Mercurial > repos > big-tiandm > mirplant2
comparison convert_bowtie_to_blast.pl @ 7:c3b93bce023e draft
Uploaded
author | big-tiandm |
---|---|
date | Fri, 25 Jul 2014 05:19:53 -0400 |
parents | faf38239b1a9 |
children |
comparison
equal
deleted
inserted
replaced
6:e08814f01490 | 7:c3b93bce023e |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 | |
4 use warnings; | |
5 use strict; | |
6 use Getopt::Std; | |
7 | |
8 ######################################### USAGE ################################ | |
9 | |
10 my $usage= | |
11 "$0 file_bowtie_result file_solexa_seq file_chromosome | |
12 | |
13 This is a converter which changes Bowtie output into Blast format. | |
14 The input includes three files: a Bowtie result file (default Bowtie | |
15 output file), a fasta file consisting of small Reads and a chromosome | |
16 fasta file. It outputs the alignments in blast_parsed format. | |
17 | |
18 file_bowtie_result likes: | |
19 | |
20 AtFlower100010_x2 + MIR319c 508 AAGGAGATTCTTTCAGTCCAG IIIIIIIIIIIIIIIIIIIII 0 | |
21 AtFlower1000188_x1 + MIR2933a 421 TCGGAGAGGAAATTCGTCGGCG IIIIIIIIIIIIIIIIIIIIII 0 | |
22 | |
23 file_solexa_seq likes: | |
24 | |
25 >AtFlower100010_x2 | |
26 AAGGAGATTCTTTCAGTCCAG | |
27 | |
28 file_chromosome contains chromosome seq in fasta format | |
29 | |
30 "; | |
31 | |
32 | |
33 ####################################### INPUT FILES ############################ | |
34 | |
35 my $file_bowtie_result=shift or die $usage; | |
36 my $file_short_seq=shift or die $usage; | |
37 my $file_chromosome_seq=shift or die $usage; | |
38 | |
39 | |
40 ##################################### GLOBAL VARIBALES ######################### | |
41 | |
42 my %short_seq_length=(); | |
43 my %chromosome_length=(); | |
44 | |
45 | |
46 ######################################### MAIN ################################# | |
47 | |
48 #get the short sequence id and its length | |
49 sequence_length($file_short_seq,\%short_seq_length); | |
50 | |
51 #get the chromosome sequence id and its length | |
52 sequence_length($file_chromosome_seq,\%chromosome_length); | |
53 | |
54 #convert bowtie result format to blast format; | |
55 change_format($file_bowtie_result); | |
56 | |
57 exit; | |
58 | |
59 | |
60 ##################################### SUBROUTINES ############################## | |
61 | |
62 sub sequence_length{ | |
63 my ($file,$hash) = @_; | |
64 my ($id, $desc, $sequence, $seq_length) = (); | |
65 | |
66 open (FASTA, "<$file") or die "can not open $$file\n"; | |
67 while (<FASTA>) | |
68 { | |
69 chomp; | |
70 if (/^>(\S+)(.*)/) | |
71 { | |
72 $id = $1; | |
73 $desc = $2; | |
74 $sequence = ""; | |
75 while (<FASTA>){ | |
76 chomp; | |
77 if (/^>(\S+)(.*)/){ | |
78 $$hash{$id} = length $sequence; | |
79 $id = $1; | |
80 $desc = $2; | |
81 $sequence = ""; | |
82 next; | |
83 } | |
84 $sequence .= $_; | |
85 } | |
86 } | |
87 } | |
88 $seq_length=length($sequence); | |
89 $$hash{$id} = $seq_length; | |
90 close FASTA; | |
91 } | |
92 | |
93 | |
94 | |
95 | |
96 | |
97 sub change_format{ | |
98 #Change Bowtie format into blast format | |
99 my $file=shift @_; | |
100 open(FILE,"<$file")||die"can not open the bowtie result file:$!\n"; | |
101 #open(BLASTOUT,">blastout")||die"can not create the blastout file:$!\n"; | |
102 | |
103 while(<FILE>){ | |
104 chomp; | |
105 my @tmp=split("\t",$_); | |
106 #Clean the reads ID | |
107 my @tmp1=split(" ",$tmp[0]); | |
108 print "$tmp1[0]"."\t"."$short_seq_length{$tmp1[0]}"."\t"."1".'..'."$short_seq_length{$tmp1[0]}"."\t"."$tmp[2]"."\t"."$chromosome_length{$tmp[2]}"."\t"; | |
109 if($tmp[1] eq "+"){ | |
110 my $seq_end=$tmp[3] + $short_seq_length{$tmp1[0]}; | |
111 my $seq_bg=$tmp[3] + 1; | |
112 print "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Plus"."\n"; | |
113 } | |
114 if($tmp[1] eq "-"){ | |
115 my $seq_end=$chromosome_length{$tmp[2]} - $tmp[3]; | |
116 my $seq_bg=$seq_end - $short_seq_length{$tmp1[0]} + 1; | |
117 print "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Minus"."\n"; | |
118 } | |
119 } | |
120 | |
121 # close BLASTOUT; | |
122 | |
123 } | |
124 | |
125 | |
126 |