annotate quantify.pl @ 51:3202911efdae draft

Uploaded
author big-tiandm
date Fri, 05 Dec 2014 01:14:28 -0500
parents 7b5a48b972e9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
50
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl -w
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
2 #Filename:
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
3 #Author: Tian Dongmei
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
4 #Email: tiandm@big.ac.cn
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
5 #Date: 2013/7/19
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
6 #Modified:
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
7 #Description:
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
8 my $version=1.00;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
9
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
10 use File::Path;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
11 use strict;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
12 use File::Basename;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
13 #use Getopt::Std;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
14 use Getopt::Long;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
15 #use RNA;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
16
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
17 my %opts;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
18 GetOptions(\%opts,"r=s","p=s","m=s","mis:i","t:i","e:i","f:i","tag:s","o=s","h");
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
19 if (!(defined $opts{r} and defined $opts{p} and defined $opts{m} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
20 &usage;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
21 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
22
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
23 my $read=$opts{'r'};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
24 my $pre=$opts{'p'};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
25 my $mature=$opts{'m'};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
26
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
27 my $dir=$opts{'o'};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
28 unless ($dir=~/\/$/) {$dir .="/";}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
29 if (not -d $dir) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
30 mkdir $dir;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
31 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
32
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
33 my $threads=defined $opts{'t'} ? $opts{'t'} : 1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
34 my $mismatch=defined $opts{'mis'} ? $opts{'mis'} : 0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
35
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
36 my $upstream = 2;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
37 my $downstream = 5;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
38
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
39 $upstream = $opts{'e'} if(defined $opts{'e'});
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
40 $downstream = $opts{'f'} if(defined $opts{'f'});
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
41
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
42 my $marks=defined $opts{'tag'} ? $opts{'tag'} : "";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
43
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
44 my $time=Time();
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
45
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
46 my $tmpdir="${dir}/known_miRNA_Express";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
47 if(not -d $tmpdir){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
48 mkdir($tmpdir);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
49 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
50 chdir $tmpdir;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
51
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
52 `cp $pre ./`;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
53 my $pre_file_name=basename($pre);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
54
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
55 &mapping(); # matures align to precursors && reads align to precursors;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
56
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
57 my %pre_mature; # $pre_mature{pre_id}{matre_ID}{"mature"}[0]->start; $pre_mature{pre_id}{matre_ID}{"mature"}[1]->end;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
58 &maturePosOnPre(); # acknowledge mature positions on precursor
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
59
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
60 my %pre_read;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
61 &readPosOnPre(); # acknowledge reads positions on precursors
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
62
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
63 if(!(defined $opts{'tag'})){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
64 foreach my $key (keys %pre_read) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
65 $pre_read{$key}[0][0]=~/:([\d|_]+)_x(\d+)$/;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
66 my @ss=split/_/,$1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
67 for (my $i=1;$i<=@ss;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
68 $marks .="Smp$i;";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
69 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
70 last;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
71 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
72 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
73
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
74 my %pre;## read in precursor sequences #$pre{pre_id}="CGTA...."
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
75 &attachPre();
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
76
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
77 my $preno=scalar (keys %pre);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
78 print "Total Precursor Number is $preno !!!!\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
79
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
80 my %struc; #mature star loop; $struc{$key}{"struc"}=$str; $struc{$key}{"mfe"}=$mfe;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
81 &structure();
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
82
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
83
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
84 ##### analysis and print out && moRs
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
85 my $aln=$dir."known_microRNA_express.aln";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
86 my $list=$dir."known_microRNA_express.txt";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
87 my $moRs=$dir."known_microRNA_express.moRs";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
88
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
89 system("ln -s $mature $dir/known_microRNA_mature.fa ");
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
90 system("ln -s $pre $dir/known_microRNA_precursor.fa ");
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
91
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
92 open ALN,">$aln";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
93 open LIST,">$list";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
94 open MORS,">$moRs";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
95
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
96 $"="\t"; ##### @array print in \t
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
97
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
98 my @marks=split/\;/,$marks;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
99 #print LIST "#matueID\tpreID\tpos1\tpos2\tmatureExp\tstarExp\ttotalExp\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
100 print LIST "#matueID\tpreID\tpos1\tpos2";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
101 for (my $i=0;$i<@marks;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
102 print LIST "\t",$marks[$i],"_matureExp";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
103 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
104 for (my $i=0;$i<@marks;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
105 print LIST "\t",$marks[$i],"_starExp";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
106 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
107 for (my $i=0;$i<@marks;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
108 print LIST "\t",$marks[$i],"_totalExp";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
109 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
110 print LIST "\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
111 print ALN "#>precursor ID \n#precursor sequence\n#precursor structure (mfe)\n#RNA_seq\t@marks\ttotal\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
112 print MORS "#>precursor ID\tstrand\texpress_reads\texpress_reads\/total_reads\tblock_number\tprecursor_sequence\n#\tblock_start\tblock_end\t@marks\ttotal\ttag_number\tsequence\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
113 my %moRs;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
114
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
115 foreach my $key (keys %pre) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
116 print ALN ">$key\n$pre{$key}\n$struc{$key}{struc} ($struc{$key}{mfe})\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
117 next if(! (exists $pre_read{$key}));
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
118 my @array=@{$pre_read{$key}};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
119 @array=sort{$a->[3]<=> $b->[3]} @array;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
120
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
121 my $length=length($pre{$key});
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
122
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
123 my $maxline=-1;my $max=0; ### storage the maxinum express read line
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
124 my $totalReadsNo=0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
125 my @not_over=(); ### new read format better for moRs analysis
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
126
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
127 ####print out Aln file start
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
128 for (my $i=0;$i<@array;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
129 my $maps=$array[$i][3]+1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
130 my $mape=$array[$i][3]+length($array[$i][4]);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
131 my $str="";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
132 $str .= "." x ($maps-1);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
133 $str .=$array[$i][4];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
134 $str .="." x ($length-$mape);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
135 $str .=" ";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
136
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
137 $array[$i][0]=~/:([\d|_]+)_x(\d+)$/;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
138 my @sample=split /\_/,$1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
139 my $total=$2;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
140 print ALN $str,"@sample","\t",$total,"\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
141
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
142 if($total>$max){$max=$total; $maxline=$i;}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
143 $totalReadsNo+=$total;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
144
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
145 push @not_over,[$key,$maps,$mape,$array[$i][0],$total,"+"];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
146 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
147 ####print out Aln file end
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
148
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
149 #### express list start
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
150 my ($ms,$me,$ss,$se);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
151 if (!(exists($pre_mature{$key}))) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
152 $ms=$array[$maxline][3]+1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
153 $me=$array[$maxline][3]+length($array[$maxline][4]);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
154 ($ss,$se)=&other_pair($ms,$me,$struc{$key}{'struc'});
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
155
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
156 my ($mexp,$sexp,$texp)=&express($ms-$upstream,$me+$downstream,$ss-$upstream,$se+$downstream,\@array);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
157 print LIST "$key\t$key\tmature:$ms..$me\tstar:$ss..$se\t@$mexp\t@$sexp\t@$texp\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
158 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
159 else{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
160 foreach my $maID (keys %{$pre_mature{$key}}) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
161 $ms=$pre_mature{$key}{$maID}{"mature"}[0];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
162 $me=$pre_mature{$key}{$maID}{"mature"}[1];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
163 $ss=$pre_mature{$key}{$maID}{"star"}[0];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
164 $se=$pre_mature{$key}{$maID}{"star"}[1];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
165 my ($mexp,$sexp,$texp)=&express($ms-$upstream,$me+$downstream,$ss-$upstream,$se+$downstream,\@array);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
166 print LIST "$maID\t$key\tmature:$ms..$me\tstar:$ss..$se\t@$mexp\t@$sexp\t@$texp\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
167 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
168 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
169 #### express list end
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
170
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
171 #### analysis moRs start
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
172 my @result; my @m_texp;my $m_texp=0; ### moRs informations
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
173
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
174 while (@not_over>0) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
175 my @over=@not_over;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
176 @not_over=();
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
177
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
178 #·á¶È×î¸ßtag
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
179 my $m_max=0;my $m_maxline=-1;my $m_start=0;my $m_end=0;my $m_exp=0;my @m_exp;my $m_no=1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
180 for (my $i=0;$i<@over;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
181 my @m_array=@{$over[$i]};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
182 if ($m_max<$m_array[4]) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
183 $m_max=$m_array[4];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
184 $m_maxline=$i;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
185 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
186 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
187 $m_start=$over[$m_maxline][1];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
188 $m_end=$over[$m_maxline][2];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
189 $m_exp=$m_max;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
190 $over[$m_maxline][3]=~/:([\d|_]+)_x(\d+)$/;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
191 my @m_nums=split/_/,$1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
192 for (my $j=0;$j<@m_nums;$j++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
193 $m_exp[$j]=$m_nums[$j];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
194 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
195
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
196 #ͳ¼ÆÒÔ·á¶È×î¸ßtagΪ×ø±êµÄreads, Á½¶ËλÖòîÒì²»³¬¹ý3nt
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
197 for (my $i=0;$i<@over;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
198 next if($i==$m_maxline);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
199 my @m_array=@{$over[$i]};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
200 if (abs($m_array[1]-$m_start)<=3 && abs($m_array[2]-$m_end)<=3) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
201 $m_exp+=$m_array[4];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
202 $m_no++;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
203 $m_array[3]=~/:([\d|_]+)_x(\d+)$/;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
204 my @m_nums=split/_/,$1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
205 for (my $j=0;$j<@m_nums;$j++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
206 $m_exp[$j] +=$m_nums[$j];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
207 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
208 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
209 elsif($m_array[1]>=$m_end || $m_array[2]<=$m_start){push @not_over,[@{$over[$i]}];} #È¥³ý¿çÔ½blockµÄreads
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
210 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
211 if($m_exp>5){### 5¸öreads
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
212 $m_texp+=$m_exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
213 for (my $j=0;$j<@m_exp;$j++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
214 $m_texp[$j]+=$m_exp[$j];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
215 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
216 my $string=&subseq($pre{$key},$m_start,$m_end,"+");
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
217 push @result,"\t$m_start\t$m_end\t@m_exp\t$m_exp\t$m_no\t$string" ;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
218 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
219 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
220
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
221 my $str=scalar @result;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
222 my $percent=sprintf("%.2f",$m_texp/$totalReadsNo);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
223 $str=">$key\t+\t$m_texp\t$percent\t".$str."\t$pre{$key}";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
224 @{$moRs{$str}}=@result;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
225
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
226 #### analysis moRs end
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
227 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
228
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
229 ##### moRs print out start
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
230 foreach my $key (keys %moRs) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
231 my @tmp=split/\t/,$key;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
232 next if ($tmp[4]<=2);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
233 next if($tmp[3]<0.95);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
234 my @over;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
235 for (my $i=0;$i<@{$moRs{$key}};$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
236 my @arrayi=split/\t/,$moRs{$key}[$i];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
237 for (my $j=0;$j<@{$moRs{$key}};$j++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
238 next if($i==$j);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
239 my @arrayj=split/\t/,$moRs{$key}[$j];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
240 if ((($arrayj[1]-$arrayi[2]>=0 && $arrayj[1]-$arrayi[2] <=3) || ($arrayj[1]-$arrayi[2]>=18 && $arrayj[1]-$arrayi[2] <=25) )||(($arrayi[1]-$arrayj[2]>=0 && $arrayi[1]-$arrayj[2] <=3)||($arrayi[1]-$arrayj[2]>=18 && $arrayi[1]-$arrayj[2] <=25))) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
241 push @over,$moRs{$key}[$i];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
242 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
243 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
244 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
245 if (@over>0) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
246 print MORS "$key\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
247 foreach (@{$moRs{$key}}) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
248 print MORS "$_\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
249 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
250 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
251 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
252 ###### moRs print out end
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
253 close ALN;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
254 close LIST;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
255 close MORS;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
256
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
257 $"=" ";##### reset
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
258
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
259
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
260 ################### Sub programs #################
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
261 sub express{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
262 my ($ms,$me,$ss,$se,$read)=@_;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
263 my (@mexp,@sexp,@texp);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
264 $$read[0][0]=~/:([_|\d]+)_x(\d+)$/;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
265 my @numsample=split/_/,$1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
266 for (my $i=0;$i<@numsample;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
267 $mexp[$i]=0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
268 $sexp[$i]=0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
269 $texp[$i]=0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
270 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
271
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
272 for (my $i=0;$i<@{$read};$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
273 my $start=$$read[$i][3]+1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
274 my $end=$$read[$i][3]+length($$read[$i][4]);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
275 $$read[$i][0]=~/:([_|\d]+)_x(\d+)$/;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
276 my $expresses=$1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
277 my @nums=split/_/,$expresses;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
278
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
279 for (my $j=0;$j<@nums;$j++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
280 $texp[$j]+=$nums[$j];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
281 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
282 if ($start>=$ms && $end<=$me) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
283 for (my $j=0;$j<@nums;$j++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
284 $mexp[$j]+=$nums[$j];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
285 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
286 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
287 if ($start>=$ss && $end<=$se) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
288 for (my $j=0;$j<@nums;$j++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
289 $sexp[$j]+=$nums[$j];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
290 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
291 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
292 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
293 return(\@mexp,\@sexp,\@texp);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
294 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
295
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
296 sub structure{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
297 foreach my $key (keys %pre_mature) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
298 if (!(defined $pre{$key})){die "!!!!! No precursor sequence $key, please check it!\n";}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
299 #my ($str,$mfe)=RNA::fold($pre{$key});
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
300 my $rnafold=`perl -e 'print "$pre{$key}"' | RNAfold --noPS`;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
301 my @rnafolds=split/\s+/,$rnafold;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
302 my $str=$rnafolds[1];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
303 my $mfe=$rnafolds[-1];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
304 $mfe=~s/\(//;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
305 $mfe=~s/\)//;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
306
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
307 $struc{$key}{"struc"}=$str;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
308 #$struc{$key}{"mfe"}=sprintf ("%.2f",$mfe);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
309 $struc{$key}{"mfe"}=$mfe;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
310
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
311 foreach my $id (keys %{$pre_mature{$key}}) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
312 ($pre_mature{$key}{$id}{"star"}[0],$pre_mature{$key}{$id}{"star"}[1])=&other_pair($pre_mature{$key}{$id}{"mature"}[0],$pre_mature{$key}{$id}{"mature"}[1],$str);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
313 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
314 =cut
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
315 ##### Nucleotide complementary
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
316 my @tmp=split//,$str;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
317 my %a2b;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
318 my @bps;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
319 for (my $i=0;$i<@tmp;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
320 if ($tmp[$i] eq "("){push @bps,$i+1 ; next;}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
321 if ($tmp[$i] eq ")") {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
322 my $up=pop @bps;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
323 $a2b{$i+1}=$up;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
324 $a2b{$up}=$i+1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
325 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
326 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
327
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
328 ##### search star position
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
329 foreach my $id (keys %{$pre_mature{$key}}) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
330 my $n=0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
331 for (my $i=$pre_mature{$key}{$id}{"mature"}[0];$i<=$pre_mature{$key}{$id}{"mature"}[1] ; $i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
332 if (defined $a2b{$i}) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
333 my $a=$i; my $b=$a2b{$i};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
334 if($a>$b){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
335 $pre_mature{$key}{$id}{"star"}[0]=$b-$n+2;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
336 $pre_mature{$key}{$id}{"star"}[1]=$b-$n+2+($pre_mature{$key}{$id}{"mature"}[1]-$pre_mature{$key}{$id}{"mature"}[0]);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
337 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
338 if($a<$b{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
339 $pre_mature{$key}{$id}{"star"}[1]=$b+$n+2;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
340 $pre_mature{$key}{$id}{"star"}[0]=$b+$n+2-($pre_mature{$key}{$id}{"mature"}[1]-$pre_mature{$key}{$id}{"mature"}[0]);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
341 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
342 last;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
343 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
344 $n++;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
345 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
346 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
347 =cut
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
348 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
349 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
350 sub other_pair{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
351 my ($start,$end,$structure)=@_;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
352 ##### Nucleotide complementary
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
353 my @tmp=split//,$structure;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
354 my %a2b; my @bps;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
355 for (my $i=0;$i<@tmp;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
356 if ($tmp[$i] eq "("){push @bps,$i+1 ; next;}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
357 if ($tmp[$i] eq ")") {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
358 my $up=pop @bps;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
359 $a2b{$i+1}=$up;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
360 $a2b{$up}=$i+1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
361 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
362 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
363 ##### search star position
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
364 my $n=0;my $startpos; my $endpos;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
365 for (my $i=$start;$i<=$end ; $i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
366 if (defined $a2b{$i}) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
367 my $a=$i; my $b=$a2b{$i};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
368 # if($a>$b){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
369 # $startpos=$b-$n+2;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
370 # $endpos=$b-$n+2+($end-$start);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
371 # }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
372 # if($a<$b){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
373 $endpos=$b+$n+2;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
374 if($endpos>length($structure)){$endpos=length($structure);}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
375 $startpos=$b+$n+2-($end-$start);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
376 if($startpos<1){$startpos=1;}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
377 # }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
378 last;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
379 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
380 $n++;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
381 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
382 return ($startpos,$endpos);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
383 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
384 sub attachPre{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
385 open IN, "<$pre_file_name";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
386 my $name;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
387 while (my $aline=<IN>) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
388 chomp $aline;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
389 if ($aline=~/^>(\S+)/) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
390 $name=$1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
391 next;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
392 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
393 $pre{$name} .=$aline;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
394 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
395 close IN;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
396 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
397 sub readPosOnPre{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
398 open IN,"<read_mapped.bwt";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
399 while (my $aline=<IN>) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
400 chomp $aline;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
401 my @tmp=split/\t/,$aline;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
402 my $id=lc($tmp[2]);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
403 push @{$pre_read{$tmp[2]}},[@tmp];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
404 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
405 close IN;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
406 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
407 sub maturePosOnPre{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
408 open IN,"<mature_mapped.bwt";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
409 while (my $aline=<IN>) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
410 chomp $aline;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
411 my @tmp=split/\t/,$aline;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
412 my $mm=$tmp[0];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
413 # $mm=~s/\-3P|\-5P//i;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
414 $mm=lc($mm);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
415 my $pm=$tmp[2];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
416 $pm=lc($pm);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
417
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
418 # next if ($mm ne $pm);### stringent mapping let7a only allowed to map pre-let7a
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
419 next if($mm!~/$pm/);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
420 # print "$tmp[2]\t$tmp[0]\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
421 # $pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=$tmp[3]-$upstream;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
422 # $pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=0 if($pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]<0);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
423 # $pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[1]=$tmp[3]+length($tmp[4])-1+$downstream;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
424 $pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=$tmp[3]+1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
425 $pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[1]=$tmp[3]+length($tmp[4]);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
426 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
427 close IN;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
428 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
429 sub mapping{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
430 my $err;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
431 ## build bowtie index
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
432 #print STDERR "building bowtie index\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
433 $err = `bowtie-build $pre_file_name miRNA_precursor`;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
434
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
435 ## map mature sequences against precursors
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
436 #print STDERR "mapping mature sequences against index\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
437 $err = `bowtie -p $threads -f -v 0 -a --best --strata --norc miRNA_precursor $mature > mature_mapped.bwt 2> run.log`;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
438
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
439 ## map reads against precursors
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
440 #print STDERR "mapping read sequences against index\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
441 $err=`bowtie -p $threads -f -v $mismatch -a --best --strata --norc miRNA_precursor $read --al mirbase_mapped.fa --un mirbase_not_mapped.fa > read_mapped.bwt 2> run.log`;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
442
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
443 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
444
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
445 sub subseq{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
446 my $seq=shift;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
447 my $beg=shift;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
448 my $end=shift;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
449 my $strand=shift;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
450
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
451 my $subseq=substr($seq,$beg-1,$end-$beg+1);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
452 if ($strand eq "-") {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
453 $subseq=revcom($subseq);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
454 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
455 return uc $subseq;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
456 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
457
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
458 sub revcom{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
459 my $seq=shift;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
460 $seq=~tr/ATCGatcg/TAGCtagc/;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
461 $seq=reverse $seq;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
462 return uc $seq;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
463 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
464
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
465 sub Time{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
466 my $time=time();
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
467 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
468 $month++;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
469 $year+=1900;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
470 if (length($sec) == 1) {$sec = "0"."$sec";}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
471 if (length($min) == 1) {$min = "0"."$min";}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
472 if (length($hour) == 1) {$hour = "0"."$hour";}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
473 if (length($day) == 1) {$day = "0"."$day";}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
474 if (length($month) == 1) {$month = "0"."$month";}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
475 #print "$year-$month-$day $hour:$min:$sec\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
476 return("$year-$month-$day-$hour-$min-$sec");
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
477 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
478
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
479 sub usage{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
480 print <<"USAGE";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
481 Version $version
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
482 Usage:
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
483 $0 -r -p -m -mis -t -e -f -tag -o -time
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
484 mandatory parameters:
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
485 -p precursor.fa miRNA precursor sequences from miRBase # must be absolute path
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
486 -m mature.fa miRNA sequences from miRBase # must be absolute path
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
487 -r reads.fa your read sequences #must be absolute path
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
488
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
489 -o output directory
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
490
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
491 options:
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
492 -mis [int] number of allowed mismatches when mapping reads to precursors, default 0
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
493 -t [int] threads number,default 1
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
494 -e [int] number of nucleotides upstream of the mature sequence to consider, default 2
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
495 -f [int] number of nucleotides downstream of the mature sequence to consider, default 5
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
496 -tag [string] sample marks# eg. sampleA;sampleB;sampleC
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
497 -time sting #make directory time,default is the local time
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
498 -h help
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
499 USAGE
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
500 exit(1);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
501 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
502