Mercurial > repos > big-tiandm > sirna_plant
comparison phased_siRNA.pl @ 23:cad6a1dfb174 draft
Uploaded
author | big-tiandm |
---|---|
date | Wed, 05 Nov 2014 21:11:49 -0500 |
parents | 07745c0958dd |
children |
comparison
equal
deleted
inserted
replaced
22:b6686462d0cb | 23:cad6a1dfb174 |
---|---|
1 #!/usr/bin/perl -w | |
2 #Filename: | |
3 #Author: Tian Dongmei | |
4 #Email: tiandm@big.ac.cn | |
5 #Date: 2013/7/19 | |
6 #Modified: | |
7 #Description: | |
8 my $version=1.00; | |
9 | |
10 use strict; | |
11 use Getopt::Long; | |
12 #use Math::Cephes qw(:hypergeometrics); | |
13 | |
14 my %opts; | |
15 GetOptions(\%opts,"i=s","o=s","h"); | |
16 if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments | |
17 &usage; | |
18 } | |
19 | |
20 my $filein=$opts{'i'}; | |
21 my $fileout=$opts{'o'}; | |
22 | |
23 open IN,"<$filein"; #input file | |
24 open OUT,">$fileout"; #output file | |
25 | |
26 while (my $aline=<IN>) { | |
27 chomp $aline; | |
28 if ($aline=~/^\#/) { | |
29 print OUT $aline,"\tp-value\n"; | |
30 next; | |
31 } | |
32 my @tmp=split/\t/,$aline; | |
33 my @pos=split/:|-/,$tmp[0]; | |
34 $tmp[1]=~s/nt//; | |
35 my $pv=&phase($tmp[1],$pos[1],$pos[2],$tmp[4]); | |
36 | |
37 print OUT $aline,"\t",$pv,"\n"; | |
38 } | |
39 close IN; | |
40 close OUT; | |
41 | |
42 sub phase{ | |
43 my ($tagL,$start,$end,$tags)=@_; | |
44 my @tmp=split/\;/,$tags; | |
45 my %tag; | |
46 for (my $i=0;$i<@tmp;$i++) { | |
47 my @aa=split/\,/,$tmp[$i]; | |
48 next if($aa[1]-$aa[0]+1 != $tagL); | |
49 # $tag{$aa[0].",".$aa[2]}+=$aa[3] if($aa[2] eq "+"); | |
50 # $tag{($aa[1]).",".$aa[2]}+=$aa[3] if($aa[2] eq "-"); | |
51 $tag{$aa[0]}+=$aa[3] if($aa[2] eq "+"); | |
52 $tag{($aa[1]+3)}+=$aa[3] if($aa[2] eq "-"); | |
53 } | |
54 | |
55 my $pv=&pvalue2(\%tag,$tagL,$start,$end); | |
56 | |
57 return $pv; | |
58 } | |
59 | |
60 sub pvalue2{ | |
61 my ($tag,$tagL,$start,$end)=@_; | |
62 | |
63 my $p=1; my $pp=1; | |
64 foreach my $ccs(keys %{$tag}){ | |
65 my $n=0; | |
66 my $k=0; | |
67 my $K=0; | |
68 my $N=0; | |
69 | |
70 my $cor= $ccs; | |
71 my $ss=$cor; | |
72 my $ee=($cor+$tagL*10-1)<$end ? $cor+$tagL*10-1 : $end; | |
73 | |
74 my $max=0; | |
75 for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand | |
76 { | |
77 my $x=$i; | |
78 if (defined $$tag{$x}) | |
79 { | |
80 if ($max<$$tag{$x}) {$max=$$tag{$x};} | |
81 $n +=$$tag{$x}; | |
82 $N++; | |
83 } | |
84 } | |
85 for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand | |
86 { | |
87 my $x=$i; | |
88 if (defined $$tag{$x}) | |
89 { | |
90 $k +=$$tag{$x}; | |
91 $K++; | |
92 } | |
93 } | |
94 | |
95 | |
96 return $p if($K<3); | |
97 return $p if($max/$n>0.8); | |
98 | |
99 my $pn=0; | |
100 next if($n==$k); | |
101 $pn=10*$k/($n-$k)+1; | |
102 $pn = $pn ** ($K-2); | |
103 $pn = log($pn); | |
104 if ($p<$pn) { | |
105 $p=$pn; | |
106 } | |
107 | |
108 } | |
109 | |
110 return $p; | |
111 | |
112 } | |
113 | |
114 sub pvalue{ | |
115 my ($tag,$tagL,$start,$end)=@_; | |
116 | |
117 my $p=1; | |
118 foreach my $ccs(keys %{$tag}){ | |
119 my $n=-1; | |
120 my $k=-1; | |
121 | |
122 my ($cor, $str)=split(/,/, $ccs); | |
123 if ($str eq "+") # small RNAs on the Watson strand | |
124 { | |
125 my $ss=$cor; | |
126 my $ee=($cor+$tagL*11-1)<$end ? $cor+$tagL*11-1 : $end; | |
127 for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand | |
128 { | |
129 my $x=$i.","."+"; | |
130 if (defined $$tag{$x}) | |
131 { | |
132 $n=$n+1; | |
133 } | |
134 } | |
135 for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand | |
136 { | |
137 my $x=$i.","."+"; | |
138 if (defined $$tag{$x}) | |
139 { | |
140 $k=$k+1; | |
141 } | |
142 } | |
143 | |
144 for (my $j=$ss-2; $j<=$ee-2; $j++) # calculate n on the antisense strand | |
145 { | |
146 my $x=$j.","."-"; | |
147 if (defined $$tag{$x}) | |
148 { | |
149 $n=$n+1; | |
150 } | |
151 } | |
152 | |
153 for (my $j=$ss+$tagL-2; $j<=$ee-2; $j=$j+$tagL) # calculate k on the antisense strand | |
154 { | |
155 my $x=$j.","."-"; | |
156 if (defined $$tag{$x}) | |
157 { | |
158 $k=$k+1; | |
159 } | |
160 } | |
161 } | |
162 | |
163 elsif ($str eq "-") # small RNAs on the Crick strand | |
164 { | |
165 my $ee=$cor; | |
166 my $ss=$cor-$tagL*11+1> $start ? $cor-$tagL*11+1 : $start; | |
167 for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand | |
168 { | |
169 my $x=$i.","."-"; | |
170 if (defined $$tag{$x}) | |
171 { | |
172 $n=$n+1; | |
173 } | |
174 } | |
175 for (my $i=$ss+$tagL-1; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand | |
176 { | |
177 my $x=$i.","."-"; | |
178 if (defined $$tag{$x}) | |
179 { | |
180 $k=$k+1; | |
181 } | |
182 } | |
183 | |
184 for (my $j=$ss+2; $j<=$ee+2; $j++) # calculate n on the antisense strand | |
185 { | |
186 my $x=$j.","."+"; | |
187 if (defined $$tag{$x}) | |
188 { | |
189 $n=$n+1; | |
190 } | |
191 } | |
192 for (my $j=$ss+2; $j<=$ee+2; $j=$j+$tagL) # calculate k on the antisense strand | |
193 { | |
194 my $x=$j.","."+"; | |
195 if (defined $$tag{$x}) | |
196 { | |
197 $k=$k+1; | |
198 } | |
199 } | |
200 } | |
201 | |
202 next if($k<3); | |
203 | |
204 my $pn=0; my $N=$tagL*11*2-1; my $M=21; | |
205 for (my $w=$k; $w<=$M; $w++) # calculate p-value from n and k | |
206 { | |
207 my $c=1; | |
208 my $rr=1; | |
209 my $rw=1; | |
210 | |
211 for (my $j=0; $j<=$w-1; $j++) | |
212 { | |
213 $c=$c*($M-$j)/($j+1); | |
214 } | |
215 for (my $x=0; $x<=$n-$w-1; $x++) | |
216 { | |
217 $rr=$rr*($N-$M-$x)/($x+1); | |
218 } | |
219 for (my $y=0; $y<=$n-1; $y++) | |
220 { | |
221 $rw=$rw*($y+1)/($N-$y); | |
222 } | |
223 my $pr=$c*$rr*$rw; | |
224 | |
225 $pn=$pn+$pr; | |
226 } | |
227 | |
228 $p=$pn<$p ? $pn :$p; | |
229 | |
230 if ($p<0.001) #select and output small RNA clusters with p<0.001 | |
231 | |
232 { | |
233 | |
234 return $p; | |
235 | |
236 } | |
237 | |
238 } | |
239 return $p; | |
240 } | |
241 | |
242 sub usage{ | |
243 print <<"USAGE"; | |
244 Version $version | |
245 Usage: | |
246 $0 -i -o | |
247 options: | |
248 -i input file | |
249 -o output file | |
250 -h help | |
251 USAGE | |
252 exit(1); | |
253 } | |
254 |