comparison phased_siRNA.pl @ 23:cad6a1dfb174 draft

Uploaded
author big-tiandm
date Wed, 05 Nov 2014 21:11:49 -0500
parents 07745c0958dd
children
comparison
equal deleted inserted replaced
22:b6686462d0cb 23:cad6a1dfb174
1 #!/usr/bin/perl -w
2 #Filename:
3 #Author: Tian Dongmei
4 #Email: tiandm@big.ac.cn
5 #Date: 2013/7/19
6 #Modified:
7 #Description:
8 my $version=1.00;
9
10 use strict;
11 use Getopt::Long;
12 #use Math::Cephes qw(:hypergeometrics);
13
14 my %opts;
15 GetOptions(\%opts,"i=s","o=s","h");
16 if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
17 &usage;
18 }
19
20 my $filein=$opts{'i'};
21 my $fileout=$opts{'o'};
22
23 open IN,"<$filein"; #input file
24 open OUT,">$fileout"; #output file
25
26 while (my $aline=<IN>) {
27 chomp $aline;
28 if ($aline=~/^\#/) {
29 print OUT $aline,"\tp-value\n";
30 next;
31 }
32 my @tmp=split/\t/,$aline;
33 my @pos=split/:|-/,$tmp[0];
34 $tmp[1]=~s/nt//;
35 my $pv=&phase($tmp[1],$pos[1],$pos[2],$tmp[4]);
36
37 print OUT $aline,"\t",$pv,"\n";
38 }
39 close IN;
40 close OUT;
41
42 sub phase{
43 my ($tagL,$start,$end,$tags)=@_;
44 my @tmp=split/\;/,$tags;
45 my %tag;
46 for (my $i=0;$i<@tmp;$i++) {
47 my @aa=split/\,/,$tmp[$i];
48 next if($aa[1]-$aa[0]+1 != $tagL);
49 # $tag{$aa[0].",".$aa[2]}+=$aa[3] if($aa[2] eq "+");
50 # $tag{($aa[1]).",".$aa[2]}+=$aa[3] if($aa[2] eq "-");
51 $tag{$aa[0]}+=$aa[3] if($aa[2] eq "+");
52 $tag{($aa[1]+3)}+=$aa[3] if($aa[2] eq "-");
53 }
54
55 my $pv=&pvalue2(\%tag,$tagL,$start,$end);
56
57 return $pv;
58 }
59
60 sub pvalue2{
61 my ($tag,$tagL,$start,$end)=@_;
62
63 my $p=1; my $pp=1;
64 foreach my $ccs(keys %{$tag}){
65 my $n=0;
66 my $k=0;
67 my $K=0;
68 my $N=0;
69
70 my $cor= $ccs;
71 my $ss=$cor;
72 my $ee=($cor+$tagL*10-1)<$end ? $cor+$tagL*10-1 : $end;
73
74 my $max=0;
75 for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand
76 {
77 my $x=$i;
78 if (defined $$tag{$x})
79 {
80 if ($max<$$tag{$x}) {$max=$$tag{$x};}
81 $n +=$$tag{$x};
82 $N++;
83 }
84 }
85 for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand
86 {
87 my $x=$i;
88 if (defined $$tag{$x})
89 {
90 $k +=$$tag{$x};
91 $K++;
92 }
93 }
94
95
96 return $p if($K<3);
97 return $p if($max/$n>0.8);
98
99 my $pn=0;
100 next if($n==$k);
101 $pn=10*$k/($n-$k)+1;
102 $pn = $pn ** ($K-2);
103 $pn = log($pn);
104 if ($p<$pn) {
105 $p=$pn;
106 }
107
108 }
109
110 return $p;
111
112 }
113
114 sub pvalue{
115 my ($tag,$tagL,$start,$end)=@_;
116
117 my $p=1;
118 foreach my $ccs(keys %{$tag}){
119 my $n=-1;
120 my $k=-1;
121
122 my ($cor, $str)=split(/,/, $ccs);
123 if ($str eq "+") # small RNAs on the Watson strand
124 {
125 my $ss=$cor;
126 my $ee=($cor+$tagL*11-1)<$end ? $cor+$tagL*11-1 : $end;
127 for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand
128 {
129 my $x=$i.","."+";
130 if (defined $$tag{$x})
131 {
132 $n=$n+1;
133 }
134 }
135 for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand
136 {
137 my $x=$i.","."+";
138 if (defined $$tag{$x})
139 {
140 $k=$k+1;
141 }
142 }
143
144 for (my $j=$ss-2; $j<=$ee-2; $j++) # calculate n on the antisense strand
145 {
146 my $x=$j.","."-";
147 if (defined $$tag{$x})
148 {
149 $n=$n+1;
150 }
151 }
152
153 for (my $j=$ss+$tagL-2; $j<=$ee-2; $j=$j+$tagL) # calculate k on the antisense strand
154 {
155 my $x=$j.","."-";
156 if (defined $$tag{$x})
157 {
158 $k=$k+1;
159 }
160 }
161 }
162
163 elsif ($str eq "-") # small RNAs on the Crick strand
164 {
165 my $ee=$cor;
166 my $ss=$cor-$tagL*11+1> $start ? $cor-$tagL*11+1 : $start;
167 for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand
168 {
169 my $x=$i.","."-";
170 if (defined $$tag{$x})
171 {
172 $n=$n+1;
173 }
174 }
175 for (my $i=$ss+$tagL-1; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand
176 {
177 my $x=$i.","."-";
178 if (defined $$tag{$x})
179 {
180 $k=$k+1;
181 }
182 }
183
184 for (my $j=$ss+2; $j<=$ee+2; $j++) # calculate n on the antisense strand
185 {
186 my $x=$j.","."+";
187 if (defined $$tag{$x})
188 {
189 $n=$n+1;
190 }
191 }
192 for (my $j=$ss+2; $j<=$ee+2; $j=$j+$tagL) # calculate k on the antisense strand
193 {
194 my $x=$j.","."+";
195 if (defined $$tag{$x})
196 {
197 $k=$k+1;
198 }
199 }
200 }
201
202 next if($k<3);
203
204 my $pn=0; my $N=$tagL*11*2-1; my $M=21;
205 for (my $w=$k; $w<=$M; $w++) # calculate p-value from n and k
206 {
207 my $c=1;
208 my $rr=1;
209 my $rw=1;
210
211 for (my $j=0; $j<=$w-1; $j++)
212 {
213 $c=$c*($M-$j)/($j+1);
214 }
215 for (my $x=0; $x<=$n-$w-1; $x++)
216 {
217 $rr=$rr*($N-$M-$x)/($x+1);
218 }
219 for (my $y=0; $y<=$n-1; $y++)
220 {
221 $rw=$rw*($y+1)/($N-$y);
222 }
223 my $pr=$c*$rr*$rw;
224
225 $pn=$pn+$pr;
226 }
227
228 $p=$pn<$p ? $pn :$p;
229
230 if ($p<0.001) #select and output small RNA clusters with p<0.001
231
232 {
233
234 return $p;
235
236 }
237
238 }
239 return $p;
240 }
241
242 sub usage{
243 print <<"USAGE";
244 Version $version
245 Usage:
246 $0 -i -o
247 options:
248 -i input file
249 -o output file
250 -h help
251 USAGE
252 exit(1);
253 }
254