Mercurial > repos > big-tiandm > sirna_plant
view phased_siRNA.pl @ 18:22d79320085c draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 30 Oct 2014 21:31:55 -0400 |
parents | 07745c0958dd |
children |
line wrap: on
line source
#!/usr/bin/perl -w #Filename: #Author: Tian Dongmei #Email: tiandm@big.ac.cn #Date: 2013/7/19 #Modified: #Description: my $version=1.00; use strict; use Getopt::Long; #use Math::Cephes qw(:hypergeometrics); my %opts; GetOptions(\%opts,"i=s","o=s","h"); if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments &usage; } my $filein=$opts{'i'}; my $fileout=$opts{'o'}; open IN,"<$filein"; #input file open OUT,">$fileout"; #output file while (my $aline=<IN>) { chomp $aline; if ($aline=~/^\#/) { print OUT $aline,"\tp-value\n"; next; } my @tmp=split/\t/,$aline; my @pos=split/:|-/,$tmp[0]; $tmp[1]=~s/nt//; my $pv=&phase($tmp[1],$pos[1],$pos[2],$tmp[4]); print OUT $aline,"\t",$pv,"\n"; } close IN; close OUT; sub phase{ my ($tagL,$start,$end,$tags)=@_; my @tmp=split/\;/,$tags; my %tag; for (my $i=0;$i<@tmp;$i++) { my @aa=split/\,/,$tmp[$i]; next if($aa[1]-$aa[0]+1 != $tagL); # $tag{$aa[0].",".$aa[2]}+=$aa[3] if($aa[2] eq "+"); # $tag{($aa[1]).",".$aa[2]}+=$aa[3] if($aa[2] eq "-"); $tag{$aa[0]}+=$aa[3] if($aa[2] eq "+"); $tag{($aa[1]+3)}+=$aa[3] if($aa[2] eq "-"); } my $pv=&pvalue2(\%tag,$tagL,$start,$end); return $pv; } sub pvalue2{ my ($tag,$tagL,$start,$end)=@_; my $p=1; my $pp=1; foreach my $ccs(keys %{$tag}){ my $n=0; my $k=0; my $K=0; my $N=0; my $cor= $ccs; my $ss=$cor; my $ee=($cor+$tagL*10-1)<$end ? $cor+$tagL*10-1 : $end; my $max=0; for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand { my $x=$i; if (defined $$tag{$x}) { if ($max<$$tag{$x}) {$max=$$tag{$x};} $n +=$$tag{$x}; $N++; } } for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand { my $x=$i; if (defined $$tag{$x}) { $k +=$$tag{$x}; $K++; } } return $p if($K<3); return $p if($max/$n>0.8); my $pn=0; next if($n==$k); $pn=10*$k/($n-$k)+1; $pn = $pn ** ($K-2); $pn = log($pn); if ($p<$pn) { $p=$pn; } } return $p; } sub pvalue{ my ($tag,$tagL,$start,$end)=@_; my $p=1; foreach my $ccs(keys %{$tag}){ my $n=-1; my $k=-1; my ($cor, $str)=split(/,/, $ccs); if ($str eq "+") # small RNAs on the Watson strand { my $ss=$cor; my $ee=($cor+$tagL*11-1)<$end ? $cor+$tagL*11-1 : $end; for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand { my $x=$i.","."+"; if (defined $$tag{$x}) { $n=$n+1; } } for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand { my $x=$i.","."+"; if (defined $$tag{$x}) { $k=$k+1; } } for (my $j=$ss-2; $j<=$ee-2; $j++) # calculate n on the antisense strand { my $x=$j.","."-"; if (defined $$tag{$x}) { $n=$n+1; } } for (my $j=$ss+$tagL-2; $j<=$ee-2; $j=$j+$tagL) # calculate k on the antisense strand { my $x=$j.","."-"; if (defined $$tag{$x}) { $k=$k+1; } } } elsif ($str eq "-") # small RNAs on the Crick strand { my $ee=$cor; my $ss=$cor-$tagL*11+1> $start ? $cor-$tagL*11+1 : $start; for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand { my $x=$i.","."-"; if (defined $$tag{$x}) { $n=$n+1; } } for (my $i=$ss+$tagL-1; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand { my $x=$i.","."-"; if (defined $$tag{$x}) { $k=$k+1; } } for (my $j=$ss+2; $j<=$ee+2; $j++) # calculate n on the antisense strand { my $x=$j.","."+"; if (defined $$tag{$x}) { $n=$n+1; } } for (my $j=$ss+2; $j<=$ee+2; $j=$j+$tagL) # calculate k on the antisense strand { my $x=$j.","."+"; if (defined $$tag{$x}) { $k=$k+1; } } } next if($k<3); my $pn=0; my $N=$tagL*11*2-1; my $M=21; for (my $w=$k; $w<=$M; $w++) # calculate p-value from n and k { my $c=1; my $rr=1; my $rw=1; for (my $j=0; $j<=$w-1; $j++) { $c=$c*($M-$j)/($j+1); } for (my $x=0; $x<=$n-$w-1; $x++) { $rr=$rr*($N-$M-$x)/($x+1); } for (my $y=0; $y<=$n-1; $y++) { $rw=$rw*($y+1)/($N-$y); } my $pr=$c*$rr*$rw; $pn=$pn+$pr; } $p=$pn<$p ? $pn :$p; if ($p<0.001) #select and output small RNA clusters with p<0.001 { return $p; } } return $p; } sub usage{ print <<"USAGE"; Version $version Usage: $0 -i -o options: -i input file -o output file -h help USAGE exit(1); }