Mercurial > repos > big-tiandm > mirplant2
diff miRDeep_plant.pl @ 44:0c4e11018934 draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 30 Oct 2014 21:29:19 -0400 |
parents | dc5a29826c7d |
children | ca05d68aca13 |
line wrap: on
line diff
--- a/miRDeep_plant.pl Tue Oct 28 01:35:32 2014 -0400 +++ b/miRDeep_plant.pl Thu Oct 30 21:29:19 2014 -0400 @@ -3,7 +3,7 @@ use warnings; use strict; use Getopt::Std; - +use RNA; ################################# MIRDEEP ################################################# @@ -125,7 +125,7 @@ #parse signature file in blast_parsed format and resolve each potential precursor parse_file_blast_parsed($file_blast_parsed); -`rm -rf $tmpdir`; +#`rm -rf $tmpdir`; exit; @@ -360,14 +360,16 @@ #print sequence to temporary file, test randfold value, return 1 or 0 # print_file("pri_seq.fa",">pri_seq\n".$hash_comp{"pri_seq"}); - my $tmpfile=$tmpdir.$hash_comp{"pri_id"}; - open(FILE, ">$tmpfile"); - print FILE ">pri_seq\n",$hash_comp{"pri_seq"}; - close FILE; + #my $tmpfile=$tmpdir.$hash_comp{"pri_id"}; + #open(FILE, ">$tmpfile"); + #print FILE ">pri_seq\n",$hash_comp{"pri_seq"}; + #close FILE; # my $p_value=`randfold -s $tmpfile 999 | cut -f 3`; - my $p1=`randfold -s $tmpfile 999 | cut -f 3`; - my $p2=`randfold -s $tmpfile 999 | cut -f 3`; + #my $p1=`randfold -s $tmpfile 999 | cut -f 3`; + #my $p2=`randfold -s $tmpfile 999 | cut -f 3`; + my $p1=&randfold_pvalue($hash_comp{"pri_seq"},999); + my $p2=&randfold_pvalue($hash_comp{"pri_seq"},999); my $p_value=($p1+$p2)/2; wait; # system "rm $tmpfile"; @@ -377,18 +379,82 @@ return 0; } +sub randfold_pvalue{ + my $cpt_sup = 0; + my $cpt_inf = 0; + my $cpt_ega = 1; + + my ($seq,$number_of_randomizations)=@_; + my $str =$seq; + my $mfe = RNA::fold($seq,$str); -#sub print_file{ + for (my $i=0;$i<$number_of_randomizations;$i++) { + $seq = shuffle_sequence_dinucleotide($seq); + $str = $seq; + + my $rand_mfe = RNA::fold($str,$str); + + if ($rand_mfe < $mfe) { + $cpt_inf++; + } + if ($rand_mfe == $mfe) { + $cpt_ega++; + } + if ($rand_mfe > $mfe) { + $cpt_sup++; + } + } + + my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1); - #print string to file + #print "$name\t$mfe\t$proba\n"; + return $proba; +} + +sub shuffle_sequence_dinucleotide { + + my ($str) = @_; -# my($file,$string)=@_; + # upper case and convert to ATGC + $str = uc($str); + $str =~ s/U/T/g; + + my @nuc = ('A','T','G','C'); + my $count_swap = 0; + # set maximum number of permutations + my $stop = length($str) * 10; + + while($count_swap < $stop) { + + my @pos; + + # look start and end letters + my $firstnuc = $nuc[int(rand 4)]; + my $thirdnuc = $nuc[int(rand 4)]; -# open(FILE, ">$file"); -# print FILE "$string"; -# close FILE; -#} - + # get positions for matching nucleotides + for (my $i=0;$i<(length($str)-2);$i++) { + if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) { + push (@pos,($i+1)); + $i++; + } + } + + # swap at random trinucleotides + my $max = scalar(@pos); + for (my $i=0;$i<$max;$i++) { + my $swap = int(rand($max)); + if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) { + $count_swap++; + my $w1 = substr($str,$pos[$i],1); + my $w2 = substr($str,$pos[$swap],1); + substr($str,$pos[$i],1,$w2); + substr($str,$pos[$swap],1,$w1); + } + } + } + return($str); +} sub test_nucleus_conservation{ @@ -1279,7 +1345,7 @@ my $freq=$1; return $freq; }else{ - print STDERR "Problem with read format\n"; + #print STDERR "Problem with read format\n"; return 0; } } @@ -1497,7 +1563,7 @@ #parameters of known precursors and background hairpins, scale and location my $a=1.339e-12;my $b=2.778e-13;my $c=45.834; my $ev=$e**($mfe_adj1*$c); - print STDERR "\n***",$ev,"**\t",$ev+$b,"\t"; + #print STDERR "\n***",$ev,"**\t",$ev+$b,"\t"; my $log_odds=($a/($b+$ev)); @@ -1506,7 +1572,7 @@ my $odds=$prob_test/$prob_background; my $log_odds_2=log($odds); - print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n"; + #print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n"; return $log_odds; }