mirplant2: miRDeep_plant.pl comparison

comparison miRDeep_plant.pl @ 44:0c4e11018934 draft

Uploaded

author	big-tiandm
date	Thu, 30 Oct 2014 21:29:19 -0400
parents	dc5a29826c7d
children	ca05d68aca13

comparison

equal deleted inserted replaced

-:4c0b1a94b882
+:0c4e11018934
 #!/usr/bin/perl
 use warnings;
 use strict;
 use Getopt::Std;
+use RNA;
 ################################# MIRDEEP #################################################
 ################################## USAGE ##################################################
 #if conservation is scored, the fasta file of known miRNA sequences is parsed
 if($options{s}){create_hash_nuclei($options{s})};
 #parse signature file in blast_parsed format and resolve each potential precursor
 parse_file_blast_parsed($file_blast_parsed);
-`rm -rf $tmpdir`;
+#`rm -rf $tmpdir`;
 exit;
 sub test_randfold{
 #print sequence to temporary file, test randfold value, return 1 or 0
 #    print_file("pri_seq.fa",">pri_seq\n".$hash_comp{"pri_seq"});
-	my $tmpfile=$tmpdir.$hash_comp{"pri_id"};
+	#my $tmpfile=$tmpdir.$hash_comp{"pri_id"};
-	open(FILE, ">$tmpfile");
+	#open(FILE, ">$tmpfile");
-	print FILE ">pri_seq\n",$hash_comp{"pri_seq"};
+	#print FILE ">pri_seq\n",$hash_comp{"pri_seq"};
-	close FILE;
+	#close FILE;
 #	my $p_value=`randfold -s $tmpfile 999 | cut -f 3`;
-	my $p1=`randfold -s $tmpfile 999 | cut -f 3`;
+	#my $p1=`randfold -s $tmpfile 999 | cut -f 3`;
-	my $p2=`randfold -s $tmpfile 999 | cut -f 3`;
+	#my $p2=`randfold -s $tmpfile 999 | cut -f 3`;
+	my $p1=&randfold_pvalue($hash_comp{"pri_seq"},999);
+	my $p2=&randfold_pvalue($hash_comp{"pri_seq"},999);
 	my $p_value=($p1+$p2)/2;
 	wait;
 #    system "rm $tmpfile";
 if($p_value<=0.05){return 1;}
 return 0;
 }
+sub randfold_pvalue{
-#sub print_file{
+	my $cpt_sup = 0;
+	my $cpt_inf = 0;
-#print string to file
+	my $cpt_ega = 1;
-#    my($file,$string)=@_;
+	my ($seq,$number_of_randomizations)=@_;
+	my $str =$seq;
-#    open(FILE, ">$file");
+	my $mfe = RNA::fold($seq,$str);
-#    print FILE "$string";
-#    close FILE;
+	for (my $i=0;$i<$number_of_randomizations;$i++) {
-#}
+		$seq = shuffle_sequence_dinucleotide($seq);
+		$str = $seq;
+		my $rand_mfe = RNA::fold($str,$str);
+		if ($rand_mfe < $mfe) {
+			$cpt_inf++;
+		}
+		if ($rand_mfe == $mfe) {
+			$cpt_ega++;
+		}
+		if ($rand_mfe > $mfe) {
+			$cpt_sup++;
+		}
+	}
+	my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1);
+	#print "$name\t$mfe\t$proba\n";
+	return $proba;
+}
+sub shuffle_sequence_dinucleotide {
+	my ($str) = @_;
+	# upper case and convert to ATGC
+	$str = uc($str);
+	$str =~ s/U/T/g;
+	my @nuc = ('A','T','G','C');
+	my $count_swap = 0;
+	# set maximum number of permutations
+	my $stop = length($str) * 10;
+	while($count_swap < $stop) {
+		my @pos;
+		# look start and end letters
+		my $firstnuc = $nuc[int(rand 4)];
+		my $thirdnuc = $nuc[int(rand 4)];
+		# get positions for matching nucleotides
+		for (my $i=0;$i<(length($str)-2);$i++) {
+			if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) {
+				push (@pos,($i+1));
+				$i++;
+			}
+		}
+		# swap at random trinucleotides
+		my $max = scalar(@pos);
+		for (my $i=0;$i<$max;$i++) {
+			my $swap = int(rand($max));
+			if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) {
+				$count_swap++;
+				my $w1 = substr($str,$pos[$i],1);
+				my $w2 = substr($str,$pos[$swap],1);
+				substr($str,$pos[$i],1,$w2);
+				substr($str,$pos[$swap],1,$w1);
+			}
+		}
+	}
+	return($str);
+}
 sub test_nucleus_conservation{
 #test if nucleus is identical to nucleus from known miRNA, return 1 or 0
 if($query=~/x(\d+)/i){
 	my $freq=$1;
 	return $freq;
 }else{
-	print STDERR "Problem with read format\n";
+	#print STDERR "Problem with read format\n";
 	return 0;
 }
 }
 my $mfe_adj=max2(1,-$mfe);
 my $mfe_adj1=$mfe/$mlng;
 #parameters of known precursors and background hairpins, scale and location
 	my $a=1.339e-12;my $b=2.778e-13;my $c=45.834;
 	my $ev=$e**($mfe_adj1*$c);
-	print STDERR "\n***",$ev,"**\t",$ev+$b,"\t";
+	#print STDERR "\n***",$ev,"**\t",$ev+$b,"\t";
 	my $log_odds=($a/($b+$ev));
 	my $prob_test=prob_gumbel_discretized($mfe_adj,5.5,32);
 my $prob_background=prob_gumbel_discretized($mfe_adj,4.8,23);
 my $odds=$prob_test/$prob_background;
 my $log_odds_2=log($odds);
-	print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n";
+	#print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n";
 return $log_odds;
 }

Mercurial > repos > big-tiandm > mirplant2

comparison miRDeep_plant.pl @ 44:0c4e11018934 draft