Mercurial > repos > big-tiandm > mirplant2
diff randfold @ 44:0c4e11018934 draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 30 Oct 2014 21:29:19 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/randfold Thu Oct 30 21:29:19 2014 -0400 @@ -0,0 +1,139 @@ +# randfold (c) Eric Bonnet & Jan Wuyts 2003 +# randomization test for rna secondary structure + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# This soft needs BioPerl and ViennaRNA packages +# See http://www.bioperl.org and http://www.tbi.univie.ac.at/~ivo/RNA/ +# See also README + +use strict; +use Bio::SeqIO; +use RNA; + +if (scalar(@ARGV) != 3) { + die "Usage $0 fasta_file number_of_randomizations type[m|d]\n"; +} + +my $in = Bio::SeqIO->new(-file => "$ARGV[0]", -format => "fasta"); +my $number_of_randomizations = $ARGV[1]; +my $type = $ARGV[2]; +if ($type !~ /[m|d]/) { + die "error: wrong type of randomization."; +} + +while (my $o = $in->next_seq) { + my $seq = uc($o->seq()); + my $name = $o->display_id(); + + my $cpt_sup = 0; + my $cpt_inf = 0; + my $cpt_ega = 1; + + my $str = $seq; + my $mfe = RNA::fold($seq,$str); + + for (my $i=0;$i<$number_of_randomizations;$i++) { + if ($type eq "d") { + $seq = shuffle_sequence_dinucleotide($seq); + $str = $seq; + } + elsif ($type eq "m") { + my @tmp = split //,$seq; + fisher_yates_shuffle(\@tmp); + $seq = join '',@tmp; + $str = $seq; + } + + my $rand_mfe = RNA::fold($str,$str); + + if ($rand_mfe < $mfe) { + $cpt_inf++; + } + if ($rand_mfe == $mfe) { + $cpt_ega++; + } + if ($rand_mfe > $mfe) { + $cpt_sup++; + } + } + + my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1); + + print "$name\t$mfe\t$proba\n"; +} + +# fisher_yates_shuffle( \@array ) : +# generate a random permutation of @array in place +# input array ref +# return nothing +sub fisher_yates_shuffle { + my $array = shift; + my $i; + for ($i = @$array; --$i; ) { + my $j = int rand ($i+1); + next if $i == $j; + @$array[$i,$j] = @$array[$j,$i]; + } +} + +# shuffle a sequence while preserving dinucleotide distribution +# input sequence string +# return sequence string shuffled +sub shuffle_sequence_dinucleotide { + + my ($str) = @_; + + # upper case and convert to ATGC + $str = uc($str); + $str =~ s/U/T/g; + + my @nuc = ('A','T','G','C'); + my $count_swap = 0; + # set maximum number of permutations + my $stop = length($str) * 10; + + while($count_swap < $stop) { + + my @pos; + + # look start and end letters + my $firstnuc = $nuc[int(rand 4)]; + my $thirdnuc = $nuc[int(rand 4)]; + + # get positions for matching nucleotides + for (my $i=0;$i<(length($str)-2);$i++) { + if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) { + push (@pos,($i+1)); + $i++; + } + } + + # swap at random trinucleotides + my $max = scalar(@pos); + for (my $i=0;$i<$max;$i++) { + my $swap = int(rand($max)); + if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) { + $count_swap++; + my $w1 = substr($str,$pos[$i],1); + my $w2 = substr($str,$pos[$swap],1); + substr($str,$pos[$i],1,$w2); + substr($str,$pos[$swap],1,$w1); + } + } + } + return($str); +} +