Mercurial > repos > big-tiandm > mirplant2
view randfold @ 44:0c4e11018934 draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 30 Oct 2014 21:29:19 -0400 |
parents | |
children |
line wrap: on
line source
# randfold (c) Eric Bonnet & Jan Wuyts 2003 # randomization test for rna secondary structure # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # This soft needs BioPerl and ViennaRNA packages # See http://www.bioperl.org and http://www.tbi.univie.ac.at/~ivo/RNA/ # See also README use strict; use Bio::SeqIO; use RNA; if (scalar(@ARGV) != 3) { die "Usage $0 fasta_file number_of_randomizations type[m|d]\n"; } my $in = Bio::SeqIO->new(-file => "$ARGV[0]", -format => "fasta"); my $number_of_randomizations = $ARGV[1]; my $type = $ARGV[2]; if ($type !~ /[m|d]/) { die "error: wrong type of randomization."; } while (my $o = $in->next_seq) { my $seq = uc($o->seq()); my $name = $o->display_id(); my $cpt_sup = 0; my $cpt_inf = 0; my $cpt_ega = 1; my $str = $seq; my $mfe = RNA::fold($seq,$str); for (my $i=0;$i<$number_of_randomizations;$i++) { if ($type eq "d") { $seq = shuffle_sequence_dinucleotide($seq); $str = $seq; } elsif ($type eq "m") { my @tmp = split //,$seq; fisher_yates_shuffle(\@tmp); $seq = join '',@tmp; $str = $seq; } my $rand_mfe = RNA::fold($str,$str); if ($rand_mfe < $mfe) { $cpt_inf++; } if ($rand_mfe == $mfe) { $cpt_ega++; } if ($rand_mfe > $mfe) { $cpt_sup++; } } my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1); print "$name\t$mfe\t$proba\n"; } # fisher_yates_shuffle( \@array ) : # generate a random permutation of @array in place # input array ref # return nothing sub fisher_yates_shuffle { my $array = shift; my $i; for ($i = @$array; --$i; ) { my $j = int rand ($i+1); next if $i == $j; @$array[$i,$j] = @$array[$j,$i]; } } # shuffle a sequence while preserving dinucleotide distribution # input sequence string # return sequence string shuffled sub shuffle_sequence_dinucleotide { my ($str) = @_; # upper case and convert to ATGC $str = uc($str); $str =~ s/U/T/g; my @nuc = ('A','T','G','C'); my $count_swap = 0; # set maximum number of permutations my $stop = length($str) * 10; while($count_swap < $stop) { my @pos; # look start and end letters my $firstnuc = $nuc[int(rand 4)]; my $thirdnuc = $nuc[int(rand 4)]; # get positions for matching nucleotides for (my $i=0;$i<(length($str)-2);$i++) { if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) { push (@pos,($i+1)); $i++; } } # swap at random trinucleotides my $max = scalar(@pos); for (my $i=0;$i<$max;$i++) { my $swap = int(rand($max)); if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) { $count_swap++; my $w1 = substr($str,$pos[$i],1); my $w2 = substr($str,$pos[$swap],1); substr($str,$pos[$i],1,$w2); substr($str,$pos[$swap],1,$w1); } } } return($str); }