mirplant2: randfold annotate

annotate randfold @ 44:0c4e11018934 draft

Uploaded

author	big-tiandm
date	Thu, 30 Oct 2014 21:29:19 -0400
parents
children

rev	line source
44 0c4e11018934 Uploaded big-tiandm parents: diff changeset	1 # randfold (c) Eric Bonnet & Jan Wuyts 2003
0c4e11018934 Uploaded big-tiandm parents: diff changeset	2 # randomization test for rna secondary structure
0c4e11018934 Uploaded big-tiandm parents: diff changeset	3
0c4e11018934 Uploaded big-tiandm parents: diff changeset	4 # This program is free software; you can redistribute it and/or
0c4e11018934 Uploaded big-tiandm parents: diff changeset	5 # modify it under the terms of the GNU General Public License
0c4e11018934 Uploaded big-tiandm parents: diff changeset	6 # as published by the Free Software Foundation; either version 2
0c4e11018934 Uploaded big-tiandm parents: diff changeset	7 # of the License, or (at your option) any later version.
0c4e11018934 Uploaded big-tiandm parents: diff changeset	8
0c4e11018934 Uploaded big-tiandm parents: diff changeset	9 # This program is distributed in the hope that it will be useful,
0c4e11018934 Uploaded big-tiandm parents: diff changeset	10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
0c4e11018934 Uploaded big-tiandm parents: diff changeset	11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0c4e11018934 Uploaded big-tiandm parents: diff changeset	12 # GNU General Public License for more details.
0c4e11018934 Uploaded big-tiandm parents: diff changeset	13
0c4e11018934 Uploaded big-tiandm parents: diff changeset	14 # You should have received a copy of the GNU General Public License
0c4e11018934 Uploaded big-tiandm parents: diff changeset	15 # along with this program; if not, write to the Free Software
0c4e11018934 Uploaded big-tiandm parents: diff changeset	16 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
0c4e11018934 Uploaded big-tiandm parents: diff changeset	17
0c4e11018934 Uploaded big-tiandm parents: diff changeset	18 # This soft needs BioPerl and ViennaRNA packages
0c4e11018934 Uploaded big-tiandm parents: diff changeset	19 # See http://www.bioperl.org and http://www.tbi.univie.ac.at/~ivo/RNA/
0c4e11018934 Uploaded big-tiandm parents: diff changeset	20 # See also README
0c4e11018934 Uploaded big-tiandm parents: diff changeset	21
0c4e11018934 Uploaded big-tiandm parents: diff changeset	22 use strict;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	23 use Bio::SeqIO;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	24 use RNA;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	25
0c4e11018934 Uploaded big-tiandm parents: diff changeset	26 if (scalar(@ARGV) != 3) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	27 die "Usage $0 fasta_file number_of_randomizations type[m\|d]\n";
0c4e11018934 Uploaded big-tiandm parents: diff changeset	28 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	29
0c4e11018934 Uploaded big-tiandm parents: diff changeset	30 my $in = Bio::SeqIO->new(-file => "$ARGV[0]", -format => "fasta");
0c4e11018934 Uploaded big-tiandm parents: diff changeset	31 my $number_of_randomizations = $ARGV[1];
0c4e11018934 Uploaded big-tiandm parents: diff changeset	32 my $type = $ARGV[2];
0c4e11018934 Uploaded big-tiandm parents: diff changeset	33 if ($type !~ /[m\|d]/) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	34 die "error: wrong type of randomization.";
0c4e11018934 Uploaded big-tiandm parents: diff changeset	35 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	36
0c4e11018934 Uploaded big-tiandm parents: diff changeset	37 while (my $o = $in->next_seq) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	38 my $seq = uc($o->seq());
0c4e11018934 Uploaded big-tiandm parents: diff changeset	39 my $name = $o->display_id();
0c4e11018934 Uploaded big-tiandm parents: diff changeset	40
0c4e11018934 Uploaded big-tiandm parents: diff changeset	41 my $cpt_sup = 0;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	42 my $cpt_inf = 0;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	43 my $cpt_ega = 1;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	44
0c4e11018934 Uploaded big-tiandm parents: diff changeset	45 my $str = $seq;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	46 my $mfe = RNA::fold($seq,$str);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	47
0c4e11018934 Uploaded big-tiandm parents: diff changeset	48 for (my $i=0;$i<$number_of_randomizations;$i++) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	49 if ($type eq "d") {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	50 $seq = shuffle_sequence_dinucleotide($seq);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	51 $str = $seq;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	52 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	53 elsif ($type eq "m") {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	54 my @tmp = split //,$seq;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	55 fisher_yates_shuffle(\@tmp);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	56 $seq = join '',@tmp;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	57 $str = $seq;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	58 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	59
0c4e11018934 Uploaded big-tiandm parents: diff changeset	60 my $rand_mfe = RNA::fold($str,$str);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	61
0c4e11018934 Uploaded big-tiandm parents: diff changeset	62 if ($rand_mfe < $mfe) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	63 $cpt_inf++;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	64 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	65 if ($rand_mfe == $mfe) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	66 $cpt_ega++;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	67 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	68 if ($rand_mfe > $mfe) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	69 $cpt_sup++;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	70 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	71 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	72
0c4e11018934 Uploaded big-tiandm parents: diff changeset	73 my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	74
0c4e11018934 Uploaded big-tiandm parents: diff changeset	75 print "$name\t$mfe\t$proba\n";
0c4e11018934 Uploaded big-tiandm parents: diff changeset	76 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	77
0c4e11018934 Uploaded big-tiandm parents: diff changeset	78 # fisher_yates_shuffle( \@array ) :
0c4e11018934 Uploaded big-tiandm parents: diff changeset	79 # generate a random permutation of @array in place
0c4e11018934 Uploaded big-tiandm parents: diff changeset	80 # input array ref
0c4e11018934 Uploaded big-tiandm parents: diff changeset	81 # return nothing
0c4e11018934 Uploaded big-tiandm parents: diff changeset	82 sub fisher_yates_shuffle {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	83 my $array = shift;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	84 my $i;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	85 for ($i = @$array; --$i; ) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	86 my $j = int rand ($i+1);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	87 next if $i == $j;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	88 @$array[$i,$j] = @$array[$j,$i];
0c4e11018934 Uploaded big-tiandm parents: diff changeset	89 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	90 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	91
0c4e11018934 Uploaded big-tiandm parents: diff changeset	92 # shuffle a sequence while preserving dinucleotide distribution
0c4e11018934 Uploaded big-tiandm parents: diff changeset	93 # input sequence string
0c4e11018934 Uploaded big-tiandm parents: diff changeset	94 # return sequence string shuffled
0c4e11018934 Uploaded big-tiandm parents: diff changeset	95 sub shuffle_sequence_dinucleotide {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	96
0c4e11018934 Uploaded big-tiandm parents: diff changeset	97 my ($str) = @_;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	98
0c4e11018934 Uploaded big-tiandm parents: diff changeset	99 # upper case and convert to ATGC
0c4e11018934 Uploaded big-tiandm parents: diff changeset	100 $str = uc($str);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	101 $str =~ s/U/T/g;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	102
0c4e11018934 Uploaded big-tiandm parents: diff changeset	103 my @nuc = ('A','T','G','C');
0c4e11018934 Uploaded big-tiandm parents: diff changeset	104 my $count_swap = 0;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	105 # set maximum number of permutations
0c4e11018934 Uploaded big-tiandm parents: diff changeset	106 my $stop = length($str) * 10;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	107
0c4e11018934 Uploaded big-tiandm parents: diff changeset	108 while($count_swap < $stop) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	109
0c4e11018934 Uploaded big-tiandm parents: diff changeset	110 my @pos;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	111
0c4e11018934 Uploaded big-tiandm parents: diff changeset	112 # look start and end letters
0c4e11018934 Uploaded big-tiandm parents: diff changeset	113 my $firstnuc = $nuc[int(rand 4)];
0c4e11018934 Uploaded big-tiandm parents: diff changeset	114 my $thirdnuc = $nuc[int(rand 4)];
0c4e11018934 Uploaded big-tiandm parents: diff changeset	115
0c4e11018934 Uploaded big-tiandm parents: diff changeset	116 # get positions for matching nucleotides
0c4e11018934 Uploaded big-tiandm parents: diff changeset	117 for (my $i=0;$i<(length($str)-2);$i++) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	118 if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	119 push (@pos,($i+1));
0c4e11018934 Uploaded big-tiandm parents: diff changeset	120 $i++;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	121 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	122 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	123
0c4e11018934 Uploaded big-tiandm parents: diff changeset	124 # swap at random trinucleotides
0c4e11018934 Uploaded big-tiandm parents: diff changeset	125 my $max = scalar(@pos);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	126 for (my $i=0;$i<$max;$i++) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	127 my $swap = int(rand($max));
0c4e11018934 Uploaded big-tiandm parents: diff changeset	128 if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) {
0c4e11018934 Uploaded big-tiandm parents: diff changeset	129 $count_swap++;
0c4e11018934 Uploaded big-tiandm parents: diff changeset	130 my $w1 = substr($str,$pos[$i],1);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	131 my $w2 = substr($str,$pos[$swap],1);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	132 substr($str,$pos[$i],1,$w2);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	133 substr($str,$pos[$swap],1,$w1);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	134 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	135 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	136 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	137 return($str);
0c4e11018934 Uploaded big-tiandm parents: diff changeset	138 }
0c4e11018934 Uploaded big-tiandm parents: diff changeset	139

Mercurial > repos > big-tiandm > mirplant2

annotate randfold @ 44:0c4e11018934 draft