44
|
1 # randfold (c) Eric Bonnet & Jan Wuyts 2003
|
|
2 # randomization test for rna secondary structure
|
|
3
|
|
4 # This program is free software; you can redistribute it and/or
|
|
5 # modify it under the terms of the GNU General Public License
|
|
6 # as published by the Free Software Foundation; either version 2
|
|
7 # of the License, or (at your option) any later version.
|
|
8
|
|
9 # This program is distributed in the hope that it will be useful,
|
|
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12 # GNU General Public License for more details.
|
|
13
|
|
14 # You should have received a copy of the GNU General Public License
|
|
15 # along with this program; if not, write to the Free Software
|
|
16 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
17
|
|
18 # This soft needs BioPerl and ViennaRNA packages
|
|
19 # See http://www.bioperl.org and http://www.tbi.univie.ac.at/~ivo/RNA/
|
|
20 # See also README
|
|
21
|
|
22 use strict;
|
|
23 use Bio::SeqIO;
|
|
24 use RNA;
|
|
25
|
|
26 if (scalar(@ARGV) != 3) {
|
|
27 die "Usage $0 fasta_file number_of_randomizations type[m|d]\n";
|
|
28 }
|
|
29
|
|
30 my $in = Bio::SeqIO->new(-file => "$ARGV[0]", -format => "fasta");
|
|
31 my $number_of_randomizations = $ARGV[1];
|
|
32 my $type = $ARGV[2];
|
|
33 if ($type !~ /[m|d]/) {
|
|
34 die "error: wrong type of randomization.";
|
|
35 }
|
|
36
|
|
37 while (my $o = $in->next_seq) {
|
|
38 my $seq = uc($o->seq());
|
|
39 my $name = $o->display_id();
|
|
40
|
|
41 my $cpt_sup = 0;
|
|
42 my $cpt_inf = 0;
|
|
43 my $cpt_ega = 1;
|
|
44
|
|
45 my $str = $seq;
|
|
46 my $mfe = RNA::fold($seq,$str);
|
|
47
|
|
48 for (my $i=0;$i<$number_of_randomizations;$i++) {
|
|
49 if ($type eq "d") {
|
|
50 $seq = shuffle_sequence_dinucleotide($seq);
|
|
51 $str = $seq;
|
|
52 }
|
|
53 elsif ($type eq "m") {
|
|
54 my @tmp = split //,$seq;
|
|
55 fisher_yates_shuffle(\@tmp);
|
|
56 $seq = join '',@tmp;
|
|
57 $str = $seq;
|
|
58 }
|
|
59
|
|
60 my $rand_mfe = RNA::fold($str,$str);
|
|
61
|
|
62 if ($rand_mfe < $mfe) {
|
|
63 $cpt_inf++;
|
|
64 }
|
|
65 if ($rand_mfe == $mfe) {
|
|
66 $cpt_ega++;
|
|
67 }
|
|
68 if ($rand_mfe > $mfe) {
|
|
69 $cpt_sup++;
|
|
70 }
|
|
71 }
|
|
72
|
|
73 my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1);
|
|
74
|
|
75 print "$name\t$mfe\t$proba\n";
|
|
76 }
|
|
77
|
|
78 # fisher_yates_shuffle( \@array ) :
|
|
79 # generate a random permutation of @array in place
|
|
80 # input array ref
|
|
81 # return nothing
|
|
82 sub fisher_yates_shuffle {
|
|
83 my $array = shift;
|
|
84 my $i;
|
|
85 for ($i = @$array; --$i; ) {
|
|
86 my $j = int rand ($i+1);
|
|
87 next if $i == $j;
|
|
88 @$array[$i,$j] = @$array[$j,$i];
|
|
89 }
|
|
90 }
|
|
91
|
|
92 # shuffle a sequence while preserving dinucleotide distribution
|
|
93 # input sequence string
|
|
94 # return sequence string shuffled
|
|
95 sub shuffle_sequence_dinucleotide {
|
|
96
|
|
97 my ($str) = @_;
|
|
98
|
|
99 # upper case and convert to ATGC
|
|
100 $str = uc($str);
|
|
101 $str =~ s/U/T/g;
|
|
102
|
|
103 my @nuc = ('A','T','G','C');
|
|
104 my $count_swap = 0;
|
|
105 # set maximum number of permutations
|
|
106 my $stop = length($str) * 10;
|
|
107
|
|
108 while($count_swap < $stop) {
|
|
109
|
|
110 my @pos;
|
|
111
|
|
112 # look start and end letters
|
|
113 my $firstnuc = $nuc[int(rand 4)];
|
|
114 my $thirdnuc = $nuc[int(rand 4)];
|
|
115
|
|
116 # get positions for matching nucleotides
|
|
117 for (my $i=0;$i<(length($str)-2);$i++) {
|
|
118 if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) {
|
|
119 push (@pos,($i+1));
|
|
120 $i++;
|
|
121 }
|
|
122 }
|
|
123
|
|
124 # swap at random trinucleotides
|
|
125 my $max = scalar(@pos);
|
|
126 for (my $i=0;$i<$max;$i++) {
|
|
127 my $swap = int(rand($max));
|
|
128 if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) {
|
|
129 $count_swap++;
|
|
130 my $w1 = substr($str,$pos[$i],1);
|
|
131 my $w2 = substr($str,$pos[$swap],1);
|
|
132 substr($str,$pos[$i],1,$w2);
|
|
133 substr($str,$pos[$swap],1,$w1);
|
|
134 }
|
|
135 }
|
|
136 }
|
|
137 return($str);
|
|
138 }
|
|
139
|