Mercurial > repos > big-tiandm > mirplant2
comparison randfold @ 44:0c4e11018934 draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 30 Oct 2014 21:29:19 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
43:4c0b1a94b882 | 44:0c4e11018934 |
---|---|
1 # randfold (c) Eric Bonnet & Jan Wuyts 2003 | |
2 # randomization test for rna secondary structure | |
3 | |
4 # This program is free software; you can redistribute it and/or | |
5 # modify it under the terms of the GNU General Public License | |
6 # as published by the Free Software Foundation; either version 2 | |
7 # of the License, or (at your option) any later version. | |
8 | |
9 # This program is distributed in the hope that it will be useful, | |
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 # GNU General Public License for more details. | |
13 | |
14 # You should have received a copy of the GNU General Public License | |
15 # along with this program; if not, write to the Free Software | |
16 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | |
18 # This soft needs BioPerl and ViennaRNA packages | |
19 # See http://www.bioperl.org and http://www.tbi.univie.ac.at/~ivo/RNA/ | |
20 # See also README | |
21 | |
22 use strict; | |
23 use Bio::SeqIO; | |
24 use RNA; | |
25 | |
26 if (scalar(@ARGV) != 3) { | |
27 die "Usage $0 fasta_file number_of_randomizations type[m|d]\n"; | |
28 } | |
29 | |
30 my $in = Bio::SeqIO->new(-file => "$ARGV[0]", -format => "fasta"); | |
31 my $number_of_randomizations = $ARGV[1]; | |
32 my $type = $ARGV[2]; | |
33 if ($type !~ /[m|d]/) { | |
34 die "error: wrong type of randomization."; | |
35 } | |
36 | |
37 while (my $o = $in->next_seq) { | |
38 my $seq = uc($o->seq()); | |
39 my $name = $o->display_id(); | |
40 | |
41 my $cpt_sup = 0; | |
42 my $cpt_inf = 0; | |
43 my $cpt_ega = 1; | |
44 | |
45 my $str = $seq; | |
46 my $mfe = RNA::fold($seq,$str); | |
47 | |
48 for (my $i=0;$i<$number_of_randomizations;$i++) { | |
49 if ($type eq "d") { | |
50 $seq = shuffle_sequence_dinucleotide($seq); | |
51 $str = $seq; | |
52 } | |
53 elsif ($type eq "m") { | |
54 my @tmp = split //,$seq; | |
55 fisher_yates_shuffle(\@tmp); | |
56 $seq = join '',@tmp; | |
57 $str = $seq; | |
58 } | |
59 | |
60 my $rand_mfe = RNA::fold($str,$str); | |
61 | |
62 if ($rand_mfe < $mfe) { | |
63 $cpt_inf++; | |
64 } | |
65 if ($rand_mfe == $mfe) { | |
66 $cpt_ega++; | |
67 } | |
68 if ($rand_mfe > $mfe) { | |
69 $cpt_sup++; | |
70 } | |
71 } | |
72 | |
73 my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1); | |
74 | |
75 print "$name\t$mfe\t$proba\n"; | |
76 } | |
77 | |
78 # fisher_yates_shuffle( \@array ) : | |
79 # generate a random permutation of @array in place | |
80 # input array ref | |
81 # return nothing | |
82 sub fisher_yates_shuffle { | |
83 my $array = shift; | |
84 my $i; | |
85 for ($i = @$array; --$i; ) { | |
86 my $j = int rand ($i+1); | |
87 next if $i == $j; | |
88 @$array[$i,$j] = @$array[$j,$i]; | |
89 } | |
90 } | |
91 | |
92 # shuffle a sequence while preserving dinucleotide distribution | |
93 # input sequence string | |
94 # return sequence string shuffled | |
95 sub shuffle_sequence_dinucleotide { | |
96 | |
97 my ($str) = @_; | |
98 | |
99 # upper case and convert to ATGC | |
100 $str = uc($str); | |
101 $str =~ s/U/T/g; | |
102 | |
103 my @nuc = ('A','T','G','C'); | |
104 my $count_swap = 0; | |
105 # set maximum number of permutations | |
106 my $stop = length($str) * 10; | |
107 | |
108 while($count_swap < $stop) { | |
109 | |
110 my @pos; | |
111 | |
112 # look start and end letters | |
113 my $firstnuc = $nuc[int(rand 4)]; | |
114 my $thirdnuc = $nuc[int(rand 4)]; | |
115 | |
116 # get positions for matching nucleotides | |
117 for (my $i=0;$i<(length($str)-2);$i++) { | |
118 if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) { | |
119 push (@pos,($i+1)); | |
120 $i++; | |
121 } | |
122 } | |
123 | |
124 # swap at random trinucleotides | |
125 my $max = scalar(@pos); | |
126 for (my $i=0;$i<$max;$i++) { | |
127 my $swap = int(rand($max)); | |
128 if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) { | |
129 $count_swap++; | |
130 my $w1 = substr($str,$pos[$i],1); | |
131 my $w2 = substr($str,$pos[$swap],1); | |
132 substr($str,$pos[$i],1,$w2); | |
133 substr($str,$pos[$swap],1,$w1); | |
134 } | |
135 } | |
136 } | |
137 return($str); | |
138 } | |
139 |