annotate PGAP-1.2.1/PGAP.pl @ 4:70b7a5270968 draft

Uploaded
author dereeper
date Thu, 24 Jun 2021 16:15:34 +0000
parents 83e62a1aeeeb
children d8c5bea1cce2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2 use strict;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
3 use warnings;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
4 use Getopt::Long;
4
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
5 use File::Basename;
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
6
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
7 use lib dirname(__FILE__);
0
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
8
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
9 ### programs from BLAST
4
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
10 my $formatdb=`which formatdb`;
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
11 chomp($formatdb);
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
12
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
13 my $blastall=`which blastall`;
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
14 chomp($blastall);
0
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
15
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
16 ### programs from mcl
4
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
17 my $mcl=`which mcl`;
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
18 chomp($mcl);
0
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
19
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
20 ### programs from mafft
4
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
21 my $mafft=`which mafft`;
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
22 chomp($mafft);
0
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
23
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
24 ### programs from PHYLIP
4
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
25 my $seqboot=`which seqboot`;chomp($seqboot);
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
26 my $neighbor=`which neighbor`;chomp($neighbor);
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
27 my $consense=`which consense`;chomp($consense);
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
28 my $dnaml=`which dnaml`;chomp($dnaml);
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
29 my $dnadist=`which dnadist`;chomp($dnadist);
70b7a5270968 Uploaded
dereeper
parents: 0
diff changeset
30 my $dnapars=`which dnapars`;chomp($dnapars);
0
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
31
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
32 my $count_tree=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
33
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
34 my $sampleSize=8000; # when calculate the pan-genome size, we will sample $sampleSize combinations
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
35 # if the total combination number is larger than $sampleSize for specific genomes
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
36 # Surely, the number of $sampleSize is, the larger, the better.
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
37 # However, the larger the $sampleSize is, the more time would be consumed.
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
38 # we suggest the range: 5000 ~ 20,000
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
39
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
40 #####################################################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
41 # DOn't modify the following code, unless you know their functions
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
42 #####################################################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
43
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
44 my %opt=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
45 GetOptions(\%opt,"strains:s","input:s","output:s","cluster!","pangenome!","variation!","evolution!","function!","method:s","thread:i","score:f","evalue:f","coverage:f","local:f","global:f","identity:f","bootstrap:i","help|h!");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
46
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
47 my @usage=qq(
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
48 ====== Pan-Genome Analysis Pipeline (PGAP) ======
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
49 Version 1.2.1
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
50
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
51 Usage: perl PGAP.pl [Options]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
52
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
53 Options:
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
54 --strains String Input strains nicknames, and join them with '+', for example: A+B+C
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
55 --input String Input data directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
56 --output String Result output directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
57
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
58 --cluster Run homologous gene clustering
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
59 --pangenome Run pan-genome analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
60 --variation Run homologous clusters variation analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
61 --evolution Run evolution analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
62 --function Run Function analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
63
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
64 --method String GF for GeneFamily method, and MP for MultiParanoid method
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
65 for GF: fast, but not very accurate
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
66 evalue, score, indentity, coverage are employed
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
67 for MP: slow, but more accurate
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
68 score, coverage, local, global are employed
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
69 --thread Int Number of processors to use in blastall. [default:1]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
70 --score Int Minimum score in blastall. [default:40]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
71 --evalue Decimal Maximal E-value in blastall. [default:1e-10]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
72 --coverage Decimal Minimum alignment coverage for two homologous proteins. [default:0.5]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
73 --local Decimal Minimum local alignment overlap in MP method. [default:0.25]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
74 --global Decimal Minimum global alignment overlap in MP method. [default:0.5]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
75 --identity Decimal Minimum alignment indentity for two homologous proteins. [default:0.5]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
76 --bootstrap Int Bootstrap times for phylogenetics tree. [default:1]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
77
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
78 --h or help Display this message
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
79 );
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
80
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
81
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
82 ############# specified variable #############
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
83 my $inputDIR;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
84 my $outputDIR;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
85 my $run_cluster;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
86 my $run_pangenome;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
87 my $run_variation;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
88 my $run_evolution;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
89 my $run_function;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
90 my $method="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
91 my $thread;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
92 my $score;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
93 my $identity;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
94 my $evalue;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
95 my $coverage;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
96 my $global;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
97 my $local;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
98 my $bootstrap;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
99
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
100
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
101 my %pep;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
102 my %nuc;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
103 my $spnum;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
104 my @clusters;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
105 my $Cluster;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
106 my @SpecieCombination;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
107 my @spID;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
108 my %genenum;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
109 my %aaAln;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
110 my %ntAln;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
111 my %cog;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
112 my %description;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
113 #my %aa4tree; ### AA sequence for Phylogenetic Tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
114 my %nt4tree; ### nucleotide sequence for Phylogenetic Tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
115 my @SNPPosition; ### SNP position
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
116 my $dieMessage="You did not run PGAP.pl in the program directory\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
117 my $section;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
118
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
119 ######### common temporary variable #############
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
120 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
121 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
122 my $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
123 my %tmpHash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
124 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
125 my $tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
126 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
127 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
128 my $inparacount;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
129 my $ClusterID;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
130 my $orth;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
131 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
132 my $clusterName;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
133 my @xdata;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
134 my @ydata;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
135 my @fit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
136
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
137 my $fit_A;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
138 my $fit_A_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
139 my $fit_B;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
140 my $fit_C;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
141 my $fit_C_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
142 my $fit_Rsquare;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
143
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
144
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
145
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
146 #### check option
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
147
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
148 my $opt_error=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
149
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
150 if ((scalar(keys %opt) ==0) or (exists($opt{"help"})))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
151 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
152 print join("\n",@usage)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
153 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
154 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
155
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
156
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
157 ###################### public info
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
158 ### strains name
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
159 my @species;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
160 if (exists($opt{"strains"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
161 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
162 @species=split(/\+/,$opt{"strains"});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
163 $spnum=scalar(@species);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
164 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
165 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
166 print "Please assign strains nick name!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
167 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
168 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
169
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
170 ### input data directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
171
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
172 if (exists($opt{"input"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
173 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
174 $inputDIR=$opt{"input"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
175 if ($inputDIR!~/\/$/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
176 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
177 $inputDIR=$inputDIR."/";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
178 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
179 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
180 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
181 print "Please assign input data directory!\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
182 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
183 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
184 ### output data directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
185
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
186 if (exists($opt{"output"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
187 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
188 $outputDIR=$opt{"output"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
189 if ($outputDIR!~/\/$/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
190 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
191 $outputDIR=$outputDIR."/";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
192 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
193 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
194 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
195 print "Please assign result output directory!\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
196 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
197 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
198
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
199 ###################### section info
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
200
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
201 if (exists($opt{"cluster"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
202 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
203 $run_cluster=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
204 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
205 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
206 $run_cluster=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
207 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
208
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
209 if (exists($opt{"pangenome"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
210 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
211 $run_pangenome=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
212 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
213 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
214 $run_pangenome=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
215 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
216
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
217 if (exists($opt{"variation"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
218 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
219 $run_variation=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
220 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
221 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
222 $run_variation=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
223 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
224
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
225 if (exists($opt{"evolution"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
226 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
227 $run_evolution=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
228 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
229 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
230 $run_evolution=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
231 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
232
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
233 if (exists($opt{"function"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
234 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
235 $run_function=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
236 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
237 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
238 $run_function=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
239 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
240
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
241 if ($run_cluster)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
242 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
243 ### method
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
244 if (exists($opt{"method"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
245 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
246 $method=uc($opt{"method"});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
247 if ($method!~/^GF$/ and $method!~/^MP$/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
248 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
249 print "Unknown method: ".$opt{"method"}."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
250 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
251 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
252 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
253 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
254 print "Please assign the cluster method!\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
255 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
256 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
257
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
258 ##thread
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
259 if (exists($opt{"thread"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
260 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
261 $thread=$opt{"thread"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
262 if ($thread==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
263 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
264 print "please assign an applicable thread value.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
265 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
266 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
267 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
268 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
269 $thread=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
270 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
271
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
272 ##score
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
273 if (exists($opt{"score"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
274 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
275 $score=$opt{"score"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
276 if ($score<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
277 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
278 print "please assign an applicable score value.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
279 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
280 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
281 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
282 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
283 $score=40;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
284 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
285
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
286 if ($method eq "GF")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
287 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
288 ###identity
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
289 if (exists($opt{"identity"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
290 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
291 $identity=$opt{"identity"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
292 if ($identity>1 or $identity<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
293 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
294 print "identity should be 0 ~ 1 \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
295 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
296 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
297 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
298 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
299 $identity=0.5;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
300 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
301
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
302 ###evalue
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
303 if (exists($opt{"evalue"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
304 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
305 $evalue=$opt{"evalue"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
306 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
307 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
308 $evalue=1e-10;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
309 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
310
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
311 ###coverage
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
312 if (exists($opt{"coverage"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
313 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
314 $coverage=$opt{"coverage"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
315 if ($coverage>1 or $coverage<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
316 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
317 print "coverage should be 0 ~ 1 \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
318 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
319 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
320 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
321 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
322 $coverage=0.5;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
323 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
324 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
325
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
326
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
327 if ($method eq "MP")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
328 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
329 ###global
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
330 if (exists($opt{"global"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
331 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
332 $global=$opt{"global"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
333 if ($global>1 or $global<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
334 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
335 print "global coverage should be 0 ~ 1 \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
336 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
337 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
338 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
339 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
340 $global=0.5;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
341 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
342 ###local
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
343 if (exists($opt{"local"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
344 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
345 $local=$opt{"local"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
346 if ($local<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
347 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
348 print "local coverage should be 0 ~ [global coverage value] \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
349 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
350 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
351 if ($local>$global)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
352 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
353 print "local coverage should be less than global coverage!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
354 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
355 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
356 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
357 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
358 $local=0.25;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
359 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
360 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
361 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
362
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
363 if ($run_evolution)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
364 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
365 if (exists($opt{"bootstrap"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
366 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
367 $bootstrap=$opt{"bootstrap"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
368 if ($bootstrap<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
369 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
370 print "please assign an applicable bootstrap value.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
371 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
372 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
373 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
374 $bootstrap=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
375 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
376 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
377
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
378 print "Program begin at ".localtime()."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
379 print "The following are the parameters for current process:\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
380 print "Strains: ".join(",",@species)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
381 print "Input directory: $inputDIR\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
382 print "Output directory: $outputDIR\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
383 if ($run_cluster)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
384 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
385 print "Cluster analysis: yes\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
386 print " Method: $method\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
387 print " Thread: $thread\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
388 if ($method eq "GF")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
389 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
390 print " E-value: $evalue\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
391 print " Identity: $identity\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
392 print " Coverage: $coverage\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
393 print " Score: $score\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
394 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
395 if ($method eq "MP")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
396 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
397 print " Local: $local\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
398 print " Global: $global\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
399 print " Score: $score\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
400 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
401 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
402 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
403 print "Cluster analysis: no\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
404 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
405 if ($run_pangenome)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
406 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
407 print "Pan-genome analysis: yes\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
408 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
409 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
410 print "Pan-genome analysis: no\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
411 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
412 if ($run_variation)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
413 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
414 print "Variation analysis: yes\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
415 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
416 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
417 print "Variation analysis: no\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
418 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
419 if ($run_evolution)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
420 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
421 print "Evolution analysis: yes\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
422 print " Bootstrap: $bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
423 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
424 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
425 print "Evolution analysis: no\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
426 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
427 if ($run_function)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
428 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
429 print "Function analysis: yes\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
430 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
431 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
432 print "Function analysis: no\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
433 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
434
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
435 $section=$run_cluster.$run_pangenome.$run_variation.$run_evolution.$run_function;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
436
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
437 ###############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
438 # section 0) check input file and program
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
439 ###############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
440 if (!(-e $outputDIR))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
441 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
442 system("mkdir $outputDIR");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
443 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
444 system("chmod +rw $outputDIR");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
445
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
446 if (!(-w $outputDIR))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
447 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
448 print "There is no WRITE permission in $outputDIR\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
449 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
450 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
451 @tmp=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
452 &CheckInputFile(\@species,$inputDIR,$section,$method,\@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
453 &CheckExtraProgram($section,$method,\@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
454
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
455 if (scalar(@tmp)>0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
456 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
457 open(R,">".$outputDIR."0.error.message");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
458 print R join("",@tmp)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
459 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
460 print "error!\nlog are saved in ${outputDIR}0.error.message\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
461 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
462 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
463
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
464
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
465 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
466 # section 1) cluster analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
467 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
468
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
469 if ($run_cluster)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
470 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
471 print "\n\n############################################\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
472 print "# section 1) cluster analysis\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
473 print "############################################\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
474
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
475 #### cluster gene and return result to the array @clusters
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
476
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
477 if ($method eq "MP")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
478 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
479 print "Begin cluster gene with MP method ...\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
480 &MP();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
481 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
482 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
483 print "Begin cluster gene with GF method ...\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
484 &GF();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
485 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
486
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
487 #### output normal cluster format
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
488
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
489 &FormatClusterOutPut(\@species,"${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
490
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
491 #### Retrieve cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
492
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
493 &RetrieveClusterFromFile("${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
494
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
495 ##### gene distribution in each strains
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
496 %tmpHash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
497 &GeneDistribution(\@clusters,\%tmpHash);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
498
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
499 open(R,">${outputDIR}1.Gene_Distribution_By_Conservation.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
500 print R "SharedBy_Strains\t".join("\t",@species)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
501
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
502 for ($i=$spnum;$i>0;$i--)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
503 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
504 print R $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
505 for ($j=0;$j<$spnum;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
506 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
507 if (exists($tmpHash{$i."|".$j}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
508 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
509 print R "\t".$tmpHash{$i."|".$j};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
510 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
511 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
512 print R "\t0";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
513 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
514 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
515 print R "\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
516 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
517
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
518 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
519
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
520
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
521 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
522 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
523 print "Homologous gene clustering is skipped!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
524 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
525
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
526
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
527 if ($run_pangenome)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
528 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
529 print "\n\n############################################\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
530 print "# section 2) Pan-genome analysis\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
531 print "############################################\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
532
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
533 #### Retrieve cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
534 &RetrieveClusterFromFile("${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
535 chomp(@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
536
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
537 #### convert file into 0-1 matrix
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
538 for ($line=0;$line<@clusters;$line++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
539 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
540 @row=split(/\t/,$clusters[$line]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
541 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
542 for ($i=0;$i<@row;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
543 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
544 if ($row[$i] eq "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
545 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
546 $row[$i]=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
547 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
548 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
549 $row[$i]=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
550 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
551 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
552 $clusters[$line]=join("\t",@row);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
553 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
554
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
555 #### fetch gene number of each strains
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
556 for ($i=0;$i<$spnum;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
557 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
558 open(F,"$inputDIR$species[$i].pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
559 @tmp=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
560 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
561 @tmp=grep(/^>/,@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
562 $genenum{$species[$i]}=scalar(@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
563 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
564
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
565 #### pan genome size and core genome size
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
566 print "Deducing pan genome size and core genome size for each composition...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
567
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
568 open(PAN,">${outputDIR}2.PanGenome.Data.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
569 print PAN "ClusterConservation\tTotalGeneNumber\tPanGenome\tCoreGenome\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
570
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
571 for ($i=1;$i<=scalar(@species);$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
572 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
573 #@SpecieCombination=&Combination(\@species,$i);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
574 #@SpecieCombination=&Combination($spnum,$i);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
575 if (&ChkCombinationValue($spnum,$i) !=0) ### transfer the array reference to the subroutine
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
576 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
577 &Combination($spnum,$i,\@SpecieCombination); ## if the combination number is less than sampleSize, then fecth all, else sample
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
578 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
579 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
580 &SampleCombination($spnum,$i,\@SpecieCombination);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
581 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
582
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
583 foreach $key (@SpecieCombination)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
584 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
585 ##### count total gene number in current combination
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
586 $tmp=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
587 @spID=split(/\t/,$key); #### speices id in current combination
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
588 foreach (@spID)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
589 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
590 $tmp=$tmp+$genenum{$species[$_]};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
591 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
592 ##### scan pangenome and coregenome
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
593 @tmp=split(/\t/,&PanGenomeNumber(\@spID));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
594 print PAN "$i\t$tmp\t".join("\t",@tmp)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
595
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
596 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
597 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
598
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
599 close(PAN);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
600
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
601 #### data fit
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
602
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
603 #### for model A
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
604
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
605 if ($spnum<3)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
606 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
607 print "There are $spnum strains. For pan-genome function fitting, at least 3 strains data are required.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
608 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
609 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
610 open(R,">${outputDIR}2.PanGenome.Profile.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
611 ##### genome number & pan-genome size
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
612 @xdata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
613 @ydata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
614 &ReadData2Array("${outputDIR}2.PanGenome.Data.txt",\@xdata,0,\@ydata,2);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
615 &SumData(\@xdata,\@ydata,"mean");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
616 ($fit_Rsquare, $fit_A, $fit_A_interval, $fit_B, $fit_C, $fit_C_interval)=&fit_model_A(\@xdata,\@ydata);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
617 print R "The relation bewteen genome number and pan-genome size\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
618 print R "Function model: y=A*x**B +C \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
619 print R "\ty denotes pan-genome size, x denotes genome number, and A, B, C are fitting parameters.\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
620 print R "Fitting result:\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
621 print R "\ty = $fit_A *x**$fit_B + $fit_C\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
622 print R "\tR-square = $fit_Rsquare\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
623 print R "\tA 95% confidence interval: ($fit_A - $fit_A_interval , $fit_A + $fit_A_interval)\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
624 print R "\tC 95% confidence interval: ($fit_C - $fit_C_interval , $fit_C + $fit_C_interval)\n\n\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
625
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
626 ##### total gene number & pan-genome size
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
627 #@xdata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
628 #@ydata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
629 #&ReadData2Array("${outputDIR}2.PanGenome.Data.txt",\@xdata,1,\@ydata,2);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
630 #&SumDataByMedian(\@xdata,\@ydata);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
631 #($fit_Rsquare, $fit_A, $fit_B, $fit_C)=&fit_model_A(\@xdata,\@ydata);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
632 #print R "The relation bewteen total gene number and pan-genome size\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
633 #print R "$fit_Rsquare, $fit_A, $fit_B, $fit_C\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
634 #print R "\ty = $fit_A *x**$fit_B + $fit_C R-square = $fit_Rsquare\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
635 #print R "\tx: total gene number\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
636 #print R "\ty: pan-genome size\n\n\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
637
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
638 ##### genome number & core genome
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
639 @xdata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
640 @ydata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
641 &ReadData2Array("${outputDIR}2.PanGenome.Data.txt",\@xdata,0,\@ydata,3);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
642 &SumData(\@xdata,\@ydata,"mean");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
643 ($fit_Rsquare, $fit_A, $fit_A_interval, $fit_B, $fit_C, $fit_C_interval)=&fit_model_B(\@xdata,\@ydata);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
644 print R "The relation bewteen genome number and core genome size\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
645 print R "Function model: y=A*exp(B*x) +C \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
646 print R "\ty denotes pan-genome size, x denotes genome number, and A, B, C are fitting parameters.\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
647 print R "Fitting result:\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
648 print R "\ty = $fit_A *exp($fit_B * x) + $fit_C R-square = $fit_Rsquare\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
649 print R "\tR-square = $fit_Rsquare\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
650 print R "\tA 95% confidence interval: ($fit_A - $fit_A_interval , $fit_A + $fit_A_interval)\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
651 print R "\tC 95% confidence interval: ($fit_C - $fit_C_interval , $fit_C + $fit_C_interval)\n\n\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
652 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
653 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
654
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
655 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
656
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
657
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
658 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
659 # section 3) CDS variation analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
660 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
661
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
662 if ($run_variation)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
663 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
664 print "\n\n############################################\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
665 print "# section 3) CDS variation analysis\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
666 print "############################################\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
667
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
668 #### Retrieve cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
669 &RetrieveClusterFromFile("${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
670 chomp(@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
671
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
672 ## protein
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
673 system("rm -rf *.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
674 &PrepareFasta(\@species,$inputDIR,".pep"); ###prepare pep file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
675 system("cat *.pep > All.faa && rm -rf *.pep && mv All.faa All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
676 &ReadSequenceInToHash("All.pep",\%pep);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
677 ## nucleic
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
678 system("rm -rf *.nuc");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
679 &PrepareFasta(\@species,$inputDIR,".nuc"); ###prepare nuc file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
680 system("cat *.nuc > All.ffn && rm -rf *.nuc && mv All.ffn All.nuc");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
681 &ReadSequenceInToHash("All.nuc",\%nuc);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
682
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
683 ## scanning SNP
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
684 %nt4tree=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
685 for ($i=0;$i<$spnum;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
686 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
687 $nt4tree{"S".$i}="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
688 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
689
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
690 open(VAR,">${outputDIR}3.CDS.variation.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
691 print VAR "ClusterID\tStrains_Number\tGene_Number\tPosition\taaType\tntType\tntProfile\tVariation type\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
692
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
693 open(VA,">${outputDIR}3.CDS.variation.analysis.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
694 print VA "ClusterID\tInDel Base\tNonsynonymous mutation\tSynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
695
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
696 for ($line=0;$line<@clusters;$line++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
697 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
698 @row=split(/\t|\,/,$clusters[$line]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
699 $ClusterID=$row[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
700 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
701 @row=grep(/^S/,@row);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
702 if (scalar(@row) >=2)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
703 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
704 open(PEP,">$ClusterID.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
705 open(NUC,">$ClusterID.nuc");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
706 foreach $key (@row)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
707 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
708 print PEP ">$key\n$pep{$key}\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
709 print NUC ">$key\n$nuc{$key}\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
710 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
711 close(PEP);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
712 close(NUC);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
713 system("$mafft --quiet $ClusterID.pep > $ClusterID.pal");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
714 #system("perl ./pal2nal.pl $ClusterID.pal $ClusterID.nuc -output fasta > $ClusterID.nal");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
715 $tmp=&pal2nal("$ClusterID.pal","$ClusterID.nuc","$ClusterID.nal");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
716 if ($tmp == 0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
717 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
718 system("rm -rf $ClusterID.*");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
719 next;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
720 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
721
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
722 @tmp=&DetectSNP();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
723 if (scalar(@tmp)>0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
724 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
725 print VA $ClusterID."\t".&VarAnalysis(\@tmp)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
726 print VAR join("",@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
727 ### core orthologs
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
728 @row=split(/\t/,$clusters[$line]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
729 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
730 if ((&CountGeneInCluster(join("\t",@row)) ==$spnum) and (&CountSpeicesInCluster(join("\t",@row)) == $spnum) )
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
731 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
732 $count_tree++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
733 %tmpHash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
734 foreach (@row)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
735 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
736 $tmpHash{$_}="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
737 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
738 &RemoveHeadGap("$ClusterID.nal",\%tmpHash);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
739 &ExtractSNP4tree(\%tmpHash,\%nt4tree);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
740 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
741 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
742 system("rm -rf $ClusterID.*");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
743 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
744 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
745 close(VAR);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
746 close(VA);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
747
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
748 open(R,">${outputDIR}3.CDS.variation.for.evolution.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
749 foreach $key (keys %nt4tree)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
750 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
751 $_=$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
752 s/s//gi;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
753 print R ">$species[$_]\n$nt4tree{$key}\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
754 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
755 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
756 print $count_tree."\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
757
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
758 ###
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
759 system("rm All.nuc All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
760 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
761 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
762 print "CDS variation is skipped.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
763 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
764
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
765 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
766 # section 4) CDS variation analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
767 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
768
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
769 if ($run_evolution)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
770 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
771 #### Retrieve cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
772 &RetrieveClusterFromFile("${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
773 chomp(@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
774
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
775
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
776
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
777 ##################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
778 ##
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
779 ## Distance based
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
780 ##
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
781 ##################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
782
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
783 ### caculate the distance between each two strains by cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
784 &ClusterProfil4Specie(\%tmpHash); # caculate Clusters profile for each specie
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
785 &DistanceMatrix($spnum,\%tmpHash); # caculate distance matrix acoording to Clusters profile
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
786 ###output distance
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
787 open(DIST,">${outputDIR}4.Species_Distance_Clusters_Based.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
788 ###header
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
789 printf DIST "%5d", $spnum;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
790 print DIST "\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
791 foreach $i (0..($spnum-1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
792 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
793 $key="sp".$i."sp";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
794 printf DIST "%-10s",$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
795 foreach $j (0..($spnum-1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
796 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
797 printf DIST " %8f",$tmpHash{$i."-".$j};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
798 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
799 print DIST "\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
800 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
801 close(DIST);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
802
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
803 %tmpHash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
804
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
805 ### based on pan genome (distance)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
806 print "\nDraw pangenome based phylogenetic tree ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
807
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
808 &PanBasedTree("${outputDIR}4.Species_Distance_Clusters_Based.txt","${outputDIR}4.PanBased");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
809
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
810 ##################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
811 ##
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
812 ## SNP based
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
813 ##
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
814 ##################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
815 %tmpHash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
816 if (!(-e "${outputDIR}3.CDS.variation.for.evolution.txt"))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
817 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
818 print "Variation in core orthologs cluster is not found from ${outputDIR}3.CDS.variation.for.evolution.txt.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
819 print "Maybe you have skipped CDS variation analysis.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
820 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
821 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
822 &ReadSequenceInToHash("${outputDIR}3.CDS.variation.for.evolution.txt",\%tmpHash);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
823 open(R,">mlst.aln");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
824 for ($i=0;$i<@species;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
825 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
826 print R ">sp${i}sp\n".$tmpHash{$species[$i]}."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
827 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
828 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
829 &fasta2phylip("mlst.aln","mlst.phylip");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
830 system("rm -rf mlst.aln");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
831
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
832 print "\nDraw SNP based phylogenetic tree ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
833 &SNPBasedTree("mlst.phylip","${outputDIR}4.SNPBased");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
834 system("rm -rf mlst.phylip")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
835 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
836
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
837 ########
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
838 # replace speices name
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
839 ########
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
840
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
841 opendir(DIR,"${outputDIR}");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
842 @tmp=readdir(DIR);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
843 closedir(DIR);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
844 @tmp=grep(/^4/,@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
845 foreach $tmp (@tmp)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
846 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
847 &ReplaceName(\@species,"${outputDIR}$tmp");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
848 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
849 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
850 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
851 print "Evolution analysis is skipped.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
852 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
853
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
854
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
855 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
856 # section 4) Function analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
857 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
858
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
859 if ($run_function)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
860 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
861 #### Retrieve cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
862 &RetrieveClusterFromFile("${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
863 chomp(@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
864
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
865 #### prepare annotation file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
866 &PrepareTable(\@species,$inputDIR,".function"); ###prepare location file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
867 &ReadAnnotation(\@species,\%cog,\%description);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
868
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
869
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
870 #### assign function
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
871 open(R,">${outputDIR}5.Orthologs_Cluster_Function.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
872 print R "ClusterID\tConservation_Level\tCOG\tDescription\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
873 for ($i=0;$i<@clusters;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
874 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
875 @row=split(/\t/,$clusters[$i]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
876 $ClusterID=$row[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
877 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
878 print R $ClusterID."\t".&CountSpeicesInCluster(join("\t",@row))."\t".&getCOG(\@row,\%cog)."\t".&getDescription(\@row,\%description)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
879 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
880 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
881
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
882 #### COG distribution
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
883
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
884 ###Whole Clusters COG Distribution
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
885 &outputCOGStatistic("${outputDIR}5.Orthologs_Whole_Cluster_COG_Distribution.txt",&scanCOG("${outputDIR}5.Orthologs_Cluster_Function.txt",$spnum,1));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
886
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
887 ###Core Clusters COG Distribution
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
888 &outputCOGStatistic("${outputDIR}5.Orthologs_Core_Cluster_COG_Distribution.txt",&scanCOG("${outputDIR}5.Orthologs_Cluster_Function.txt",$spnum,$spnum));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
889
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
890 ###Dispensable Clusters COG Distribution
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
891 &outputCOGStatistic("${outputDIR}5.Orthologs_Dispensable_Cluster_COG_Distribution.txt",&scanCOG("${outputDIR}5.Orthologs_Cluster_Function.txt",($spnum-1),2));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
892
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
893 ###strains specifc Clusters COG Distribution
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
894 &outputCOGStatistic("${outputDIR}5.Orthologs_specifc_Cluster_COG_Distribution.txt",&scanCOG("${outputDIR}5.Orthologs_Cluster_Function.txt",1,1));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
895
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
896 system("rm -rf *.function");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
897
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
898 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
899 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
900 print "Function analysis is skipped.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
901 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
902
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
903 sub outputCOGStatistic()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
904 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
905 (my $file,my $subcogcount)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
906 my @cogcat=("J A K L B","D Y V T M N Z W U O","C G E F H I P Q","R S -");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
907 my @cogdesc=("INFORMATION STORAGE AND PROCESSING","CELLULAR PROCESSES AND SIGNALING","METABOLISM","POORLY CHARACTERIZED");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
908 my @subcogcat=qw(J A K L B D Y V T M N Z W U O C G E F H I P Q R S -);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
909 my @subcogdesc=("[J] Translation, ribosomal structure and biogenesis","[A] RNA processing and modification","[K] Transcription","[L] Replication, recombination and repair","[B] Chromatin structure and dynamics","[D] Cell cycle control, cell division, chromosome partitioning","[Y] Nuclear structure","[V] Defense mechanisms","[T] Signal transduction mechanisms","[M] Cell wall/membrane/envelope biogenesis","[N] Cell motility","[Z] Cytoskeleton","[W] Extracellular structures","[U] Intracellular trafficking, secretion, and vesicular transport","[O] Posttranslational modification, protein turnover, chaperones","[C] Energy production and conversion","[G] Carbohydrate transport and metabolism","[E] Amino acid transport and metabolism","[F] Nucleotide transport and metabolism","[H] Coenzyme transport and metabolism","[I] Lipid transport and metabolism","[P] Inorganic ion transport and metabolism","[Q] Secondary metabolites biosynthesis, transport and catabolism","[R] General function prediction only","[S] Function unknown","[-] Unclassified");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
910 my %subcogdesc;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
911 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
912 my @cog;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
913 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
914 my $cognum;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
915
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
916 for ($i=0;$i<@subcogcat;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
917 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
918 $subcogdesc{$subcogcat[$i]}=$subcogdesc[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
919 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
920
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
921 open(R,">$file");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
922 for ($i=0;$i<@cogcat;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
923 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
924 $cognum=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
925 foreach $key (split(" ",$cogcat[$i]))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
926 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
927 $cognum=$cognum+$$subcogcount{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
928 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
929 print R $cogdesc[$i]." ( ".$cognum." )\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
930 foreach $key (split(" ",$cogcat[$i]))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
931 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
932 printf R "%-6d %s\n",$$subcogcount{$key},$subcogdesc{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
933 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
934 print R "\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
935
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
936 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
937 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
938
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
939 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
940
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
941 sub scanCOG()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
942 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
943 (my $file,my $max_orth,my $min_orth)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
944 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
945 my @subcogcat=qw(J A K L B D Y V T M N Z W U O C G E F H I P Q R S -);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
946 my %subcogcount;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
947 my $cog;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
948 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
949
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
950 foreach $key (@subcogcat)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
951 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
952 $subcogcount{$key}=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
953 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
954
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
955 @subcogcat=qw(J A K L B D Y V T M N Z W U O C G E F H I P Q R S);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
956
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
957 open(F,"$file");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
958 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
959 while (<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
960 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
961 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
962 if ($row[1]>=$min_orth and $row[1]<=$max_orth)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
963 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
964 if ($row[2] eq "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
965 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
966 $subcogcount{"-"}++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
967 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
968 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
969 $_=uc($row[2]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
970 s/COG//gi;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
971 $cog=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
972 foreach $key (@subcogcat)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
973 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
974 if ($cog=~/$key/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
975 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
976 $subcogcount{$key}++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
977 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
978 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
979 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
980 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
981 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
982 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
983
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
984 return \%subcogcount;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
985 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
986
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
987
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
988
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
989
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
990 sub getCOG()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
991 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
992 (my $data,my $coghash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
993 my $cog="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
994 my @cog;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
995 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
996 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
997 my @gene=split(/\t|\,/,join("\t",@$data));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
998 @gene=grep(/^S/,@gene);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
999
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1000 foreach $key (@gene)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1001 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1002 if (($$coghash{$key} ne "-") and ($$coghash{$key} ne ""))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1003 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1004 $cog=$cog.",".$$coghash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1005 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1006 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1007 @cog=split(/,/,$cog);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1008 foreach $cog (@cog)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1009 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1010 if ($cog ne "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1011 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1012 $hash{$cog}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1013 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1014 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1015
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1016 $cog=join(",",(keys %hash));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1017 if ($cog eq "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1018 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1019 $cog="-";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1020 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1021 return $cog;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1022 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1023
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1024 sub getDescription()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1025 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1026 (my $data,my $deschash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1027 my $desc="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1028 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1029 my @gene=split(/\t|\,/,join("\t",@$data));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1030 @gene=grep(/^S/,@gene);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1031
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1032 foreach $key (@gene)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1033 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1034 if ( ($$deschash{$key} ne "") and ($$deschash{$key} ne "-") and ($$deschash{$key}!~/hypothetical/))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1035 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1036 $desc=$$deschash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1037 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1038 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1039
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1040 if ($desc eq "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1041 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1042 $desc="hypothetical protein";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1043 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1044
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1045 return $desc;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1046 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1047
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1048
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1049
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1050 sub ReadAnnotation()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1051 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1052 (my $species,my $cog,my $description)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1053 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1054 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1055
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1056 for ($i=0;$i<@$species;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1057 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1058 open(F,"$$species[$i].function");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1059 while (<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1060 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1061 chomp($_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1062 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1063 if (scalar(@row)>=2)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1064 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1065 $$cog{$row[0]}=$row[1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1066 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1067 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1068 $$cog{$row[0]}="-";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1069 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1070
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1071 if (scalar(@row)>=3)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1072 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1073 $$description{$row[0]}=$row[2];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1074 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1075 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1076 $$description{$row[0]}="hypothetical protein";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1077 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1078 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1079 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1080 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1081 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1082
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1083
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1084 sub SNPBasedTree()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1085 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1086 (my $infile,my $outfileprefix)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1087 my $tmpin=$infile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1088 my $tmpout;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1089
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1090
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1091 #### boootstrap
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1092 print "\n#### seqboot ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1093 open(R,">seqboot.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1094 #print R "$tmpin\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1095 print R "R\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1096 print R "$bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1097 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1098 print R "1\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1099 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1100
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1101 system("cp $tmpin infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1102 system("$seqboot < seqboot.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1103 system("mv outfile 100dnaseq");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1104 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1105 system("rm seqboot.cmd"); # 100dnasseq
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1106
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1107 #### dnaml
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1108 print "\n#### dnaml ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1109 open(R,">dnaml.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1110 #print R "100dnaseq\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1111 print R "T\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1112 print R "25\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1113 if ($bootstrap>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1114 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1115 print R "M\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1116 print R "D\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1117 #print R "100\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1118 print R "$bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1119 print R "1\n"; # Random number seed (must be odd)?
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1120 print R "5\n"; # Number of times to jumble?
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1121 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1122 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1123 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1124
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1125 system("cp 100dnaseq infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1126 system("$dnaml < dnaml.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1127 system("rm -rf outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1128 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1129 system("mv outtree 100dnaseqtree"); # 100dnaseq, 100dnaseqtree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1130
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1131 #### consense
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1132 print "\n#### dnaml consense ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1133
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1134 open(R,">consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1135 #print R "100dnaseqtree\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1136 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1137 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1138
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1139 system("cp 100dnaseqtree intree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1140 system("$consense < consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1141 system("mv outfile ${outfileprefix}.ML.outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1142 system("mv outtree ${outfileprefix}.ML.tree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1143 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1144 system("rm -rf 100dnaseqtree"); # 100dnaseq
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1145
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1146 #### dnadist
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1147 print "\n#### dnadist ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1148 open(R,">dnadist.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1149 #print R "100dnaseq\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1150 print R "T\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1151 print R "25\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1152 if ($bootstrap>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1153 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1154 print R "M\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1155 print R "D\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1156 #print R "100\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1157 print R "$bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1158 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1159 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1160 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1161
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1162 system("cp 100dnaseq infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1163 system("$dnadist < dnadist.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1164 system("rm -rf 100dnaseq");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1165 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1166 system("mv outfile 100dnadist"); # 100dnadist
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1167
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1168 #### Neighbor-joining tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1169 print "\n#### Neighbor-joining ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1170 open(R,">NJ.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1171 if ($bootstrap>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1172 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1173 #print R "100dnadist\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1174 print R "M\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1175 #print R "100\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1176 print R "$bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1177 print R "1\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1178 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1179 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1180 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1181
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1182 system("cp 100dnadist infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1183 system("$neighbor < NJ.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1184 system("mv outtree 100dnadistNJtree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1185 system("rm outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1186 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1187 system("rm -rf NJ.cmd"); # 100dnadist,100dnadistNJtree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1188
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1189 #### NJ-consense
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1190 print "\n#### NJ-consense ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1191 open(R,">NJ-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1192 #print R "100dnadistNJtree\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1193 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1194 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1195
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1196 system("cp 100dnadistNJtree intree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1197 system("$consense < NJ-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1198 system("mv outfile ${outfileprefix}.Neighbor-joining.outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1199 system("mv outtree ${outfileprefix}.Neighbor-joining.tree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1200 system("rm -rf NJ-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1201 system("rm -rf intree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1202 system("rm -rf 100dnadistNJtree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1203
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1204
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1205 #### UPGMA tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1206 print "\n#### UPGMA ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1207 open(R,">UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1208 #print R "100dnadist\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1209 print R "N\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1210 if ($bootstrap>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1211 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1212 print R "M\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1213 #print R "100\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1214 print R "$bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1215 print R "1\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1216 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1217 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1218 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1219
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1220 system("cp 100dnadist infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1221 system("$neighbor < UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1222 system("mv outtree 100dnadistUPGMAtree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1223 system("rm -rf outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1224 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1225 system("rm -rf UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1226
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1227 #### UPGMA-consense
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1228 print "\n#### UPGMA-consense ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1229 open(R,">UPGMA-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1230 #print R "100dnadistUPGMAtree\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1231 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1232 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1233
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1234 system("cp 100dnadistUPGMAtree intree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1235 system("$consense < UPGMA-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1236 system("mv outfile ${outfileprefix}.UPGMA.outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1237 system("mv outtree ${outfileprefix}.UPGMA.tree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1238 system("rm -rf UPGMA-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1239 system("rm -rf 100dnadistUPGMAtree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1240 system("rm -rf intree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1241
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1242 ###CLEAN TMP FILE
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1243
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1244 system("rm -rf *.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1245 system("rm -rf 100dnadist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1246 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1247
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1248 sub PanBasedTree()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1249 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1250 (my $infile,my $outfileprefix)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1251 my $tmpin;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1252 my $tmpout;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1253
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1254 $tmpin=$infile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1255 #### Neighbor-joining tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1256
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1257 open(R,">NJ.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1258 #print R "$tmpin\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1259 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1260 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1261
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1262 system("cp $tmpin infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1263 system("$neighbor < NJ.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1264 system("mv outfile ${outfileprefix}.Neighbor-joining.outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1265 system("mv outtree ${outfileprefix}.Neighbor-joining.tree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1266 system("rm -rf NJ.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1267 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1268
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1269 #### UPGMA tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1270
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1271 open(R,">UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1272 #print R "$tmpin\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1273 print R "N\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1274 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1275 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1276
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1277 system("cp $tmpin infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1278 system("$neighbor < UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1279 system("mv outfile ${outfileprefix}.UPGMA.outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1280 system("mv outtree ${outfileprefix}.UPGMA.tree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1281 system("rm -rf UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1282 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1283
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1284
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1285 ###CLEAN TMP FILE
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1286 system("rm -rf *.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1287 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1288
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1289 sub DistanceMatrix()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1290 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1291 (my $spnum,my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1292 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1293 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1294 my $k;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1295 my $dist;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1296 my $ref;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1297 my $query;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1298 foreach $i (0..($spnum-1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1299 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1300 foreach $j ($i..($spnum-1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1301 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1302 $ref=$$hash{$i};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1303 $query=$$hash{$j};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1304 $dist=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1305 for ($k=0;$k<length($ref);$k++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1306 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1307 if (substr($ref,$k,1) ne substr($query,$k,1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1308 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1309 $dist++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1310 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1311 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1312 $$hash{$i."-".$j}=$dist;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1313 $$hash{$j."-".$i}=$dist;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1314 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1315 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1316 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1317
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1318
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1319 sub ClusterProfil4Specie
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1320 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1321 (my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1322 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1323 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1324
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1325 foreach (0..($spnum-1)) #initialization Hash
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1326 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1327 $$hash{$_}="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1328 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1329
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1330 foreach (@clusters)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1331 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1332 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1333 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1334 if (&CountSpeicesInCluster(join("\t",@row))>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1335 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1336 for ($i=0;$i<@row;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1337 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1338 if ($row[$i] eq "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1339 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1340 $$hash{$i}=$$hash{$i}."0";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1341 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1342 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1343 $$hash{$i}=$$hash{$i}."1";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1344 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1345 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1346 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1347 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1348 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1349
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1350 # &ExtractSNP4tree(\%tmpHash,\%nt4tree);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1351
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1352 sub ExtractSNP4tree()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1353 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1354 (my $hash,my $nt4treeRef)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1355 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1356 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1357 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1358 my $len;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1359 my @tribases;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1360 foreach $key (keys %$hash)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1361 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1362 $$hash{substr($key,0,index($key,"G"))}=$$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1363 delete($$hash{$key});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1364 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1365
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1366 for ($i=0;$i<$spnum;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1367 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1368 $nt4tree{"S".$i}=$nt4tree{"S".$i}.$$hash{"S".$i};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1369 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1370 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1371
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1372
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1373 =pod
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1374 sub ExtractSNP4tree()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1375 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1376 (my $hash,my $nt4treeRef)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1377 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1378 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1379 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1380 my $len;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1381 my @tribases;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1382 foreach $key (keys %$hash)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1383 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1384 $$hash{substr($key,0,index($key,"G"))}=$$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1385 delete($$hash{$key});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1386 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1387 @_=(keys %$hash);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1388 $len=length($_[0]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1389 for ($j=0;3*$j<$len;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1390 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1391 ##### scanning each codon
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1392 for ($i=0;$i<$spnum;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1393 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1394 $tribases[$i]=substr($$hash{"S".$i},3*$j,3);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1395 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1396 ##### checking each codon
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1397 if (&IsTheSame(@tribases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1398 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1399 for ($i=0;$i<@tribases;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1400 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1401 $nt4tree{"S".$i}=$nt4tree{"S".$i}.$tribases[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1402 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1403 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1404 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1405 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1406 =cut
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1407
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1408
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1409 sub pal2nal()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1410 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1411 (my $pal,my $nuc, my $nal)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1412 my %aaAln=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1413 my %ffn=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1414 my %ntAln=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1415 my %nt;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1416 my $dna;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1417 my $nt;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1418 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1419 my $flag=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1420 my $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1421 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1422
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1423 ### read protein aligment result
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1424 &ReadAlignmentToHash("$pal",\%aaAln);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1425 ### read nt sequences
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1426 &ReadSequenceInToHash("$nuc",\%ffn);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1427 foreach $key (keys %ffn)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1428 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1429 $dna=$ffn{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1430 #if (int(length($nt{$key})/3)*3 ne length($nt{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1431 if (int(length($dna)/3)*3 ne length($dna))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1432 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1433 $flag=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1434 print "The length of nucleotide sequence is not 3 integer times.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1435 last;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1436 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1437 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1438 for ($i=0;$i<(length($dna)/3);$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1439 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1440 $nt{$key."|".$i}=substr($dna,$i*3,3);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1441 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1442 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1443 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1444
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1445 if ($flag==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1446 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1447 return 0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1448 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1449 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1450 foreach $key (keys %aaAln) ### replace aa with corresponding nt
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1451 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1452 $nt="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1453 $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1454 for ($j=0;$j<length($aaAln{$key});$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1455 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1456 if (substr($aaAln{$key},$j,1) eq "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1457 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1458 $nt=$nt."---";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1459 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1460 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1461 $nt=$nt.$nt{$key."|".$i};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1462 $i++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1463 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1464 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1465 $ntAln{$key}=$nt;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1466 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1467
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1468 ### output
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1469 open(R,">$nal");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1470 foreach (keys %ntAln)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1471 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1472 print R ">$_\n".$ntAln{$_}."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1473 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1474 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1475
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1476 return 1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1477 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1478 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1479
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1480
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1481
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1482
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1483 sub DetectSNP()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1484 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1485 my %faa;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1486 my %ffn;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1487 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1488 my $count_gene;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1489 my $count_sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1490 my @genelist;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1491 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1492 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1493 my $pepalnlen;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1494 my @cdsvar=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1495 my $cdi=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1496 my @tribases;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1497 my @bases;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1498 my @aa;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1499
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1500
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1501 ### fetch gene list
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1502 open(F,"$ClusterID.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1503 @genelist=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1504 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1505 @genelist=grep(/^>/,@genelist);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1506 chomp(@genelist);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1507 $_=join("\t",@genelist);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1508 s/>//g;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1509 @genelist=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1510
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1511 ### count gene number and species number
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1512 @row=split(/\t/,$clusters[$ClusterID-1]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1513 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1514 $count_sp=&CountSpeicesInCluster(join("\t",@row));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1515 $count_gene=&CountGeneInCluster(join("\t",@row));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1516
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1517 ### read alignment sequences
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1518 &ReadAlignmentToHash("$ClusterID.pal",\%faa);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1519 &ReadAlignmentToHash("$ClusterID.nal",\%ffn);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1520
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1521 @_=(keys %faa);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1522 $pepalnlen=length($faa{$_[0]});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1523 ### scan SNP
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1524 for ($i=1;$i<=$pepalnlen;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1525 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1526 @tmp=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1527 @tribases=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1528 for ($j=0;$j<@genelist;$j++) ### fetch triplet codon
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1529 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1530 $tribases[$j]=substr($ffn{$genelist[$j]},3*($i-1),3);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1531 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1532 if (&IsTheSame(@tribases) ==0) ### if triplet codon is not consistent
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1533 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1534 @aa=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1535 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1536 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1537 $aa[$j]=substr($faa{$genelist[$j]},($i-1),1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1538 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1539 if (&IsTheSame(@aa) ==0) ### aa is not consistent
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1540 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1541 if (join("",@aa) =~/-/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1542 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1543 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".$i."\t".&CharType(\@aa)."\t-\t-\tInDel\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1544 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1545 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1546 #### base 1
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1547 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1548 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1549 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1),1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1550 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1551 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1552 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1553 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.1)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tNonsynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1554 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1555 #### base 2
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1556 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1557 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1558 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1)+1,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1559 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1560 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1561 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1562 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.2)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tNonsynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1563 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1564 #### base 3
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1565 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1566 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1567 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1)+2,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1568 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1569 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1570 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1571 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.3)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tNonsynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1572 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1573 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1574 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1575 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1576 #### base 1
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1577 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1578 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1579 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1),1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1580 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1581 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1582 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1583 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.1)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tSynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1584 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1585 #### base 2
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1586 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1587 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1588 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1)+1,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1589 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1590 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1591 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1592 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.2)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tSynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1593 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1594 #### base 3
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1595 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1596 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1597 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1)+2,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1598 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1599 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1600 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1601 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.3)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tSynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1602 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1603 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1604 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1605 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1606 return @cdsvar;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1607 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1608
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1609
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1610 sub VarAnalysis()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1611 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1612 (my $data)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1613 my @data=@$data;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1614 my $indel=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1615 my $syn=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1616 my $nonsyn=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1617 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1618 $indel=scalar(grep(/InDel$/,@data));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1619 $nonsyn=scalar(grep(/Nonsynonymous mutation$/,@data));;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1620 $syn=scalar(grep(/Synonymous mutation$/,@data));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1621 return "$indel\t$nonsyn\t$syn";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1622 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1623
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1624
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1625
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1626 sub CharType()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1627 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1628 (my $str)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1629 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1630 my @data=@$str;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1631 foreach (@data)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1632 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1633 $hash{$_}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1634 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1635 return join(",",(keys %hash));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1636 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1637
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1638 sub IsTheSame()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1639 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1640 (my @data)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1641 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1642 foreach (@data)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1643 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1644 $hash{$_}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1645 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1646 if (scalar(keys %hash) ==1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1647 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1648 return 1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1649 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1650 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1651 return 0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1652 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1653 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1654
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1655
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1656
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1657 sub FormatClusterOutPut()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1658 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1659 (my $speices,my $file,my $cluster)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1660 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1661 my $gid=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1662 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1663 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1664 my $gene;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1665 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1666 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1667 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1668 open(R,">$file");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1669 print R "ClutserID\t".join("\t",@$speices)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1670 foreach $key (@$cluster)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1671 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1672 @row=split(/\t/,$key);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1673 for ($i=0;$i<@row;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1674 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1675 if ($row[$i] ne "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1676 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1677 @tmp=split(/,/,$row[$i]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1678 for ($j=0;$j<@tmp;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1679 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1680 $_=$tmp[$j];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1681 s/^S[0-9]+G//;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1682 $tmp[$j]=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1683 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1684 $row[$i]=join(",",@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1685 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1686 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1687 print R $gid."\t".join("\t",@row)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1688 $gid++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1689 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1690 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1691 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1692 sub RetrieveClusterFromFile()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1693 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1694 (my $file,my $clusters)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1695 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1696 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1697 my $spid;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1698 my $line=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1699 my $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1700 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1701 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1702 open(F,$file) or die "Could open $file\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1703 @content=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1704 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1705 splice(@content,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1706 chomp(@content);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1707 foreach (@content)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1708 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1709 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1710 $$clusters[$line]=$row[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1711 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1712 for ($i=0;$i<@row;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1713 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1714 if ($row[$i] ne "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1715 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1716 @tmp=split(/,/,$row[$i]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1717 for ($j=0;$j<@tmp;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1718 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1719 $tmp[$j]="S${i}G".$tmp[$j];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1720 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1721 $row[$i]=join(",",@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1722 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1723 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1724 $$clusters[$line]=$$clusters[$line]."\t".join("\t",@row)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1725 $line++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1726 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1727 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1728
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1729
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1730
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1731
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1732 sub GeneDistribution()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1733 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1734 (my $clusters,my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1735 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1736 my $spid;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1737 my $orth;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1738 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1739 foreach (@$clusters)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1740 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1741 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1742 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1743 $orth=&CountSpeicesInCluster(join("\t",@row));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1744 @row=split(/\t|\,/,join("\t",@row));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1745 foreach $key (@row)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1746 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1747 if ($key ne "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1748 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1749 $spid=substr($key,1,(index($key,'G')-1)); ###extract strains id
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1750 if (exists($$hash{$orth."|".$spid}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1751 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1752 $$hash{$orth."|".$spid}++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1753 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1754 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1755 $$hash{$orth."|".$spid}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1756 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1757 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1758 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1759 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1760 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1761
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1762 sub CountSpeicesInCluster()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1763 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1764 (my $str)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1765 chomp($str);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1766 my @list=split(/\t/,$str);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1767 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1768 my $count=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1769
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1770 foreach $key (@list)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1771 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1772 if ($key ne "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1773 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1774 $count++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1775 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1776 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1777 return $count;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1778 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1779
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1780 sub CountGeneInCluster()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1781 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1782 (my $str)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1783 chomp();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1784 my @list=split(/\t|\,/,$str);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1785 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1786 my $count=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1787 foreach $key (@list)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1788 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1789 if ($key ne "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1790 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1791 $count++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1792 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1793 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1794 return $count;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1795 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1796
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1797
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1798
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1799
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1800 sub GF()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1801 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1802 &PrepareFasta(\@species,$inputDIR,".pep"); ###prepare pep file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1803 system("cat ".join(".pep ",@species).".pep > All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1804 system("grep '>' All.pep > genelist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1805 system("$formatdb -p T -i All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1806 system("$blastall -p blastp -i All.pep -d All.pep -M BLOSUM45 -m9 -e $evalue -o All.blastp -a $thread");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1807 system("perl ./Blast_Filter.pl All.blastp All.pep $coverage $identity $score | $mcl - --abc -I 2.0 -o All.cluster");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1808 &FormatCluster("All.cluster","genelist",$spnum,\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1809 #system("rm -rf *.pep* All.blastp All.cluster genelist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1810 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1811
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1812 sub MP()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1813 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1814 # (my $species,my $inputDIR,my $thread,my $evalue,my $score,my $coverage,my $identity)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1815 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1816 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1817 &PrepareFasta(\@species,$inputDIR,".pep"); ###prepare pep file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1818 system("cat ".join(".pep ",@species).".pep > All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1819 system("grep '>' All.pep > genelist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1820 system("rm -rf All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1821 for ($i=0;$i<$spnum;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1822 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1823 for ($j=$i+1;$j<$spnum;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1824 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1825 system("perl ./inparanoid.pl $blastall $thread $formatdb $score $global $local $species[$i].pep $species[$j].pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1826 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1827 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1828 system("perl ./multiparanoid.pl -species ".join(".pep+",@species).".pep -unique 1");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1829 ###convert the MP result to table list based on gene
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1830 &MP_Result_to_Table("MP.Cluster","All.cluster");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1831 &FormatCluster("All.cluster","genelist",$spnum,\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1832 system("rm -rf sqltable.* *.pep* MP.Cluster genelist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1833 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1834
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1835 sub fasta2phylip()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1836 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1837 (my $input,my $output)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1838 use Bio::AlignIO;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1839 my $inputfilename = "10.aln";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1840 my $in= Bio::AlignIO->new(-file => $input ,
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1841 -format => 'fasta');
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1842 my $out = Bio::AlignIO->new(-file => ">$output" ,
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1843 -format => 'phylip');
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1844 while ( my $aln = $in->next_aln() )
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1845 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1846 $out->write_aln($aln);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1847 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1848 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1849
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1850
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1851 sub RemoveHeadGap()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1852 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1853 (my $nal,my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1854 my %aln;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1855 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1856 my $gaplength=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1857 my $len1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1858 my $len2;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1859 &ReadSequenceInToHash("$nal",\%aln);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1860 foreach $key (keys %aln)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1861 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1862 $len1=length($aln{$key});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1863 $_=$aln{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1864 s/^-+//;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1865 $len2=length($_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1866 if (($len1-$len2)>$gaplength)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1867 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1868 $gaplength=$len1-$len2;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1869 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1870 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1871 foreach $key (keys %aln)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1872 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1873 $$hash{$key}=$$hash{$key}.substr($aln{$key},$gaplength,(length($aln{$key})-$gaplength));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1874 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1875 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1876
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1877 sub PrepareFasta()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1878 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1879 (my $species,my $inputDIR,my $extention)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1880 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1881 my $file;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1882 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1883 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1884 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1885 for ($i=0;$i<@$species;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1886 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1887 $file=$inputDIR.$$species[$i].$extention;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1888 %hash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1889 &ReadSequenceInToHash($file,\%hash);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1890 open(R,">$$species[$i]${extention}") or die "Could write into $file\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1891 foreach $key (keys %hash)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1892 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1893 print R ">S${i}G$key\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1894 print R $hash{$key}."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1895 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1896 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1897 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1898 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1899
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1900 sub PrepareTable()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1901 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1902 (my $species,my $inputDIR,my $extention)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1903 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1904 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1905 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1906 my $file;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1907 for ($i=0;$i<@$species;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1908 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1909 $file=$inputDIR.$$species[$i].$extention;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1910 open(F,$file) or die "Could open $file\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1911 @content=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1912 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1913 chomp(@content);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1914 open(R,">$$species[$i]${extention}") or die "Could write into $file\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1915 foreach (@content)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1916 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1917 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1918 $row[0]="S${i}G$row[0]";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1919 if ($extention eq ".location")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1920 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1921 $row[0]=$row[0]."\t".$row[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1922 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1923 print R join("\t",@row)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1924 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1925 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1926 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1927 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1928
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1929 sub CheckExtraProgram
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1930 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1931 #(my $section, my $method, my $tmparray)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1932 my @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1933 my $ei=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1934
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1935 #####cluster gene
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1936 if (substr($section,0,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1937 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1938 ###MP: blastall formatdb
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1939 ###GF: blastall formatdb mcl
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1940
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1941 if (!(-e $formatdb))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1942 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1943 $error[$ei++]="formatdb is not found at $formatdb\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1944 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1945
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1946 if (!(-X $formatdb))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1947 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1948 $error[$ei++]="there is not premission to execute $formatdb\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1949 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1950
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1951 if (!(-e $blastall))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1952 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1953 $error[$ei++]="blastall is not found at $blastall\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1954 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1955
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1956 if (!(-X $blastall))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1957 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1958 $error[$ei++]="there is not premission to execute $blastall\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1959 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1960
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1961 if ($method eq "GF")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1962 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1963 if (!(-e $mcl))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1964 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1965 $error[$ei++]="mcl is not found at $mcl\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1966 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1967 if (!(-X $mcl))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1968 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1969 $error[$ei++]="there is not premission to execute $mcl\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1970 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1971 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1972 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1973
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1974 #####CDS variation
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1975 if (substr($section,2,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1976 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1977 if (!(-e $mafft))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1978 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1979 $error[$ei++]="mafft is not found at $mafft\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1980 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1981 if (!(-X $mafft))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1982 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1983 $error[$ei++]="there is not premission to execute $mafft\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1984 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1985 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1986
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1987 #####CDS variation
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1988 if (substr($section,3,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1989 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1990 if (!(-e $mafft))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1991 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1992 $error[$ei++]="mafft is not found at $mafft\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1993 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1994 if (!(-X $mafft))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1995 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1996 $error[$ei++]="there is not premission to execute $mafft\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1997 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1998 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1999 #####Evolution analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2000 if (substr($section,3,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2001 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2002 if (-e $seqboot)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2003 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2004 $error[$ei++]="there is not premission to execute $seqboot\n" if(!(-X $seqboot));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2005 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2006 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2007 $error[$ei++]="seqboot is not found at $seqboot\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2008 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2009 if (-e $dnaml)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2010 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2011 $error[$ei++]="there is not premission to execute $dnaml\n" if(!(-X $dnaml));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2012 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2013 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2014 $error[$ei++]="dnaml is not found at $dnaml\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2015 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2016 if (-e $dnadist)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2017 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2018 $error[$ei++]="there is not premission to execute $dnadist\n" if(!(-X $dnadist));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2019 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2020 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2021 $error[$ei++]="dnadist is not found at $dnadist\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2022 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2023 if (-e $neighbor)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2024 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2025 $error[$ei++]="there is not premission to execute $neighbor\n" if(!(-X $neighbor));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2026 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2027 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2028 $error[$ei++]="neighbor is not found at $neighbor\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2029 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2030 if (-e $consense)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2031 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2032 $error[$ei++]="there is not premission to execute $consense\n" if(!(-X $consense));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2033 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2034 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2035 $error[$ei++]="consense is not found at $consense\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2036 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2037 if (-e $dnapars)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2038 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2039 $error[$ei++]="there is not premission to execute $dnapars\n" if(!(-X $dnapars));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2040 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2041 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2042 $error[$ei++]="dnapars is not found at $dnapars\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2043 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2044 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2045 #@$tmparray=(@$tmparray,@error);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2046 @tmp=(@tmp,@error);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2047 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2048
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2049
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2050 sub CheckInputFile()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2051 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2052 (my $species,my $inputDIR,my $section,my $method,my $tmparray)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2053 ####cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2054 if (substr($section,0,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2055 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2056 if ($method eq "MM")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2057 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2058 @$tmparray=(@$tmparray,&chk2SEQ($species,$inputDIR)); ### check pep and nuc
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2059 @$tmparray=(@$tmparray,&chktab($species,$inputDIR,".location"));### chk pep nuc location
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2060 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2061 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2062 @$tmparray=(@$tmparray,&chk1SEQ($species,$inputDIR));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2063 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2064 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2065 ###CDS variation
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2066 if (substr($section,2,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2067 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2068 @$tmparray=(@$tmparray,&chk2SEQ($species,$inputDIR));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2069 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2070 ###function analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2071 if (substr($section,4,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2072 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2073 @$tmparray=(@$tmparray,&chktab($species,$inputDIR,".function"));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2074 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2075 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2076
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2077
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2078 sub chk1SEQ()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2079 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2080 (my $species,my $inputDIR)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2081 my @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2082 my $ei=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2083 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2084 my $pepfile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2085 my %pep;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2086 foreach $sp (@$species)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2087 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2088 %pep=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2089 $pepfile=$inputDIR.$sp.".pep";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2090 &ReadSequenceInToHash($pepfile,\%pep);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2091 if (scalar(keys %pep)<2)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2092 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2093 $error[$ei++]="format error in $pepfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2094 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2095 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2096 return @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2097 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2098
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2099 sub chk2SEQ()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2100 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2101 (my $species,my $inputDIR)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2102 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2103 my %pep;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2104 my %nuc;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2105 my $pepfile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2106 my $nucfile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2107 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2108 my @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2109 my $ei=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2110 foreach $sp (@$species)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2111 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2112 $pepfile=$inputDIR.$sp.".pep";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2113 $nucfile=$inputDIR.$sp.".nuc";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2114 %pep=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2115 %nuc=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2116 &ReadSequenceInToHash("$pepfile",\%pep);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2117 &ReadSequenceInToHash("$nucfile",\%nuc);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2118 if (scalar(keys %pep) ne scalar(keys %nuc))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2119 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2120 $error[$ei++]="Sequences number is not consistent in the following two file:\n\t$pepfile\n\t$nucfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2121 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2122 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2123 foreach $key (keys %pep)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2124 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2125 if (exists($nuc{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2126 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2127 if (length($nuc{$key}) ne ((length($pep{$key})+1)*3))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2128 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2129 $error[$ei++]="the length of $key in $nucfile is not consistent with its corresponding protein length\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2130 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2131 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2132 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2133 $error[$ei++]="$key lost in $nucfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2134 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2135 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2136
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2137 foreach $key (keys %nuc)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2138 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2139 if (!exists($pep{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2140 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2141 $error[$ei++]="1048 $key lost in $pepfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2142 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2143 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2144 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2145 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2146 return @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2147 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2148
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2149
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2150 sub chktab()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2151 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2152 (my $species,my $inputDIR,my $extention)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2153 my %pep;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2154 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2155 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2156 my %tab;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2157 my @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2158 my $ei=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2159 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2160 my $tabfile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2161 my $pepfile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2162 foreach $sp (@$species)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2163 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2164 %tab=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2165 %pep=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2166 $tabfile=$inputDIR.$sp.$extention;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2167 open(F,"$tabfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2168 while (<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2169 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2170 chomp();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2171 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2172 if (scalar(@row)<3)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2173 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2174 $error[$ei++]="format error in $tabfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2175 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2176 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2177 $tab{$row[0]}=$row[1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2178 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2179 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2180 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2181 $pepfile=$inputDIR.$sp.".pep";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2182 &ReadSequenceInToHash($pepfile,\%pep);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2183 foreach $key (keys %pep)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2184 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2185 if (!exists($tab{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2186 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2187 $error[$ei++]="sequence $key lost infomation in $tabfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2188 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2189 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2190 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2191 return @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2192 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2193
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2194
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2195
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2196 sub ReadSequenceInToHash()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2197 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2198 use Bio::SeqIO;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2199 (my $file,my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2200 my $seq;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2201 my $in=Bio::SeqIO->new(-file=>"$file",-format=>"fasta");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2202 while ($seq=$in->next_seq())
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2203 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2204 #$$hash{$id."|".$seq->id}=$seq->seq();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2205 $$hash{$seq->id}=$seq->seq();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2206 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2207 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2208
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2209 sub ReadAlignmentToHash()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2210 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2211 (my $file,my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2212 my $name="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2213 my $seq="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2214 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2215 my $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2216 open(F,"$file");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2217 @content=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2218 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2219 chomp(@content);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2220 for ($line=0;$line<@content;$line++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2221 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2222 if ($content[$line]=~/^>/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2223 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2224 if ($line>0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2225 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2226 $$hash{$name}=$seq;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2227 $name="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2228 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2229
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2230 $_=$content[$line];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2231 s/^>//;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2232 $name=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2233 $seq="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2234 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2235 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2236 if ($name ne "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2237 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2238 $seq=$seq.$content[$line];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2239 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2240 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2241 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2242 $$hash{$name}=$seq;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2243 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2244
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2245
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2246
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2247 sub Combination()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2248 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2249 (my $m,my $n,my $comRef)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2250 my $str="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2251 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2252 my $fpos;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2253 my $num0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2254 my $rest;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2255 my $tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2256 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2257 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2258 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2259 #my $m=scalar(@$array);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2260 my @combination;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2261
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2262 for ($i=1;$i<=$n;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2263 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2264 $str="1".$str;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2265 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2266
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2267 for ($i=1;$i<=($m-$n);$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2268 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2269 $str=$str."0";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2270 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2271
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2272 $hash{$str}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2273 while ($str=~/10/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2274 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2275 $fpos=index($str,"10");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2276 $_=$str;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2277 s/10/01/;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2278 $str=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2279 $tmp=substr($str,0,$fpos);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2280 $_=$tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2281 s/0//g;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2282 $rest=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2283 $num0=$fpos-length($_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2284 for ($i=1;$i<=$num0;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2285 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2286 $rest="$rest"."0";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2287 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2288 $str="$rest".substr($str,$fpos,$m-$fpos);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2289 $hash{$str}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2290 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2291 $j=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2292 foreach $key (keys %hash)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2293 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2294 $combination[$j]="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2295 for ($i=0;$i<$m;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2296 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2297 if (substr($key,$i,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2298 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2299 if ($combination[$j] ne "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2300 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2301 #$combination[$j]=$combination[$j]."\t".$$array[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2302 $combination[$j]=$combination[$j]."\t".$i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2303 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2304 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2305 #$combination[$j]=$$array[$i]; ### For return species ID
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2306 $combination[$j]=$i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2307 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2308 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2309 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2310 $j++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2311 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2312 @$comRef=@combination; ### update the data through the physic address
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2313 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2314
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2315 sub ChkCombinationValue()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2316 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2317 (my $m,my $n)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2318 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2319 my %vhash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2320 my $value=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2321 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2322 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2323 my @sdA;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2324 my @sdB;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2325
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2326 ### initialization
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2327 $hash{$m."-".$n}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2328
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2329 ### split combination
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2330 while (scalar(keys %hash)>0 and $value<=$sampleSize)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2331 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2332 foreach $key (keys %hash)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2333 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2334 if ($value > $sampleSize) ### threshold
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2335 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2336 last;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2337 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2338 if (!exists($hash{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2339 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2340 next;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2341 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2342 @row=split(/-/,$key);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2343 #print $row[0]."|".$row[1]."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2344 if ($row[0] eq $row[1])
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2345 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2346 $value=$value+$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2347 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2348 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2349 ##split
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2350 $sdA[0]=$row[0]-1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2351 $sdA[1]=$row[1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2352 $sdB[0]=$row[0]-1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2353 $sdB[1]=$row[1]-1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2354 ##storing A
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2355 if (($sdA[0] eq $sdA[1]) or $sdA[1] ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2356 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2357 $value=$value+$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2358 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2359 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2360 if (exists($hash{$sdA[0]."-".$sdA[1]}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2361 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2362 $hash{$sdA[0]."-".$sdA[1]}=$hash{$sdA[0]."-".$sdA[1]}+$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2363 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2364 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2365 $hash{$sdA[0]."-".$sdA[1]}=$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2366 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2367 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2368
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2369 ##storing B
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2370 if (($sdB[0] eq $sdB[1]) or $sdB[1]==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2371 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2372 $value=$value+$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2373 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2374 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2375 if (exists($hash{$sdB[0]."-".$sdB[1]}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2376 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2377 $hash{$sdB[0]."-".$sdB[1]}=$hash{$sdB[0]."-".$sdB[1]}+$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2378 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2379 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2380 $hash{$sdB[0]."-".$sdB[1]}=$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2381 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2382 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2383 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2384 #delete original combination
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2385 delete($hash{$key});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2386 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2387 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2388
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2389 if ($value>$sampleSize)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2390 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2391 return 0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2392 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2393 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2394 return $value;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2395 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2396 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2397
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2398
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2399 sub SampleCombination()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2400 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2401 (my $m,my $n,my $comRef)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2402 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2403 my $sampleTimes=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2404 my @randNum;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2405 my @sortID;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2406 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2407 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2408 my $tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2409 while ( scalar(keys %hash)<$sampleSize and $sampleTimes<($sampleSize*2))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2410 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2411 for ($i=0;$i<$m;$i++) # generate random data
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2412 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2413 $randNum[$i]=int(100000 * rand(100));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2414 $sortID[$i]=$i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2415 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2416
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2417 for ($i=0;$i<$m;$i++) # sorting random data
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2418 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2419 for ($j=0;$j<$m;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2420 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2421 if ($randNum[$sortID[$i]]<$randNum[$sortID[$j]])
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2422 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2423 $tmp=$sortID[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2424 $sortID[$i]=$sortID[$j];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2425 $sortID[$j]=$tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2426 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2427 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2428 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2429
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2430 #storing data
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2431 $tmp=join("\t",sort {$a<=>$b} (splice(@sortID,0,$n)));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2432 $hash{$tmp}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2433 $sampleTimes++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2434 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2435 @$comRef=keys %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2436 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2437
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2438
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2439 sub PanGenomeNumber()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2440 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2441 (my $spID)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2442 my $pan=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2443 my $core=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2444 my $count; #### counter;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2445 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2446
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2447 foreach (@clusters)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2448 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2449 $count=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2450 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2451
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2452 foreach (@$spID)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2453 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2454 $count=$count+$row[$_];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2455 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2456
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2457 if ($count>0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2458 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2459 $pan++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2460 if ($count == scalar(@$spID))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2461 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2462 $core++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2463 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2464 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2465 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2466 return $pan."\t".$core;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2467 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2468
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2469 sub fit_model_A()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2470 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2471 ### model y = A * x**B + C
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2472 (my $xdata,my $ydata)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2473 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2474 my $b;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2475 my $max_B=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2476 my $max_R=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2477 my $max_A=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2478 my $max_A_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2479 my $max_C=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2480 my $max_C_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2481 my $R=1e-100;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2482 my $start;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2483 my $end;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2484 my $step;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2485 my @xValues;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2486 my @yValues;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2487
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2488 $start=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2489 $step=0.001;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2490 $b=$start;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2491 $max_R=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2492 $R=1e-100;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2493
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2494 use Statistics::LineFit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2495 use Statistics::Distributions;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2496
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2497 while ($max_R<=$R)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2498 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2499 if (($b < 0.02) and ($b >-0.02))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2500 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2501 $b=-0.02;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2502 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2503
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2504 for ($i=0;$i<@$xdata;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2505 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2506 $xValues[$i]=$$xdata[$i]**$b;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2507 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2508 @yValues=@$ydata;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2509 my $lineFit = Statistics::LineFit->new();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2510 $lineFit->setData (\@xValues, \@yValues) or die "Invalid data";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2511 (my $intercept, my $slope) = $lineFit->coefficients();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2512 my $rSquared = $lineFit->rSquared();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2513 my $meanSquaredError = $lineFit->meanSqError();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2514 my $durbinWatson = $lineFit->durbinWatson();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2515 my $sigma = $lineFit->sigma();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2516 (my $tStatIntercept, my $tStatSlope) = $lineFit->tStatistics();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2517 (my $varianceIntercept,my $varianceSlope) = $lineFit->varianceOfEstimates();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2518
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2519 $max_R=$R;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2520 $R=$rSquared;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2521 if ($max_R<=$R)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2522 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2523 $max_R=$R;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2524 ($max_C,$max_A)=$lineFit->coefficients();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2525 $max_A_interval=Statistics::Distributions::tdistr (($spnum-2),.025)*sqrt($varianceSlope);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2526 $max_C_interval=Statistics::Distributions::tdistr (($spnum-2),.025)*sqrt($varianceIntercept);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2527 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2528 $b=$b-$step;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2529 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2530 $max_B=$b;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2531 return ($max_R,$max_A,$max_A_interval,$max_B,$max_C,$max_C_interval);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2532
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2533 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2534
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2535 sub fit_model_B()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2536 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2537 ### model y = A * exp(x*B) + C
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2538 (my $xdata,my $ydata)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2539 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2540 my $b;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2541 my $max_B=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2542 my $max_R=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2543 my $max_A=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2544 my $max_A_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2545 my $max_C=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2546 my $max_C_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2547 my $R=1e-100;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2548 my $start;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2549 my $end;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2550 my $step;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2551 my @xValues;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2552 my @yValues;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2553
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2554 $start=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2555 $step=0.001;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2556 $b=$start;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2557 $max_R=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2558 $R=1e-100;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2559
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2560 use Statistics::LineFit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2561 use Statistics::Distributions;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2562
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2563 while ($max_R<=$R)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2564 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2565 if (($b < 0.02) and ($b >-0.02))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2566 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2567 $b=-0.02;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2568 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2569
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2570 for ($i=0;$i<@$xdata;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2571 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2572 $xValues[$i]=exp($$xdata[$i]*$b);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2573 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2574 @yValues=@$ydata;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2575 my $lineFit = Statistics::LineFit->new();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2576 $lineFit->setData (\@xValues, \@yValues) or die "Invalid data";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2577 (my $intercept, my $slope) = $lineFit->coefficients();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2578 my $rSquared = $lineFit->rSquared();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2579 my $meanSquaredError = $lineFit->meanSqError();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2580 my $durbinWatson = $lineFit->durbinWatson();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2581 my $sigma = $lineFit->sigma();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2582 (my $tStatIntercept, my $tStatSlope) = $lineFit->tStatistics();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2583 (my $varianceIntercept,my $varianceSlope) = $lineFit->varianceOfEstimates();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2584
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2585 $max_R=$R;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2586 $R=$rSquared;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2587 if ($max_R<=$R)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2588 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2589 $max_R=$R;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2590 ($max_C,$max_A)=$lineFit->coefficients();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2591 $max_A_interval=Statistics::Distributions::tdistr (($spnum-2),.025)*sqrt($varianceSlope);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2592 $max_C_interval=Statistics::Distributions::tdistr (($spnum-2),.025)*sqrt($varianceIntercept);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2593 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2594 $b=$b-$step;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2595 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2596 $max_B=$b;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2597 return ($max_R,$max_A,$max_A_interval,$max_B,$max_C,$max_C_interval);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2598 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2599
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2600
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2601 sub ReadData2Array()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2602 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2603 (my $file, my $array1,my $col1,my $array2,my $col2)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2604 my $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2605 open(F,$file);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2606 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2607 while (<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2608 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2609 chomp();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2610 @_=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2611 $$array1[$i]=$_[$col1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2612 $$array2[$i]=$_[$col2];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2613 $i++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2614 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2615 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2616 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2617
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2618 sub SumData()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2619 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2620 (my $xdata,my $ydata,my $SumMethod)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2621 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2622 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2623 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2624 my $max=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2625 for ($i=0;$i<@$xdata;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2626 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2627 if (exists($hash{$$xdata[$i]}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2628 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2629 $hash{$$xdata[$i]}=$hash{$$xdata[$i]}." ".$$ydata[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2630 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2631 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2632 $hash{$$xdata[$i]}=$$ydata[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2633 if ($$xdata[$i]>$max)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2634 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2635 $max=$$xdata[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2636 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2637 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2638 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2639 @$xdata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2640 @$ydata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2641 $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2642 foreach $i (1..$max)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2643 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2644 $$xdata[$i-1]=$i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2645 if ($SumMethod eq "median")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2646 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2647 $$ydata[$i-1]=&median($hash{$i});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2648 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2649 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2650 $$ydata[$i-1]=&mean($hash{$i});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2651 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2652 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2653 #print join(",",@$xdata)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2654 #print join(",",@$ydata)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2655 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2656
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2657 sub median()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2658 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2659 (my $data)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2660 my @data=split(/ /,$data);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2661 my $arraylen=scalar(@data);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2662 @data=sort{$a<=>$b} @data;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2663 if (int($arraylen/2)*2 == $arraylen)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2664 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2665 return ($data[$arraylen/2]+$data[$arraylen/2-1])/2;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2666 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2667 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2668 return $data[int($arraylen/2)];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2669 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2670 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2671
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2672 sub mean()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2673 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2674 (my $data)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2675 my @data=split(/ /,$data);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2676 my $sum=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2677 foreach (@data)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2678 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2679 $sum=$sum+$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2680 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2681 return int(($sum/scalar(@data))*1000)/1000;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2682 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2683
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2684 sub ReplaceName()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2685 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2686 (my $sp,my $file)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2687 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2688 my $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2689 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2690 my $target;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2691 open(F,$file);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2692 @content=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2693 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2694 for ($line=0;$line<@content;$line++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2695 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2696 for ($i=0;$i<@$sp;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2697 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2698 $_=$content[$line];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2699 $target="sp".$i."sp";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2700 s/$target/$$sp[$i]/;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2701 $content[$line]=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2702 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2703 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2704 open(R,">$file");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2705 print R @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2706 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2707 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2708
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2709 sub MP_Result_to_Table()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2710 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2711 (my $MPresult, my $outputfile)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2712 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2713 my $maxid=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2714 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2715 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2716
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2717 open(F,"$MPresult");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2718 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2719 while (<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2720 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2721 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2722 if (exists($hash{$row[0]}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2723 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2724 $hash{$row[0]}=$hash{$row[0]}."\t".$row[2];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2725 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2726 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2727 $hash{$row[0]}=$row[2];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2728 if ($row[0]>$maxid)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2729 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2730 $maxid=$row[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2731 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2732 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2733 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2734 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2735
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2736 open(R,">$outputfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2737 foreach $i (1..$maxid)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2738 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2739 print R $hash{$i}."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2740 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2741 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2742 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2743
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2744
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2745
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2746 sub FormatCluster()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2747 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2748 (my $infile,my $genelist,my $spnum,my $cluster)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2749 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2750 my %gene;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2751 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2752 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2753 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2754 my $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2755 my $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2756 my $j=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2757 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2758
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2759 ### record gene in clusters
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2760 open(F,"$infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2761 @content=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2762 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2763 chomp(@content);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2764 for ($line=0;$line<@content;$line++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2765 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2766 @row=split(/\t/,$content[$line]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2767 foreach $key (@row)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2768 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2769 $gene{$key}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2770 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2771 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2772 ###retrieves gene which is not in clutsers
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2773
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2774 open(F,"$genelist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2775 while ($key=<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2776 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2777 if ($key=~/^>/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2778 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2779 chomp($key);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2780 $_=$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2781 s/^>//;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2782 $key=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2783 if (!exists($gene{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2784 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2785 $content[$line]=$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2786 $line++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2787 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2788 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2789 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2790 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2791
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2792 #### initialization @cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2793 @$cluster=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2794 $j=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2795
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2796 foreach $line (@content)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2797 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2798 if ($line ne "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2799 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2800 %hash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2801 @row=split(/\t/,$line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2802 foreach $key (@row)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2803 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2804 $sp=substr($key,0,index($key,"G"));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2805 $gene{$key}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2806 if (exists($hash{$sp}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2807 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2808 $hash{$sp}=$hash{$sp}.",".$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2809 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2810 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2811 $hash{$sp}=$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2812 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2813 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2814
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2815 $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2816 @row=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2817
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2818 foreach $i (0..($spnum-1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2819 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2820 if (exists($hash{"S$i"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2821 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2822 $row[$i]=$hash{"S$i"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2823 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2824 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2825 $row[$i]="-";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2826 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2827 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2828 $$cluster[$j++]=join("\t",@row);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2829 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2830 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2831 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2832
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2833