annotate PGAP-1.2.1/PGAP.pl @ 0:83e62a1aeeeb draft

Uploaded
author dereeper
date Thu, 24 Jun 2021 13:51:52 +0000
parents
children 70b7a5270968
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2 use strict;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
3 use warnings;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
4 use Getopt::Long;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
5
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
6 ### programs from BLAST
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
7 my $formatdb="formatdb";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
8 my $blastall="blastall";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
9
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
10 ### programs from mcl
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
11 my $mcl="mcl";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
12
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
13 ### programs from mafft
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
14 my $mafft="mafft";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
15
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
16 ### programs from PHYLIP
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
17 my $seqboot="seqboot";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
18 my $neighbor="neighbor";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
19 my $consense="consense";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
20 my $dnaml="dnaml";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
21 my $dnadist="dnadist";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
22 my $dnapars="dnapars";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
23
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
24 my $count_tree=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
25
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
26 my $sampleSize=8000; # when calculate the pan-genome size, we will sample $sampleSize combinations
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
27 # if the total combination number is larger than $sampleSize for specific genomes
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
28 # Surely, the number of $sampleSize is, the larger, the better.
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
29 # However, the larger the $sampleSize is, the more time would be consumed.
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
30 # we suggest the range: 5000 ~ 20,000
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
31
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
32 #####################################################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
33 # DOn't modify the following code, unless you know their functions
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
34 #####################################################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
35
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
36 my %opt=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
37 GetOptions(\%opt,"strains:s","input:s","output:s","cluster!","pangenome!","variation!","evolution!","function!","method:s","thread:i","score:f","evalue:f","coverage:f","local:f","global:f","identity:f","bootstrap:i","help|h!");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
38
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
39 my @usage=qq(
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
40 ====== Pan-Genome Analysis Pipeline (PGAP) ======
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
41 Version 1.2.1
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
42
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
43 Usage: perl PGAP.pl [Options]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
44
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
45 Options:
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
46 --strains String Input strains nicknames, and join them with '+', for example: A+B+C
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
47 --input String Input data directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
48 --output String Result output directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
49
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
50 --cluster Run homologous gene clustering
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
51 --pangenome Run pan-genome analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
52 --variation Run homologous clusters variation analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
53 --evolution Run evolution analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
54 --function Run Function analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
55
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
56 --method String GF for GeneFamily method, and MP for MultiParanoid method
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
57 for GF: fast, but not very accurate
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
58 evalue, score, indentity, coverage are employed
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
59 for MP: slow, but more accurate
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
60 score, coverage, local, global are employed
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
61 --thread Int Number of processors to use in blastall. [default:1]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
62 --score Int Minimum score in blastall. [default:40]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
63 --evalue Decimal Maximal E-value in blastall. [default:1e-10]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
64 --coverage Decimal Minimum alignment coverage for two homologous proteins. [default:0.5]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
65 --local Decimal Minimum local alignment overlap in MP method. [default:0.25]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
66 --global Decimal Minimum global alignment overlap in MP method. [default:0.5]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
67 --identity Decimal Minimum alignment indentity for two homologous proteins. [default:0.5]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
68 --bootstrap Int Bootstrap times for phylogenetics tree. [default:1]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
69
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
70 --h or help Display this message
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
71 );
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
72
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
73
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
74 ############# specified variable #############
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
75 my $inputDIR;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
76 my $outputDIR;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
77 my $run_cluster;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
78 my $run_pangenome;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
79 my $run_variation;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
80 my $run_evolution;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
81 my $run_function;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
82 my $method="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
83 my $thread;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
84 my $score;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
85 my $identity;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
86 my $evalue;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
87 my $coverage;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
88 my $global;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
89 my $local;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
90 my $bootstrap;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
91
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
92
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
93 my %pep;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
94 my %nuc;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
95 my $spnum;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
96 my @clusters;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
97 my $Cluster;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
98 my @SpecieCombination;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
99 my @spID;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
100 my %genenum;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
101 my %aaAln;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
102 my %ntAln;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
103 my %cog;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
104 my %description;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
105 #my %aa4tree; ### AA sequence for Phylogenetic Tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
106 my %nt4tree; ### nucleotide sequence for Phylogenetic Tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
107 my @SNPPosition; ### SNP position
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
108 my $dieMessage="You did not run PGAP.pl in the program directory\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
109 my $section;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
110
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
111 ######### common temporary variable #############
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
112 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
113 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
114 my $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
115 my %tmpHash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
116 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
117 my $tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
118 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
119 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
120 my $inparacount;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
121 my $ClusterID;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
122 my $orth;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
123 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
124 my $clusterName;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
125 my @xdata;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
126 my @ydata;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
127 my @fit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
128
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
129 my $fit_A;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
130 my $fit_A_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
131 my $fit_B;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
132 my $fit_C;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
133 my $fit_C_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
134 my $fit_Rsquare;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
135
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
136
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
137
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
138 #### check option
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
139
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
140 my $opt_error=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
141
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
142 if ((scalar(keys %opt) ==0) or (exists($opt{"help"})))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
143 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
144 print join("\n",@usage)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
145 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
146 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
147
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
148
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
149 ###################### public info
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
150 ### strains name
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
151 my @species;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
152 if (exists($opt{"strains"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
153 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
154 @species=split(/\+/,$opt{"strains"});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
155 $spnum=scalar(@species);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
156 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
157 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
158 print "Please assign strains nick name!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
159 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
160 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
161
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
162 ### input data directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
163
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
164 if (exists($opt{"input"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
165 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
166 $inputDIR=$opt{"input"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
167 if ($inputDIR!~/\/$/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
168 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
169 $inputDIR=$inputDIR."/";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
170 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
171 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
172 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
173 print "Please assign input data directory!\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
174 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
175 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
176 ### output data directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
177
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
178 if (exists($opt{"output"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
179 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
180 $outputDIR=$opt{"output"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
181 if ($outputDIR!~/\/$/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
182 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
183 $outputDIR=$outputDIR."/";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
184 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
185 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
186 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
187 print "Please assign result output directory!\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
188 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
189 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
190
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
191 ###################### section info
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
192
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
193 if (exists($opt{"cluster"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
194 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
195 $run_cluster=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
196 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
197 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
198 $run_cluster=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
199 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
200
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
201 if (exists($opt{"pangenome"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
202 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
203 $run_pangenome=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
204 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
205 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
206 $run_pangenome=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
207 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
208
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
209 if (exists($opt{"variation"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
210 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
211 $run_variation=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
212 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
213 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
214 $run_variation=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
215 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
216
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
217 if (exists($opt{"evolution"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
218 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
219 $run_evolution=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
220 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
221 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
222 $run_evolution=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
223 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
224
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
225 if (exists($opt{"function"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
226 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
227 $run_function=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
228 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
229 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
230 $run_function=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
231 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
232
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
233 if ($run_cluster)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
234 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
235 ### method
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
236 if (exists($opt{"method"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
237 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
238 $method=uc($opt{"method"});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
239 if ($method!~/^GF$/ and $method!~/^MP$/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
240 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
241 print "Unknown method: ".$opt{"method"}."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
242 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
243 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
244 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
245 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
246 print "Please assign the cluster method!\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
247 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
248 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
249
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
250 ##thread
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
251 if (exists($opt{"thread"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
252 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
253 $thread=$opt{"thread"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
254 if ($thread==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
255 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
256 print "please assign an applicable thread value.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
257 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
258 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
259 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
260 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
261 $thread=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
262 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
263
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
264 ##score
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
265 if (exists($opt{"score"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
266 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
267 $score=$opt{"score"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
268 if ($score<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
269 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
270 print "please assign an applicable score value.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
271 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
272 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
273 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
274 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
275 $score=40;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
276 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
277
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
278 if ($method eq "GF")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
279 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
280 ###identity
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
281 if (exists($opt{"identity"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
282 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
283 $identity=$opt{"identity"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
284 if ($identity>1 or $identity<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
285 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
286 print "identity should be 0 ~ 1 \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
287 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
288 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
289 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
290 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
291 $identity=0.5;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
292 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
293
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
294 ###evalue
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
295 if (exists($opt{"evalue"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
296 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
297 $evalue=$opt{"evalue"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
298 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
299 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
300 $evalue=1e-10;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
301 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
302
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
303 ###coverage
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
304 if (exists($opt{"coverage"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
305 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
306 $coverage=$opt{"coverage"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
307 if ($coverage>1 or $coverage<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
308 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
309 print "coverage should be 0 ~ 1 \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
310 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
311 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
312 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
313 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
314 $coverage=0.5;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
315 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
316 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
317
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
318
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
319 if ($method eq "MP")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
320 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
321 ###global
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
322 if (exists($opt{"global"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
323 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
324 $global=$opt{"global"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
325 if ($global>1 or $global<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
326 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
327 print "global coverage should be 0 ~ 1 \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
328 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
329 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
330 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
331 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
332 $global=0.5;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
333 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
334 ###local
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
335 if (exists($opt{"local"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
336 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
337 $local=$opt{"local"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
338 if ($local<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
339 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
340 print "local coverage should be 0 ~ [global coverage value] \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
341 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
342 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
343 if ($local>$global)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
344 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
345 print "local coverage should be less than global coverage!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
346 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
347 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
348 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
349 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
350 $local=0.25;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
351 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
352 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
353 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
354
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
355 if ($run_evolution)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
356 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
357 if (exists($opt{"bootstrap"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
358 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
359 $bootstrap=$opt{"bootstrap"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
360 if ($bootstrap<=0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
361 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
362 print "please assign an applicable bootstrap value.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
363 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
364 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
365 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
366 $bootstrap=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
367 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
368 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
369
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
370 print "Program begin at ".localtime()."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
371 print "The following are the parameters for current process:\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
372 print "Strains: ".join(",",@species)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
373 print "Input directory: $inputDIR\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
374 print "Output directory: $outputDIR\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
375 if ($run_cluster)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
376 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
377 print "Cluster analysis: yes\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
378 print " Method: $method\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
379 print " Thread: $thread\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
380 if ($method eq "GF")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
381 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
382 print " E-value: $evalue\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
383 print " Identity: $identity\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
384 print " Coverage: $coverage\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
385 print " Score: $score\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
386 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
387 if ($method eq "MP")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
388 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
389 print " Local: $local\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
390 print " Global: $global\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
391 print " Score: $score\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
392 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
393 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
394 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
395 print "Cluster analysis: no\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
396 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
397 if ($run_pangenome)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
398 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
399 print "Pan-genome analysis: yes\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
400 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
401 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
402 print "Pan-genome analysis: no\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
403 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
404 if ($run_variation)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
405 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
406 print "Variation analysis: yes\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
407 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
408 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
409 print "Variation analysis: no\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
410 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
411 if ($run_evolution)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
412 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
413 print "Evolution analysis: yes\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
414 print " Bootstrap: $bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
415 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
416 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
417 print "Evolution analysis: no\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
418 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
419 if ($run_function)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
420 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
421 print "Function analysis: yes\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
422 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
423 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
424 print "Function analysis: no\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
425 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
426
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
427 $section=$run_cluster.$run_pangenome.$run_variation.$run_evolution.$run_function;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
428
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
429 ###############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
430 # section 0) check input file and program
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
431 ###############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
432 if (!(-e $outputDIR))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
433 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
434 system("mkdir $outputDIR");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
435 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
436 system("chmod +rw $outputDIR");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
437
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
438 if (!(-w $outputDIR))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
439 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
440 print "There is no WRITE permission in $outputDIR\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
441 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
442 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
443 @tmp=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
444 &CheckInputFile(\@species,$inputDIR,$section,$method,\@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
445 &CheckExtraProgram($section,$method,\@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
446
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
447 if (scalar(@tmp)>0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
448 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
449 open(R,">".$outputDIR."0.error.message");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
450 print R join("",@tmp)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
451 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
452 print "error!\nlog are saved in ${outputDIR}0.error.message\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
453 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
454 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
455
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
456
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
457 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
458 # section 1) cluster analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
459 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
460
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
461 if ($run_cluster)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
462 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
463 print "\n\n############################################\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
464 print "# section 1) cluster analysis\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
465 print "############################################\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
466
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
467 #### cluster gene and return result to the array @clusters
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
468
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
469 if ($method eq "MP")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
470 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
471 print "Begin cluster gene with MP method ...\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
472 &MP();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
473 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
474 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
475 print "Begin cluster gene with GF method ...\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
476 &GF();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
477 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
478
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
479 #### output normal cluster format
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
480
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
481 &FormatClusterOutPut(\@species,"${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
482
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
483 #### Retrieve cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
484
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
485 &RetrieveClusterFromFile("${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
486
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
487 ##### gene distribution in each strains
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
488 %tmpHash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
489 &GeneDistribution(\@clusters,\%tmpHash);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
490
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
491 open(R,">${outputDIR}1.Gene_Distribution_By_Conservation.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
492 print R "SharedBy_Strains\t".join("\t",@species)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
493
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
494 for ($i=$spnum;$i>0;$i--)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
495 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
496 print R $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
497 for ($j=0;$j<$spnum;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
498 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
499 if (exists($tmpHash{$i."|".$j}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
500 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
501 print R "\t".$tmpHash{$i."|".$j};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
502 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
503 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
504 print R "\t0";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
505 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
506 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
507 print R "\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
508 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
509
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
510 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
511
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
512
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
513 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
514 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
515 print "Homologous gene clustering is skipped!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
516 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
517
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
518
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
519 if ($run_pangenome)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
520 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
521 print "\n\n############################################\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
522 print "# section 2) Pan-genome analysis\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
523 print "############################################\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
524
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
525 #### Retrieve cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
526 &RetrieveClusterFromFile("${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
527 chomp(@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
528
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
529 #### convert file into 0-1 matrix
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
530 for ($line=0;$line<@clusters;$line++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
531 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
532 @row=split(/\t/,$clusters[$line]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
533 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
534 for ($i=0;$i<@row;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
535 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
536 if ($row[$i] eq "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
537 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
538 $row[$i]=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
539 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
540 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
541 $row[$i]=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
542 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
543 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
544 $clusters[$line]=join("\t",@row);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
545 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
546
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
547 #### fetch gene number of each strains
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
548 for ($i=0;$i<$spnum;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
549 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
550 open(F,"$inputDIR$species[$i].pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
551 @tmp=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
552 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
553 @tmp=grep(/^>/,@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
554 $genenum{$species[$i]}=scalar(@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
555 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
556
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
557 #### pan genome size and core genome size
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
558 print "Deducing pan genome size and core genome size for each composition...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
559
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
560 open(PAN,">${outputDIR}2.PanGenome.Data.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
561 print PAN "ClusterConservation\tTotalGeneNumber\tPanGenome\tCoreGenome\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
562
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
563 for ($i=1;$i<=scalar(@species);$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
564 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
565 #@SpecieCombination=&Combination(\@species,$i);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
566 #@SpecieCombination=&Combination($spnum,$i);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
567 if (&ChkCombinationValue($spnum,$i) !=0) ### transfer the array reference to the subroutine
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
568 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
569 &Combination($spnum,$i,\@SpecieCombination); ## if the combination number is less than sampleSize, then fecth all, else sample
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
570 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
571 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
572 &SampleCombination($spnum,$i,\@SpecieCombination);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
573 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
574
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
575 foreach $key (@SpecieCombination)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
576 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
577 ##### count total gene number in current combination
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
578 $tmp=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
579 @spID=split(/\t/,$key); #### speices id in current combination
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
580 foreach (@spID)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
581 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
582 $tmp=$tmp+$genenum{$species[$_]};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
583 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
584 ##### scan pangenome and coregenome
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
585 @tmp=split(/\t/,&PanGenomeNumber(\@spID));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
586 print PAN "$i\t$tmp\t".join("\t",@tmp)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
587
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
588 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
589 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
590
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
591 close(PAN);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
592
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
593 #### data fit
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
594
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
595 #### for model A
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
596
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
597 if ($spnum<3)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
598 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
599 print "There are $spnum strains. For pan-genome function fitting, at least 3 strains data are required.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
600 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
601 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
602 open(R,">${outputDIR}2.PanGenome.Profile.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
603 ##### genome number & pan-genome size
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
604 @xdata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
605 @ydata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
606 &ReadData2Array("${outputDIR}2.PanGenome.Data.txt",\@xdata,0,\@ydata,2);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
607 &SumData(\@xdata,\@ydata,"mean");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
608 ($fit_Rsquare, $fit_A, $fit_A_interval, $fit_B, $fit_C, $fit_C_interval)=&fit_model_A(\@xdata,\@ydata);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
609 print R "The relation bewteen genome number and pan-genome size\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
610 print R "Function model: y=A*x**B +C \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
611 print R "\ty denotes pan-genome size, x denotes genome number, and A, B, C are fitting parameters.\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
612 print R "Fitting result:\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
613 print R "\ty = $fit_A *x**$fit_B + $fit_C\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
614 print R "\tR-square = $fit_Rsquare\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
615 print R "\tA 95% confidence interval: ($fit_A - $fit_A_interval , $fit_A + $fit_A_interval)\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
616 print R "\tC 95% confidence interval: ($fit_C - $fit_C_interval , $fit_C + $fit_C_interval)\n\n\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
617
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
618 ##### total gene number & pan-genome size
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
619 #@xdata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
620 #@ydata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
621 #&ReadData2Array("${outputDIR}2.PanGenome.Data.txt",\@xdata,1,\@ydata,2);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
622 #&SumDataByMedian(\@xdata,\@ydata);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
623 #($fit_Rsquare, $fit_A, $fit_B, $fit_C)=&fit_model_A(\@xdata,\@ydata);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
624 #print R "The relation bewteen total gene number and pan-genome size\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
625 #print R "$fit_Rsquare, $fit_A, $fit_B, $fit_C\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
626 #print R "\ty = $fit_A *x**$fit_B + $fit_C R-square = $fit_Rsquare\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
627 #print R "\tx: total gene number\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
628 #print R "\ty: pan-genome size\n\n\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
629
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
630 ##### genome number & core genome
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
631 @xdata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
632 @ydata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
633 &ReadData2Array("${outputDIR}2.PanGenome.Data.txt",\@xdata,0,\@ydata,3);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
634 &SumData(\@xdata,\@ydata,"mean");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
635 ($fit_Rsquare, $fit_A, $fit_A_interval, $fit_B, $fit_C, $fit_C_interval)=&fit_model_B(\@xdata,\@ydata);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
636 print R "The relation bewteen genome number and core genome size\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
637 print R "Function model: y=A*exp(B*x) +C \n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
638 print R "\ty denotes pan-genome size, x denotes genome number, and A, B, C are fitting parameters.\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
639 print R "Fitting result:\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
640 print R "\ty = $fit_A *exp($fit_B * x) + $fit_C R-square = $fit_Rsquare\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
641 print R "\tR-square = $fit_Rsquare\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
642 print R "\tA 95% confidence interval: ($fit_A - $fit_A_interval , $fit_A + $fit_A_interval)\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
643 print R "\tC 95% confidence interval: ($fit_C - $fit_C_interval , $fit_C + $fit_C_interval)\n\n\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
644 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
645 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
646
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
647 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
648
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
649
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
650 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
651 # section 3) CDS variation analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
652 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
653
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
654 if ($run_variation)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
655 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
656 print "\n\n############################################\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
657 print "# section 3) CDS variation analysis\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
658 print "############################################\n\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
659
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
660 #### Retrieve cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
661 &RetrieveClusterFromFile("${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
662 chomp(@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
663
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
664 ## protein
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
665 system("rm -rf *.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
666 &PrepareFasta(\@species,$inputDIR,".pep"); ###prepare pep file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
667 system("cat *.pep > All.faa && rm -rf *.pep && mv All.faa All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
668 &ReadSequenceInToHash("All.pep",\%pep);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
669 ## nucleic
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
670 system("rm -rf *.nuc");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
671 &PrepareFasta(\@species,$inputDIR,".nuc"); ###prepare nuc file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
672 system("cat *.nuc > All.ffn && rm -rf *.nuc && mv All.ffn All.nuc");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
673 &ReadSequenceInToHash("All.nuc",\%nuc);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
674
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
675 ## scanning SNP
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
676 %nt4tree=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
677 for ($i=0;$i<$spnum;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
678 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
679 $nt4tree{"S".$i}="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
680 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
681
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
682 open(VAR,">${outputDIR}3.CDS.variation.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
683 print VAR "ClusterID\tStrains_Number\tGene_Number\tPosition\taaType\tntType\tntProfile\tVariation type\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
684
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
685 open(VA,">${outputDIR}3.CDS.variation.analysis.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
686 print VA "ClusterID\tInDel Base\tNonsynonymous mutation\tSynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
687
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
688 for ($line=0;$line<@clusters;$line++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
689 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
690 @row=split(/\t|\,/,$clusters[$line]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
691 $ClusterID=$row[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
692 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
693 @row=grep(/^S/,@row);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
694 if (scalar(@row) >=2)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
695 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
696 open(PEP,">$ClusterID.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
697 open(NUC,">$ClusterID.nuc");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
698 foreach $key (@row)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
699 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
700 print PEP ">$key\n$pep{$key}\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
701 print NUC ">$key\n$nuc{$key}\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
702 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
703 close(PEP);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
704 close(NUC);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
705 system("$mafft --quiet $ClusterID.pep > $ClusterID.pal");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
706 #system("perl ./pal2nal.pl $ClusterID.pal $ClusterID.nuc -output fasta > $ClusterID.nal");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
707 $tmp=&pal2nal("$ClusterID.pal","$ClusterID.nuc","$ClusterID.nal");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
708 if ($tmp == 0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
709 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
710 system("rm -rf $ClusterID.*");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
711 next;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
712 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
713
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
714 @tmp=&DetectSNP();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
715 if (scalar(@tmp)>0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
716 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
717 print VA $ClusterID."\t".&VarAnalysis(\@tmp)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
718 print VAR join("",@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
719 ### core orthologs
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
720 @row=split(/\t/,$clusters[$line]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
721 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
722 if ((&CountGeneInCluster(join("\t",@row)) ==$spnum) and (&CountSpeicesInCluster(join("\t",@row)) == $spnum) )
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
723 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
724 $count_tree++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
725 %tmpHash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
726 foreach (@row)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
727 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
728 $tmpHash{$_}="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
729 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
730 &RemoveHeadGap("$ClusterID.nal",\%tmpHash);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
731 &ExtractSNP4tree(\%tmpHash,\%nt4tree);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
732 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
733 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
734 system("rm -rf $ClusterID.*");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
735 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
736 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
737 close(VAR);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
738 close(VA);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
739
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
740 open(R,">${outputDIR}3.CDS.variation.for.evolution.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
741 foreach $key (keys %nt4tree)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
742 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
743 $_=$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
744 s/s//gi;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
745 print R ">$species[$_]\n$nt4tree{$key}\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
746 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
747 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
748 print $count_tree."\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
749
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
750 ###
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
751 system("rm All.nuc All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
752 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
753 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
754 print "CDS variation is skipped.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
755 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
756
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
757 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
758 # section 4) CDS variation analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
759 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
760
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
761 if ($run_evolution)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
762 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
763 #### Retrieve cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
764 &RetrieveClusterFromFile("${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
765 chomp(@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
766
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
767
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
768
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
769 ##################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
770 ##
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
771 ## Distance based
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
772 ##
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
773 ##################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
774
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
775 ### caculate the distance between each two strains by cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
776 &ClusterProfil4Specie(\%tmpHash); # caculate Clusters profile for each specie
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
777 &DistanceMatrix($spnum,\%tmpHash); # caculate distance matrix acoording to Clusters profile
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
778 ###output distance
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
779 open(DIST,">${outputDIR}4.Species_Distance_Clusters_Based.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
780 ###header
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
781 printf DIST "%5d", $spnum;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
782 print DIST "\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
783 foreach $i (0..($spnum-1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
784 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
785 $key="sp".$i."sp";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
786 printf DIST "%-10s",$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
787 foreach $j (0..($spnum-1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
788 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
789 printf DIST " %8f",$tmpHash{$i."-".$j};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
790 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
791 print DIST "\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
792 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
793 close(DIST);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
794
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
795 %tmpHash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
796
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
797 ### based on pan genome (distance)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
798 print "\nDraw pangenome based phylogenetic tree ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
799
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
800 &PanBasedTree("${outputDIR}4.Species_Distance_Clusters_Based.txt","${outputDIR}4.PanBased");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
801
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
802 ##################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
803 ##
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
804 ## SNP based
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
805 ##
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
806 ##################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
807 %tmpHash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
808 if (!(-e "${outputDIR}3.CDS.variation.for.evolution.txt"))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
809 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
810 print "Variation in core orthologs cluster is not found from ${outputDIR}3.CDS.variation.for.evolution.txt.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
811 print "Maybe you have skipped CDS variation analysis.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
812 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
813 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
814 &ReadSequenceInToHash("${outputDIR}3.CDS.variation.for.evolution.txt",\%tmpHash);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
815 open(R,">mlst.aln");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
816 for ($i=0;$i<@species;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
817 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
818 print R ">sp${i}sp\n".$tmpHash{$species[$i]}."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
819 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
820 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
821 &fasta2phylip("mlst.aln","mlst.phylip");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
822 system("rm -rf mlst.aln");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
823
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
824 print "\nDraw SNP based phylogenetic tree ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
825 &SNPBasedTree("mlst.phylip","${outputDIR}4.SNPBased");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
826 system("rm -rf mlst.phylip")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
827 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
828
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
829 ########
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
830 # replace speices name
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
831 ########
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
832
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
833 opendir(DIR,"${outputDIR}");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
834 @tmp=readdir(DIR);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
835 closedir(DIR);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
836 @tmp=grep(/^4/,@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
837 foreach $tmp (@tmp)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
838 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
839 &ReplaceName(\@species,"${outputDIR}$tmp");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
840 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
841 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
842 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
843 print "Evolution analysis is skipped.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
844 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
845
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
846
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
847 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
848 # section 4) Function analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
849 ############################################
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
850
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
851 if ($run_function)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
852 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
853 #### Retrieve cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
854 &RetrieveClusterFromFile("${outputDIR}1.Orthologs_Cluster.txt",\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
855 chomp(@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
856
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
857 #### prepare annotation file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
858 &PrepareTable(\@species,$inputDIR,".function"); ###prepare location file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
859 &ReadAnnotation(\@species,\%cog,\%description);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
860
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
861
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
862 #### assign function
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
863 open(R,">${outputDIR}5.Orthologs_Cluster_Function.txt");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
864 print R "ClusterID\tConservation_Level\tCOG\tDescription\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
865 for ($i=0;$i<@clusters;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
866 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
867 @row=split(/\t/,$clusters[$i]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
868 $ClusterID=$row[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
869 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
870 print R $ClusterID."\t".&CountSpeicesInCluster(join("\t",@row))."\t".&getCOG(\@row,\%cog)."\t".&getDescription(\@row,\%description)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
871 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
872 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
873
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
874 #### COG distribution
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
875
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
876 ###Whole Clusters COG Distribution
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
877 &outputCOGStatistic("${outputDIR}5.Orthologs_Whole_Cluster_COG_Distribution.txt",&scanCOG("${outputDIR}5.Orthologs_Cluster_Function.txt",$spnum,1));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
878
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
879 ###Core Clusters COG Distribution
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
880 &outputCOGStatistic("${outputDIR}5.Orthologs_Core_Cluster_COG_Distribution.txt",&scanCOG("${outputDIR}5.Orthologs_Cluster_Function.txt",$spnum,$spnum));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
881
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
882 ###Dispensable Clusters COG Distribution
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
883 &outputCOGStatistic("${outputDIR}5.Orthologs_Dispensable_Cluster_COG_Distribution.txt",&scanCOG("${outputDIR}5.Orthologs_Cluster_Function.txt",($spnum-1),2));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
884
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
885 ###strains specifc Clusters COG Distribution
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
886 &outputCOGStatistic("${outputDIR}5.Orthologs_specifc_Cluster_COG_Distribution.txt",&scanCOG("${outputDIR}5.Orthologs_Cluster_Function.txt",1,1));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
887
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
888 system("rm -rf *.function");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
889
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
890 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
891 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
892 print "Function analysis is skipped.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
893 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
894
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
895 sub outputCOGStatistic()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
896 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
897 (my $file,my $subcogcount)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
898 my @cogcat=("J A K L B","D Y V T M N Z W U O","C G E F H I P Q","R S -");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
899 my @cogdesc=("INFORMATION STORAGE AND PROCESSING","CELLULAR PROCESSES AND SIGNALING","METABOLISM","POORLY CHARACTERIZED");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
900 my @subcogcat=qw(J A K L B D Y V T M N Z W U O C G E F H I P Q R S -);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
901 my @subcogdesc=("[J] Translation, ribosomal structure and biogenesis","[A] RNA processing and modification","[K] Transcription","[L] Replication, recombination and repair","[B] Chromatin structure and dynamics","[D] Cell cycle control, cell division, chromosome partitioning","[Y] Nuclear structure","[V] Defense mechanisms","[T] Signal transduction mechanisms","[M] Cell wall/membrane/envelope biogenesis","[N] Cell motility","[Z] Cytoskeleton","[W] Extracellular structures","[U] Intracellular trafficking, secretion, and vesicular transport","[O] Posttranslational modification, protein turnover, chaperones","[C] Energy production and conversion","[G] Carbohydrate transport and metabolism","[E] Amino acid transport and metabolism","[F] Nucleotide transport and metabolism","[H] Coenzyme transport and metabolism","[I] Lipid transport and metabolism","[P] Inorganic ion transport and metabolism","[Q] Secondary metabolites biosynthesis, transport and catabolism","[R] General function prediction only","[S] Function unknown","[-] Unclassified");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
902 my %subcogdesc;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
903 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
904 my @cog;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
905 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
906 my $cognum;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
907
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
908 for ($i=0;$i<@subcogcat;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
909 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
910 $subcogdesc{$subcogcat[$i]}=$subcogdesc[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
911 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
912
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
913 open(R,">$file");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
914 for ($i=0;$i<@cogcat;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
915 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
916 $cognum=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
917 foreach $key (split(" ",$cogcat[$i]))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
918 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
919 $cognum=$cognum+$$subcogcount{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
920 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
921 print R $cogdesc[$i]." ( ".$cognum." )\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
922 foreach $key (split(" ",$cogcat[$i]))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
923 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
924 printf R "%-6d %s\n",$$subcogcount{$key},$subcogdesc{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
925 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
926 print R "\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
927
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
928 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
929 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
930
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
931 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
932
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
933 sub scanCOG()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
934 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
935 (my $file,my $max_orth,my $min_orth)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
936 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
937 my @subcogcat=qw(J A K L B D Y V T M N Z W U O C G E F H I P Q R S -);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
938 my %subcogcount;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
939 my $cog;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
940 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
941
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
942 foreach $key (@subcogcat)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
943 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
944 $subcogcount{$key}=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
945 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
946
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
947 @subcogcat=qw(J A K L B D Y V T M N Z W U O C G E F H I P Q R S);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
948
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
949 open(F,"$file");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
950 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
951 while (<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
952 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
953 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
954 if ($row[1]>=$min_orth and $row[1]<=$max_orth)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
955 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
956 if ($row[2] eq "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
957 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
958 $subcogcount{"-"}++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
959 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
960 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
961 $_=uc($row[2]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
962 s/COG//gi;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
963 $cog=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
964 foreach $key (@subcogcat)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
965 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
966 if ($cog=~/$key/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
967 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
968 $subcogcount{$key}++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
969 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
970 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
971 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
972 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
973 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
974 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
975
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
976 return \%subcogcount;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
977 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
978
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
979
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
980
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
981
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
982 sub getCOG()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
983 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
984 (my $data,my $coghash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
985 my $cog="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
986 my @cog;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
987 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
988 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
989 my @gene=split(/\t|\,/,join("\t",@$data));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
990 @gene=grep(/^S/,@gene);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
991
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
992 foreach $key (@gene)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
993 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
994 if (($$coghash{$key} ne "-") and ($$coghash{$key} ne ""))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
995 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
996 $cog=$cog.",".$$coghash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
997 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
998 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
999 @cog=split(/,/,$cog);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1000 foreach $cog (@cog)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1001 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1002 if ($cog ne "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1003 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1004 $hash{$cog}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1005 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1006 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1007
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1008 $cog=join(",",(keys %hash));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1009 if ($cog eq "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1010 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1011 $cog="-";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1012 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1013 return $cog;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1014 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1015
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1016 sub getDescription()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1017 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1018 (my $data,my $deschash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1019 my $desc="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1020 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1021 my @gene=split(/\t|\,/,join("\t",@$data));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1022 @gene=grep(/^S/,@gene);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1023
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1024 foreach $key (@gene)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1025 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1026 if ( ($$deschash{$key} ne "") and ($$deschash{$key} ne "-") and ($$deschash{$key}!~/hypothetical/))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1027 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1028 $desc=$$deschash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1029 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1030 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1031
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1032 if ($desc eq "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1033 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1034 $desc="hypothetical protein";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1035 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1036
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1037 return $desc;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1038 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1039
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1040
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1041
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1042 sub ReadAnnotation()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1043 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1044 (my $species,my $cog,my $description)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1045 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1046 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1047
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1048 for ($i=0;$i<@$species;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1049 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1050 open(F,"$$species[$i].function");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1051 while (<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1052 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1053 chomp($_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1054 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1055 if (scalar(@row)>=2)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1056 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1057 $$cog{$row[0]}=$row[1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1058 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1059 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1060 $$cog{$row[0]}="-";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1061 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1062
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1063 if (scalar(@row)>=3)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1064 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1065 $$description{$row[0]}=$row[2];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1066 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1067 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1068 $$description{$row[0]}="hypothetical protein";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1069 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1070 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1071 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1072 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1073 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1074
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1075
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1076 sub SNPBasedTree()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1077 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1078 (my $infile,my $outfileprefix)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1079 my $tmpin=$infile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1080 my $tmpout;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1081
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1082
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1083 #### boootstrap
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1084 print "\n#### seqboot ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1085 open(R,">seqboot.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1086 #print R "$tmpin\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1087 print R "R\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1088 print R "$bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1089 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1090 print R "1\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1091 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1092
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1093 system("cp $tmpin infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1094 system("$seqboot < seqboot.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1095 system("mv outfile 100dnaseq");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1096 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1097 system("rm seqboot.cmd"); # 100dnasseq
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1098
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1099 #### dnaml
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1100 print "\n#### dnaml ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1101 open(R,">dnaml.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1102 #print R "100dnaseq\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1103 print R "T\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1104 print R "25\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1105 if ($bootstrap>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1106 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1107 print R "M\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1108 print R "D\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1109 #print R "100\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1110 print R "$bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1111 print R "1\n"; # Random number seed (must be odd)?
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1112 print R "5\n"; # Number of times to jumble?
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1113 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1114 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1115 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1116
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1117 system("cp 100dnaseq infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1118 system("$dnaml < dnaml.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1119 system("rm -rf outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1120 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1121 system("mv outtree 100dnaseqtree"); # 100dnaseq, 100dnaseqtree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1122
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1123 #### consense
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1124 print "\n#### dnaml consense ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1125
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1126 open(R,">consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1127 #print R "100dnaseqtree\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1128 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1129 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1130
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1131 system("cp 100dnaseqtree intree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1132 system("$consense < consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1133 system("mv outfile ${outfileprefix}.ML.outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1134 system("mv outtree ${outfileprefix}.ML.tree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1135 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1136 system("rm -rf 100dnaseqtree"); # 100dnaseq
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1137
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1138 #### dnadist
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1139 print "\n#### dnadist ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1140 open(R,">dnadist.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1141 #print R "100dnaseq\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1142 print R "T\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1143 print R "25\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1144 if ($bootstrap>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1145 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1146 print R "M\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1147 print R "D\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1148 #print R "100\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1149 print R "$bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1150 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1151 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1152 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1153
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1154 system("cp 100dnaseq infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1155 system("$dnadist < dnadist.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1156 system("rm -rf 100dnaseq");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1157 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1158 system("mv outfile 100dnadist"); # 100dnadist
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1159
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1160 #### Neighbor-joining tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1161 print "\n#### Neighbor-joining ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1162 open(R,">NJ.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1163 if ($bootstrap>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1164 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1165 #print R "100dnadist\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1166 print R "M\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1167 #print R "100\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1168 print R "$bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1169 print R "1\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1170 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1171 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1172 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1173
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1174 system("cp 100dnadist infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1175 system("$neighbor < NJ.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1176 system("mv outtree 100dnadistNJtree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1177 system("rm outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1178 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1179 system("rm -rf NJ.cmd"); # 100dnadist,100dnadistNJtree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1180
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1181 #### NJ-consense
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1182 print "\n#### NJ-consense ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1183 open(R,">NJ-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1184 #print R "100dnadistNJtree\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1185 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1186 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1187
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1188 system("cp 100dnadistNJtree intree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1189 system("$consense < NJ-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1190 system("mv outfile ${outfileprefix}.Neighbor-joining.outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1191 system("mv outtree ${outfileprefix}.Neighbor-joining.tree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1192 system("rm -rf NJ-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1193 system("rm -rf intree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1194 system("rm -rf 100dnadistNJtree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1195
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1196
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1197 #### UPGMA tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1198 print "\n#### UPGMA ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1199 open(R,">UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1200 #print R "100dnadist\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1201 print R "N\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1202 if ($bootstrap>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1203 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1204 print R "M\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1205 #print R "100\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1206 print R "$bootstrap\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1207 print R "1\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1208 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1209 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1210 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1211
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1212 system("cp 100dnadist infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1213 system("$neighbor < UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1214 system("mv outtree 100dnadistUPGMAtree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1215 system("rm -rf outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1216 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1217 system("rm -rf UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1218
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1219 #### UPGMA-consense
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1220 print "\n#### UPGMA-consense ...\n\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1221 open(R,">UPGMA-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1222 #print R "100dnadistUPGMAtree\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1223 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1224 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1225
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1226 system("cp 100dnadistUPGMAtree intree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1227 system("$consense < UPGMA-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1228 system("mv outfile ${outfileprefix}.UPGMA.outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1229 system("mv outtree ${outfileprefix}.UPGMA.tree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1230 system("rm -rf UPGMA-consense.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1231 system("rm -rf 100dnadistUPGMAtree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1232 system("rm -rf intree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1233
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1234 ###CLEAN TMP FILE
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1235
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1236 system("rm -rf *.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1237 system("rm -rf 100dnadist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1238 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1239
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1240 sub PanBasedTree()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1241 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1242 (my $infile,my $outfileprefix)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1243 my $tmpin;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1244 my $tmpout;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1245
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1246 $tmpin=$infile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1247 #### Neighbor-joining tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1248
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1249 open(R,">NJ.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1250 #print R "$tmpin\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1251 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1252 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1253
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1254 system("cp $tmpin infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1255 system("$neighbor < NJ.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1256 system("mv outfile ${outfileprefix}.Neighbor-joining.outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1257 system("mv outtree ${outfileprefix}.Neighbor-joining.tree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1258 system("rm -rf NJ.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1259 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1260
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1261 #### UPGMA tree
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1262
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1263 open(R,">UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1264 #print R "$tmpin\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1265 print R "N\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1266 print R "Y\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1267 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1268
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1269 system("cp $tmpin infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1270 system("$neighbor < UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1271 system("mv outfile ${outfileprefix}.UPGMA.outfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1272 system("mv outtree ${outfileprefix}.UPGMA.tree");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1273 system("rm -rf UPGMA.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1274 system("rm -rf infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1275
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1276
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1277 ###CLEAN TMP FILE
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1278 system("rm -rf *.cmd");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1279 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1280
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1281 sub DistanceMatrix()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1282 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1283 (my $spnum,my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1284 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1285 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1286 my $k;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1287 my $dist;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1288 my $ref;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1289 my $query;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1290 foreach $i (0..($spnum-1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1291 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1292 foreach $j ($i..($spnum-1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1293 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1294 $ref=$$hash{$i};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1295 $query=$$hash{$j};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1296 $dist=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1297 for ($k=0;$k<length($ref);$k++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1298 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1299 if (substr($ref,$k,1) ne substr($query,$k,1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1300 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1301 $dist++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1302 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1303 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1304 $$hash{$i."-".$j}=$dist;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1305 $$hash{$j."-".$i}=$dist;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1306 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1307 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1308 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1309
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1310
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1311 sub ClusterProfil4Specie
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1312 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1313 (my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1314 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1315 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1316
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1317 foreach (0..($spnum-1)) #initialization Hash
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1318 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1319 $$hash{$_}="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1320 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1321
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1322 foreach (@clusters)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1323 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1324 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1325 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1326 if (&CountSpeicesInCluster(join("\t",@row))>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1327 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1328 for ($i=0;$i<@row;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1329 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1330 if ($row[$i] eq "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1331 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1332 $$hash{$i}=$$hash{$i}."0";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1333 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1334 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1335 $$hash{$i}=$$hash{$i}."1";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1336 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1337 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1338 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1339 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1340 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1341
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1342 # &ExtractSNP4tree(\%tmpHash,\%nt4tree);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1343
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1344 sub ExtractSNP4tree()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1345 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1346 (my $hash,my $nt4treeRef)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1347 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1348 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1349 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1350 my $len;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1351 my @tribases;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1352 foreach $key (keys %$hash)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1353 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1354 $$hash{substr($key,0,index($key,"G"))}=$$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1355 delete($$hash{$key});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1356 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1357
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1358 for ($i=0;$i<$spnum;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1359 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1360 $nt4tree{"S".$i}=$nt4tree{"S".$i}.$$hash{"S".$i};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1361 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1362 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1363
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1364
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1365 =pod
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1366 sub ExtractSNP4tree()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1367 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1368 (my $hash,my $nt4treeRef)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1369 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1370 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1371 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1372 my $len;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1373 my @tribases;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1374 foreach $key (keys %$hash)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1375 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1376 $$hash{substr($key,0,index($key,"G"))}=$$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1377 delete($$hash{$key});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1378 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1379 @_=(keys %$hash);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1380 $len=length($_[0]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1381 for ($j=0;3*$j<$len;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1382 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1383 ##### scanning each codon
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1384 for ($i=0;$i<$spnum;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1385 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1386 $tribases[$i]=substr($$hash{"S".$i},3*$j,3);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1387 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1388 ##### checking each codon
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1389 if (&IsTheSame(@tribases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1390 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1391 for ($i=0;$i<@tribases;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1392 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1393 $nt4tree{"S".$i}=$nt4tree{"S".$i}.$tribases[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1394 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1395 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1396 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1397 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1398 =cut
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1399
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1400
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1401 sub pal2nal()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1402 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1403 (my $pal,my $nuc, my $nal)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1404 my %aaAln=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1405 my %ffn=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1406 my %ntAln=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1407 my %nt;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1408 my $dna;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1409 my $nt;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1410 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1411 my $flag=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1412 my $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1413 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1414
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1415 ### read protein aligment result
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1416 &ReadAlignmentToHash("$pal",\%aaAln);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1417 ### read nt sequences
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1418 &ReadSequenceInToHash("$nuc",\%ffn);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1419 foreach $key (keys %ffn)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1420 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1421 $dna=$ffn{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1422 #if (int(length($nt{$key})/3)*3 ne length($nt{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1423 if (int(length($dna)/3)*3 ne length($dna))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1424 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1425 $flag=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1426 print "The length of nucleotide sequence is not 3 integer times.\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1427 last;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1428 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1429 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1430 for ($i=0;$i<(length($dna)/3);$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1431 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1432 $nt{$key."|".$i}=substr($dna,$i*3,3);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1433 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1434 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1435 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1436
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1437 if ($flag==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1438 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1439 return 0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1440 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1441 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1442 foreach $key (keys %aaAln) ### replace aa with corresponding nt
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1443 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1444 $nt="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1445 $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1446 for ($j=0;$j<length($aaAln{$key});$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1447 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1448 if (substr($aaAln{$key},$j,1) eq "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1449 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1450 $nt=$nt."---";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1451 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1452 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1453 $nt=$nt.$nt{$key."|".$i};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1454 $i++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1455 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1456 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1457 $ntAln{$key}=$nt;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1458 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1459
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1460 ### output
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1461 open(R,">$nal");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1462 foreach (keys %ntAln)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1463 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1464 print R ">$_\n".$ntAln{$_}."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1465 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1466 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1467
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1468 return 1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1469 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1470 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1471
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1472
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1473
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1474
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1475 sub DetectSNP()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1476 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1477 my %faa;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1478 my %ffn;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1479 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1480 my $count_gene;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1481 my $count_sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1482 my @genelist;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1483 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1484 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1485 my $pepalnlen;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1486 my @cdsvar=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1487 my $cdi=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1488 my @tribases;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1489 my @bases;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1490 my @aa;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1491
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1492
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1493 ### fetch gene list
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1494 open(F,"$ClusterID.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1495 @genelist=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1496 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1497 @genelist=grep(/^>/,@genelist);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1498 chomp(@genelist);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1499 $_=join("\t",@genelist);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1500 s/>//g;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1501 @genelist=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1502
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1503 ### count gene number and species number
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1504 @row=split(/\t/,$clusters[$ClusterID-1]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1505 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1506 $count_sp=&CountSpeicesInCluster(join("\t",@row));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1507 $count_gene=&CountGeneInCluster(join("\t",@row));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1508
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1509 ### read alignment sequences
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1510 &ReadAlignmentToHash("$ClusterID.pal",\%faa);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1511 &ReadAlignmentToHash("$ClusterID.nal",\%ffn);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1512
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1513 @_=(keys %faa);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1514 $pepalnlen=length($faa{$_[0]});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1515 ### scan SNP
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1516 for ($i=1;$i<=$pepalnlen;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1517 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1518 @tmp=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1519 @tribases=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1520 for ($j=0;$j<@genelist;$j++) ### fetch triplet codon
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1521 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1522 $tribases[$j]=substr($ffn{$genelist[$j]},3*($i-1),3);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1523 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1524 if (&IsTheSame(@tribases) ==0) ### if triplet codon is not consistent
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1525 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1526 @aa=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1527 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1528 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1529 $aa[$j]=substr($faa{$genelist[$j]},($i-1),1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1530 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1531 if (&IsTheSame(@aa) ==0) ### aa is not consistent
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1532 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1533 if (join("",@aa) =~/-/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1534 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1535 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".$i."\t".&CharType(\@aa)."\t-\t-\tInDel\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1536 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1537 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1538 #### base 1
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1539 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1540 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1541 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1),1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1542 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1543 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1544 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1545 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.1)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tNonsynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1546 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1547 #### base 2
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1548 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1549 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1550 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1)+1,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1551 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1552 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1553 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1554 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.2)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tNonsynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1555 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1556 #### base 3
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1557 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1558 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1559 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1)+2,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1560 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1561 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1562 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1563 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.3)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tNonsynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1564 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1565 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1566 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1567 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1568 #### base 1
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1569 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1570 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1571 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1),1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1572 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1573 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1574 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1575 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.1)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tSynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1576 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1577 #### base 2
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1578 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1579 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1580 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1)+1,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1581 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1582 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1583 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1584 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.2)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tSynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1585 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1586 #### base 3
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1587 for ($j=0;$j<@genelist;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1588 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1589 $bases[$j]=substr($ffn{$genelist[$j]},3*($i-1)+2,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1590 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1591 if (&IsTheSame(@bases) ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1592 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1593 $cdsvar[$cdi++]=$ClusterID."\t".$count_sp."\t".$count_gene."\t".($i+0.3)."\t".&CharType(\@aa)."\t".&CharType(\@bases)."\t".join("",@bases)."\tSynonymous mutation\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1594 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1595 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1596 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1597 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1598 return @cdsvar;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1599 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1600
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1601
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1602 sub VarAnalysis()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1603 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1604 (my $data)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1605 my @data=@$data;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1606 my $indel=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1607 my $syn=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1608 my $nonsyn=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1609 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1610 $indel=scalar(grep(/InDel$/,@data));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1611 $nonsyn=scalar(grep(/Nonsynonymous mutation$/,@data));;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1612 $syn=scalar(grep(/Synonymous mutation$/,@data));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1613 return "$indel\t$nonsyn\t$syn";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1614 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1615
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1616
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1617
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1618 sub CharType()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1619 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1620 (my $str)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1621 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1622 my @data=@$str;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1623 foreach (@data)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1624 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1625 $hash{$_}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1626 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1627 return join(",",(keys %hash));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1628 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1629
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1630 sub IsTheSame()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1631 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1632 (my @data)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1633 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1634 foreach (@data)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1635 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1636 $hash{$_}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1637 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1638 if (scalar(keys %hash) ==1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1639 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1640 return 1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1641 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1642 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1643 return 0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1644 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1645 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1646
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1647
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1648
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1649 sub FormatClusterOutPut()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1650 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1651 (my $speices,my $file,my $cluster)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1652 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1653 my $gid=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1654 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1655 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1656 my $gene;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1657 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1658 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1659 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1660 open(R,">$file");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1661 print R "ClutserID\t".join("\t",@$speices)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1662 foreach $key (@$cluster)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1663 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1664 @row=split(/\t/,$key);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1665 for ($i=0;$i<@row;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1666 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1667 if ($row[$i] ne "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1668 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1669 @tmp=split(/,/,$row[$i]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1670 for ($j=0;$j<@tmp;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1671 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1672 $_=$tmp[$j];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1673 s/^S[0-9]+G//;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1674 $tmp[$j]=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1675 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1676 $row[$i]=join(",",@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1677 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1678 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1679 print R $gid."\t".join("\t",@row)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1680 $gid++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1681 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1682 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1683 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1684 sub RetrieveClusterFromFile()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1685 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1686 (my $file,my $clusters)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1687 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1688 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1689 my $spid;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1690 my $line=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1691 my $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1692 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1693 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1694 open(F,$file) or die "Could open $file\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1695 @content=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1696 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1697 splice(@content,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1698 chomp(@content);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1699 foreach (@content)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1700 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1701 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1702 $$clusters[$line]=$row[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1703 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1704 for ($i=0;$i<@row;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1705 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1706 if ($row[$i] ne "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1707 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1708 @tmp=split(/,/,$row[$i]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1709 for ($j=0;$j<@tmp;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1710 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1711 $tmp[$j]="S${i}G".$tmp[$j];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1712 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1713 $row[$i]=join(",",@tmp);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1714 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1715 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1716 $$clusters[$line]=$$clusters[$line]."\t".join("\t",@row)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1717 $line++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1718 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1719 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1720
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1721
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1722
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1723
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1724 sub GeneDistribution()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1725 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1726 (my $clusters,my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1727 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1728 my $spid;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1729 my $orth;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1730 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1731 foreach (@$clusters)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1732 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1733 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1734 splice(@row,0,1);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1735 $orth=&CountSpeicesInCluster(join("\t",@row));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1736 @row=split(/\t|\,/,join("\t",@row));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1737 foreach $key (@row)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1738 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1739 if ($key ne "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1740 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1741 $spid=substr($key,1,(index($key,'G')-1)); ###extract strains id
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1742 if (exists($$hash{$orth."|".$spid}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1743 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1744 $$hash{$orth."|".$spid}++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1745 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1746 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1747 $$hash{$orth."|".$spid}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1748 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1749 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1750 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1751 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1752 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1753
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1754 sub CountSpeicesInCluster()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1755 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1756 (my $str)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1757 chomp($str);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1758 my @list=split(/\t/,$str);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1759 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1760 my $count=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1761
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1762 foreach $key (@list)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1763 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1764 if ($key ne "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1765 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1766 $count++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1767 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1768 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1769 return $count;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1770 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1771
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1772 sub CountGeneInCluster()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1773 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1774 (my $str)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1775 chomp();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1776 my @list=split(/\t|\,/,$str);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1777 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1778 my $count=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1779 foreach $key (@list)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1780 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1781 if ($key ne "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1782 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1783 $count++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1784 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1785 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1786 return $count;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1787 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1788
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1789
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1790
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1791
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1792 sub GF()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1793 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1794 &PrepareFasta(\@species,$inputDIR,".pep"); ###prepare pep file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1795 system("cat ".join(".pep ",@species).".pep > All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1796 system("grep '>' All.pep > genelist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1797 system("$formatdb -p T -i All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1798 system("$blastall -p blastp -i All.pep -d All.pep -M BLOSUM45 -m9 -e $evalue -o All.blastp -a $thread");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1799 system("perl ./Blast_Filter.pl All.blastp All.pep $coverage $identity $score | $mcl - --abc -I 2.0 -o All.cluster");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1800 &FormatCluster("All.cluster","genelist",$spnum,\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1801 #system("rm -rf *.pep* All.blastp All.cluster genelist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1802 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1803
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1804 sub MP()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1805 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1806 # (my $species,my $inputDIR,my $thread,my $evalue,my $score,my $coverage,my $identity)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1807 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1808 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1809 &PrepareFasta(\@species,$inputDIR,".pep"); ###prepare pep file
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1810 system("cat ".join(".pep ",@species).".pep > All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1811 system("grep '>' All.pep > genelist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1812 system("rm -rf All.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1813 for ($i=0;$i<$spnum;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1814 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1815 for ($j=$i+1;$j<$spnum;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1816 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1817 system("perl ./inparanoid.pl $blastall $thread $formatdb $score $global $local $species[$i].pep $species[$j].pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1818 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1819 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1820 system("perl ./multiparanoid.pl -species ".join(".pep+",@species).".pep -unique 1");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1821 ###convert the MP result to table list based on gene
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1822 &MP_Result_to_Table("MP.Cluster","All.cluster");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1823 &FormatCluster("All.cluster","genelist",$spnum,\@clusters);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1824 system("rm -rf sqltable.* *.pep* MP.Cluster genelist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1825 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1826
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1827 sub fasta2phylip()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1828 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1829 (my $input,my $output)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1830 use Bio::AlignIO;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1831 my $inputfilename = "10.aln";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1832 my $in= Bio::AlignIO->new(-file => $input ,
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1833 -format => 'fasta');
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1834 my $out = Bio::AlignIO->new(-file => ">$output" ,
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1835 -format => 'phylip');
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1836 while ( my $aln = $in->next_aln() )
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1837 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1838 $out->write_aln($aln);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1839 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1840 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1841
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1842
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1843 sub RemoveHeadGap()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1844 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1845 (my $nal,my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1846 my %aln;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1847 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1848 my $gaplength=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1849 my $len1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1850 my $len2;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1851 &ReadSequenceInToHash("$nal",\%aln);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1852 foreach $key (keys %aln)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1853 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1854 $len1=length($aln{$key});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1855 $_=$aln{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1856 s/^-+//;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1857 $len2=length($_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1858 if (($len1-$len2)>$gaplength)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1859 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1860 $gaplength=$len1-$len2;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1861 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1862 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1863 foreach $key (keys %aln)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1864 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1865 $$hash{$key}=$$hash{$key}.substr($aln{$key},$gaplength,(length($aln{$key})-$gaplength));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1866 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1867 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1868
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1869 sub PrepareFasta()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1870 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1871 (my $species,my $inputDIR,my $extention)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1872 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1873 my $file;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1874 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1875 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1876 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1877 for ($i=0;$i<@$species;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1878 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1879 $file=$inputDIR.$$species[$i].$extention;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1880 %hash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1881 &ReadSequenceInToHash($file,\%hash);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1882 open(R,">$$species[$i]${extention}") or die "Could write into $file\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1883 foreach $key (keys %hash)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1884 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1885 print R ">S${i}G$key\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1886 print R $hash{$key}."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1887 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1888 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1889 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1890 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1891
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1892 sub PrepareTable()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1893 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1894 (my $species,my $inputDIR,my $extention)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1895 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1896 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1897 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1898 my $file;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1899 for ($i=0;$i<@$species;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1900 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1901 $file=$inputDIR.$$species[$i].$extention;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1902 open(F,$file) or die "Could open $file\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1903 @content=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1904 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1905 chomp(@content);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1906 open(R,">$$species[$i]${extention}") or die "Could write into $file\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1907 foreach (@content)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1908 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1909 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1910 $row[0]="S${i}G$row[0]";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1911 if ($extention eq ".location")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1912 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1913 $row[0]=$row[0]."\t".$row[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1914 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1915 print R join("\t",@row)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1916 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1917 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1918 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1919 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1920
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1921 sub CheckExtraProgram
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1922 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1923 #(my $section, my $method, my $tmparray)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1924 my @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1925 my $ei=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1926
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1927 #####cluster gene
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1928 if (substr($section,0,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1929 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1930 ###MP: blastall formatdb
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1931 ###GF: blastall formatdb mcl
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1932
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1933 if (!(-e $formatdb))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1934 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1935 $error[$ei++]="formatdb is not found at $formatdb\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1936 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1937
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1938 if (!(-X $formatdb))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1939 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1940 $error[$ei++]="there is not premission to execute $formatdb\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1941 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1942
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1943 if (!(-e $blastall))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1944 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1945 $error[$ei++]="blastall is not found at $blastall\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1946 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1947
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1948 if (!(-X $blastall))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1949 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1950 $error[$ei++]="there is not premission to execute $blastall\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1951 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1952
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1953 if ($method eq "GF")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1954 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1955 if (!(-e $mcl))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1956 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1957 $error[$ei++]="mcl is not found at $mcl\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1958 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1959 if (!(-X $mcl))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1960 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1961 $error[$ei++]="there is not premission to execute $mcl\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1962 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1963 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1964 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1965
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1966 #####CDS variation
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1967 if (substr($section,2,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1968 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1969 if (!(-e $mafft))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1970 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1971 $error[$ei++]="mafft is not found at $mafft\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1972 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1973 if (!(-X $mafft))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1974 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1975 $error[$ei++]="there is not premission to execute $mafft\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1976 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1977 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1978
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1979 #####CDS variation
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1980 if (substr($section,3,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1981 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1982 if (!(-e $mafft))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1983 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1984 $error[$ei++]="mafft is not found at $mafft\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1985 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1986 if (!(-X $mafft))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1987 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1988 $error[$ei++]="there is not premission to execute $mafft\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1989 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1990 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1991 #####Evolution analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1992 if (substr($section,3,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1993 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1994 if (-e $seqboot)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1995 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1996 $error[$ei++]="there is not premission to execute $seqboot\n" if(!(-X $seqboot));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1997 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1998 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1999 $error[$ei++]="seqboot is not found at $seqboot\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2000 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2001 if (-e $dnaml)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2002 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2003 $error[$ei++]="there is not premission to execute $dnaml\n" if(!(-X $dnaml));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2004 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2005 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2006 $error[$ei++]="dnaml is not found at $dnaml\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2007 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2008 if (-e $dnadist)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2009 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2010 $error[$ei++]="there is not premission to execute $dnadist\n" if(!(-X $dnadist));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2011 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2012 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2013 $error[$ei++]="dnadist is not found at $dnadist\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2014 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2015 if (-e $neighbor)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2016 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2017 $error[$ei++]="there is not premission to execute $neighbor\n" if(!(-X $neighbor));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2018 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2019 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2020 $error[$ei++]="neighbor is not found at $neighbor\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2021 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2022 if (-e $consense)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2023 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2024 $error[$ei++]="there is not premission to execute $consense\n" if(!(-X $consense));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2025 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2026 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2027 $error[$ei++]="consense is not found at $consense\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2028 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2029 if (-e $dnapars)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2030 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2031 $error[$ei++]="there is not premission to execute $dnapars\n" if(!(-X $dnapars));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2032 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2033 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2034 $error[$ei++]="dnapars is not found at $dnapars\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2035 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2036 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2037 #@$tmparray=(@$tmparray,@error);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2038 @tmp=(@tmp,@error);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2039 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2040
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2041
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2042 sub CheckInputFile()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2043 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2044 (my $species,my $inputDIR,my $section,my $method,my $tmparray)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2045 ####cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2046 if (substr($section,0,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2047 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2048 if ($method eq "MM")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2049 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2050 @$tmparray=(@$tmparray,&chk2SEQ($species,$inputDIR)); ### check pep and nuc
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2051 @$tmparray=(@$tmparray,&chktab($species,$inputDIR,".location"));### chk pep nuc location
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2052 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2053 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2054 @$tmparray=(@$tmparray,&chk1SEQ($species,$inputDIR));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2055 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2056 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2057 ###CDS variation
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2058 if (substr($section,2,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2059 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2060 @$tmparray=(@$tmparray,&chk2SEQ($species,$inputDIR));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2061 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2062 ###function analysis
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2063 if (substr($section,4,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2064 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2065 @$tmparray=(@$tmparray,&chktab($species,$inputDIR,".function"));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2066 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2067 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2068
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2069
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2070 sub chk1SEQ()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2071 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2072 (my $species,my $inputDIR)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2073 my @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2074 my $ei=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2075 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2076 my $pepfile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2077 my %pep;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2078 foreach $sp (@$species)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2079 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2080 %pep=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2081 $pepfile=$inputDIR.$sp.".pep";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2082 &ReadSequenceInToHash($pepfile,\%pep);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2083 if (scalar(keys %pep)<2)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2084 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2085 $error[$ei++]="format error in $pepfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2086 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2087 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2088 return @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2089 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2090
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2091 sub chk2SEQ()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2092 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2093 (my $species,my $inputDIR)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2094 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2095 my %pep;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2096 my %nuc;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2097 my $pepfile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2098 my $nucfile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2099 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2100 my @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2101 my $ei=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2102 foreach $sp (@$species)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2103 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2104 $pepfile=$inputDIR.$sp.".pep";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2105 $nucfile=$inputDIR.$sp.".nuc";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2106 %pep=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2107 %nuc=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2108 &ReadSequenceInToHash("$pepfile",\%pep);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2109 &ReadSequenceInToHash("$nucfile",\%nuc);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2110 if (scalar(keys %pep) ne scalar(keys %nuc))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2111 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2112 $error[$ei++]="Sequences number is not consistent in the following two file:\n\t$pepfile\n\t$nucfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2113 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2114 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2115 foreach $key (keys %pep)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2116 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2117 if (exists($nuc{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2118 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2119 if (length($nuc{$key}) ne ((length($pep{$key})+1)*3))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2120 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2121 $error[$ei++]="the length of $key in $nucfile is not consistent with its corresponding protein length\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2122 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2123 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2124 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2125 $error[$ei++]="$key lost in $nucfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2126 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2127 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2128
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2129 foreach $key (keys %nuc)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2130 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2131 if (!exists($pep{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2132 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2133 $error[$ei++]="1048 $key lost in $pepfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2134 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2135 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2136 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2137 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2138 return @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2139 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2140
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2141
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2142 sub chktab()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2143 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2144 (my $species,my $inputDIR,my $extention)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2145 my %pep;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2146 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2147 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2148 my %tab;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2149 my @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2150 my $ei=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2151 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2152 my $tabfile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2153 my $pepfile;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2154 foreach $sp (@$species)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2155 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2156 %tab=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2157 %pep=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2158 $tabfile=$inputDIR.$sp.$extention;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2159 open(F,"$tabfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2160 while (<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2161 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2162 chomp();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2163 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2164 if (scalar(@row)<3)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2165 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2166 $error[$ei++]="format error in $tabfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2167 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2168 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2169 $tab{$row[0]}=$row[1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2170 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2171 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2172 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2173 $pepfile=$inputDIR.$sp.".pep";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2174 &ReadSequenceInToHash($pepfile,\%pep);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2175 foreach $key (keys %pep)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2176 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2177 if (!exists($tab{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2178 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2179 $error[$ei++]="sequence $key lost infomation in $tabfile\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2180 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2181 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2182 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2183 return @error;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2184 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2185
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2186
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2187
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2188 sub ReadSequenceInToHash()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2189 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2190 use Bio::SeqIO;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2191 (my $file,my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2192 my $seq;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2193 my $in=Bio::SeqIO->new(-file=>"$file",-format=>"fasta");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2194 while ($seq=$in->next_seq())
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2195 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2196 #$$hash{$id."|".$seq->id}=$seq->seq();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2197 $$hash{$seq->id}=$seq->seq();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2198 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2199 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2200
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2201 sub ReadAlignmentToHash()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2202 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2203 (my $file,my $hash)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2204 my $name="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2205 my $seq="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2206 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2207 my $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2208 open(F,"$file");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2209 @content=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2210 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2211 chomp(@content);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2212 for ($line=0;$line<@content;$line++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2213 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2214 if ($content[$line]=~/^>/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2215 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2216 if ($line>0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2217 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2218 $$hash{$name}=$seq;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2219 $name="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2220 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2221
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2222 $_=$content[$line];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2223 s/^>//;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2224 $name=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2225 $seq="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2226 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2227 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2228 if ($name ne "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2229 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2230 $seq=$seq.$content[$line];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2231 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2232 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2233 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2234 $$hash{$name}=$seq;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2235 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2236
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2237
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2238
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2239 sub Combination()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2240 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2241 (my $m,my $n,my $comRef)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2242 my $str="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2243 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2244 my $fpos;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2245 my $num0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2246 my $rest;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2247 my $tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2248 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2249 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2250 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2251 #my $m=scalar(@$array);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2252 my @combination;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2253
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2254 for ($i=1;$i<=$n;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2255 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2256 $str="1".$str;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2257 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2258
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2259 for ($i=1;$i<=($m-$n);$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2260 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2261 $str=$str."0";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2262 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2263
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2264 $hash{$str}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2265 while ($str=~/10/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2266 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2267 $fpos=index($str,"10");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2268 $_=$str;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2269 s/10/01/;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2270 $str=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2271 $tmp=substr($str,0,$fpos);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2272 $_=$tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2273 s/0//g;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2274 $rest=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2275 $num0=$fpos-length($_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2276 for ($i=1;$i<=$num0;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2277 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2278 $rest="$rest"."0";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2279 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2280 $str="$rest".substr($str,$fpos,$m-$fpos);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2281 $hash{$str}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2282 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2283 $j=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2284 foreach $key (keys %hash)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2285 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2286 $combination[$j]="";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2287 for ($i=0;$i<$m;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2288 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2289 if (substr($key,$i,1) eq "1")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2290 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2291 if ($combination[$j] ne "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2292 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2293 #$combination[$j]=$combination[$j]."\t".$$array[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2294 $combination[$j]=$combination[$j]."\t".$i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2295 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2296 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2297 #$combination[$j]=$$array[$i]; ### For return species ID
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2298 $combination[$j]=$i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2299 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2300 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2301 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2302 $j++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2303 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2304 @$comRef=@combination; ### update the data through the physic address
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2305 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2306
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2307 sub ChkCombinationValue()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2308 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2309 (my $m,my $n)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2310 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2311 my %vhash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2312 my $value=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2313 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2314 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2315 my @sdA;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2316 my @sdB;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2317
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2318 ### initialization
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2319 $hash{$m."-".$n}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2320
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2321 ### split combination
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2322 while (scalar(keys %hash)>0 and $value<=$sampleSize)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2323 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2324 foreach $key (keys %hash)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2325 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2326 if ($value > $sampleSize) ### threshold
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2327 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2328 last;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2329 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2330 if (!exists($hash{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2331 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2332 next;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2333 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2334 @row=split(/-/,$key);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2335 #print $row[0]."|".$row[1]."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2336 if ($row[0] eq $row[1])
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2337 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2338 $value=$value+$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2339 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2340 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2341 ##split
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2342 $sdA[0]=$row[0]-1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2343 $sdA[1]=$row[1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2344 $sdB[0]=$row[0]-1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2345 $sdB[1]=$row[1]-1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2346 ##storing A
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2347 if (($sdA[0] eq $sdA[1]) or $sdA[1] ==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2348 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2349 $value=$value+$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2350 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2351 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2352 if (exists($hash{$sdA[0]."-".$sdA[1]}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2353 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2354 $hash{$sdA[0]."-".$sdA[1]}=$hash{$sdA[0]."-".$sdA[1]}+$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2355 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2356 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2357 $hash{$sdA[0]."-".$sdA[1]}=$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2358 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2359 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2360
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2361 ##storing B
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2362 if (($sdB[0] eq $sdB[1]) or $sdB[1]==0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2363 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2364 $value=$value+$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2365 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2366 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2367 if (exists($hash{$sdB[0]."-".$sdB[1]}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2368 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2369 $hash{$sdB[0]."-".$sdB[1]}=$hash{$sdB[0]."-".$sdB[1]}+$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2370 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2371 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2372 $hash{$sdB[0]."-".$sdB[1]}=$hash{$key};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2373 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2374 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2375 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2376 #delete original combination
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2377 delete($hash{$key});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2378 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2379 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2380
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2381 if ($value>$sampleSize)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2382 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2383 return 0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2384 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2385 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2386 return $value;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2387 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2388 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2389
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2390
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2391 sub SampleCombination()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2392 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2393 (my $m,my $n,my $comRef)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2394 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2395 my $sampleTimes=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2396 my @randNum;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2397 my @sortID;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2398 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2399 my $j;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2400 my $tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2401 while ( scalar(keys %hash)<$sampleSize and $sampleTimes<($sampleSize*2))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2402 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2403 for ($i=0;$i<$m;$i++) # generate random data
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2404 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2405 $randNum[$i]=int(100000 * rand(100));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2406 $sortID[$i]=$i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2407 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2408
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2409 for ($i=0;$i<$m;$i++) # sorting random data
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2410 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2411 for ($j=0;$j<$m;$j++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2412 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2413 if ($randNum[$sortID[$i]]<$randNum[$sortID[$j]])
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2414 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2415 $tmp=$sortID[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2416 $sortID[$i]=$sortID[$j];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2417 $sortID[$j]=$tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2418 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2419 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2420 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2421
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2422 #storing data
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2423 $tmp=join("\t",sort {$a<=>$b} (splice(@sortID,0,$n)));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2424 $hash{$tmp}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2425 $sampleTimes++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2426 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2427 @$comRef=keys %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2428 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2429
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2430
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2431 sub PanGenomeNumber()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2432 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2433 (my $spID)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2434 my $pan=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2435 my $core=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2436 my $count; #### counter;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2437 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2438
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2439 foreach (@clusters)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2440 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2441 $count=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2442 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2443
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2444 foreach (@$spID)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2445 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2446 $count=$count+$row[$_];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2447 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2448
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2449 if ($count>0)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2450 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2451 $pan++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2452 if ($count == scalar(@$spID))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2453 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2454 $core++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2455 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2456 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2457 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2458 return $pan."\t".$core;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2459 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2460
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2461 sub fit_model_A()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2462 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2463 ### model y = A * x**B + C
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2464 (my $xdata,my $ydata)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2465 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2466 my $b;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2467 my $max_B=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2468 my $max_R=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2469 my $max_A=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2470 my $max_A_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2471 my $max_C=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2472 my $max_C_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2473 my $R=1e-100;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2474 my $start;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2475 my $end;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2476 my $step;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2477 my @xValues;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2478 my @yValues;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2479
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2480 $start=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2481 $step=0.001;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2482 $b=$start;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2483 $max_R=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2484 $R=1e-100;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2485
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2486 use Statistics::LineFit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2487 use Statistics::Distributions;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2488
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2489 while ($max_R<=$R)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2490 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2491 if (($b < 0.02) and ($b >-0.02))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2492 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2493 $b=-0.02;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2494 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2495
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2496 for ($i=0;$i<@$xdata;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2497 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2498 $xValues[$i]=$$xdata[$i]**$b;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2499 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2500 @yValues=@$ydata;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2501 my $lineFit = Statistics::LineFit->new();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2502 $lineFit->setData (\@xValues, \@yValues) or die "Invalid data";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2503 (my $intercept, my $slope) = $lineFit->coefficients();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2504 my $rSquared = $lineFit->rSquared();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2505 my $meanSquaredError = $lineFit->meanSqError();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2506 my $durbinWatson = $lineFit->durbinWatson();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2507 my $sigma = $lineFit->sigma();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2508 (my $tStatIntercept, my $tStatSlope) = $lineFit->tStatistics();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2509 (my $varianceIntercept,my $varianceSlope) = $lineFit->varianceOfEstimates();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2510
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2511 $max_R=$R;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2512 $R=$rSquared;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2513 if ($max_R<=$R)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2514 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2515 $max_R=$R;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2516 ($max_C,$max_A)=$lineFit->coefficients();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2517 $max_A_interval=Statistics::Distributions::tdistr (($spnum-2),.025)*sqrt($varianceSlope);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2518 $max_C_interval=Statistics::Distributions::tdistr (($spnum-2),.025)*sqrt($varianceIntercept);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2519 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2520 $b=$b-$step;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2521 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2522 $max_B=$b;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2523 return ($max_R,$max_A,$max_A_interval,$max_B,$max_C,$max_C_interval);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2524
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2525 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2526
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2527 sub fit_model_B()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2528 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2529 ### model y = A * exp(x*B) + C
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2530 (my $xdata,my $ydata)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2531 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2532 my $b;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2533 my $max_B=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2534 my $max_R=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2535 my $max_A=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2536 my $max_A_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2537 my $max_C=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2538 my $max_C_interval;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2539 my $R=1e-100;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2540 my $start;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2541 my $end;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2542 my $step;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2543 my @xValues;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2544 my @yValues;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2545
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2546 $start=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2547 $step=0.001;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2548 $b=$start;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2549 $max_R=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2550 $R=1e-100;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2551
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2552 use Statistics::LineFit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2553 use Statistics::Distributions;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2554
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2555 while ($max_R<=$R)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2556 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2557 if (($b < 0.02) and ($b >-0.02))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2558 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2559 $b=-0.02;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2560 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2561
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2562 for ($i=0;$i<@$xdata;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2563 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2564 $xValues[$i]=exp($$xdata[$i]*$b);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2565 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2566 @yValues=@$ydata;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2567 my $lineFit = Statistics::LineFit->new();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2568 $lineFit->setData (\@xValues, \@yValues) or die "Invalid data";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2569 (my $intercept, my $slope) = $lineFit->coefficients();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2570 my $rSquared = $lineFit->rSquared();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2571 my $meanSquaredError = $lineFit->meanSqError();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2572 my $durbinWatson = $lineFit->durbinWatson();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2573 my $sigma = $lineFit->sigma();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2574 (my $tStatIntercept, my $tStatSlope) = $lineFit->tStatistics();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2575 (my $varianceIntercept,my $varianceSlope) = $lineFit->varianceOfEstimates();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2576
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2577 $max_R=$R;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2578 $R=$rSquared;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2579 if ($max_R<=$R)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2580 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2581 $max_R=$R;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2582 ($max_C,$max_A)=$lineFit->coefficients();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2583 $max_A_interval=Statistics::Distributions::tdistr (($spnum-2),.025)*sqrt($varianceSlope);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2584 $max_C_interval=Statistics::Distributions::tdistr (($spnum-2),.025)*sqrt($varianceIntercept);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2585 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2586 $b=$b-$step;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2587 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2588 $max_B=$b;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2589 return ($max_R,$max_A,$max_A_interval,$max_B,$max_C,$max_C_interval);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2590 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2591
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2592
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2593 sub ReadData2Array()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2594 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2595 (my $file, my $array1,my $col1,my $array2,my $col2)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2596 my $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2597 open(F,$file);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2598 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2599 while (<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2600 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2601 chomp();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2602 @_=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2603 $$array1[$i]=$_[$col1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2604 $$array2[$i]=$_[$col2];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2605 $i++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2606 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2607 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2608 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2609
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2610 sub SumData()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2611 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2612 (my $xdata,my $ydata,my $SumMethod)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2613 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2614 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2615 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2616 my $max=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2617 for ($i=0;$i<@$xdata;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2618 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2619 if (exists($hash{$$xdata[$i]}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2620 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2621 $hash{$$xdata[$i]}=$hash{$$xdata[$i]}." ".$$ydata[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2622 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2623 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2624 $hash{$$xdata[$i]}=$$ydata[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2625 if ($$xdata[$i]>$max)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2626 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2627 $max=$$xdata[$i];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2628 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2629 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2630 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2631 @$xdata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2632 @$ydata=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2633 $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2634 foreach $i (1..$max)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2635 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2636 $$xdata[$i-1]=$i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2637 if ($SumMethod eq "median")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2638 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2639 $$ydata[$i-1]=&median($hash{$i});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2640 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2641 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2642 $$ydata[$i-1]=&mean($hash{$i});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2643 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2644 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2645 #print join(",",@$xdata)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2646 #print join(",",@$ydata)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2647 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2648
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2649 sub median()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2650 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2651 (my $data)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2652 my @data=split(/ /,$data);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2653 my $arraylen=scalar(@data);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2654 @data=sort{$a<=>$b} @data;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2655 if (int($arraylen/2)*2 == $arraylen)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2656 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2657 return ($data[$arraylen/2]+$data[$arraylen/2-1])/2;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2658 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2659 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2660 return $data[int($arraylen/2)];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2661 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2662 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2663
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2664 sub mean()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2665 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2666 (my $data)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2667 my @data=split(/ /,$data);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2668 my $sum=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2669 foreach (@data)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2670 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2671 $sum=$sum+$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2672 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2673 return int(($sum/scalar(@data))*1000)/1000;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2674 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2675
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2676 sub ReplaceName()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2677 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2678 (my $sp,my $file)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2679 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2680 my $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2681 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2682 my $target;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2683 open(F,$file);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2684 @content=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2685 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2686 for ($line=0;$line<@content;$line++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2687 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2688 for ($i=0;$i<@$sp;$i++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2689 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2690 $_=$content[$line];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2691 $target="sp".$i."sp";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2692 s/$target/$$sp[$i]/;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2693 $content[$line]=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2694 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2695 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2696 open(R,">$file");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2697 print R @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2698 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2699 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2700
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2701 sub MP_Result_to_Table()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2702 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2703 (my $MPresult, my $outputfile)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2704 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2705 my $maxid=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2706 my $i;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2707 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2708
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2709 open(F,"$MPresult");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2710 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2711 while (<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2712 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2713 @row=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2714 if (exists($hash{$row[0]}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2715 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2716 $hash{$row[0]}=$hash{$row[0]}."\t".$row[2];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2717 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2718 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2719 $hash{$row[0]}=$row[2];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2720 if ($row[0]>$maxid)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2721 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2722 $maxid=$row[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2723 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2724 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2725 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2726 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2727
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2728 open(R,">$outputfile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2729 foreach $i (1..$maxid)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2730 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2731 print R $hash{$i}."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2732 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2733 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2734 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2735
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2736
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2737
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2738 sub FormatCluster()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2739 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2740 (my $infile,my $genelist,my $spnum,my $cluster)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2741 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2742 my %gene;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2743 my $key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2744 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2745 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2746 my $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2747 my $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2748 my $j=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2749 my @content;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2750
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2751 ### record gene in clusters
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2752 open(F,"$infile");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2753 @content=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2754 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2755 chomp(@content);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2756 for ($line=0;$line<@content;$line++)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2757 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2758 @row=split(/\t/,$content[$line]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2759 foreach $key (@row)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2760 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2761 $gene{$key}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2762 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2763 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2764 ###retrieves gene which is not in clutsers
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2765
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2766 open(F,"$genelist");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2767 while ($key=<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2768 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2769 if ($key=~/^>/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2770 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2771 chomp($key);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2772 $_=$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2773 s/^>//;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2774 $key=$_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2775 if (!exists($gene{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2776 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2777 $content[$line]=$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2778 $line++;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2779 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2780 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2781 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2782 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2783
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2784 #### initialization @cluster
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2785 @$cluster=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2786 $j=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2787
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2788 foreach $line (@content)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2789 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2790 if ($line ne "")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2791 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2792 %hash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2793 @row=split(/\t/,$line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2794 foreach $key (@row)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2795 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2796 $sp=substr($key,0,index($key,"G"));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2797 $gene{$key}=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2798 if (exists($hash{$sp}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2799 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2800 $hash{$sp}=$hash{$sp}.",".$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2801 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2802 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2803 $hash{$sp}=$key;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2804 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2805 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2806
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2807 $i=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2808 @row=qw();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2809
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2810 foreach $i (0..($spnum-1))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2811 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2812 if (exists($hash{"S$i"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2813 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2814 $row[$i]=$hash{"S$i"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2815 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2816 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2817 $row[$i]="-";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2818 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2819 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2820 $$cluster[$j++]=join("\t",@row);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2821 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2822 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2823 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2824
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2825