0
|
1 #!/usr/bin/perl -w
|
|
2 #Filename:
|
|
3 #Author: Tian Dongmei
|
|
4 #Email: tiandm@big.ac.cn
|
|
5 #Date: 2014-5-29
|
|
6 #Modified:
|
|
7 #Description:
|
|
8 my $version=1.00;
|
|
9
|
|
10 use strict;
|
|
11 use Getopt::Long;
|
|
12 use File::Basename;
|
|
13
|
|
14 my %opts;
|
|
15 GetOptions(\%opts,"i=s","format=s","o=s","h");
|
|
16 if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
|
|
17 &usage;
|
|
18 }
|
|
19 my ($config,$prepath,$rfampath,$genomepath,$clusterpath,$annotatepath,$degpath);
|
|
20 my ($predir,$rfamdir,$genomedir,$clusterdir,$annotatedir,$degdir);
|
|
21 open IN,"<$opts{i}";
|
|
22 $config=<IN>; chomp $config;
|
|
23 $prepath=<IN>; chomp $prepath;
|
|
24 $rfampath=<IN>;chomp $rfampath;
|
|
25 $genomepath=<IN>; chomp $genomepath;
|
|
26 $clusterpath=<IN>; chomp $clusterpath;
|
|
27 $annotatepath=<IN>; chomp $annotatepath;
|
15
|
28 my $deg_tag=1;
|
|
29 if(eof){$deg_tag=0;}
|
|
30 else{
|
|
31 $degpath=<IN>; chomp $degpath;
|
|
32 }
|
0
|
33 close IN;
|
|
34 my @tmp=split/\//,$prepath;
|
|
35 $predir=$tmp[-1];
|
|
36 @tmp=split/\//,$rfampath;
|
|
37 $rfamdir=$tmp[-1];
|
|
38 @tmp=split/\//,$genomepath;
|
|
39 $genomedir=$tmp[-1];
|
|
40 @tmp=split/\//,$clusterpath;
|
|
41 $clusterdir=$tmp[-1];
|
|
42 @tmp=split/\//,$annotatepath;
|
|
43 $annotatedir=$tmp[-1];
|
15
|
44 #@tmp=split/\//,$degpath;
|
|
45 #$degdir=$tmp[-1];
|
0
|
46
|
|
47 my $dir=dirname($opts{'o'});
|
|
48
|
|
49 open OUT ,">$opts{'o'}";
|
|
50 print OUT "<HTML>\n <HEAD>\n <TITLE> Analysis Report </TITLE>\n </HEAD>
|
|
51 <BODY bgcolor=\"lightgray\">\n <h1 align=\"center\">\n <font face=\"ºÚÌå\">\n <b>Small RNA Analysis Report</b>\n </font>\n </h1>
|
|
52 <h2>1. Sequence No. and quality</h2>
|
|
53 <h3>1.1 Sequece No.</h3>
|
|
54 ";
|
|
55
|
|
56 ### raw data no
|
|
57 open IN,"<$config";
|
|
58 my @files;my @marks; my @rawNo;
|
|
59 while (my $aline=<IN>) {
|
|
60 chomp $aline;
|
|
61 my @tmp=split/\t/,$aline;
|
|
62 push @files,$tmp[0];
|
|
63
|
|
64 my $no=`less $tmp[0] |wc -l `;
|
|
65 chomp $no;
|
|
66 if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") {
|
|
67 $no=$no/4;
|
|
68 }
|
|
69 else{
|
|
70 $no=$no/2;
|
|
71 }
|
|
72 push @rawNo,$no;
|
|
73
|
|
74 push @marks,$tmp[1];
|
|
75 }
|
|
76 close IN;
|
|
77
|
|
78 ### preprocess
|
|
79 unless ($prepath=~/\/$/) {
|
|
80 $prepath .="/";
|
|
81 }
|
|
82
|
|
83 my @trimNo;my @collapse;
|
|
84 my $collapsefile=$prepath."collapse_reads.fa";
|
|
85 open IN,"<$collapsefile";
|
|
86 while (my $aline=<IN>) {
|
|
87 chomp $aline;
|
|
88 <IN>;
|
|
89 $aline=~/:([\d|_]+)_x(\d+)$/;
|
|
90 my @lng=split/_/,$1;
|
|
91 for (my $i=0;$i<@lng;$i++) {
|
|
92 if ($lng[$i]>0) {
|
|
93 $trimNo[$i] +=$lng[$i];
|
|
94 $collapse[$i] ++;
|
|
95 }
|
|
96 }
|
|
97 }
|
|
98 close IN;
|
|
99
|
|
100 my @cleanR;my @cleanT;
|
|
101 my $clean=$prepath."collapse_reads_18-40.fa";
|
|
102 open IN,"<$clean";
|
|
103 while (my $aline=<IN>) {
|
|
104 chomp $aline;
|
|
105 <IN>;
|
|
106 $aline=~/:([\d|_]+)_x(\d+)$/;
|
|
107 my @lng=split/_/,$1;
|
|
108 for (my $i=0;$i<@lng;$i++) {
|
|
109 if ($lng[$i]>0) {
|
|
110 $cleanR[$i] +=$lng[$i];
|
|
111 $cleanT[$i] ++;
|
|
112 }
|
|
113 }
|
|
114 }
|
|
115 close IN;
|
|
116
|
|
117 my @filterR;my @filterT;
|
|
118 my $filter=$prepath."collapse_reads_out.fa";
|
|
119 open IN,"<$filter";
|
|
120 while (my $aline=<IN>) {
|
|
121 chomp $aline;
|
|
122 <IN>;
|
|
123 $aline=~/:([\d|_]+)_x(\d+)$/;
|
|
124 my @lng=split/_/,$1;
|
|
125 for (my $i=0;$i<@lng;$i++) {
|
|
126 if ($lng[$i]>0) {
|
|
127 $filterR[$i] +=$lng[$i];
|
|
128 $filterT[$i] ++;
|
|
129 }
|
|
130 }
|
|
131 }
|
|
132 close IN;
|
|
133
|
|
134
|
|
135 print OUT "<table border=\"1\">
|
|
136 <tr align=\"center\">
|
|
137 <th> </th>
|
|
138 ";
|
|
139 foreach (@marks) {
|
|
140 print OUT "<th> $_ </th>\n";
|
|
141 }
|
|
142 print OUT "</tr>
|
|
143 <tr align=\"center\">
|
|
144 <th align=\"left\">Raw Reads No. </th>
|
|
145 ";
|
|
146 foreach (@rawNo) {
|
|
147 print OUT "<td> $_ </td>\n";
|
|
148 }
|
|
149 print OUT "</tr>
|
|
150 <tr align=\"center\">
|
|
151 <th align=\"left\">Reads No. After Trimed 3\' adapter </th>
|
|
152 ";
|
|
153 foreach (@trimNo) {
|
|
154 print OUT "<td> $_ </td>\n";
|
|
155 }
|
|
156 print OUT "</tr>
|
|
157 <tr align=\"center\">
|
|
158 <th align=\"left\">Unique Tags No. </th>
|
|
159 ";
|
|
160 foreach (@collapse) {
|
|
161 print OUT "<td> $_ </td>\n";
|
|
162 }
|
|
163 print OUT "</tr>
|
|
164 <tr align=\"center\">
|
|
165 <th align=\"left\">Clean Reads No. </th>
|
|
166 ";
|
|
167 foreach (@cleanR) {
|
|
168 print OUT "<td> $_ </td>\n";
|
|
169 }
|
|
170 print OUT "</tr>
|
|
171 <tr align=\"center\">
|
|
172 <th align=\"left\">Clean Tags No. </th>
|
|
173 ";
|
|
174 foreach (@cleanT) {
|
|
175 print OUT "<td> $_ </td>\n";
|
|
176 }
|
|
177 print OUT "</tr>
|
|
178 <tr align=\"center\">
|
|
179 <th align=\"left\">Filter Reads No. \(reads count \>3\) </th>
|
|
180 ";
|
|
181 foreach (@filterR) {
|
|
182 print OUT "<td> $_ </td>\n";
|
|
183 }
|
|
184 print OUT "</tr>
|
|
185 <tr align=\"center\">
|
|
186 <th align=\"left\">Filter Tags No. \(reads count \>3\) </th>
|
|
187 ";
|
|
188 foreach (@filterT) {
|
|
189 print OUT "<td> $_ </td>\n";
|
|
190 }
|
|
191 print OUT "</tr>\n</table>";
|
|
192 print OUT "<p>
|
|
193 Note:<br />
|
|
194 The raw data file path is: <b>$files[0]</b><br />
|
|
195 ";
|
|
196 for (my $i=1;$i<@files;$i++) {
|
|
197 print OUT "          <b>$files[$i]</b><br />";
|
|
198 }
|
|
199 print OUT "The collapsed file path is: <b>$collapsefile</b><br />
|
|
200 The clean data file path is: <b>$clean</b><br />
|
|
201 The filter (remain total reads>3) data file path is: <b>$filter</b><br />
|
|
202 </p>
|
|
203 <h2> 1. Sequence length count</h2>
|
|
204 <h3> 1.1 Reads length</h3>
|
|
205 ";
|
|
206
|
|
207 print OUT "<img src=\"./$predir/Reads_length_after_count_filter.png\" alt=\"Reads_length_after_count_filter.png\" width=\"400\" height=\"300\"/>
|
|
208 <h3> 1.2 Tags length count</h3>
|
|
209 <img src=\"./$predir/Tags_length_after_count_filter.png\" alt=\"Tags_length_after_count_filter.png\" width=\"400\" height=\"300\"/>
|
|
210 <p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution_after_count_filter.txt\"> length file</a>
|
|
211 </p>
|
|
212 ";
|
|
213
|
|
214 #### rfam
|
|
215 unless ($rfampath=~/\/$/) {
|
|
216 $rfampath .="/";
|
|
217 }
|
|
218 unless ($genomepath=~/\/$/) {
|
|
219 $genomepath .="/";
|
|
220 }
|
|
221 print OUT "<h2>2. Rfam non-miRNA annotation</h2>
|
|
222 <h3>2.1 Reads count</h3>
|
|
223 <table border=\"1\">
|
|
224 <tr align=\"center\">
|
|
225 ";
|
|
226
|
|
227 my @rfamR; my @rfamT;
|
|
228 my $tag=1;
|
|
229 open IN,"<$dir/rfam_match/rfam_non-miRNA_annotation.txt";
|
|
230 while (my $aline=<IN>) {
|
|
231 chomp $aline;
|
|
232 $tag=0 if($aline=~/tags\s+number/);
|
|
233 next if($aline=~/^\#/);
|
|
234 next if($aline=~/^\s*$/);
|
|
235 my @tmp=split/\s+/,$aline;
|
|
236 if($tag == 1){push @rfamR,[@tmp];}
|
|
237 else{push @rfamT,[@tmp];}
|
|
238 }
|
|
239 close IN;
|
|
240
|
|
241
|
|
242 print OUT "<th>RNA Name</th>\n";
|
|
243 foreach (@marks) {
|
|
244 print OUT "<th> $_ </th>\n";
|
|
245 }
|
|
246 for (my $i=0;$i<@rfamR;$i++) {
|
|
247 print OUT "</tr>
|
|
248 <tr align=\"center\">
|
|
249 <th align=\"left\">$rfamR[$i][0]</th>
|
|
250 ";
|
|
251 for (my $j=1;$j<@{$rfamR[$i]} ;$j++) {
|
|
252 print OUT "<td> $rfamR[$i][$j]</td>\n";
|
|
253 }
|
|
254 }
|
|
255
|
|
256 print OUT "</tr>\n</table>
|
|
257 <h3>2.2 Tags count</h3>
|
|
258 <table border=\"1\">
|
|
259 <tr align=\"center\">
|
|
260 <th>RNA Name</th>\n";
|
|
261 foreach (@marks) {
|
|
262 print OUT "<th> $_ </th>\n";
|
|
263 }
|
|
264 for (my $i=0;$i<@rfamT;$i++) {
|
|
265 print OUT "</tr>
|
|
266 <tr align=\"center\">
|
|
267 <th align=\"left\">$rfamT[$i][0]</th>
|
|
268 ";
|
|
269 for (my $j=1;$j<@{$rfamT[$i]} ;$j++) {
|
|
270 print OUT "<td> $rfamT[$i][$j]</td>\n";
|
|
271 }
|
|
272 }
|
|
273 print OUT "</tr>\n</table>
|
|
274 <p>Note:<br />The rfam mapping results is: <b>$rfampath</b>";
|
|
275 print OUT "<b>rfam_mapped.bwt</b></p>";
|
|
276
|
|
277 open IN,"<$dir/genome_match/genome_mapped.bwt";
|
|
278 my @genome_r_u;
|
|
279 my @genome_r_m;
|
|
280 my @genome_t_u;
|
|
281 my @genome_t_m;
|
|
282 my $tags_map_number=0;
|
|
283 while (my $aline=<IN>) {
|
|
284 chomp $aline;
|
|
285 my @temp=split/\t/,$aline;
|
|
286 if ($temp[6]==0) {
|
|
287 $aline=~/:([\d|_]+)_x(\d+)/;
|
|
288 my @lng=split/_/,$1;
|
|
289 for (my $i=0;$i<@lng;$i++) {
|
|
290 if ($lng[$i]>0) {
|
|
291 $genome_r_u[$i] +=$lng[$i];
|
|
292 $genome_t_u[$i] ++;
|
|
293 }
|
|
294 }
|
|
295 $tags_map_number++;
|
|
296 }
|
|
297 if ($temp[6]>0) {
|
|
298 $aline=~/:([\d|_]+)_x(\d+)/;
|
|
299 my @lng=split/_/,$1;
|
|
300 for (my $i=0;$i<@lng;$i++) {
|
|
301 if ($lng[$i]>0) {
|
|
302 $genome_r_m[$i] +=$lng[$i];
|
|
303 $genome_t_m[$i] ++;
|
|
304 }
|
|
305 }
|
|
306 for (my $i=0;$i<$temp[6] ;$i++) {
|
|
307 my $next=<IN>;
|
|
308 }
|
|
309 $tags_map_number++;
|
|
310 }
|
|
311 }
|
|
312 close IN;
|
|
313 #<h3>3.1 Reads count</h3>
|
|
314 #<table border=\"1\">
|
|
315 #<tr align=\"center\">
|
|
316 print OUT "<h2>3. genome mapping result</h2>
|
|
317 <table border=\"1\">
|
|
318 <tr align=\"center\">
|
|
319 <th align=\"left\">Map</th>\n
|
|
320 ";
|
|
321 foreach (@marks) {
|
|
322 print OUT "<th> $_ </th>\n";
|
|
323 }
|
|
324 print OUT "</tr>
|
|
325 <tr align=\"center\">
|
|
326 <th align=\"left\">Uniq Map Reads No.</th>
|
|
327 ";
|
|
328 for (my $i=0;$i<@genome_r_u ;$i++) {
|
|
329 print OUT "<td> $genome_r_u[$i]</td>\n";
|
|
330 }
|
|
331
|
|
332 print OUT "</tr>
|
|
333 <tr align=\"center\">
|
|
334 <th align=\"left\">Uniq Map Tags No.</th>
|
|
335 ";
|
|
336 for (my $i=0;$i<@genome_t_u ;$i++) {
|
|
337 print OUT "<td> $genome_t_u[$i]</td>\n";
|
|
338 }
|
|
339
|
|
340 print OUT "</tr>
|
|
341 <tr align=\"center\">
|
|
342 <th align=\"left\">Multiple Map Reads No.</th>
|
|
343 ";
|
|
344 for (my $i=0;$i<@genome_r_m ;$i++) {
|
|
345 print OUT "<td> $genome_r_m[$i]</td>\n";
|
|
346 }
|
|
347
|
|
348 print OUT "</tr>
|
|
349 <tr align=\"center\">
|
|
350 <th align=\"left\">Multiple Map Tags No.</th>
|
|
351 ";
|
|
352 for (my $i=0;$i<@genome_t_m ;$i++) {
|
|
353 print OUT "<td> $genome_t_m[$i]</td>\n";
|
|
354 }
|
|
355
|
|
356 print OUT "</tr>\n</table>
|
|
357 <p>Note:<br />The genome mapping results is: <b>$genomepath</b>";
|
|
358 print OUT "<b>genome_mapped.bwt</b></p>";
|
|
359
|
|
360 my $cluster="$clusterpath/sample_reads.cluster";
|
|
361 my $cluster_number=`less $cluster |wc -l `;
|
|
362 $cluster_number=$cluster_number-1;
|
|
363 my (%cluster_length,@exp,@rpkm);
|
|
364 my @exp_range=qw(0 1--9 10--99 100--999 1000--9999 10000--99999 100000--**);
|
|
365 my @rpkm_range=qw(0 0--0.25 0.25--0.5 0.5--1 1.0--5.0 5.0--10 10--50 50--100 100--500 500--1000 1000--**);
|
|
366
|
|
367 open IN,"<$cluster";
|
|
368 while (my $aline=<IN>) {
|
|
369 next if($aline=~/^\"/);
|
|
370 chomp $aline;
|
|
371 my @temp=split/\t/,$aline;
|
|
372 my @id=split/:|-/,$temp[0];
|
|
373 $cluster_length{$id[2]-$id[1]+1}++;
|
|
374 for (my $i=0;$i<@marks ;$i++) {
|
|
375 if ($temp[$i+3] == 0) {$exp[$i][0]++;}
|
|
376 elsif ($temp[$i+3]>0 && $temp[$i+3]<= 10 ){$exp[$i][1]++;}
|
|
377 elsif ($temp[$i+3]>10 && $temp[$i+3]<=100){$exp[$i][2]++;}
|
|
378 elsif ($temp[$i+3]>100 && $temp[$i+3]<=1000){$exp[$i][3]++;}
|
|
379 elsif ($temp[$i+3]>1000 && $temp[$i+3]<=10000){$exp[$i][4]++;}
|
|
380 elsif ($temp[$i+3]>10000 && $temp[$i+3]<=100000){$exp[$i][5]++;}
|
|
381 elsif ($temp[$i+3]>100000){$exp[$i][6]++;}
|
|
382 }
|
|
383 }
|
|
384 close IN;
|
|
385
|
|
386 my $cluster_rpkm="$clusterpath/sample_rpkm.cluster";
|
|
387 open IN,"<$cluster_rpkm";
|
|
388 while (my $aline=<IN>) {
|
|
389 next if($aline=~/^\#/);
|
|
390 chomp $aline;
|
|
391 my @temp=split/\t/,$aline;
|
|
392 for (my $i=0;$i<@marks ;$i++) {
|
|
393 if ($temp[$i+3]==0) {$rpkm[$i][0]++;}
|
|
394 elsif($temp[$i+3]>0 && $temp[$i+3]<=0.25){$rpkm[$i][1]++;}
|
|
395 elsif($temp[$i+3]>0.25 && $temp[$i+3]<=0.5){$rpkm[$i][2]++;}
|
|
396 elsif($temp[$i+3]>0.5 && $temp[$i+3]<=1){$rpkm[$i][3]++;}
|
|
397 elsif($temp[$i+3]>1 && $temp[$i+3]<=5){$rpkm[$i][4]++;}
|
|
398 elsif($temp[$i+3]>5 && $temp[$i+3]<=10){$rpkm[$i][5]++;}
|
|
399 elsif($temp[$i+3]>10 && $temp[$i+3]<=50){$rpkm[$i][6]++;}
|
|
400 elsif($temp[$i+3]>50 && $temp[$i+3]<=100){$rpkm[$i][7]++;}
|
|
401 elsif($temp[$i+3]>100 && $temp[$i+3]<=500){$rpkm[$i][8]++;}
|
|
402 elsif($temp[$i+3]>500 && $temp[$i+3]<=1000){$rpkm[$i][9]++;}
|
|
403 else{$rpkm[$i][10]++;}
|
|
404 }
|
|
405 }
|
|
406
|
|
407 close IN;
|
|
408
|
|
409 my $cluster_length_file="$clusterpath/cluster_length.txt";
|
|
410 open LEN,">$cluster_length_file";
|
|
411 print LEN "\#length\tcluster_number\n";
|
|
412 foreach my $key (sort keys %cluster_length) {
|
|
413 print LEN "$key\t$cluster_length{$key}\n";
|
|
414 }
|
|
415 close LEN;
|
|
416 print OUT "<h2>4. cluster result</h2>
|
|
417 <h3>4.1 Cluster count</h3>
|
|
418 <table border=\"1\">
|
|
419 <tr align=\"center\">
|
|
420 <th align=\"left\"> </th>
|
|
421 <td>Merged samples</td></tr>
|
|
422 <tr align=\"center\">
|
|
423 <th align=\"left\">Tags number</th>
|
|
424 <td>$tags_map_number</td></tr>
|
|
425 <tr align=\"center\">
|
|
426 <th align=\"left\">Cluster number</th>
|
|
427 <td>$cluster_number</td></tr>\n</table>
|
|
428 ";
|
|
429
|
|
430 print OUT "<h3>4.2 Cluster length</h3>
|
|
431 <p> Note:<br />The clusters length data: <a href=\"./$clusterdir/cluster_length.txt\"> length file</a>
|
|
432 </p>
|
|
433 ";
|
|
434 print OUT "<h3>4.3 Quantify</h3>
|
|
435 <table border=\"1\">
|
|
436 <tr align=\"center\">
|
|
437 <th align=\"left\">Reads Range</th>\n
|
|
438 ";
|
|
439 foreach (@marks) {
|
|
440 print OUT "<th> $_ </th>\n";
|
|
441 }
|
|
442 for (my $i=0;$i<@exp_range;$i++) {
|
|
443 print OUT "</tr>
|
|
444 <tr align=\"center\">
|
|
445 <th align=\"left\">$exp_range[$i]</th>
|
|
446 ";
|
|
447 for (my $j=0;$j<@marks ;$j++) {
|
|
448 if (!(defined($exp[$i][$j]))) {
|
|
449 print OUT "<td> 0</td>\n";
|
|
450 }
|
|
451 else{print OUT "<td> $exp[$i][$j]</td>\n";}
|
|
452 }
|
|
453 }
|
|
454 print OUT "</tr>\n</table>";
|
|
455
|
|
456 print OUT "\n<table border=\"1\">
|
|
457 <tr align=\"center\">
|
|
458 <th align=\"left\">RPKM Range</th>\n
|
|
459 ";
|
|
460 foreach (@marks) {
|
|
461 print OUT "<th> $_ </th>\n";
|
|
462 }
|
|
463 for (my $i=0;$i<@rpkm_range;$i++) {
|
|
464 print OUT "</tr>
|
|
465 <tr align=\"center\">
|
|
466 <th align=\"left\">$rpkm_range[$i]</th>
|
|
467 ";
|
|
468 for (my $j=0;$j<@marks ;$j++) {
|
|
469 if (!(defined($rpkm[$i][$j]))) {
|
|
470 print OUT "<td> 0</td>\n";
|
|
471 }
|
|
472 else{print OUT "<td> $rpkm[$i][$j]</td>\n";}
|
|
473 }
|
|
474 }
|
|
475 print OUT "</tr>\n</table>";
|
|
476
|
|
477 my $annotate="$annotatepath/sample_c_p.anno";
|
|
478 my (%posit,%repeat,%nat1,%nat2);
|
|
479 my (@phase,@long,@repeat,@nat);
|
|
480 for (my $j=0;$j<@marks ;$j++) {
|
|
481 $phase[$j]=0;
|
|
482 $long[$j]=0;
|
|
483 $repeat[$j]=0;
|
|
484 $nat[$j]=0;
|
|
485 }
|
15
|
486
|
|
487 my $class_anno=6;
|
0
|
488 open ANNO,"<$annotate";
|
|
489 while (my $aline=<ANNO>) {
|
|
490 chomp $aline;
|
|
491 my @temp=split/\t/,$aline;
|
15
|
492 if($aline=~/^\#/){
|
|
493 if (@temp != 10+@marks) {
|
|
494 $class_anno=0;
|
|
495 }
|
|
496 next;
|
|
497 }
|
|
498 for (my $i=3+@marks+$class_anno;$i<@temp;$i++) {
|
0
|
499 my @posit=split/\;/,$temp[$i];
|
|
500 for (my $j=0;$j<@marks ;$j++) {
|
|
501 if ($temp[3+$j]>0) {
|
|
502 $posit{$posit[0]}[$j]++;
|
|
503 }
|
|
504 else{
|
|
505 if (!(defined($posit{$posit[0]}[$j]))) {
|
|
506 $posit{$posit[0]}[$j]=0;
|
|
507 }
|
|
508 }
|
|
509 }
|
|
510 }
|
15
|
511 if ($class_anno) {
|
|
512 for (my $j=0;$j<@marks ;$j++) {
|
|
513 if ($temp[3+$j]>0) {
|
|
514 if ($temp[6] eq "phase") {
|
|
515 $phase[$j]++;
|
|
516 }
|
|
517 if ($temp[7] eq "long") {
|
|
518 $long[$j]++;
|
0
|
519 }
|
15
|
520 if ($temp[8] ne "\/") {
|
|
521 $repeat[$j]++;
|
|
522 my @rr=split/\;/,$temp[8];
|
|
523 foreach (@rr) {
|
|
524 $repeat{$_}[$j]++;
|
|
525 }
|
|
526 }
|
|
527 if ($temp[9] ne "\/") {
|
|
528 $nat[$j]++;
|
|
529 my @nn1=split/\;/,$temp[9];
|
|
530 my @nn2=split/\;/,$temp[10];
|
|
531 for (my $k=0;$k<@nn1 ;$k++) {
|
|
532 $nat1{$nn1[$k]}[$j]++;
|
|
533 $nat2{$nn2[$k]}[$j]++;
|
|
534 }
|
0
|
535 }
|
|
536 }
|
|
537 }
|
|
538 }
|
|
539 }
|
|
540 close ANNO;
|
|
541
|
|
542 print OUT "<h2>5. Cluster Annotate</h2>
|
|
543 <h3>5.1 Cluster genome position annotate</h3>
|
|
544 <table border=\"1\">
|
|
545 <tr align=\"center\">
|
|
546 <th align=\"left\">clusters number</th>\n
|
|
547 ";
|
|
548
|
|
549 foreach (@marks) {
|
|
550 print OUT "<th> $_ </th>\n";
|
|
551 }
|
|
552 foreach my $key (sort keys %posit) {
|
|
553 print OUT "</tr>
|
|
554 <tr align=\"center\">
|
|
555 <th align=\"left\">$key</th>
|
|
556 ";
|
|
557 foreach (@{$posit{$key}}) {
|
|
558 print OUT "<td> $_</td>\n";
|
|
559 }
|
|
560 }
|
|
561 print OUT "</tr>\n</table>";
|
|
562 print OUT "<p>
|
|
563 Note:<br />
|
|
564 One cluster mybe annotate to multiple genes<br />
|
|
565 ";
|
|
566
|
15
|
567 if ($class_anno) {
|
|
568 print OUT "<h3>5.2 Cluster source mechanism annotate</h3>
|
|
569 <table border=\"1\">
|
|
570 <tr align=\"center\">
|
|
571 <th align=\"left\">clusters number</th>\n
|
|
572 ";
|
0
|
573
|
15
|
574 foreach (@marks) {
|
|
575 print OUT "<th> $_ </th>\n";
|
|
576 }
|
|
577 print OUT "</tr>
|
|
578 <tr align=\"center\">
|
|
579 <th align=\"left\">Phase</th>\n
|
|
580 ";
|
|
581 foreach (@phase) {
|
|
582 print OUT "<td> $_ </td>\n";
|
|
583 }
|
0
|
584
|
15
|
585 print OUT "</tr>
|
|
586 <tr align=\"center\">
|
|
587 <th align=\"left\">Long</th>\n
|
|
588 ";
|
|
589 foreach (@long) {
|
|
590 print OUT "<td> $_ </td>\n";
|
|
591 }
|
0
|
592
|
15
|
593 print OUT "</tr>
|
|
594 <tr align=\"center\">
|
|
595 <th align=\"left\">Repeat</th>\n
|
|
596 ";
|
|
597 foreach (@repeat) {
|
|
598 print OUT "<td> $_ </td>\n";
|
|
599 }
|
0
|
600
|
15
|
601 print OUT "</tr>
|
|
602 <tr align=\"center\">
|
|
603 <th align=\"left\">Nat</th>\n
|
|
604 ";
|
|
605 foreach (@nat) {
|
|
606 print OUT "<td> $_ </td>\n";
|
|
607 }
|
|
608 print OUT "</tr>\n</table>";
|
0
|
609
|
15
|
610 print OUT "<p>
|
|
611 Repeat subclass annotate:
|
|
612 ";
|
0
|
613
|
15
|
614 print OUT "<table border=\"1\">
|
|
615 <tr align=\"center\">
|
|
616 <th align=\"left\">Repeat subclass</th>\n
|
|
617 ";
|
|
618 foreach (@marks) {
|
|
619 print OUT "<th> $_ </th>\n";
|
|
620 }
|
0
|
621
|
15
|
622 foreach my $key (sort keys %repeat) {
|
|
623 print OUT "</tr>
|
|
624 <tr align=\"center\">
|
|
625 <th align=\"left\">$key</th>
|
|
626 ";
|
|
627 for (my $i=0;$i<@marks ;$i++) {
|
|
628 if (defined($repeat{$key}[$i])) {
|
|
629 print OUT "<td> $repeat{$key}[$i] </td>\n";
|
|
630 }
|
|
631 else{print OUT "<td> 0 </td>\n";}
|
0
|
632 }
|
|
633 }
|
15
|
634 print OUT "</tr>\n</table>";
|
0
|
635
|
|
636
|
15
|
637 print OUT "<p>
|
|
638 Nat subclass1 annotate:
|
|
639 ";
|
0
|
640
|
15
|
641 print OUT "<table border=\"1\">
|
|
642 <tr align=\"center\">
|
|
643 <th align=\"left\">Nat subclass1</th>\n
|
0
|
644 ";
|
15
|
645 foreach (@marks) {
|
|
646 print OUT "<th> $_ </th>\n";
|
|
647 }
|
|
648 foreach my $key (sort keys %nat1) {
|
|
649 print OUT "</tr>
|
|
650 <tr align=\"center\">
|
|
651 <th align=\"left\">$key</th>
|
|
652 ";
|
|
653 for (my $i=0;$i<@marks ;$i++) {
|
|
654 if (defined($nat1{$key}[$i])) {
|
|
655 print OUT "<td> $nat1{$key}[$i] </td>\n";
|
|
656 }
|
|
657 else{print OUT "<td> 0 </td>\n";}
|
0
|
658 }
|
|
659 }
|
15
|
660 print OUT "</tr>\n</table>";
|
|
661
|
|
662 print OUT "<p>
|
|
663 Nat subclass2 annotate:
|
|
664 ";
|
0
|
665
|
15
|
666 print OUT "<table border=\"1\">
|
|
667 <tr align=\"center\">
|
|
668 <th align=\"left\">Nat subclass2</th>\n
|
|
669 ";
|
|
670 foreach (@marks) {
|
|
671 print OUT "<th> $_ </th>\n";
|
|
672 }
|
|
673 foreach my $key (sort keys %nat2) {
|
|
674 print OUT "</tr>
|
|
675 <tr align=\"center\">
|
|
676 <th align=\"left\">$key</th>
|
|
677 ";
|
|
678 for (my $i=0;$i<@marks ;$i++) {
|
|
679 if (defined($nat2{$key}[$i])) {
|
|
680 print OUT "<td> $nat2{$key}[$i] </td>\n";
|
|
681 }
|
|
682 else{print OUT "<td> 0 </td>\n";}
|
|
683 }
|
|
684 }
|
|
685 print OUT "</tr>\n</table>";
|
|
686 print OUT "<p>
|
|
687 Note:<br />
|
|
688 One cluster mybe annotate to multiple repeats or nats<br />
|
|
689 ";
|
0
|
690 }
|
15
|
691 else {
|
|
692 print OUT "<h3>5.2 Cluster source mechanism annotate</h3>
|
|
693 <br />Do not do source mechanism annotate <br />";
|
|
694
|
0
|
695 }
|
|
696
|
15
|
697 if ($deg_tag) {
|
|
698 my $deg_file=`ls $degpath`;
|
|
699 chomp $deg_file;
|
|
700 my @deg_file=split/\n/,$deg_file;
|
|
701 my %deg;
|
|
702 foreach (@deg_file) {
|
|
703 my $output="$degpath/$_/output_score.txt";
|
|
704 open IN,"<$output";
|
|
705 $deg{$_}[0]=0;
|
|
706 $deg{$_}[1]=0;
|
|
707 $deg{$_}[2]=0;
|
|
708 while (my $aline=<IN>) {
|
|
709 next if ($aline=~/^\"/);
|
|
710 chomp $aline;
|
|
711 my @temp=split/\t/,$aline;
|
|
712 if ($temp[9] eq "TRUE") {
|
|
713 $deg{$_}[0]++;
|
|
714 if ($temp[4] >0) {
|
|
715 $deg{$_}[1]++;
|
|
716 }
|
|
717 if ($temp[4] <0) {
|
|
718 $deg{$_}[2]++;
|
|
719 }
|
0
|
720 }
|
|
721 }
|
15
|
722 close IN;
|
0
|
723 }
|
|
724
|
15
|
725 print OUT "<h2>6. DEG</h2>
|
|
726 <table border=\"1\">
|
|
727 <tr align=\"center\">
|
|
728 <th align=\"left\">Genes number</th>\n
|
|
729 <th> DEG </th>\n
|
|
730 <th> UP </th>\n
|
|
731 <th> DOWN </th>\n
|
|
732 ";
|
0
|
733
|
15
|
734 foreach my $key (sort keys %deg) {
|
|
735 print OUT "</tr>
|
|
736 <tr align=\"center\">
|
|
737 <th align=\"left\">$key</th>
|
|
738 ";
|
|
739 for (my $i=0;$i<@{$deg{$key}} ;$i++) {
|
|
740 print OUT "<td> $deg{$key}[$i] </td>\n";
|
|
741 }
|
0
|
742 }
|
15
|
743 print OUT "</tr>\n</table>";
|
0
|
744 }
|
15
|
745 else{
|
|
746 print OUT "<h2>6. DEG</h2>
|
|
747 <br />Do not do DE clusters <br />";
|
|
748 }
|
0
|
749
|
|
750 print OUT "
|
|
751 </BODY>
|
|
752 </HTML>
|
|
753 ";
|
|
754 close OUT;
|
|
755
|
|
756
|
|
757
|
|
758
|
|
759 sub usage{
|
|
760 print <<"USAGE";
|
|
761 Version $version
|
|
762 Usage:
|
|
763 $0 -o
|
|
764 options:
|
|
765 -i
|
|
766 -format
|
|
767 -o output file
|
|
768 -h help
|
|
769 USAGE
|
|
770 exit(1);
|
|
771 }
|