comparison html_preprocess.pl @ 47:c75593f79aa9 draft

Uploaded
author big-tiandm
date Wed, 03 Dec 2014 01:54:29 -0500
parents
children
comparison
equal deleted inserted replaced
46:ca05d68aca13 47:c75593f79aa9
1 #!/usr/bin/perl -w
2 #Filename:
3 #Author: Tian Dongmei
4 #Email: tiandm@big.ac.cn
5 #Date: 2014-5-29
6 #Modified:
7 #Description:
8 my $version=1.00;
9
10 use strict;
11 use Getopt::Long;
12 use File::Basename;
13
14 my %opts;
15 GetOptions(\%opts,"i=s","format=s","min=i","max=i","o=s","h");
16 if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
17 &usage;
18 }
19 my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath);
20 my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir);
21 open IN,"<$opts{i}";
22 $config=<IN>; chomp $config;
23 $prepath=<IN>; chomp $prepath;
24 $genomepath=<IN>; chomp $genomepath;
25 $rfampath=<IN>;
26 close IN;
27 my @tmp=split/\//,$prepath;
28 $predir=$tmp[-1];
29 @tmp=split/\//,$genomepath;
30 $genomedir=$tmp[-1];
31
32 my $dir=dirname($opts{'o'});
33
34 open OUT ,">$opts{'o'}";
35 print OUT "<HTML>\n <HEAD>\n <TITLE> Analysis Report </TITLE>\n </HEAD>
36 <BODY bgcolor=\"lightgray\">\n <h1 align=\"center\">\n <font face=\"ºÚÌå\">\n <b>Preprocess Report</b>\n </font>\n </h1>
37 <h2>1. Sequence No. and quality</h2>
38 <h3>1.1 Sequece No.</h3>
39 ";
40
41 ### raw data no
42 open IN,"<$config";
43 my @files;my @marks; my @rawNo;
44 while (my $aline=<IN>) {
45 chomp $aline;
46 my @tmp=split/\t/,$aline;
47 push @files,$tmp[0];
48
49 my $no=`less $tmp[0] |wc -l `;
50 chomp $no;
51 if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") {
52 $no=$no/4;
53 }
54 else{
55 $no=$no/2;
56 }
57 push @rawNo,$no;
58
59 push @marks,$tmp[1];
60 }
61 close IN;
62
63 ### preprocess
64 unless ($prepath=~/\/$/) {
65 $prepath .="/";
66 }
67
68 my @trimNo;my @collapse;
69 my $collapsefile=$prepath."collapse_reads.fa";
70 open IN,"<$collapsefile";
71 while (my $aline=<IN>) {
72 chomp $aline;
73 <IN>;
74 $aline=~/:([\d|_]+)_x(\d+)$/;
75 my @lng=split/_/,$1;
76 for (my $i=0;$i<@lng;$i++) {
77 if ($lng[$i]>0) {
78 $trimNo[$i] +=$lng[$i];
79 $collapse[$i] ++;
80 }
81 }
82 }
83 close IN;
84
85 my @cleanR;my @cleanT;
86 my $clean=$prepath."collapse_reads_$opts{min}_$opts{max}.fa";
87 open IN,"<$clean";
88 while (my $aline=<IN>) {
89 chomp $aline;
90 <IN>;
91 $aline=~/:([\d|_]+)_x(\d+)$/;
92 my @lng=split/_/,$1;
93 for (my $i=0;$i<@lng;$i++) {
94 if ($lng[$i]>0) {
95 $cleanR[$i] +=$lng[$i];
96 $cleanT[$i] ++;
97 }
98 }
99 }
100 close IN;
101
102 print OUT "<table border=\"1\">
103 <tr align=\"center\">
104 <th>&nbsp;</th>
105 ";
106 foreach (@marks) {
107 print OUT "<th> $_ </th>\n";
108 }
109 print OUT "</tr>
110 <tr align=\"center\">
111 <th align=\"left\">Raw Reads No. </th>
112 ";
113 foreach (@rawNo) {
114 print OUT "<td> $_ </td>\n";
115 }
116 print OUT "</tr>
117 <tr align=\"center\">
118 <th align=\"left\">Reads No. After Trimed 3\' adapter </th>
119 ";
120 foreach (@trimNo) {
121 print OUT "<td> $_ </td>\n";
122 }
123 print OUT "</tr>
124 <tr align=\"center\">
125 <th align=\"left\">Unique Tags No. </th>
126 ";
127 foreach (@collapse) {
128 print OUT "<td> $_ </td>\n";
129 }
130 print OUT "</tr>
131 <tr align=\"center\">
132 <th align=\"left\">Clean Reads No. </th>
133 ";
134 foreach (@cleanR) {
135 print OUT "<td> $_ </td>\n";
136 }
137 print OUT "</tr>
138 <tr align=\"center\">
139 <th align=\"left\">Clean Tags No. </th>
140 ";
141 foreach (@cleanT) {
142 print OUT "<td> $_ </td>\n";
143 }
144 print OUT "</tr>\n</table>";
145 print OUT "<p>
146 Note:<br />
147 The raw data file path is: <b>$files[0]</b><br />
148 ";
149 for (my $i=1;$i<@files;$i++) {
150 print OUT "&nbsp;&nbsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;<b>$files[$i]</b><br />";
151 }
152 print OUT "The collapsed file path is: <b>$collapsefile</b><br />
153 The clean data file path is: <b>$clean</b><br />
154 </p>
155 <h2> 1. Sequence length count</h2>
156 <h3> 1.1 Reads length count </h3>
157 ";
158 print OUT "\n";
159
160 my (%length); my $key="Tags Length";
161 open IN,"<$prepath/reads_length_distribution.txt";
162 while (my $aline=<IN>) {
163 chomp $aline;
164 next if($aline=~/^\s*$/);
165 if ($aline=~/^Reads/) { $key="Reads Length";}
166 my @tmp=split/\t/,$aline;
167 my @array=split/\s/,$tmp[1];
168 push @{$length{$key}},[$tmp[0],@array];
169 }
170 close IN;
171
172 print OUT "<table border=\"1\">
173 <tr align=\"center\">";
174 my $hashkey="Reads Length";
175 foreach (@{$length{$hashkey}[0]}) {
176 print OUT "<th> $_ </th>\n";
177 }
178 print OUT "</tr>";
179
180 for (my $i=1;$i<@{$length{$hashkey}};$i++) {
181 print OUT "<tr align=\"center\">
182 <th >$length{$hashkey}[$i][0] </th>
183 ";
184 for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) {
185 print OUT "<td> $length{$hashkey}[$i][$j] </td>\n";
186 }
187 print OUT "</tr>\n";
188 }
189 print OUT "</table>\n";
190
191 print OUT "<h3> 1.2 Tags length count </h3>";
192
193 print OUT "<table border=\"1\">
194 <tr align=\"center\">";
195 $hashkey="Tags Length";
196 foreach (@{$length{$hashkey}[0]}) {
197 print OUT "<th> $_ </th>\n";
198 }
199 print OUT "</tr>";
200
201 for (my $i=1;$i<@{$length{$hashkey}};$i++) {
202 print OUT "<tr align=\"center\">
203 <th > $length{$hashkey}[$i][0] </th>
204 ";
205 for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) {
206 print OUT "<td> $length{$hashkey}[$i][$j] </td>\n";
207 }
208 print OUT "</tr>\n";
209 }
210
211 print OUT "</table>\n";
212
213 print OUT "<h2> 2. Sequence length distribution </h2>";
214 my $length=$prepath."length.html";
215 open IN,"<$length";
216 while (my $aline=<IN>) {
217 chomp $aline;
218 print OUT "$aline\n";
219 }
220
221 #print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a>
222 #</p>
223 #";
224
225
226
227
228 ####genome map
229 #unless ($genomedir=~/\/$/) {
230 # $genomedir .="/";
231 #}
232
233 print OUT "<h2>2. Genome Alignment Result</h2>
234 <h3>2.1 Mapping count</h3>
235 ";
236
237 open IN,"<$genomepath/genome_mapped.fa";
238 my (@gread,@gtag);
239 while (my $aline=<IN>) {
240 chomp $aline;
241 <IN>;
242 $aline=~/:([\d|_]+)_x(\d+)$/;
243 my @sss=split/_/,$1;
244 for (my $i=0;$i<@sss;$i++) {
245 if ($sss[$i]>0) {
246 $gread[$i] +=$sss[$i];
247 $gtag[$i] ++;
248 }
249 }
250 }
251 close IN;
252
253 print OUT "<table border=\"1\">
254 <tr align=\"center\">
255 <th>&nbsp;</th>
256 ";
257 foreach (@marks) {
258 print OUT "<th> $_ </th>\n";
259 }
260 print OUT "</tr>
261 <tr align=\"center\">
262 <th align=\"left\">Genome Mapped Reads No. </th>
263 ";
264 foreach (@gread) {
265 print OUT "<td> $_ </td>\n";
266 }
267 print OUT "</tr>
268 <tr align=\"center\">
269 <th align=\"left\">Genome Mapped Reads Percent </th>
270 ";
271
272 for (my $i=0;$i<@gread;$i++) {
273 my $per=sprintf ("%.2f",$gread[$i]/$cleanR[$i]*100);
274 print OUT "<td> $per\%</td>\n";
275 }
276
277 print OUT "</tr>
278 <tr align=\"center\">
279 <th align=\"left\">Genome Mapped Tags No. </th>
280 ";
281 foreach (@gtag) {
282 print OUT "<td> $_ </td>\n";
283 }
284 print OUT "</tr>
285 <tr align=\"center\">
286 <th align=\"left\">Genome Mapped Tags Percent </th>
287 ";
288
289 for (my $i=0;$i<@gtag;$i++) {
290 my $per=sprintf ("%.2f",$gtag[$i]/$cleanT[$i]*100);
291 print OUT "<td> $per\%</td>\n";
292 }
293 print OUT "</tr>\n</table>";
294 print OUT "<p>
295 Note:<br />
296 The genome mapped bwt file path is: <b>$genomedir/genome_mapped.bwt</b><br />
297 The genome mapped FASTA file path is: <b>$genomedir/genome_mapped.fa</b>
298 <br />
299 ";
300
301
302
303 #### rfam
304 if(defined $rfampath && $rfampath=~/rfam_match/){
305 chomp $rfampath;
306 @tmp=split/\//,$rfampath;
307 $rfamdir=$tmp[-1];
308
309 unless ($rfampath=~/\/$/) {
310 $rfampath .="/";
311 }
312 print OUT "<h2>3. Rfam non-miRNA annotation</h2>
313 <h3>3.1 Reads count</h3>
314 <table border=\"1\">
315 <tr align=\"center\">
316 ";
317
318 my @rfamR; my @rfamT;
319 my $tag=1;
320 open IN,"<$dir/rfam_non-miRNA_annotation.txt";
321 while (my $aline=<IN>) {
322 chomp $aline;
323 $tag=0 if($aline=~/tags\s+number/);
324 next if($aline=~/^\#/);
325 next if($aline=~/^\s*$/);
326 my @tmp=split/\s+/,$aline;
327 if($tag == 1){push @rfamR,[@tmp];}
328 else{push @rfamT,[@tmp];}
329 }
330 close IN;
331
332
333 print OUT "<th>RNA Name</th>\n";
334 foreach (@marks) {
335 print OUT "<th> $_ </th>\n";
336 }
337 for (my $i=0;$i<@rfamR;$i++) {
338 print OUT "</tr>
339 <tr align=\"center\">
340 <th align=\"left\">$rfamR[$i][0]</th>
341 ";
342 for (my $j=1;$j<@{$rfamR[$i]} ;$j++) {
343 print OUT "<td> $rfamR[$i][$j]</td>\n";
344 }
345 }
346
347 print OUT "</tr>\n</table>
348 <h3>3.2 Tags count</h3>
349 <table border=\"1\">
350 <tr align=\"center\">
351 <th>RNA Name</th>\n";
352 foreach (@marks) {
353 print OUT "<th> $_ </th>\n";
354 }
355 for (my $i=0;$i<@rfamT;$i++) {
356 print OUT "</tr>
357 <tr align=\"center\">
358 <th align=\"left\">$rfamT[$i][0]</th>
359 ";
360 for (my $j=1;$j<@{$rfamT[$i]} ;$j++) {
361 print OUT "<td> $rfamT[$i][$j]</td>\n";
362 }
363 }
364 print OUT "</tr>\n</table>
365 <p>Note:<br />The rfam mapping results is: <b>$rfampath</b>";
366 print OUT "<b>rfam_mapped.bwt</b></p>
367 ";
368 }
369
370
371 print OUT "
372 </BODY>
373 </HTML>
374 ";
375 close OUT;
376
377 sub usage{
378 print <<"USAGE";
379 Version $version
380 Usage:
381 $0 -o
382 options:
383 -o output file
384 -h help
385 USAGE
386 exit(1);
387 }
388