Mercurial > repos > big-tiandm > mirplant2
comparison html_preprocess.pl @ 47:c75593f79aa9 draft
Uploaded
author | big-tiandm |
---|---|
date | Wed, 03 Dec 2014 01:54:29 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
46:ca05d68aca13 | 47:c75593f79aa9 |
---|---|
1 #!/usr/bin/perl -w | |
2 #Filename: | |
3 #Author: Tian Dongmei | |
4 #Email: tiandm@big.ac.cn | |
5 #Date: 2014-5-29 | |
6 #Modified: | |
7 #Description: | |
8 my $version=1.00; | |
9 | |
10 use strict; | |
11 use Getopt::Long; | |
12 use File::Basename; | |
13 | |
14 my %opts; | |
15 GetOptions(\%opts,"i=s","format=s","min=i","max=i","o=s","h"); | |
16 if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments | |
17 &usage; | |
18 } | |
19 my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath); | |
20 my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir); | |
21 open IN,"<$opts{i}"; | |
22 $config=<IN>; chomp $config; | |
23 $prepath=<IN>; chomp $prepath; | |
24 $genomepath=<IN>; chomp $genomepath; | |
25 $rfampath=<IN>; | |
26 close IN; | |
27 my @tmp=split/\//,$prepath; | |
28 $predir=$tmp[-1]; | |
29 @tmp=split/\//,$genomepath; | |
30 $genomedir=$tmp[-1]; | |
31 | |
32 my $dir=dirname($opts{'o'}); | |
33 | |
34 open OUT ,">$opts{'o'}"; | |
35 print OUT "<HTML>\n <HEAD>\n <TITLE> Analysis Report </TITLE>\n </HEAD> | |
36 <BODY bgcolor=\"lightgray\">\n <h1 align=\"center\">\n <font face=\"ºÚÌå\">\n <b>Preprocess Report</b>\n </font>\n </h1> | |
37 <h2>1. Sequence No. and quality</h2> | |
38 <h3>1.1 Sequece No.</h3> | |
39 "; | |
40 | |
41 ### raw data no | |
42 open IN,"<$config"; | |
43 my @files;my @marks; my @rawNo; | |
44 while (my $aline=<IN>) { | |
45 chomp $aline; | |
46 my @tmp=split/\t/,$aline; | |
47 push @files,$tmp[0]; | |
48 | |
49 my $no=`less $tmp[0] |wc -l `; | |
50 chomp $no; | |
51 if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") { | |
52 $no=$no/4; | |
53 } | |
54 else{ | |
55 $no=$no/2; | |
56 } | |
57 push @rawNo,$no; | |
58 | |
59 push @marks,$tmp[1]; | |
60 } | |
61 close IN; | |
62 | |
63 ### preprocess | |
64 unless ($prepath=~/\/$/) { | |
65 $prepath .="/"; | |
66 } | |
67 | |
68 my @trimNo;my @collapse; | |
69 my $collapsefile=$prepath."collapse_reads.fa"; | |
70 open IN,"<$collapsefile"; | |
71 while (my $aline=<IN>) { | |
72 chomp $aline; | |
73 <IN>; | |
74 $aline=~/:([\d|_]+)_x(\d+)$/; | |
75 my @lng=split/_/,$1; | |
76 for (my $i=0;$i<@lng;$i++) { | |
77 if ($lng[$i]>0) { | |
78 $trimNo[$i] +=$lng[$i]; | |
79 $collapse[$i] ++; | |
80 } | |
81 } | |
82 } | |
83 close IN; | |
84 | |
85 my @cleanR;my @cleanT; | |
86 my $clean=$prepath."collapse_reads_$opts{min}_$opts{max}.fa"; | |
87 open IN,"<$clean"; | |
88 while (my $aline=<IN>) { | |
89 chomp $aline; | |
90 <IN>; | |
91 $aline=~/:([\d|_]+)_x(\d+)$/; | |
92 my @lng=split/_/,$1; | |
93 for (my $i=0;$i<@lng;$i++) { | |
94 if ($lng[$i]>0) { | |
95 $cleanR[$i] +=$lng[$i]; | |
96 $cleanT[$i] ++; | |
97 } | |
98 } | |
99 } | |
100 close IN; | |
101 | |
102 print OUT "<table border=\"1\"> | |
103 <tr align=\"center\"> | |
104 <th> </th> | |
105 "; | |
106 foreach (@marks) { | |
107 print OUT "<th> $_ </th>\n"; | |
108 } | |
109 print OUT "</tr> | |
110 <tr align=\"center\"> | |
111 <th align=\"left\">Raw Reads No. </th> | |
112 "; | |
113 foreach (@rawNo) { | |
114 print OUT "<td> $_ </td>\n"; | |
115 } | |
116 print OUT "</tr> | |
117 <tr align=\"center\"> | |
118 <th align=\"left\">Reads No. After Trimed 3\' adapter </th> | |
119 "; | |
120 foreach (@trimNo) { | |
121 print OUT "<td> $_ </td>\n"; | |
122 } | |
123 print OUT "</tr> | |
124 <tr align=\"center\"> | |
125 <th align=\"left\">Unique Tags No. </th> | |
126 "; | |
127 foreach (@collapse) { | |
128 print OUT "<td> $_ </td>\n"; | |
129 } | |
130 print OUT "</tr> | |
131 <tr align=\"center\"> | |
132 <th align=\"left\">Clean Reads No. </th> | |
133 "; | |
134 foreach (@cleanR) { | |
135 print OUT "<td> $_ </td>\n"; | |
136 } | |
137 print OUT "</tr> | |
138 <tr align=\"center\"> | |
139 <th align=\"left\">Clean Tags No. </th> | |
140 "; | |
141 foreach (@cleanT) { | |
142 print OUT "<td> $_ </td>\n"; | |
143 } | |
144 print OUT "</tr>\n</table>"; | |
145 print OUT "<p> | |
146 Note:<br /> | |
147 The raw data file path is: <b>$files[0]</b><br /> | |
148 "; | |
149 for (my $i=1;$i<@files;$i++) { | |
150 print OUT "          <b>$files[$i]</b><br />"; | |
151 } | |
152 print OUT "The collapsed file path is: <b>$collapsefile</b><br /> | |
153 The clean data file path is: <b>$clean</b><br /> | |
154 </p> | |
155 <h2> 1. Sequence length count</h2> | |
156 <h3> 1.1 Reads length count </h3> | |
157 "; | |
158 print OUT "\n"; | |
159 | |
160 my (%length); my $key="Tags Length"; | |
161 open IN,"<$prepath/reads_length_distribution.txt"; | |
162 while (my $aline=<IN>) { | |
163 chomp $aline; | |
164 next if($aline=~/^\s*$/); | |
165 if ($aline=~/^Reads/) { $key="Reads Length";} | |
166 my @tmp=split/\t/,$aline; | |
167 my @array=split/\s/,$tmp[1]; | |
168 push @{$length{$key}},[$tmp[0],@array]; | |
169 } | |
170 close IN; | |
171 | |
172 print OUT "<table border=\"1\"> | |
173 <tr align=\"center\">"; | |
174 my $hashkey="Reads Length"; | |
175 foreach (@{$length{$hashkey}[0]}) { | |
176 print OUT "<th> $_ </th>\n"; | |
177 } | |
178 print OUT "</tr>"; | |
179 | |
180 for (my $i=1;$i<@{$length{$hashkey}};$i++) { | |
181 print OUT "<tr align=\"center\"> | |
182 <th >$length{$hashkey}[$i][0] </th> | |
183 "; | |
184 for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) { | |
185 print OUT "<td> $length{$hashkey}[$i][$j] </td>\n"; | |
186 } | |
187 print OUT "</tr>\n"; | |
188 } | |
189 print OUT "</table>\n"; | |
190 | |
191 print OUT "<h3> 1.2 Tags length count </h3>"; | |
192 | |
193 print OUT "<table border=\"1\"> | |
194 <tr align=\"center\">"; | |
195 $hashkey="Tags Length"; | |
196 foreach (@{$length{$hashkey}[0]}) { | |
197 print OUT "<th> $_ </th>\n"; | |
198 } | |
199 print OUT "</tr>"; | |
200 | |
201 for (my $i=1;$i<@{$length{$hashkey}};$i++) { | |
202 print OUT "<tr align=\"center\"> | |
203 <th > $length{$hashkey}[$i][0] </th> | |
204 "; | |
205 for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) { | |
206 print OUT "<td> $length{$hashkey}[$i][$j] </td>\n"; | |
207 } | |
208 print OUT "</tr>\n"; | |
209 } | |
210 | |
211 print OUT "</table>\n"; | |
212 | |
213 print OUT "<h2> 2. Sequence length distribution </h2>"; | |
214 my $length=$prepath."length.html"; | |
215 open IN,"<$length"; | |
216 while (my $aline=<IN>) { | |
217 chomp $aline; | |
218 print OUT "$aline\n"; | |
219 } | |
220 | |
221 #print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a> | |
222 #</p> | |
223 #"; | |
224 | |
225 | |
226 | |
227 | |
228 ####genome map | |
229 #unless ($genomedir=~/\/$/) { | |
230 # $genomedir .="/"; | |
231 #} | |
232 | |
233 print OUT "<h2>2. Genome Alignment Result</h2> | |
234 <h3>2.1 Mapping count</h3> | |
235 "; | |
236 | |
237 open IN,"<$genomepath/genome_mapped.fa"; | |
238 my (@gread,@gtag); | |
239 while (my $aline=<IN>) { | |
240 chomp $aline; | |
241 <IN>; | |
242 $aline=~/:([\d|_]+)_x(\d+)$/; | |
243 my @sss=split/_/,$1; | |
244 for (my $i=0;$i<@sss;$i++) { | |
245 if ($sss[$i]>0) { | |
246 $gread[$i] +=$sss[$i]; | |
247 $gtag[$i] ++; | |
248 } | |
249 } | |
250 } | |
251 close IN; | |
252 | |
253 print OUT "<table border=\"1\"> | |
254 <tr align=\"center\"> | |
255 <th> </th> | |
256 "; | |
257 foreach (@marks) { | |
258 print OUT "<th> $_ </th>\n"; | |
259 } | |
260 print OUT "</tr> | |
261 <tr align=\"center\"> | |
262 <th align=\"left\">Genome Mapped Reads No. </th> | |
263 "; | |
264 foreach (@gread) { | |
265 print OUT "<td> $_ </td>\n"; | |
266 } | |
267 print OUT "</tr> | |
268 <tr align=\"center\"> | |
269 <th align=\"left\">Genome Mapped Reads Percent </th> | |
270 "; | |
271 | |
272 for (my $i=0;$i<@gread;$i++) { | |
273 my $per=sprintf ("%.2f",$gread[$i]/$cleanR[$i]*100); | |
274 print OUT "<td> $per\%</td>\n"; | |
275 } | |
276 | |
277 print OUT "</tr> | |
278 <tr align=\"center\"> | |
279 <th align=\"left\">Genome Mapped Tags No. </th> | |
280 "; | |
281 foreach (@gtag) { | |
282 print OUT "<td> $_ </td>\n"; | |
283 } | |
284 print OUT "</tr> | |
285 <tr align=\"center\"> | |
286 <th align=\"left\">Genome Mapped Tags Percent </th> | |
287 "; | |
288 | |
289 for (my $i=0;$i<@gtag;$i++) { | |
290 my $per=sprintf ("%.2f",$gtag[$i]/$cleanT[$i]*100); | |
291 print OUT "<td> $per\%</td>\n"; | |
292 } | |
293 print OUT "</tr>\n</table>"; | |
294 print OUT "<p> | |
295 Note:<br /> | |
296 The genome mapped bwt file path is: <b>$genomedir/genome_mapped.bwt</b><br /> | |
297 The genome mapped FASTA file path is: <b>$genomedir/genome_mapped.fa</b> | |
298 <br /> | |
299 "; | |
300 | |
301 | |
302 | |
303 #### rfam | |
304 if(defined $rfampath && $rfampath=~/rfam_match/){ | |
305 chomp $rfampath; | |
306 @tmp=split/\//,$rfampath; | |
307 $rfamdir=$tmp[-1]; | |
308 | |
309 unless ($rfampath=~/\/$/) { | |
310 $rfampath .="/"; | |
311 } | |
312 print OUT "<h2>3. Rfam non-miRNA annotation</h2> | |
313 <h3>3.1 Reads count</h3> | |
314 <table border=\"1\"> | |
315 <tr align=\"center\"> | |
316 "; | |
317 | |
318 my @rfamR; my @rfamT; | |
319 my $tag=1; | |
320 open IN,"<$dir/rfam_non-miRNA_annotation.txt"; | |
321 while (my $aline=<IN>) { | |
322 chomp $aline; | |
323 $tag=0 if($aline=~/tags\s+number/); | |
324 next if($aline=~/^\#/); | |
325 next if($aline=~/^\s*$/); | |
326 my @tmp=split/\s+/,$aline; | |
327 if($tag == 1){push @rfamR,[@tmp];} | |
328 else{push @rfamT,[@tmp];} | |
329 } | |
330 close IN; | |
331 | |
332 | |
333 print OUT "<th>RNA Name</th>\n"; | |
334 foreach (@marks) { | |
335 print OUT "<th> $_ </th>\n"; | |
336 } | |
337 for (my $i=0;$i<@rfamR;$i++) { | |
338 print OUT "</tr> | |
339 <tr align=\"center\"> | |
340 <th align=\"left\">$rfamR[$i][0]</th> | |
341 "; | |
342 for (my $j=1;$j<@{$rfamR[$i]} ;$j++) { | |
343 print OUT "<td> $rfamR[$i][$j]</td>\n"; | |
344 } | |
345 } | |
346 | |
347 print OUT "</tr>\n</table> | |
348 <h3>3.2 Tags count</h3> | |
349 <table border=\"1\"> | |
350 <tr align=\"center\"> | |
351 <th>RNA Name</th>\n"; | |
352 foreach (@marks) { | |
353 print OUT "<th> $_ </th>\n"; | |
354 } | |
355 for (my $i=0;$i<@rfamT;$i++) { | |
356 print OUT "</tr> | |
357 <tr align=\"center\"> | |
358 <th align=\"left\">$rfamT[$i][0]</th> | |
359 "; | |
360 for (my $j=1;$j<@{$rfamT[$i]} ;$j++) { | |
361 print OUT "<td> $rfamT[$i][$j]</td>\n"; | |
362 } | |
363 } | |
364 print OUT "</tr>\n</table> | |
365 <p>Note:<br />The rfam mapping results is: <b>$rfampath</b>"; | |
366 print OUT "<b>rfam_mapped.bwt</b></p> | |
367 "; | |
368 } | |
369 | |
370 | |
371 print OUT " | |
372 </BODY> | |
373 </HTML> | |
374 "; | |
375 close OUT; | |
376 | |
377 sub usage{ | |
378 print <<"USAGE"; | |
379 Version $version | |
380 Usage: | |
381 $0 -o | |
382 options: | |
383 -o output file | |
384 -h help | |
385 USAGE | |
386 exit(1); | |
387 } | |
388 |