annotate html_preprocess.pl @ 53:f5a2e8308836 draft default tip

Uploaded
author big-tiandm
date Mon, 08 Dec 2014 01:51:16 -0500
parents c75593f79aa9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
47
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl -w
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
2 #Filename:
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
3 #Author: Tian Dongmei
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
4 #Email: tiandm@big.ac.cn
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
5 #Date: 2014-5-29
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
6 #Modified:
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
7 #Description:
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
8 my $version=1.00;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
9
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
10 use strict;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
11 use Getopt::Long;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
12 use File::Basename;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
13
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
14 my %opts;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
15 GetOptions(\%opts,"i=s","format=s","min=i","max=i","o=s","h");
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
16 if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
17 &usage;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
18 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
19 my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath);
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
20 my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir);
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
21 open IN,"<$opts{i}";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
22 $config=<IN>; chomp $config;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
23 $prepath=<IN>; chomp $prepath;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
24 $genomepath=<IN>; chomp $genomepath;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
25 $rfampath=<IN>;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
26 close IN;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
27 my @tmp=split/\//,$prepath;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
28 $predir=$tmp[-1];
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
29 @tmp=split/\//,$genomepath;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
30 $genomedir=$tmp[-1];
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
31
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
32 my $dir=dirname($opts{'o'});
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
33
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
34 open OUT ,">$opts{'o'}";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
35 print OUT "<HTML>\n <HEAD>\n <TITLE> Analysis Report </TITLE>\n </HEAD>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
36 <BODY bgcolor=\"lightgray\">\n <h1 align=\"center\">\n <font face=\"ºÚÌå\">\n <b>Preprocess Report</b>\n </font>\n </h1>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
37 <h2>1. Sequence No. and quality</h2>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
38 <h3>1.1 Sequece No.</h3>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
39 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
40
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
41 ### raw data no
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
42 open IN,"<$config";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
43 my @files;my @marks; my @rawNo;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
44 while (my $aline=<IN>) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
45 chomp $aline;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
46 my @tmp=split/\t/,$aline;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
47 push @files,$tmp[0];
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
48
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
49 my $no=`less $tmp[0] |wc -l `;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
50 chomp $no;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
51 if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
52 $no=$no/4;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
53 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
54 else{
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
55 $no=$no/2;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
56 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
57 push @rawNo,$no;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
58
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
59 push @marks,$tmp[1];
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
60 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
61 close IN;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
62
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
63 ### preprocess
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
64 unless ($prepath=~/\/$/) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
65 $prepath .="/";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
66 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
67
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
68 my @trimNo;my @collapse;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
69 my $collapsefile=$prepath."collapse_reads.fa";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
70 open IN,"<$collapsefile";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
71 while (my $aline=<IN>) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
72 chomp $aline;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
73 <IN>;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
74 $aline=~/:([\d|_]+)_x(\d+)$/;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
75 my @lng=split/_/,$1;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
76 for (my $i=0;$i<@lng;$i++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
77 if ($lng[$i]>0) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
78 $trimNo[$i] +=$lng[$i];
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
79 $collapse[$i] ++;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
80 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
81 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
82 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
83 close IN;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
84
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
85 my @cleanR;my @cleanT;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
86 my $clean=$prepath."collapse_reads_$opts{min}_$opts{max}.fa";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
87 open IN,"<$clean";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
88 while (my $aline=<IN>) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
89 chomp $aline;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
90 <IN>;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
91 $aline=~/:([\d|_]+)_x(\d+)$/;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
92 my @lng=split/_/,$1;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
93 for (my $i=0;$i<@lng;$i++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
94 if ($lng[$i]>0) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
95 $cleanR[$i] +=$lng[$i];
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
96 $cleanT[$i] ++;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
97 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
98 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
99 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
100 close IN;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
101
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
102 print OUT "<table border=\"1\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
103 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
104 <th>&nbsp;</th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
105 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
106 foreach (@marks) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
107 print OUT "<th> $_ </th>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
108 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
109 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
110 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
111 <th align=\"left\">Raw Reads No. </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
112 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
113 foreach (@rawNo) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
114 print OUT "<td> $_ </td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
115 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
116 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
117 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
118 <th align=\"left\">Reads No. After Trimed 3\' adapter </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
119 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
120 foreach (@trimNo) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
121 print OUT "<td> $_ </td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
122 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
123 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
124 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
125 <th align=\"left\">Unique Tags No. </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
126 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
127 foreach (@collapse) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
128 print OUT "<td> $_ </td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
129 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
130 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
131 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
132 <th align=\"left\">Clean Reads No. </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
133 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
134 foreach (@cleanR) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
135 print OUT "<td> $_ </td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
136 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
137 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
138 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
139 <th align=\"left\">Clean Tags No. </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
140 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
141 foreach (@cleanT) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
142 print OUT "<td> $_ </td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
143 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
144 print OUT "</tr>\n</table>";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
145 print OUT "<p>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
146 Note:<br />
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
147 The raw data file path is: <b>$files[0]</b><br />
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
148 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
149 for (my $i=1;$i<@files;$i++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
150 print OUT "&nbsp;&nbsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;<b>$files[$i]</b><br />";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
151 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
152 print OUT "The collapsed file path is: <b>$collapsefile</b><br />
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
153 The clean data file path is: <b>$clean</b><br />
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
154 </p>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
155 <h2> 1. Sequence length count</h2>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
156 <h3> 1.1 Reads length count </h3>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
157 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
158 print OUT "\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
159
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
160 my (%length); my $key="Tags Length";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
161 open IN,"<$prepath/reads_length_distribution.txt";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
162 while (my $aline=<IN>) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
163 chomp $aline;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
164 next if($aline=~/^\s*$/);
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
165 if ($aline=~/^Reads/) { $key="Reads Length";}
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
166 my @tmp=split/\t/,$aline;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
167 my @array=split/\s/,$tmp[1];
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
168 push @{$length{$key}},[$tmp[0],@array];
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
169 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
170 close IN;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
171
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
172 print OUT "<table border=\"1\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
173 <tr align=\"center\">";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
174 my $hashkey="Reads Length";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
175 foreach (@{$length{$hashkey}[0]}) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
176 print OUT "<th> $_ </th>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
177 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
178 print OUT "</tr>";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
179
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
180 for (my $i=1;$i<@{$length{$hashkey}};$i++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
181 print OUT "<tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
182 <th >$length{$hashkey}[$i][0] </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
183 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
184 for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
185 print OUT "<td> $length{$hashkey}[$i][$j] </td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
186 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
187 print OUT "</tr>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
188 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
189 print OUT "</table>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
190
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
191 print OUT "<h3> 1.2 Tags length count </h3>";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
192
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
193 print OUT "<table border=\"1\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
194 <tr align=\"center\">";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
195 $hashkey="Tags Length";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
196 foreach (@{$length{$hashkey}[0]}) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
197 print OUT "<th> $_ </th>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
198 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
199 print OUT "</tr>";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
200
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
201 for (my $i=1;$i<@{$length{$hashkey}};$i++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
202 print OUT "<tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
203 <th > $length{$hashkey}[$i][0] </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
204 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
205 for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
206 print OUT "<td> $length{$hashkey}[$i][$j] </td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
207 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
208 print OUT "</tr>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
209 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
210
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
211 print OUT "</table>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
212
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
213 print OUT "<h2> 2. Sequence length distribution </h2>";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
214 my $length=$prepath."length.html";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
215 open IN,"<$length";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
216 while (my $aline=<IN>) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
217 chomp $aline;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
218 print OUT "$aline\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
219 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
220
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
221 #print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
222 #</p>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
223 #";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
224
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
225
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
226
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
227
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
228 ####genome map
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
229 #unless ($genomedir=~/\/$/) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
230 # $genomedir .="/";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
231 #}
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
232
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
233 print OUT "<h2>2. Genome Alignment Result</h2>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
234 <h3>2.1 Mapping count</h3>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
235 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
236
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
237 open IN,"<$genomepath/genome_mapped.fa";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
238 my (@gread,@gtag);
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
239 while (my $aline=<IN>) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
240 chomp $aline;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
241 <IN>;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
242 $aline=~/:([\d|_]+)_x(\d+)$/;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
243 my @sss=split/_/,$1;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
244 for (my $i=0;$i<@sss;$i++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
245 if ($sss[$i]>0) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
246 $gread[$i] +=$sss[$i];
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
247 $gtag[$i] ++;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
248 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
249 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
250 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
251 close IN;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
252
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
253 print OUT "<table border=\"1\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
254 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
255 <th>&nbsp;</th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
256 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
257 foreach (@marks) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
258 print OUT "<th> $_ </th>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
259 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
260 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
261 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
262 <th align=\"left\">Genome Mapped Reads No. </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
263 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
264 foreach (@gread) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
265 print OUT "<td> $_ </td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
266 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
267 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
268 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
269 <th align=\"left\">Genome Mapped Reads Percent </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
270 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
271
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
272 for (my $i=0;$i<@gread;$i++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
273 my $per=sprintf ("%.2f",$gread[$i]/$cleanR[$i]*100);
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
274 print OUT "<td> $per\%</td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
275 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
276
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
277 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
278 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
279 <th align=\"left\">Genome Mapped Tags No. </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
280 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
281 foreach (@gtag) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
282 print OUT "<td> $_ </td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
283 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
284 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
285 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
286 <th align=\"left\">Genome Mapped Tags Percent </th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
287 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
288
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
289 for (my $i=0;$i<@gtag;$i++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
290 my $per=sprintf ("%.2f",$gtag[$i]/$cleanT[$i]*100);
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
291 print OUT "<td> $per\%</td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
292 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
293 print OUT "</tr>\n</table>";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
294 print OUT "<p>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
295 Note:<br />
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
296 The genome mapped bwt file path is: <b>$genomedir/genome_mapped.bwt</b><br />
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
297 The genome mapped FASTA file path is: <b>$genomedir/genome_mapped.fa</b>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
298 <br />
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
299 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
300
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
301
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
302
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
303 #### rfam
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
304 if(defined $rfampath && $rfampath=~/rfam_match/){
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
305 chomp $rfampath;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
306 @tmp=split/\//,$rfampath;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
307 $rfamdir=$tmp[-1];
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
308
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
309 unless ($rfampath=~/\/$/) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
310 $rfampath .="/";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
311 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
312 print OUT "<h2>3. Rfam non-miRNA annotation</h2>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
313 <h3>3.1 Reads count</h3>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
314 <table border=\"1\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
315 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
316 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
317
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
318 my @rfamR; my @rfamT;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
319 my $tag=1;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
320 open IN,"<$dir/rfam_non-miRNA_annotation.txt";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
321 while (my $aline=<IN>) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
322 chomp $aline;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
323 $tag=0 if($aline=~/tags\s+number/);
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
324 next if($aline=~/^\#/);
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
325 next if($aline=~/^\s*$/);
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
326 my @tmp=split/\s+/,$aline;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
327 if($tag == 1){push @rfamR,[@tmp];}
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
328 else{push @rfamT,[@tmp];}
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
329 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
330 close IN;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
331
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
332
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
333 print OUT "<th>RNA Name</th>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
334 foreach (@marks) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
335 print OUT "<th> $_ </th>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
336 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
337 for (my $i=0;$i<@rfamR;$i++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
338 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
339 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
340 <th align=\"left\">$rfamR[$i][0]</th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
341 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
342 for (my $j=1;$j<@{$rfamR[$i]} ;$j++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
343 print OUT "<td> $rfamR[$i][$j]</td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
344 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
345 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
346
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
347 print OUT "</tr>\n</table>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
348 <h3>3.2 Tags count</h3>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
349 <table border=\"1\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
350 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
351 <th>RNA Name</th>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
352 foreach (@marks) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
353 print OUT "<th> $_ </th>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
354 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
355 for (my $i=0;$i<@rfamT;$i++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
356 print OUT "</tr>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
357 <tr align=\"center\">
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
358 <th align=\"left\">$rfamT[$i][0]</th>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
359 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
360 for (my $j=1;$j<@{$rfamT[$i]} ;$j++) {
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
361 print OUT "<td> $rfamT[$i][$j]</td>\n";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
362 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
363 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
364 print OUT "</tr>\n</table>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
365 <p>Note:<br />The rfam mapping results is: <b>$rfampath</b>";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
366 print OUT "<b>rfam_mapped.bwt</b></p>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
367 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
368 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
369
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
370
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
371 print OUT "
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
372 </BODY>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
373 </HTML>
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
374 ";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
375 close OUT;
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
376
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
377 sub usage{
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
378 print <<"USAGE";
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
379 Version $version
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
380 Usage:
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
381 $0 -o
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
382 options:
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
383 -o output file
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
384 -h help
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
385 USAGE
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
386 exit(1);
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
387 }
c75593f79aa9 Uploaded
big-tiandm
parents:
diff changeset
388