Mercurial > repos > big-tiandm > sirna_plant
view html.pl @ 26:80d514e98f72 draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 06 Nov 2014 02:18:09 -0500 |
parents | 9dcffd531c76 |
children |
line wrap: on
line source
#!/usr/bin/perl -w #Filename: #Author: Tian Dongmei #Email: tiandm@big.ac.cn #Date: 2014-5-29 #Modified: #Description: my $version=1.00; use strict; use Getopt::Long; use File::Basename; my %opts; GetOptions(\%opts,"i=s","format=s","o=s","h"); if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments &usage; } my ($config,$prepath,$rfampath,$genomepath,$clusterpath,$annotatepath,$plotpath,$degpath); my ($predir,$rfamdir,$genomedir,$clusterdir,$annotatedir,$plotdir,$degdir); open IN,"<$opts{i}"; $config=<IN>; chomp $config; $prepath=<IN>; chomp $prepath; $rfampath=<IN>;chomp $rfampath; $genomepath=<IN>; chomp $genomepath; $clusterpath=<IN>; chomp $clusterpath; $annotatepath=<IN>; chomp $annotatepath; $plotpath=<IN>; chomp $plotpath; my $deg_tag=1; if(eof){$deg_tag=0;} else{ $degpath=<IN>; chomp $degpath; } close IN; my @tmp=split/\//,$prepath; $predir=$tmp[-1]; @tmp=split/\//,$rfampath; $rfamdir=$tmp[-1]; @tmp=split/\//,$genomepath; $genomedir=$tmp[-1]; @tmp=split/\//,$clusterpath; $clusterdir=$tmp[-1]; @tmp=split/\//,$annotatepath; $annotatedir=$tmp[-1]; @tmp=split/\//,$plotpath; $plotdir=$tmp[-1]; my $dir=dirname($opts{'o'}); open OUT ,">$opts{'o'}"; print OUT "<HTML>\n <HEAD>\n <TITLE> Analysis Report </TITLE>\n </HEAD> <BODY bgcolor=\"lightgray\">\n <h1 align=\"center\">\n <font face=\"ºÚÌå\">\n <b>Small RNA Analysis Report</b>\n </font>\n </h1> <h2>1. Sequence No. and quality</h2> <h3>1.1 Sequece No.</h3> "; ### raw data no open IN,"<$config"; my @files;my @marks; my @rawNo; while (my $aline=<IN>) { chomp $aline; my @tmp=split/\t/,$aline; push @files,$tmp[0]; my $no=`less $tmp[0] |wc -l `; chomp $no; if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") { $no=$no/4; } else{ $no=$no/2; } push @rawNo,$no; push @marks,$tmp[1]; } close IN; ### preprocess unless ($prepath=~/\/$/) { $prepath .="/"; } my @trimNo;my @collapse; my $collapsefile=$prepath."collapse_reads.fa"; open IN,"<$collapsefile"; while (my $aline=<IN>) { chomp $aline; <IN>; $aline=~/:([\d|_]+)_x(\d+)$/; my @lng=split/_/,$1; for (my $i=0;$i<@lng;$i++) { if ($lng[$i]>0) { $trimNo[$i] +=$lng[$i]; $collapse[$i] ++; } } } close IN; my @cleanR;my @cleanT; my $clean=$prepath."collapse_reads_18-40.fa"; open IN,"<$clean"; while (my $aline=<IN>) { chomp $aline; <IN>; $aline=~/:([\d|_]+)_x(\d+)$/; my @lng=split/_/,$1; for (my $i=0;$i<@lng;$i++) { if ($lng[$i]>0) { $cleanR[$i] +=$lng[$i]; $cleanT[$i] ++; } } } close IN; my @filterR;my @filterT; my $filter=$prepath."collapse_reads_out.fa"; open IN,"<$filter"; while (my $aline=<IN>) { chomp $aline; <IN>; $aline=~/:([\d|_]+)_x(\d+)$/; my @lng=split/_/,$1; for (my $i=0;$i<@lng;$i++) { if ($lng[$i]>0) { $filterR[$i] +=$lng[$i]; $filterT[$i] ++; } } } close IN; print OUT "<table border=\"1\"> <tr align=\"center\"> <th> </th> "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Raw Reads No. </th> "; foreach (@rawNo) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Reads No. After Trimed 3\' adapter </th> "; foreach (@trimNo) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Unique Tags No. </th> "; foreach (@collapse) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Clean Reads No. </th> "; foreach (@cleanR) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Clean Tags No. </th> "; foreach (@cleanT) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Filter Reads No. \(reads count \>3\) </th> "; foreach (@filterR) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Filter Tags No. \(reads count \>3\) </th> "; foreach (@filterT) { print OUT "<td> $_ </td>\n"; } print OUT "</tr>\n</table>"; print OUT "<p> Note:<br /> The raw data file path is: <b>$files[0]</b><br /> "; for (my $i=1;$i<@files;$i++) { print OUT "          <b>$files[$i]</b><br />"; } print OUT "The collapsed file path is: <b>$collapsefile</b><br /> The clean data file path is: <b>$clean</b><br /> The filter (remain total reads>3) data file path is: <b>$filter</b><br /> </p> <h2> 1. Sequence length count</h2> "; print OUT "\n"; my $length=$prepath."length.html"; open IN,"<$length"; while (my $aline=<IN>) { chomp $aline; print OUT "$aline\n"; } close IN; print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution_after_count_filter.txt\"> length file</a> </p> "; #### rfam unless ($rfampath=~/\/$/) { $rfampath .="/"; } unless ($genomepath=~/\/$/) { $genomepath .="/"; } print OUT "<h2>2. Rfam non-miRNA annotation</h2> <h3>2.1 Reads count</h3> <table border=\"1\"> <tr align=\"center\"> "; my @rfamR; my @rfamT; my $tag=1; open IN,"<$dir/rfam_match/rfam_non-miRNA_annotation.txt"; while (my $aline=<IN>) { chomp $aline; $tag=0 if($aline=~/tags\s+number/); next if($aline=~/^\#/); next if($aline=~/^\s*$/); my @tmp=split/\s+/,$aline; if($tag == 1){push @rfamR,[@tmp];} else{push @rfamT,[@tmp];} } close IN; print OUT "<th>RNA Name</th>\n"; foreach (@marks) { print OUT "<th> $_ </th>\n"; } for (my $i=0;$i<@rfamR;$i++) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$rfamR[$i][0]</th> "; for (my $j=1;$j<@{$rfamR[$i]} ;$j++) { print OUT "<td> $rfamR[$i][$j]</td>\n"; } } print OUT "</tr>\n</table> <h3>2.2 Tags count</h3> <table border=\"1\"> <tr align=\"center\"> <th>RNA Name</th>\n"; foreach (@marks) { print OUT "<th> $_ </th>\n"; } for (my $i=0;$i<@rfamT;$i++) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$rfamT[$i][0]</th> "; for (my $j=1;$j<@{$rfamT[$i]} ;$j++) { print OUT "<td> $rfamT[$i][$j]</td>\n"; } } print OUT "</tr>\n</table> <p>Note:<br />The rfam mapping results is: <b>$rfampath</b>"; print OUT "<b>rfam_mapped.bwt</b></p>"; open IN,"<$dir/genome_match/genome_mapped.bwt"; my @genome_r_u; my @genome_r_m; my @genome_t_u; my @genome_t_m; my $tags_map_number=0; while (my $aline=<IN>) { chomp $aline; my @temp=split/\t/,$aline; if ($temp[6]==0) { $aline=~/:([\d|_]+)_x(\d+)/; my @lng=split/_/,$1; for (my $i=0;$i<@lng;$i++) { if ($lng[$i]>0) { $genome_r_u[$i] +=$lng[$i]; $genome_t_u[$i] ++; } } $tags_map_number++; } if ($temp[6]>0) { $aline=~/:([\d|_]+)_x(\d+)/; my @lng=split/_/,$1; for (my $i=0;$i<@lng;$i++) { if ($lng[$i]>0) { $genome_r_m[$i] +=$lng[$i]; $genome_t_m[$i] ++; } } for (my $i=0;$i<$temp[6] ;$i++) { my $next=<IN>; } $tags_map_number++; } } close IN; #<h3>3.1 Reads count</h3> #<table border=\"1\"> #<tr align=\"center\"> print OUT "<h2>3. genome mapping result</h2> <table border=\"1\"> <tr align=\"center\"> <th align=\"left\">Map</th>\n "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Uniq Map Reads No.</th> "; for (my $i=0;$i<@genome_r_u ;$i++) { print OUT "<td> $genome_r_u[$i]</td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Uniq Map Tags No.</th> "; for (my $i=0;$i<@genome_t_u ;$i++) { print OUT "<td> $genome_t_u[$i]</td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Multiple Map Reads No.</th> "; for (my $i=0;$i<@genome_r_m ;$i++) { print OUT "<td> $genome_r_m[$i]</td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Multiple Map Tags No.</th> "; for (my $i=0;$i<@genome_t_m ;$i++) { print OUT "<td> $genome_t_m[$i]</td>\n"; } print OUT "</tr>\n</table> <p>Note:<br />The genome mapping results is: <b>$genomepath</b>"; print OUT "<b>genome_mapped.bwt</b></p>"; my $cluster="$clusterpath/sample_reads.cluster"; my $cluster_number=`less $cluster |wc -l `; $cluster_number=$cluster_number-1; my (%cluster_length,@exp,@rpkm); my @exp_range=qw(0 \(0--10] \(10--100] \(100--1000] \(1000--10000] \(10000--100000] \(100000--**\)); my @rpkm_range=qw(0 \(0--0.25] \(0.25--0.5] \(0.5--1] \(1.0-5.0] \(5--10] \(10--50] \(50--100] \(100--500] \(500--1000] \(1000--**]); open IN,"<$cluster"; while (my $aline=<IN>) { next if($aline=~/^\"/); chomp $aline; my @temp=split/\t/,$aline; my @id=split/:|-/,$temp[0]; $cluster_length{$id[2]-$id[1]+1}++; for (my $i=0;$i<@marks ;$i++) { if ($temp[$i+3] == 0) {$exp[0][$i]++;} elsif ($temp[$i+3]>0 && $temp[$i+3]<= 10 ){$exp[1][$i]++;} elsif ($temp[$i+3]>10 && $temp[$i+3]<=100){$exp[2][$i]++;} elsif ($temp[$i+3]>100 && $temp[$i+3]<=1000){$exp[3][$i]++;} elsif ($temp[$i+3]>1000 && $temp[$i+3]<=10000){$exp[4][$i]++;} elsif ($temp[$i+3]>10000 && $temp[$i+3]<=100000){$exp[5][$i]++;} elsif ($temp[$i+3]>100000){$exp[6][$i]++;} } } close IN; my $cluster_rpkm="$clusterpath/sample_rpkm.cluster"; open IN,"<$cluster_rpkm"; while (my $aline=<IN>) { next if($aline=~/^\#/); chomp $aline; my @temp=split/\t/,$aline; for (my $i=0;$i<@marks ;$i++) { if ($temp[$i+3]==0) {$rpkm[0][$i]++;} elsif($temp[$i+3]>0 && $temp[$i+3]<=0.25){$rpkm[1][$i]++;} elsif($temp[$i+3]>0.25 && $temp[$i+3]<=0.5){$rpkm[2][$i]++;} elsif($temp[$i+3]>0.5 && $temp[$i+3]<=1){$rpkm[3][$i]++;} elsif($temp[$i+3]>1 && $temp[$i+3]<=5){$rpkm[4][$i]++;} elsif($temp[$i+3]>5 && $temp[$i+3]<=10){$rpkm[5][$i]++;} elsif($temp[$i+3]>10 && $temp[$i+3]<=50){$rpkm[6][$i]++;} elsif($temp[$i+3]>50 && $temp[$i+3]<=100){$rpkm[7][$i]++;} elsif($temp[$i+3]>100 && $temp[$i+3]<=500){$rpkm[8][$i]++;} elsif($temp[$i+3]>500 && $temp[$i+3]<=1000){$rpkm[9][$i]++;} else{$rpkm[10][$i]++;} } } close IN; my $cluster_length_file="$clusterpath/cluster_length.txt"; open LEN,">$cluster_length_file"; print LEN "\#length\tcluster_number\n"; foreach my $key (sort keys %cluster_length) { print LEN "$key\t$cluster_length{$key}\n"; } close LEN; print OUT "<h2>4. cluster result</h2> <h3>4.1 Cluster count</h3> <table border=\"1\"> <tr align=\"center\"> <th align=\"left\"> </th> <td>Merged samples</td></tr> <tr align=\"center\"> <th align=\"left\">Tags number</th> <td>$tags_map_number</td></tr> <tr align=\"center\"> <th align=\"left\">Cluster number</th> <td>$cluster_number</td></tr>\n</table> "; print OUT "<h3>4.2 Cluster length</h3> <p> Note:<br />The clusters length data: <a href=\"./$clusterdir/cluster_length.txt\"> length file</a> </p> "; print OUT "<h3>4.3 Quantify</h3> <table border=\"1\"> <tr align=\"center\"> <th align=\"left\">Reads Range</th>\n "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } for (my $i=0;$i<@exp_range;$i++) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$exp_range[$i]</th> "; for (my $j=0;$j<@marks ;$j++) { if (!(defined($exp[$i][$j]))) { print OUT "<td> 0</td>\n"; } else{print OUT "<td> $exp[$i][$j]</td>\n";} } } print OUT "</tr>\n</table>"; print OUT "\n<table border=\"1\"> <tr align=\"center\"> <th align=\"left\">RPKM Range</th>\n "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } for (my $i=0;$i<@rpkm_range;$i++) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$rpkm_range[$i]</th> "; for (my $j=0;$j<@marks ;$j++) { if (!(defined($rpkm[$i][$j]))) { print OUT "<td> 0</td>\n"; } else{print OUT "<td> $rpkm[$i][$j]</td>\n";} } } print OUT "</tr>\n</table>"; my $annotate="$annotatepath/sample_c_p.anno"; my (%posit,%repeat,%nat1,%nat2); my (@phase,@long,@repeat,@nat); for (my $j=0;$j<@marks ;$j++) { $phase[$j]=0; $long[$j]=0; $repeat[$j]=0; $nat[$j]=0; } my $class_anno=1; open ANNO,"<$annotate"; while (my $aline=<ANNO>) { chomp $aline; my @temp=split/\t/,$aline; if($aline=~/^\#/){ if (@temp != 10+@marks) { $class_anno=0; } next; } for (my $i=3+@marks+$class_anno;$i<@temp;$i++) { my @posit=split/\;/,$temp[$i]; for (my $j=0;$j<@marks ;$j++) { if ($temp[3+$j]>0) { $posit{$posit[0]}[$j]++; } else{ if (!(defined($posit{$posit[0]}[$j]))) { $posit{$posit[0]}[$j]=0; } } } } if ($class_anno) { for (my $j=0;$j<@marks ;$j++) { if ($temp[3+$j]>0) { if ($temp[6] eq "phase") { $phase[$j]++; } if ($temp[7] eq "long") { $long[$j]++; } if ($temp[8] ne "\/") { $repeat[$j]++; my @rr=split/\;/,$temp[8]; foreach (@rr) { $repeat{$_}[$j]++; } } if ($temp[9] ne "\/") { $nat[$j]++; my @nn1=split/\;/,$temp[9]; my @nn2=split/\;/,$temp[10]; for (my $k=0;$k<@nn1 ;$k++) { $nat1{$nn1[$k]}[$j]++; $nat2{$nn2[$k]}[$j]++; } } } } } } close ANNO; print OUT "<h2>5. Cluster Annotate</h2> <h3>5.1 Cluster genome position annotate</h3> <table border=\"1\"> <tr align=\"center\"> <th align=\"left\">clusters number</th>\n "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } foreach my $key (sort keys %posit) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$key</th> "; foreach (@{$posit{$key}}) { print OUT "<td> $_</td>\n"; } } print OUT "</tr>\n</table>"; print OUT "<p> Note:<br /> One cluster mybe annotate to multiple genes<br /> "; if ($class_anno) { print OUT "<h3>5.2 Cluster source mechanism annotate</h3> <table border=\"1\"> <tr align=\"center\"> <th align=\"left\">clusters number</th>\n "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Phase</th>\n "; foreach (@phase) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Long</th>\n "; foreach (@long) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Repeat</th>\n "; foreach (@repeat) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Nat</th>\n "; foreach (@nat) { print OUT "<td> $_ </td>\n"; } print OUT "</tr>\n</table>"; print OUT "<p> Repeat subclass annotate: "; print OUT "<table border=\"1\"> <tr align=\"center\"> <th align=\"left\">Repeat subclass</th>\n "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } foreach my $key (sort keys %repeat) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$key</th> "; for (my $i=0;$i<@marks ;$i++) { if (defined($repeat{$key}[$i])) { print OUT "<td> $repeat{$key}[$i] </td>\n"; } else{print OUT "<td> 0 </td>\n";} } } print OUT "</tr>\n</table>"; print OUT "<p> Nat subclass1 annotate: "; print OUT "<table border=\"1\"> <tr align=\"center\"> <th align=\"left\">Nat subclass1</th>\n "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } foreach my $key (sort keys %nat1) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$key</th> "; for (my $i=0;$i<@marks ;$i++) { if (defined($nat1{$key}[$i])) { print OUT "<td> $nat1{$key}[$i] </td>\n"; } else{print OUT "<td> 0 </td>\n";} } } print OUT "</tr>\n</table>"; print OUT "<p> Nat subclass2 annotate: "; print OUT "<table border=\"1\"> <tr align=\"center\"> <th align=\"left\">Nat subclass2</th>\n "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } foreach my $key (sort keys %nat2) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$key</th> "; for (my $i=0;$i<@marks ;$i++) { if (defined($nat2{$key}[$i])) { print OUT "<td> $nat2{$key}[$i] </td>\n"; } else{print OUT "<td> 0 </td>\n";} } } print OUT "</tr>\n</table>"; print OUT "<p> Note:<br /> One cluster mybe annotate to multiple repeats or nats<br /> "; } else { print OUT "<h3>5.2 Cluster source mechanism annotate</h3> <br />Do not do source mechanism annotate <br />"; } print OUT "<h2>6. Graph of Clusters of all samples</h2> \n"; my $plot=$plotpath."cluster.html"; open IN,"<$plot"; while (my $aline=<IN>) { chomp $aline; print OUT "$aline\n"; } close IN; if ($deg_tag) { my $deg_file=`ls $degpath`; chomp $deg_file; my @deg_file=split/\n/,$deg_file; my %deg; foreach (@deg_file) { my $output="$degpath/$_/output_score.txt"; open IN,"<$output"; $deg{$_}[0]=0; $deg{$_}[1]=0; $deg{$_}[2]=0; while (my $aline=<IN>) { next if ($aline=~/^\"/); chomp $aline; my @temp=split/\t/,$aline; if ($temp[9] eq "TRUE") { $deg{$_}[0]++; if ($temp[4] >0) { $deg{$_}[1]++; } if ($temp[4] <0) { $deg{$_}[2]++; } } } close IN; } print OUT "<h2>7. DEG</h2> <table border=\"1\"> <tr align=\"center\"> <th align=\"left\">Genes number</th>\n <th> DEG </th>\n <th> UP </th>\n <th> DOWN </th>\n "; foreach my $key (sort keys %deg) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$key</th> "; for (my $i=0;$i<@{$deg{$key}} ;$i++) { print OUT "<td> $deg{$key}[$i] </td>\n"; } } print OUT "</tr>\n</table>"; } else{ print OUT "<h2>7. DEG</h2> <br />Do not do DE clusters <br />"; } print OUT " </BODY> </HTML> "; close OUT; sub usage{ print <<"USAGE"; Version $version Usage: $0 -o options: -i -format -o output file -h help USAGE exit(1); }