# HG changeset patch # User big-tiandm # Date 1406280034 14400 # Node ID 5ac42e277c76620eb52ff8a010d00bedda8d6972 # Parent 0a69f39fa9fff5261303a3d0fb071b54f186fade Uploaded diff -r 0a69f39fa9ff -r 5ac42e277c76 html.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/html.pl Fri Jul 25 05:20:34 2014 -0400 @@ -0,0 +1,269 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2014-5-29 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; +use File::Basename; + +my %opts; +GetOptions(\%opts,"i=s","format=s","o=s","h"); +if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments +&usage; +} +my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath); +my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir); +open IN,"<$opts{i}"; +$config=; chomp $config; +$prepath=; chomp $prepath; +$rfampath=;chomp $rfampath; +$knownpath=; chomp $knownpath; +$genomepath=; chomp $genomepath; +$novelpath=; chomp $novelpath; +close IN; +my @tmp=split/\//,$prepath; +$predir=$tmp[-1]; +@tmp=split/\//,$rfampath; +$rfamdir=$tmp[-1]; +@tmp=split/\//,$knownpath; +$knowndir=$tmp[-1]; +@tmp=split/\//,$genomepath; +$genomedir=$tmp[-1]; +@tmp=split/\//,$novelpath; +$noveldir=$tmp[-1]; + +my $dir=dirname($opts{'o'}); + +open OUT ,">$opts{'o'}"; +print OUT "\n \n Analysis Report \n + \n

\n \n Small RNA Analysis Report\n \n

+

1. Sequence No. and quality

+

1.1 Sequece No.

+"; + +### raw data no +open IN,"<$config"; +my @files;my @marks; my @rawNo; +while (my $aline=) { + chomp $aline; + my @tmp=split/\t/,$aline; + push @files,$tmp[0]; + + my $no=`less $tmp[0] |wc -l `; + chomp $no; + if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") { + $no=$no/4; + } + else{ + $no=$no/2; + } + push @rawNo,$no; + + push @marks,$tmp[1]; +} +close IN; + +### preprocess +unless ($prepath=~/\/$/) { + $prepath .="/"; +} + +my @trimNo;my @collapse; +my $collapsefile=$prepath."collapse_reads.fa"; +open IN,"<$collapsefile"; +while (my $aline=) { + chomp $aline; + ; + $aline=~/:([\d|_]+)_x(\d+)$/; + my @lng=split/_/,$1; + for (my $i=0;$i<@lng;$i++) { + if ($lng[$i]>0) { + $trimNo[$i] +=$lng[$i]; + $collapse[$i] ++; + } + } +} +close IN; + +my @cleanR;my @cleanT; +my $clean=$prepath."collapse_reads_19_28.fa"; +open IN,"<$clean"; +while (my $aline=) { + chomp $aline; + ; + $aline=~/:([\d|_]+)_x(\d+)$/; + my @lng=split/_/,$1; + for (my $i=0;$i<@lng;$i++) { + if ($lng[$i]>0) { + $cleanR[$i] +=$lng[$i]; + $cleanT[$i] ++; + } + } +} +close IN; + +print OUT " + + +"; +foreach (@marks) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@rawNo) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@trimNo) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@collapse) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@cleanR) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@cleanT) { + print OUT "\n"; +} +print OUT "\n
  $_
Raw Reads No. $_
Reads No. After Trimed 3\' adapter $_
Unique Tags No. $_
Clean Reads No. $_
Clean Tags No. $_
"; +print OUT "

+Note:
+The raw data file path is: $files[0]
+"; +for (my $i=1;$i<@files;$i++) { + print OUT "           $files[$i]
"; +} +print OUT "The collapsed file path is: $collapsefile
+The clean data file path is: $clean
+

+

1. Sequence length count

+

1.1 Reads length

+"; + +print OUT "\"Reads_length.png\" +

1.2 Tags length count

+\"Tags_length.png\" +

Note:
The sequence length data: length file +

+"; + +#### rfam +unless ($rfampath=~/\/$/) { + $rfampath .="/"; +} +print OUT "

2. Rfam non-miRNA annotation

+

2.1 Reads count

+ + +"; + +my @rfamR; my @rfamT; +my $tag=1; +open IN,"<$dir/rfam_non-miRNA_annotation.txt"; +while (my $aline=) { + chomp $aline; + $tag=0 if($aline=~/tags\s+number/); + next if($aline=~/^\#/); + next if($aline=~/^\s*$/); + my @tmp=split/\s+/,$aline; + if($tag == 1){push @rfamR,[@tmp];} + else{push @rfamT,[@tmp];} +} +close IN; + + +print OUT "\n"; +foreach (@marks) { + print OUT "\n"; +} +for (my $i=0;$i<@rfamR;$i++) { + print OUT " + + + "; + for (my $j=1;$j<@{$rfamR[$i]} ;$j++) { + print OUT "\n"; + } +} + +print OUT "\n
RNA Name $_
$rfamR[$i][0] $rfamR[$i][$j]
+

2.2 Tags count

+ + + \n"; +foreach (@marks) { + print OUT "\n"; +} +for (my $i=0;$i<@rfamT;$i++) { + print OUT " + + + "; + for (my $j=1;$j<@{$rfamT[$i]} ;$j++) { + print OUT "\n"; + } +} +print OUT "\n
RNA Name $_
$rfamT[$i][0] $rfamT[$i][$j]
+

Note:
The rfam mapping results is: $rfampath"; +print OUT "rfam_mapped.bwt

+

3. MicroRNA result

+

3.1 known microRNA

+

The known microRNA express list: known_microRNA_express.txt
+ The known microRNA alngment file: known_microRNA_express.aln
+ The known moRs file: known_microRNA_express.moRs
+ The known microRNA mature sequence file: known_microRNA_mature.fa
+ The knowm microRNA precursor sequence file: known_microRNA_precursor.fa +

+ +

3.2 novel microRNA

+

The novel microRNA prediction file: microRNA_prediction.mrd
+ The novel microRNA express list: novel_microRNA_express.txt
+ The novel microRNA mature sequence file: novel_microRNA_mature.fa
+ The novel microRNA precursor sequence file: novel_microRNA_precursor.fa +

+"; + + + +print OUT " + + +"; +close OUT; + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -o +options: +-o output file +-h help +USAGE +exit(1); +} +