# HG changeset patch
# User big-tiandm
# Date 1415168246 18000
# Node ID e0884a4b996b8af1ab7c5f06999fcf73b93060a0
# Parent 22d79320085cbe5c54ac48b571dfb3d4f9e9524b
Uploaded
diff -r 22d79320085c -r e0884a4b996b Length_Distibution.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Length_Distibution.pl Wed Nov 05 01:17:26 2014 -0500
@@ -0,0 +1,219 @@
+#!/usr/bin/perl -w
+#==========================================================================================
+# Date:
+# Title:
+# Comment: Program to plot gene structure
+# Input: 1. input file of Gene region annotation which format like GenePred
+# 2. input file of Transcripts region annotation which format like GenePred
+# 3. input file of gene snp detail info
+# Output: output file of gene structure graph by html or svg formt
+# Test Usage:
+#========================================================================================
+#use strict;
+my $version=1.00;
+use SVG;
+use Getopt::Long;
+my %opt;
+GetOptions(\%opt,"i=s","o=s",,"h");
+if (!(defined $opt{i} and defined $opt{o}) || defined $opt{h}) {
+&usage;
+}
+#===============================Define Attribute==========================================
+my %attribute=(
+ canvas=>{
+ 'width'=>1500,
+ 'height'=>1800
+ },
+ text=>{
+ 'stroke'=>"#000000",
+ 'fill'=>"none",
+ 'stroke-width'=>0.5
+ #'stroke-width2'=>1
+ },
+ line=>{
+ 'stroke'=>"black",
+ 'stroke-width'=>1
+ },
+ font=>{
+ 'fill'=>"#000000",
+ 'font-size'=>12,
+ 'font-size2'=>10,
+ 'font-weight'=>'bold',
+ 'font-family'=>"Arial"
+ #'font-family2'=>"ArialNarrow-bold"
+ },
+ rect=>{
+ 'fill'=>"lightgreen",
+ 'stroke'=>"black",
+ 'stroke-width'=>0.5
+ },
+ readwidth=>0.5
+);
+#my $Xscale=600/$length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算
+#========================================data============================
+open(IN,"$opt{i}")||die"cannot open the file $opt{i}";
+my @R_length;
+my @T_length;
+my $R_number=0;
+my $T_number=0;
+my $R_max=0;
+my $T_max=0;
+
+my $title=;
+chomp $title;
+my @title=split/\t/,$title;
+my @mark=split/\s+/,$title[1];
+my $sample_number=@mark;
+while (my $aline=) {
+ if ($aline=~/^\s/) {
+ my $T_title=;
+ chomp $T_title;
+ while (my $a_aline=) {
+ chomp $a_aline;
+ my @temp=split/\t/,$a_aline;
+ my @number=split/\s+/,$temp[1];
+ for (my $i=0;$i<@number ;$i++) {
+ if ($R_max<$number[$i]) {
+ $R_max=$number[$i];
+ }
+ }
+ push @R_length,[$temp[0],@number];
+ $R_number++;
+ }
+ }
+ else {
+ chomp $aline;
+ my @temp=split/\t/,$aline;
+ my @number=split/\s+/,$temp[1];
+ for (my $i=0;$i<@number ;$i++) {
+ if ($T_max<$number[$i]) {
+ $T_max=$number[$i];
+ }
+ }
+ push @T_length,[$temp[0],@number];
+ $T_number++;
+ }
+}
+close IN;
+print "Tag max: $T_max\nRead max: $R_max\n";
+my $kd_number=5;
+##=======================Reads 纵坐标刻度==========================
+my $r=1;
+my $rr=1;
+my $R=$R_max;
+while ($R>10) {
+ $R=$R/10;
+ $r=$r*10;
+ $rr++;
+}
+$R=int($R+0.5);
+my $R_xg=$R/$kd_number*$r;#纵坐标一小格大小(一共10格)
+my $R_kedu_scale_x=6*$rr;#纵坐标刻度文字
+##=======================Tags 纵坐标刻度==========================
+my $t=1;
+my $tt=1;
+my $T=$T_max;
+while ($T>10) {
+ $T=$T/10;
+ $t=$t*10;
+ $tt++;
+}
+$T=int($T+0.5);
+my $T_xg=$T/$kd_number*$t;#纵坐标一小格大小(一共10格)
+my $T_kedu_scale_x=6*$tt;#纵坐标刻度文字
+
+#############################s#define start coordinate and scale
+my $XOFFSET=50;
+my $YOFFSET=60;
+my $width=800;
+my $heigth=800;
+my $X_width=600;
+#my $height=1600;
+#### Starting ####
+#新建画布
+my $svg=SVG->new(width=>$width,height=>$heigth);
+####坐标轴
+my $axisL=300;#read 纵坐标长度
+my $x_margin = 50;
+#=========Reads number setting==========================================
+my $Y_R_title=30;#标题的纵向宽度
+my $Y_R_0=$YOFFSET+$axisL+$Y_R_title;
+my $X_R_0=$XOFFSET+$x_margin;
+my $R_Yscale=$axisL/$R_xg/$kd_number;
+my $R_Xscale=$X_width/$R_number/($sample_number+1);
+#=====================================Reads Y axis======================
+$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0,'y2',$Y_R_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+for (my $i=1;$i<$kd_number ;$i++) {
+ $svg->line('x1',$X_R_0-5,'y1',$Y_R_0-$i*$R_xg*$R_Yscale,'x2',$X_R_0,'y2',$Y_R_0-$i*$R_xg*$R_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+ $svg->text('x',$X_R_0-$R_kedu_scale_x,'y',$Y_R_0-$i*$R_xg*$R_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$R_xg);
+}
+#=====================================Reads X axis======================
+$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0+$X_width,'y2',$Y_R_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+
+#print "$R_number\t$sample_number\n";
+for ($i=0;$i<$R_number ;$i++) {
+ for (my $j=1;$j<$sample_number+1 ;$j++) {
+ my $red=$j/$sample_number*255;
+ $svg->rect('x',$X_R_0+($j+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0-$R_length[$i][$j]*$R_Yscale,'width',$R_Xscale,'height',$R_length[$i][$j]*$R_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+ }
+ $svg->text('x',$X_R_0+(1+$sample_number/2+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$R_length[$i][0]);
+}
+#===Reads number title
+$svg->text('x',$XOFFSET+400,'y',$YOFFSET,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Reads Length Distribution");
+#===Reads
+for (my $i=0;$i<$sample_number ;$i++) {
+ my $red=($i+1)/$sample_number*255;
+ $svg->rect('x',$X_R_0+550,'y',$YOFFSET+$Y_R_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+ $svg->text('x',$X_R_0+550+30,'y',$YOFFSET+$Y_R_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]);
+}
+####==================================================================================
+#=========================================Tag s
+my $Y_T_title=30;#标题的纵向宽度
+my $Y_T_0=$Y_R_0+$axisL+$Y_R_title+50;#length size
+my $X_T_0=$XOFFSET+$x_margin;
+my $T_Yscale=$axisL/$T_xg/$kd_number;
+my $T_Xscale=$X_width/$T_number/($sample_number+1);
+#=====================================Tags Y axis======================
+$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0,'y2',$Y_T_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+for (my $i=1;$i<$kd_number ;$i++) {
+ $svg->line('x1',$X_T_0-5,'y1',$Y_T_0-$i*$T_xg*$T_Yscale,'x2',$X_T_0,'y2',$Y_T_0-$i*$T_xg*$T_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+ $svg->text('x',$X_T_0-$T_kedu_scale_x,'y',$Y_T_0-$i*$T_xg*$T_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$T_xg);
+}
+#=====================================Tags X axis======================
+$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0+$X_width,'y2',$Y_T_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+
+#print "$R_number\t$sample_number\n";
+for ($i=0;$i<$T_number ;$i++) {
+ for (my $j=1;$j<$sample_number+1 ;$j++) {
+ my $red=$j/$sample_number*255;
+ $svg->rect('x',$X_T_0+($j+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0-$T_length[$i][$j]*$T_Yscale,'width',$T_Xscale,'height',$T_length[$i][$j]*$T_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+ }
+ $svg->text('x',$X_T_0+(1+$sample_number/2+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$T_length[$i][0]);
+}
+#===Reads number title
+$svg->text('x',$XOFFSET+400,'y',$Y_R_0+30+$Y_T_title,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Tags Length Distribution");
+#===Reads
+for (my $i=0;$i<$sample_number ;$i++) {
+ my $red=($i+1)/$sample_number*255;
+ $svg->rect('x',$X_T_0+550,'y',$Y_R_0+30+$Y_T_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+ $svg->text('x',$X_T_0+550+30,'y',$Y_R_0+30+$Y_T_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]);
+}
+
+
+
+
+open (OUT,">$opt{o}");
+print OUT $svg->xmlify();
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0
+options:
+-i
+-o svg output
+-h help
+USAGE
+exit(1);
+}
\ No newline at end of file
diff -r 22d79320085c -r e0884a4b996b html.pl
--- a/html.pl Thu Oct 30 21:31:55 2014 -0400
+++ b/html.pl Wed Nov 05 01:17:26 2014 -0500
@@ -13,11 +13,11 @@
my %opts;
GetOptions(\%opts,"i=s","format=s","o=s","h");
-if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
+if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
&usage;
}
-my ($config,$prepath,$rfampath,$genomepath,$clusterpath,$annotatepath,$degpath);
-my ($predir,$rfamdir,$genomedir,$clusterdir,$annotatedir,$degdir);
+my ($config,$prepath,$rfampath,$genomepath,$clusterpath,$annotatepath,$plotpath,$degpath);
+my ($predir,$rfamdir,$genomedir,$clusterdir,$annotatedir,$plotdir,$degdir);
open IN,"<$opts{i}";
$config=; chomp $config;
$prepath=; chomp $prepath;
@@ -25,6 +25,7 @@
$genomepath=; chomp $genomepath;
$clusterpath=; chomp $clusterpath;
$annotatepath=; chomp $annotatepath;
+$plotpath=; chomp $plotpath;
my $deg_tag=1;
if(eof){$deg_tag=0;}
else{
@@ -41,8 +42,8 @@
$clusterdir=$tmp[-1];
@tmp=split/\//,$annotatepath;
$annotatedir=$tmp[-1];
-#@tmp=split/\//,$degpath;
-#$degdir=$tmp[-1];
+@tmp=split/\//,$plotpath;
+$plotdir=$tmp[-1];
my $dir=dirname($opts{'o'});
@@ -201,13 +202,18 @@
The filter (remain total reads>3) data file path is: $filter
1. Sequence length count
- 1.1 Reads length
";
+print OUT "\n";
-print OUT "
- 1.2 Tags length count
-
- Note:
The sequence length data: length file
+my $length=$prepath."length.html";
+open IN,"<$length";
+while (my $aline=) {
+ chomp $aline;
+ print OUT "$aline\n";
+}
+close IN;
+
+print OUT " Note:
The sequence length data: length file
";
@@ -361,8 +367,8 @@
my $cluster_number=`less $cluster |wc -l `;
$cluster_number=$cluster_number-1;
my (%cluster_length,@exp,@rpkm);
-my @exp_range=qw(0 1--9 10--99 100--999 1000--9999 10000--99999 100000--**);
-my @rpkm_range=qw(0 0--0.25 0.25--0.5 0.5--1 1.0--5.0 5.0--10 10--50 50--100 100--500 500--1000 1000--**);
+my @exp_range=qw(0 \(0--10] \(10--100] \(100--1000] \(1000--10000] \(10000--100000] \(100000--**\));
+my @rpkm_range=qw(0 \(0--0.25] \(0.25--0.5] \(0.5--1] \(1.0-5.0] \(5--10] \(10--50] \(50--100] \(100--500] \(500--1000] \(1000--**]);
open IN,"<$cluster";
while (my $aline=) {
@@ -372,13 +378,13 @@
my @id=split/:|-/,$temp[0];
$cluster_length{$id[2]-$id[1]+1}++;
for (my $i=0;$i<@marks ;$i++) {
- if ($temp[$i+3] == 0) {$exp[$i][0]++;}
- elsif ($temp[$i+3]>0 && $temp[$i+3]<= 10 ){$exp[$i][1]++;}
- elsif ($temp[$i+3]>10 && $temp[$i+3]<=100){$exp[$i][2]++;}
- elsif ($temp[$i+3]>100 && $temp[$i+3]<=1000){$exp[$i][3]++;}
- elsif ($temp[$i+3]>1000 && $temp[$i+3]<=10000){$exp[$i][4]++;}
- elsif ($temp[$i+3]>10000 && $temp[$i+3]<=100000){$exp[$i][5]++;}
- elsif ($temp[$i+3]>100000){$exp[$i][6]++;}
+ if ($temp[$i+3] == 0) {$exp[0][$i]++;}
+ elsif ($temp[$i+3]>0 && $temp[$i+3]<= 10 ){$exp[1][$i]++;}
+ elsif ($temp[$i+3]>10 && $temp[$i+3]<=100){$exp[2][$i]++;}
+ elsif ($temp[$i+3]>100 && $temp[$i+3]<=1000){$exp[3][$i]++;}
+ elsif ($temp[$i+3]>1000 && $temp[$i+3]<=10000){$exp[4][$i]++;}
+ elsif ($temp[$i+3]>10000 && $temp[$i+3]<=100000){$exp[5][$i]++;}
+ elsif ($temp[$i+3]>100000){$exp[6][$i]++;}
}
}
close IN;
@@ -390,17 +396,17 @@
chomp $aline;
my @temp=split/\t/,$aline;
for (my $i=0;$i<@marks ;$i++) {
- if ($temp[$i+3]==0) {$rpkm[$i][0]++;}
- elsif($temp[$i+3]>0 && $temp[$i+3]<=0.25){$rpkm[$i][1]++;}
- elsif($temp[$i+3]>0.25 && $temp[$i+3]<=0.5){$rpkm[$i][2]++;}
- elsif($temp[$i+3]>0.5 && $temp[$i+3]<=1){$rpkm[$i][3]++;}
- elsif($temp[$i+3]>1 && $temp[$i+3]<=5){$rpkm[$i][4]++;}
- elsif($temp[$i+3]>5 && $temp[$i+3]<=10){$rpkm[$i][5]++;}
- elsif($temp[$i+3]>10 && $temp[$i+3]<=50){$rpkm[$i][6]++;}
- elsif($temp[$i+3]>50 && $temp[$i+3]<=100){$rpkm[$i][7]++;}
- elsif($temp[$i+3]>100 && $temp[$i+3]<=500){$rpkm[$i][8]++;}
- elsif($temp[$i+3]>500 && $temp[$i+3]<=1000){$rpkm[$i][9]++;}
- else{$rpkm[$i][10]++;}
+ if ($temp[$i+3]==0) {$rpkm[0][$i]++;}
+ elsif($temp[$i+3]>0 && $temp[$i+3]<=0.25){$rpkm[1][$i]++;}
+ elsif($temp[$i+3]>0.25 && $temp[$i+3]<=0.5){$rpkm[2][$i]++;}
+ elsif($temp[$i+3]>0.5 && $temp[$i+3]<=1){$rpkm[3][$i]++;}
+ elsif($temp[$i+3]>1 && $temp[$i+3]<=5){$rpkm[4][$i]++;}
+ elsif($temp[$i+3]>5 && $temp[$i+3]<=10){$rpkm[5][$i]++;}
+ elsif($temp[$i+3]>10 && $temp[$i+3]<=50){$rpkm[6][$i]++;}
+ elsif($temp[$i+3]>50 && $temp[$i+3]<=100){$rpkm[7][$i]++;}
+ elsif($temp[$i+3]>100 && $temp[$i+3]<=500){$rpkm[8][$i]++;}
+ elsif($temp[$i+3]>500 && $temp[$i+3]<=1000){$rpkm[9][$i]++;}
+ else{$rpkm[10][$i]++;}
}
}
@@ -484,7 +490,7 @@
$nat[$j]=0;
}
-my $class_anno=6;
+my $class_anno=1;
open ANNO,"<$annotate";
while (my $aline=) {
chomp $aline;
@@ -694,6 +700,17 @@
}
+print OUT "6. Graph of Clusters of all samples
\n";
+
+my $plot=$plotpath."cluster.html";
+open IN,"<$plot";
+while (my $aline=) {
+ chomp $aline;
+ print OUT "$aline\n";
+}
+close IN;
+
+
if ($deg_tag) {
my $deg_file=`ls $degpath`;
chomp $deg_file;
@@ -722,7 +739,7 @@
close IN;
}
- print OUT "6. DEG
+ print OUT "7. DEG
Genes number | \n
@@ -743,7 +760,7 @@
print OUT "
\n
";
}
else{
- print OUT "6. DEG
+ print OUT "7. DEG
Do not do DE clusters
";
}
diff -r 22d79320085c -r e0884a4b996b nibls.pl
--- a/nibls.pl Thu Oct 30 21:31:55 2014 -0400
+++ b/nibls.pl Wed Nov 05 01:17:26 2014 -0500
@@ -80,8 +80,8 @@
# $length1=40;
# }
my $total;
- foreach (0..$#data) {
- $total+=$_;
+ for (my $s=0;$s<@data ;$s++) {
+ $total+=$data[$s];
}
push @data,$total;
# push @data,$length1;
@@ -258,10 +258,10 @@
my $c=$1;
my $s=$2;
my $e=$3;
- my @data;
+ # my @data;
foreach my $str (keys %{$molecules{$c}{$s}{$e}}) {
- push @tag,($s.",".$e.",".$str);
- @data=split(/;/,$molecules{$c}{$s}{$e}{$str});
+ my @data=split(/;/,$molecules{$c}{$s}{$e}{$str});
+ push @tag,($s.",".$e.",".$str.",".$data[-1]);
# for (my $i=0;$i<$#old_data ;$i++) {
# $data[$i]+=$old_data[$i];
# }
@@ -279,14 +279,14 @@
$end = $e if $e > $end;
}
my $tag=join";",@tag;
-
+ my $tag_number=@tag;
my ($max_length,$max_p,@cluster_exp)=Max_length(\@sample);
if ($max_length==40) {
$max_length="\>30";
}
my $cluster_exp=join"\t",@cluster_exp;
my $gff = $chr."\:$start\-$end\t".$max_length."nt\t".$max_p."\t" . $cluster_exp;
- print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
+ print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag_number\t$tag\n";
print OUT $gff, "\n";
}
diff -r 22d79320085c -r e0884a4b996b sRNA_plot.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sRNA_plot.pl Wed Nov 05 01:17:26 2014 -0500
@@ -0,0 +1,411 @@
+#!/usr/bin/perl -w
+#==========================================================================================
+# Date:
+# Title:
+# Comment: Program to plot gene structure
+# Input: 1.
+# 2.
+# 3.
+# Output: output file of gene structure graph by html or svg formt
+# Test Usage:
+#========================================================================================
+#use strict;
+my $version=1.00;
+use SVG;
+use Getopt::Long;
+my %opt;
+GetOptions(\%opt,"g=s","l=s","span=s","c=s","o=s","out=s","cen:s","mark=s","h");
+if (!( defined $opt{o}) || defined $opt{h}) {
+&usage;
+}
+my $span=$opt{span};
+#my $sample_cloumn=$opt{n};
+my $mark=$opt{mark};
+my @mark=split/\#/,$mark;
+my $genelist=$opt{g};
+#===============================Define Attribute==========================================
+my %attribute=(
+ canvas=>{
+ 'width'=>1500,
+ 'height'=>1800
+ },
+ text=>{
+ 'stroke'=>"#000000",
+ 'fill'=>"none",
+ 'stroke-width'=>0.5
+ },
+ line=>{
+ 'stroke'=>"black",
+ 'stroke-width'=>1
+ },
+ csv=>{
+ 'stroke'=>"red",
+ 'stroke-width'=>0.5
+ },
+ exon=>{
+ 'stroke'=>"black",
+ 'stroke-width'=>1
+ },
+ intron=>{
+ 'stroke'=>"black",
+ 'stroke-width'=>1.5
+ },
+ font=>{
+ 'fill'=>"#000000",
+ 'font-size'=>12,
+ 'font-size2'=>10,
+ #'font-weight'=>'bold',
+ 'font-family'=>"Arial"
+ #'font-family'=>"ArialNarrow-bold"
+ },
+ rect=>{
+ 'fill'=>"lightgreen",
+ 'stroke'=>"black",
+ 'stroke-width'=>0.5
+ },
+ readwidth=>0.5
+);
+#############################s#define start coordinate and scale
+open(TXT,">$opt{out}");
+open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}";
+my %length;
+while (my $aline=) {
+ chomp $aline;
+ next if($aline=~/^\#/);
+ my @temp=split/\t/,$aline;
+ $temp[0]=~s/^c/C/;
+ $length{$temp[0]}=$temp[1];
+}
+close LENGTH;
+#---------------------------------------------------------------
+open(GENE,"$opt{g}")||die"cannot open the file $opt{g}";
+my %genelist;
+while (my $aline=) {
+ chomp $aline;#LOC_Os01g01280 Chr1 133291 134685 +
+ next if($aline=~/^\#/);
+ my @temp=split/\t/,$aline;
+ if ($temp[1]=~/^Chr(\d)$/) {
+ $temp[1]="Chr0$1";
+ }
+ push @{$genelist{$temp[1]}},[$temp[0],$temp[2],$temp[3]];
+
+}
+close GENE;
+#my %have_gene;
+#foreach my $chr (sort keys %genelist) {
+# my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}};
+# my $start=$genelist[0][1];
+# my $end=$genelist[0][2];
+# for (my $i=0;$i<@genelist ;$i++) {
+# if ($gene) {
+# }
+# }
+#}
+
+my %gene_desity;
+foreach my $chr (sort keys %genelist) {
+ my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}};
+ for (my $i=0;$i<@genelist ;$i++) {
+ my $start=int($genelist[$i][1]/$span);
+ my $end=int($genelist[$i][2]/$span);
+ #my @t_rpkm=split/\t/,$target_rpkm{$genelist[$i][0]};
+ if ($start==$end) {
+ $gene_desity{$chr}[$start]++;
+ }
+ else{
+ for (my $k=$start;$k<=$end ;$k++) {
+ $gene_desity{$chr}[$k]++;
+ }
+ }
+ }
+}
+#------------------------------------------region_gene_number-------------------------
+my $max_gene_number=0;
+my $total=0;
+foreach my $chr (sort keys %genelist) {
+ for (my $i=0;$i<@{$gene_desity{$chr}} ;$i++) {
+ if (!(defined($gene_desity{$chr}[$i]))) {
+ $gene_desity{$chr}[$i]=0;
+ }
+ if ($gene_desity{$chr}[$i]>$max_gene_number) {
+ $max_gene_number=$gene_desity{$chr}[$i];
+ #print "$gene_desity{$chr}[$i]\n";
+ }
+ #print TXT "$i\t$gene_desity[$i]\n";
+ $total+=$gene_desity{$chr}[$i];
+ #print "$chr\t$i\t$gene_desity{$chr}[$i]\n";
+ }
+}
+#print "Gene max:$max_gene_number\ntotal:$total\n";
+
+#---------------------------------------------------------------
+my %centromere;
+if (defined($opt{cen})) {
+ open CEN,"$opt{cen}";
+ while (my $aline=) {
+ chomp $aline;
+ next if($aline=~/^\#/);
+ my @temp=split/\t/,$aline;
+ $temp[0]=~s/^c/C/;
+ $centromere{$temp[0]}[0]=$temp[1];
+ $centromere{$temp[0]}[1]=$temp[2];
+ }
+ close CEN;
+}
+
+#---------------------------------------------------------------
+my $max_length=0;
+foreach my $chr (keys %length) {
+ if ($max_length<$length{$chr}) {
+ $max_length=$length{$chr};
+ }
+ print "$chr\n";
+}
+#====================================cluster data=======================================
+open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}";
+my %cluster;
+my %cluster_density;
+#my @sample=qw(39B3 3PA3 3LC3);
+my @cluster_non_add;
+while (my $aline=) {
+ next if($aline=~/^\#/);
+ chomp $aline;##Chr MajorLength Percent end 19B1
+ my @temp=split/\t/,$aline;
+ my @ID=split/\:/,$temp[0];
+ my @posi=split/\-/,$ID[1];
+ my @all_rpkm=@temp;
+ shift @all_rpkm;
+ shift @all_rpkm;
+ shift @all_rpkm;
+# for (my $s=0;$s<@all_rpkm ;$s++) {#log transfer
+# $all_rpkm[$s]=log2($all_rpkm[$s]);
+# }
+ push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],@all_rpkm];#ID start end rpkm(19B1,1PA1,1LC1);
+}
+close CLUSTER;
+my %max_cluster;
+my $chr_number=0;
+print "@mark\n$mark\n";
+foreach my $chr (sort keys %cluster) {
+ for (my $i=0;$i<@mark ;$i++) {
+ $max_cluster{$chr}[$i]=0;
+ }
+ $chr_number++;
+}
+foreach my $chr (sort keys %cluster) {
+ @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}};
+ for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
+ for (my $s=0;$s<@mark;$s++) {
+ if ($cluster{$chr}[$i][3+$s]>$max_cluster{$chr}) {
+ $max_cluster{$chr}[$s]=$cluster{$chr}[$i][3+$s];
+ }
+ }
+ }
+
+}
+foreach my $chr (sort keys %max_cluster) {
+ for (my $s=0; $s<@mark;$s++) {
+ # print "$max_cluster{$chr}[$s]\n";
+ }
+}
+#---------------------------------------------------------------------------------------
+foreach my $chr(keys %cluster) {
+ for(my $i=0;$i<$#{$cluster{$chr}};$i++) {
+ my $start=int($cluster{$chr}[$i][1]/$span);
+ my $end=int($cluster{$chr}[$i][2]/$span);
+ if ($start==$end) {
+ for (my $s=0;$s<@mark ;$s++) {
+ $cluster_density{$chr}[$start][$s]+=$cluster{$chr}[$i][3+$s];
+ }
+
+ }
+ else{
+ for (my $m=$start;$m<=$end ;$m++) {
+ for (my $s=0;$s<@mark ;$s++) {
+ $cluster_density{$chr}[$m][$s]+=$cluster{$chr}[$i][3+$s];
+ }
+ }
+ }
+ }
+}
+my %max_cluster_density;
+my $max_all_density=0;
+foreach my $chr (sort keys %cluster) {#
+ for (my $s=0;$s<@mark ;$s++) {
+ for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
+ $max_cluster_density{$chr}[$s]=0;
+ }
+ }
+
+}
+foreach my $chr (sort keys %cluster_density) {
+ print "$#{$cluster_density{$chr}}\n";
+ for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) {
+ print TXT "$chr\t$k";
+ for (my $s=0;$s<@mark;$s++) {
+ if (!(defined($cluster_density{$chr}[$k][$s]))) {
+ $cluster_density{$chr}[$k][$s]=0;
+ }
+ if ($cluster_density{$chr}[$k][$s]>$max_cluster_density{$chr}[$s]) {
+ $max_cluster_density{$chr}[$s]=$cluster_density{$chr}[$k][$s];
+ }
+ if ($cluster_density{$chr}[$k][$s]>$max_all_density) {
+ $max_all_density=$cluster_density{$chr}[$k][$s];
+ }
+ print TXT "\t$cluster_density{$chr}[$k][$s]";
+ }
+ print TXT "\n";
+ }
+}
+print "max density: $max_all_density\n";
+#--------------------------------------------------------------------
+my $top_margin=30;
+my $tail_margin=30;
+my $XOFFSET=50;
+my $YOFFSET=60;
+my $chr_length=600;
+my $Xscale=$chr_length/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算
+#my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰
+#my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算
+#========================================New canvas============================
+#### Starting ####
+#新建画布
+my $width=1000;
+my $heigth=100+130*$chr_number;
+my $svg=SVG->new(width=>$width, height=>$heigth);
+#画图起始点
+my $canvas_start_x=$XOFFSET;
+my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺 画线
+my $canvas_start_y=$YOFFSET;
+my $canvas_end_y=$YOFFSET;
+my $chr_heigth=$heigth-$YOFFSET-$tail_margin;
+print "chr number:$chr_number\n";
+my $one_chr_heigth=$chr_heigth/$chr_number;
+my $Yscale=($one_chr_heigth-15)/$max_all_density;
+#my $chr_width=$YOFFSET;
+#my $chr_start_y;
+#my $chr_end_y;
+#my $Yscale=0.01;
+#=======================================title of the graph===============================
+#my $span_k=$span/1000;
+#$svg->text('x',$width/2,'y',$YOFFSET-20,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Clusters rpkm/"."$span_k"."kb Distribution");
+#=======================================the top max chr line=============================
+$svg->line(id=>'l1',x1=>$canvas_start_x,y1=>$canvas_start_y,x2=>$canvas_end_x,y2=>$canvas_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+$long_scale=int ($max_length/10);#十等分 大刻度
+#大坐标刻度
+for ($i=0;$i<=10;$i++) {
+ my $long_x_start=$XOFFSET+$long_scale*$i*$Xscale;
+ my $long_x_end=$long_x_start;
+ my $long_y_start=$YOFFSET;
+ my $long_y_end=$YOFFSET-5;
+ $svg->line('x1',$long_x_start,'y1',$long_y_start,'x2',$long_x_end,'y2',$long_y_end,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+ my $Bscale=$long_scale*$i;
+ my $cdata=int ($Bscale/1000000);
+ $svg->text('x',$long_x_start,'y',$long_y_start-10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$cdata."M");
+}
+#=========================================================================================
+my $cc=1;
+foreach my $chr (sort keys %length) {
+ my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale;
+ my $chr_start_x=$XOFFSET;
+ my $chr_start_y=$YOFFSET+$cc*$one_chr_heigth;
+ my $chr_end_y=$chr_start_y;
+ #$chr_start_y+=$chr_width;
+ #$chr_end_y+=$chr_width;
+# for (my $i=0;$i<@{$gene_desity{$chr}};$i++) {
+# print "$chr\t$i\t$gene_desity{$chr}[$i]\n";
+# my $red=$gene_desity{$chr}[$i]/$max_gene_number*255;
+# my $green=$gene_desity{$chr}[$i]/$max_gene_number*255;
+# print "$red\t$green\t0\n";
+# $svg->rect('x',$chr_start_x+$i*$span*$Xscale,'y',$chr_start_y,'width',$span*$Xscale,'height',8,'stroke',"rgb($red,$green,0)",'stroke-width',0.1,'fill',"rgb($red,$green,0)");
+# }
+
+ $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+ $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr);
+ my $m_length=$length{$chr}%1000000;
+ $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$m_length."M");
+
+
+ if (defined($centromere{$chr}[0])) {
+ $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y-2,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
+ }
+ for (my $s=0;$s<@mark ;$s++) {
+ for (my $i=0;$i<$#{$cluster_density{$chr}}-1 ;$i++) {
+ #if ($cluster_density{$chr}[$i]*$Yscale>40) {
+ #$cluster_density{$chr}[$i]=40/$Yscale;
+ #$svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green");
+ #}
+ #print "$i\t$cluster_density{$chr}[$i][$s]\t$cluster_density{$chr}[$i+1][$s]\n";
+ my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale;
+ my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale;
+ my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i][$s]*$Yscale;
+ my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][$s]*$Yscale;
+ my $c_red=($s+1)/@mark*255;
+ $svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"rgb($c_red,125,0)",'stroke-width',0.3);
+ }
+
+ }
+ #=======Y axis
+ $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_start_x,y2=>$chr_start_y-$one_chr_heigth+15,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+ #=======Y axis ===>3 xiaoge
+ my $s10=1;
+ my $e10=0;
+ my $chr_max=$max_all_density;
+ while ($chr_max>10) {
+ $chr_max=int($chr_max/10);
+ $s10=$s10*10;
+ $e10++;
+ }
+ $chr_max=$chr_max/2;
+ #print "*****$max_all_density\t$chr_max\t$s10\n";
+ for (my $i=1;$i<3 ;$i++) {
+ my $y1=$chr_start_y-$chr_max*$s10*$Yscale*$i;
+ my $xiaoge_Y=$chr_max*$i;
+ $svg->line('x1',$chr_start_x,'y1',$y1,'x2',$chr_start_x+3,'y2',$y1,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+ $svg->text('x',$chr_start_x-26,'y',$y1+4,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',8,'font-family',$attribute{font}{'font-family'},'-cdata',$xiaoge_Y."e".$e10);
+ }
+ $cc++;
+}
+
+for (my $s=0;$s<@mark ;$s++) {
+ my $c_red=($s+1)/@mark*255;
+ print "**$c_red\n";
+ $svg->line('x1',$canvas_end_x+100,'y1',$YOFFSET+$s*20+30,'x2',$canvas_end_x+130,'y2',$YOFFSET+$s*20+30,'stroke',"rgb($c_red,125,0)",'stroke-width',1);
+ $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+$s*20+5+30,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$s]);
+}
+#
+#
+if (defined($opt{cen})) {
+ $svg->rect('x',$canvas_end_x+100,'y',$YOFFSET+@mark*20+30,'width',30,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
+ $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+@mark*20+30+5,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere");
+}
+
+close TXT;
+
+open (OUT,">$opt{o}");
+print OUT $svg->xmlify();
+
+sub log2 {
+ my $n = shift;
+ return log($n)/log(2);
+}
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0
+options:
+-g genelist
+-span
+-n sample cloumn
+-mark sample name
+-o output graph file name with html or svg extension
+-c cluster file input
+-out txt output
+-l length of chr
+-cen centromere
+-h help
+USAGE
+exit(1);
+}
\ No newline at end of file
diff -r 22d79320085c -r e0884a4b996b siRNA.pl
--- a/siRNA.pl Thu Oct 30 21:31:55 2014 -0400
+++ b/siRNA.pl Wed Nov 05 01:17:26 2014 -0500
@@ -139,12 +139,20 @@
my $cluster_file;
my $annotate_dir;
my $deg_dir;
+my $plot_dir;
my %id;
for (my $i=0;$i<@mark ;$i++) {
$id{$mark[$i]}=$i+4;
}
-
+print "\n######## tiandm test start ###########\n";
+print "\@mark: @mark\n\%id keys number:";
+print scalar keys %id;
+print "\n";
+foreach my $kyess (keys %id){
+ print $kyess," --> $id{$kyess}\n";
+}
+print "\n######## tiandm test end ############\n";
group_and_filter(); #collapse reads to tags
rfam();
@@ -280,6 +288,8 @@
print "perl $path\/filterReadsByLength_1.pl -i $group_out_file -o $l_out -min 18 -max 40 -mark $sample_mark\n\n";
my $cout_f=`perl $path\/filterReadsByCount.pl -i $l_out -o $filter_out -mark $sample_mark`;
print "perl $path\/filterReadsByCount.pl -i $l_out -o $filter_out -mark $sample_mark\n\n";
+ my $plot_l_D=`perl $path/Length_Distibution.pl -i $dir/preProcess/reads_length_distribution_after_count_filter.txt -o $dir/preProcess/length.html `;
+ print "perl $path\/Length_Distibution.pl -i $dir\/preProcess\/reads_length_distribution_after_count_filter.txt -o $dir\/preProcess\/length\.html\n\n";
return 0;
}
@@ -341,8 +351,8 @@
print "Using converntional method\n perl $path\/conventional.pl -i $file -d $distance_of_merged_tag -n $sample_number -mark $sample_mark -o $read -t $read_txt\n\n";
}
elsif($cluster_mothod eq "T"){
- my $cluster=`perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $read_txt -mark $sample_mark`;
- print "Using nibls method\n perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $dir\/cluster.txt -mark $sample_mark\n\n";
+ my $cluster=`perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $read_txt -k $sample_mark`;
+ print "Using nibls method\n perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $dir\/cluster.txt -k $sample_mark\n\n";
}
else{print "\-p is wrong!\n\n";}
return 0;
@@ -466,41 +476,16 @@
}
sub plot{
- my $plot_file="$dir\/plot\/";
- mkdir ("$plot_file");
- my $genome_plot="$dir\/plot\/genome\/";
- mkdir ("$genome_plot");
- #genome cluster
+ $plot_dir="$dir\/plot\/";
+ mkdir ("$plot_dir");
my $span=defined($options{span})?$options{span}:50000;
- foreach (1..$sample_number) {
- my $mark=$mark[$_-1];
- my $cen="";
- if (defined $options{cen}) {
- $cen="-cen $options{cen}";
- }
- my $plot=`perl $path\/sRNA_rpkm_distribution_along_genome.pl -c $rpkm -n $_ -mark $mark -span $span -l $dir\/ref\/genome\.length $cen -o $genome_plot\/$mark\.html -out $genome_plot\/$mark\.txt`;
- print "perl $path\/sRNA_rpkm_distribution_along_genome.pl -c $rpkm -n $_ -mark $mark -span $span -l $dir\/ref\/genome\.length $cen -o $genome_plot\/$mark\.html -out $genome_plot\/$mark\.txt\n\n";
+ my $cen="";
+ if (defined $options{cen}) {
+ $cen="-cen $options{cen}";
}
+ my $plot=`perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome\.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt `;
+ "print perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt \n";
- my $chr_plot_dir="$dir\/plot\/chr\/";
- mkdir("$chr_plot_dir");
- my %chr;
- open LEN,"<$dir\/ref\/genome\.length";
- while (my $aline=) {
- next if($aline=~/^\#/);
- chomp $aline;
- my @temp=split/\t/,$aline;
- $chr{$temp[0]}=$temp[1];
- }
- close LEN;
- foreach my $chr (sort keys %chr) {
- my $cen="";
- if (defined $options{cen}) {
- $cen="-cen $options{cen}";
- }
- my $chr_plot=`perl $path\/chr_plot.pl -l $chr{$chr} -chro $chr -g $dir\/ref\/genelist.txt -span $span -c $rpkm -mark $sample_mark -o $chr_plot_dir\/$chr\.html`;
- print "perl $path\/chr_plot.pl -l $chr{$chr} -chro $chr -g $dir\/ref\/genelist.txt -span $span -c $rpkm -mark $sample_mark -o $chr_plot_dir\/$chr\.html\n";
- }
}
sub html{
@@ -512,6 +497,7 @@
print PA "$dir"."genome_match\n";
print PA "$cluster_file\n";
print PA "$annotate_dir\n";
+ print PA "$plot_dir\n";
if (defined($deg_dir)) {
print PA "$deg_dir\n";
}
diff -r 22d79320085c -r e0884a4b996b tool_dependencies.xml