comparison sRNA_plot.pl @ 23:cad6a1dfb174 draft

Uploaded
author big-tiandm
date Wed, 05 Nov 2014 21:11:49 -0500
parents e0884a4b996b
children
comparison
equal deleted inserted replaced
22:b6686462d0cb 23:cad6a1dfb174
1 #!/usr/bin/perl -w
2 #==========================================================================================
3 # Date:
4 # Title:
5 # Comment: Program to plot gene structure
6 # Input: 1.
7 # 2.
8 # 3.
9 # Output: output file of gene structure graph by html or svg formt
10 # Test Usage:
11 #========================================================================================
12 #use strict;
13 my $version=1.00;
14 use SVG;
15 use Getopt::Long;
16 my %opt;
17 GetOptions(\%opt,"g=s","l=s","span=s","c=s","o=s","out=s","cen:s","mark=s","h");
18 if (!( defined $opt{o}) || defined $opt{h}) {
19 &usage;
20 }
21 my $span=$opt{span};
22 #my $sample_cloumn=$opt{n};
23 my $mark=$opt{mark};
24 my @mark=split/\#/,$mark;
25 my $genelist=$opt{g};
26 #===============================Define Attribute==========================================
27 my %attribute=(
28 canvas=>{
29 'width'=>1500,
30 'height'=>1800
31 },
32 text=>{
33 'stroke'=>"#000000",
34 'fill'=>"none",
35 'stroke-width'=>0.5
36 },
37 line=>{
38 'stroke'=>"black",
39 'stroke-width'=>1
40 },
41 csv=>{
42 'stroke'=>"red",
43 'stroke-width'=>0.5
44 },
45 exon=>{
46 'stroke'=>"black",
47 'stroke-width'=>1
48 },
49 intron=>{
50 'stroke'=>"black",
51 'stroke-width'=>1.5
52 },
53 font=>{
54 'fill'=>"#000000",
55 'font-size'=>12,
56 'font-size2'=>10,
57 #'font-weight'=>'bold',
58 'font-family'=>"Arial"
59 #'font-family'=>"ArialNarrow-bold"
60 },
61 rect=>{
62 'fill'=>"lightgreen",
63 'stroke'=>"black",
64 'stroke-width'=>0.5
65 },
66 readwidth=>0.5
67 );
68 #############################s#define start coordinate and scale
69 open(TXT,">$opt{out}");
70 open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}";
71 my %length;
72 while (my $aline=<LENGTH>) {
73 chomp $aline;
74 next if($aline=~/^\#/);
75 my @temp=split/\t/,$aline;
76 $temp[0]=~s/^c/C/;
77 $length{$temp[0]}=$temp[1];
78 }
79 close LENGTH;
80 #---------------------------------------------------------------
81 open(GENE,"$opt{g}")||die"cannot open the file $opt{g}";
82 my %genelist;
83 while (my $aline=<GENE>) {
84 chomp $aline;#LOC_Os01g01280 Chr1 133291 134685 +
85 next if($aline=~/^\#/);
86 my @temp=split/\t/,$aline;
87 if ($temp[1]=~/^Chr(\d)$/) {
88 $temp[1]="Chr0$1";
89 }
90 push @{$genelist{$temp[1]}},[$temp[0],$temp[2],$temp[3]];
91
92 }
93 close GENE;
94 #my %have_gene;
95 #foreach my $chr (sort keys %genelist) {
96 # my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}};
97 # my $start=$genelist[0][1];
98 # my $end=$genelist[0][2];
99 # for (my $i=0;$i<@genelist ;$i++) {
100 # if ($gene) {
101 # }
102 # }
103 #}
104
105 my %gene_desity;
106 foreach my $chr (sort keys %genelist) {
107 my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}};
108 for (my $i=0;$i<@genelist ;$i++) {
109 my $start=int($genelist[$i][1]/$span);
110 my $end=int($genelist[$i][2]/$span);
111 #my @t_rpkm=split/\t/,$target_rpkm{$genelist[$i][0]};
112 if ($start==$end) {
113 $gene_desity{$chr}[$start]++;
114 }
115 else{
116 for (my $k=$start;$k<=$end ;$k++) {
117 $gene_desity{$chr}[$k]++;
118 }
119 }
120 }
121 }
122 #------------------------------------------region_gene_number-------------------------
123 my $max_gene_number=0;
124 my $total=0;
125 foreach my $chr (sort keys %genelist) {
126 for (my $i=0;$i<@{$gene_desity{$chr}} ;$i++) {
127 if (!(defined($gene_desity{$chr}[$i]))) {
128 $gene_desity{$chr}[$i]=0;
129 }
130 if ($gene_desity{$chr}[$i]>$max_gene_number) {
131 $max_gene_number=$gene_desity{$chr}[$i];
132 #print "$gene_desity{$chr}[$i]\n";
133 }
134 #print TXT "$i\t$gene_desity[$i]\n";
135 $total+=$gene_desity{$chr}[$i];
136 #print "$chr\t$i\t$gene_desity{$chr}[$i]\n";
137 }
138 }
139 #print "Gene max:$max_gene_number\ntotal:$total\n";
140
141 #---------------------------------------------------------------
142 my %centromere;
143 if (defined($opt{cen})) {
144 open CEN,"$opt{cen}";
145 while (my $aline=<CEN>) {
146 chomp $aline;
147 next if($aline=~/^\#/);
148 my @temp=split/\t/,$aline;
149 $temp[0]=~s/^c/C/;
150 $centromere{$temp[0]}[0]=$temp[1];
151 $centromere{$temp[0]}[1]=$temp[2];
152 }
153 close CEN;
154 }
155
156 #---------------------------------------------------------------
157 my $max_length=0;
158 foreach my $chr (keys %length) {
159 if ($max_length<$length{$chr}) {
160 $max_length=$length{$chr};
161 }
162 print "$chr\n";
163 }
164 #====================================cluster data=======================================
165 open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}";
166 my %cluster;
167 my %cluster_density;
168 #my @sample=qw(39B3 3PA3 3LC3);
169 my @cluster_non_add;
170 while (my $aline=<CLUSTER>) {
171 next if($aline=~/^\#/);
172 chomp $aline;##Chr MajorLength Percent end 19B1
173 my @temp=split/\t/,$aline;
174 my @ID=split/\:/,$temp[0];
175 my @posi=split/\-/,$ID[1];
176 my @all_rpkm=@temp;
177 shift @all_rpkm;
178 shift @all_rpkm;
179 shift @all_rpkm;
180 # for (my $s=0;$s<@all_rpkm ;$s++) {#log transfer
181 # $all_rpkm[$s]=log2($all_rpkm[$s]);
182 # }
183 push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],@all_rpkm];#ID start end rpkm(19B1,1PA1,1LC1);
184 }
185 close CLUSTER;
186 my %max_cluster;
187 my $chr_number=0;
188 print "@mark\n$mark\n";
189 foreach my $chr (sort keys %cluster) {
190 for (my $i=0;$i<@mark ;$i++) {
191 $max_cluster{$chr}[$i]=0;
192 }
193 $chr_number++;
194 }
195 foreach my $chr (sort keys %cluster) {
196 @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}};
197 for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
198 for (my $s=0;$s<@mark;$s++) {
199 if ($cluster{$chr}[$i][3+$s]>$max_cluster{$chr}) {
200 $max_cluster{$chr}[$s]=$cluster{$chr}[$i][3+$s];
201 }
202 }
203 }
204
205 }
206 foreach my $chr (sort keys %max_cluster) {
207 for (my $s=0; $s<@mark;$s++) {
208 # print "$max_cluster{$chr}[$s]\n";
209 }
210 }
211 #---------------------------------------------------------------------------------------
212 foreach my $chr(keys %cluster) {
213 for(my $i=0;$i<$#{$cluster{$chr}};$i++) {
214 my $start=int($cluster{$chr}[$i][1]/$span);
215 my $end=int($cluster{$chr}[$i][2]/$span);
216 if ($start==$end) {
217 for (my $s=0;$s<@mark ;$s++) {
218 $cluster_density{$chr}[$start][$s]+=$cluster{$chr}[$i][3+$s];
219 }
220
221 }
222 else{
223 for (my $m=$start;$m<=$end ;$m++) {
224 for (my $s=0;$s<@mark ;$s++) {
225 $cluster_density{$chr}[$m][$s]+=$cluster{$chr}[$i][3+$s];
226 }
227 }
228 }
229 }
230 }
231 my %max_cluster_density;
232 my $max_all_density=0;
233 foreach my $chr (sort keys %cluster) {#
234 for (my $s=0;$s<@mark ;$s++) {
235 for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
236 $max_cluster_density{$chr}[$s]=0;
237 }
238 }
239
240 }
241 foreach my $chr (sort keys %cluster_density) {
242 print "$#{$cluster_density{$chr}}\n";
243 for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) {
244 print TXT "$chr\t$k";
245 for (my $s=0;$s<@mark;$s++) {
246 if (!(defined($cluster_density{$chr}[$k][$s]))) {
247 $cluster_density{$chr}[$k][$s]=0;
248 }
249 if ($cluster_density{$chr}[$k][$s]>$max_cluster_density{$chr}[$s]) {
250 $max_cluster_density{$chr}[$s]=$cluster_density{$chr}[$k][$s];
251 }
252 if ($cluster_density{$chr}[$k][$s]>$max_all_density) {
253 $max_all_density=$cluster_density{$chr}[$k][$s];
254 }
255 print TXT "\t$cluster_density{$chr}[$k][$s]";
256 }
257 print TXT "\n";
258 }
259 }
260 print "max density: $max_all_density\n";
261 #--------------------------------------------------------------------
262 my $top_margin=30;
263 my $tail_margin=30;
264 my $XOFFSET=50;
265 my $YOFFSET=60;
266 my $chr_length=600;
267 my $Xscale=$chr_length/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算
268 #my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰
269 #my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算
270 #========================================New canvas============================
271 #### Starting ####
272 #新建画布
273 my $width=1000;
274 my $heigth=100+130*$chr_number;
275 my $svg=SVG->new(width=>$width, height=>$heigth);
276 #画图起始点
277 my $canvas_start_x=$XOFFSET;
278 my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺 画线
279 my $canvas_start_y=$YOFFSET;
280 my $canvas_end_y=$YOFFSET;
281 my $chr_heigth=$heigth-$YOFFSET-$tail_margin;
282 print "chr number:$chr_number\n";
283 my $one_chr_heigth=$chr_heigth/$chr_number;
284 my $Yscale=($one_chr_heigth-15)/$max_all_density;
285 #my $chr_width=$YOFFSET;
286 #my $chr_start_y;
287 #my $chr_end_y;
288 #my $Yscale=0.01;
289 #=======================================title of the graph===============================
290 #my $span_k=$span/1000;
291 #$svg->text('x',$width/2,'y',$YOFFSET-20,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Clusters rpkm/"."$span_k"."kb Distribution");
292 #=======================================the top max chr line=============================
293 $svg->line(id=>'l1',x1=>$canvas_start_x,y1=>$canvas_start_y,x2=>$canvas_end_x,y2=>$canvas_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
294 $long_scale=int ($max_length/10);#十等分 大刻度
295 #大坐标刻度
296 for ($i=0;$i<=10;$i++) {
297 my $long_x_start=$XOFFSET+$long_scale*$i*$Xscale;
298 my $long_x_end=$long_x_start;
299 my $long_y_start=$YOFFSET;
300 my $long_y_end=$YOFFSET-5;
301 $svg->line('x1',$long_x_start,'y1',$long_y_start,'x2',$long_x_end,'y2',$long_y_end,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
302 my $Bscale=$long_scale*$i;
303 my $cdata=int ($Bscale/1000000);
304 $svg->text('x',$long_x_start,'y',$long_y_start-10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$cdata."M");
305 }
306 #=========================================================================================
307 my $cc=1;
308 foreach my $chr (sort keys %length) {
309 my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale;
310 my $chr_start_x=$XOFFSET;
311 my $chr_start_y=$YOFFSET+$cc*$one_chr_heigth;
312 my $chr_end_y=$chr_start_y;
313 #$chr_start_y+=$chr_width;
314 #$chr_end_y+=$chr_width;
315 # for (my $i=0;$i<@{$gene_desity{$chr}};$i++) {
316 # print "$chr\t$i\t$gene_desity{$chr}[$i]\n";
317 # my $red=$gene_desity{$chr}[$i]/$max_gene_number*255;
318 # my $green=$gene_desity{$chr}[$i]/$max_gene_number*255;
319 # print "$red\t$green\t0\n";
320 # $svg->rect('x',$chr_start_x+$i*$span*$Xscale,'y',$chr_start_y,'width',$span*$Xscale,'height',8,'stroke',"rgb($red,$green,0)",'stroke-width',0.1,'fill',"rgb($red,$green,0)");
321 # }
322
323 $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
324 $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr);
325 my $m_length=$length{$chr}%1000000;
326 $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$m_length."M");
327
328
329 if (defined($centromere{$chr}[0])) {
330 $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y-2,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
331 }
332 for (my $s=0;$s<@mark ;$s++) {
333 for (my $i=0;$i<$#{$cluster_density{$chr}}-1 ;$i++) {
334 #if ($cluster_density{$chr}[$i]*$Yscale>40) {
335 #$cluster_density{$chr}[$i]=40/$Yscale;
336 #$svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green");
337 #}
338 #print "$i\t$cluster_density{$chr}[$i][$s]\t$cluster_density{$chr}[$i+1][$s]\n";
339 my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale;
340 my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale;
341 my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i][$s]*$Yscale;
342 my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][$s]*$Yscale;
343 my $c_red=($s+1)/@mark*255;
344 $svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"rgb($c_red,125,0)",'stroke-width',0.3);
345 }
346
347 }
348 #=======Y axis
349 $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_start_x,y2=>$chr_start_y-$one_chr_heigth+15,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
350 #=======Y axis ===>3 xiaoge
351 my $s10=1;
352 my $e10=0;
353 my $chr_max=$max_all_density;
354 while ($chr_max>10) {
355 $chr_max=int($chr_max/10);
356 $s10=$s10*10;
357 $e10++;
358 }
359 $chr_max=$chr_max/2;
360 #print "*****$max_all_density\t$chr_max\t$s10\n";
361 for (my $i=1;$i<3 ;$i++) {
362 my $y1=$chr_start_y-$chr_max*$s10*$Yscale*$i;
363 my $xiaoge_Y=$chr_max*$i;
364 $svg->line('x1',$chr_start_x,'y1',$y1,'x2',$chr_start_x+3,'y2',$y1,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
365 $svg->text('x',$chr_start_x-26,'y',$y1+4,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',8,'font-family',$attribute{font}{'font-family'},'-cdata',$xiaoge_Y."e".$e10);
366 }
367 $cc++;
368 }
369
370 for (my $s=0;$s<@mark ;$s++) {
371 my $c_red=($s+1)/@mark*255;
372 print "**$c_red\n";
373 $svg->line('x1',$canvas_end_x+100,'y1',$YOFFSET+$s*20+30,'x2',$canvas_end_x+130,'y2',$YOFFSET+$s*20+30,'stroke',"rgb($c_red,125,0)",'stroke-width',1);
374 $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+$s*20+5+30,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$s]);
375 }
376 #
377 #
378 if (defined($opt{cen})) {
379 $svg->rect('x',$canvas_end_x+100,'y',$YOFFSET+@mark*20+30,'width',30,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
380 $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+@mark*20+30+5,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere");
381 }
382
383 close TXT;
384
385 open (OUT,">$opt{o}");
386 print OUT $svg->xmlify();
387
388 sub log2 {
389 my $n = shift;
390 return log($n)/log(2);
391 }
392
393 sub usage{
394 print <<"USAGE";
395 Version $version
396 Usage:
397 $0
398 options:
399 -g genelist
400 -span
401 -n sample cloumn
402 -mark sample name
403 -o output graph file name with html or svg extension
404 -c cluster file input
405 -out txt output
406 -l length of chr
407 -cen centromere
408 -h help
409 USAGE
410 exit(1);
411 }