Mercurial > repos > big-tiandm > sirna_plant
comparison sRNA_plot.pl @ 23:cad6a1dfb174 draft
Uploaded
author | big-tiandm |
---|---|
date | Wed, 05 Nov 2014 21:11:49 -0500 |
parents | e0884a4b996b |
children |
comparison
equal
deleted
inserted
replaced
22:b6686462d0cb | 23:cad6a1dfb174 |
---|---|
1 #!/usr/bin/perl -w | |
2 #========================================================================================== | |
3 # Date: | |
4 # Title: | |
5 # Comment: Program to plot gene structure | |
6 # Input: 1. | |
7 # 2. | |
8 # 3. | |
9 # Output: output file of gene structure graph by html or svg formt | |
10 # Test Usage: | |
11 #======================================================================================== | |
12 #use strict; | |
13 my $version=1.00; | |
14 use SVG; | |
15 use Getopt::Long; | |
16 my %opt; | |
17 GetOptions(\%opt,"g=s","l=s","span=s","c=s","o=s","out=s","cen:s","mark=s","h"); | |
18 if (!( defined $opt{o}) || defined $opt{h}) { | |
19 &usage; | |
20 } | |
21 my $span=$opt{span}; | |
22 #my $sample_cloumn=$opt{n}; | |
23 my $mark=$opt{mark}; | |
24 my @mark=split/\#/,$mark; | |
25 my $genelist=$opt{g}; | |
26 #===============================Define Attribute========================================== | |
27 my %attribute=( | |
28 canvas=>{ | |
29 'width'=>1500, | |
30 'height'=>1800 | |
31 }, | |
32 text=>{ | |
33 'stroke'=>"#000000", | |
34 'fill'=>"none", | |
35 'stroke-width'=>0.5 | |
36 }, | |
37 line=>{ | |
38 'stroke'=>"black", | |
39 'stroke-width'=>1 | |
40 }, | |
41 csv=>{ | |
42 'stroke'=>"red", | |
43 'stroke-width'=>0.5 | |
44 }, | |
45 exon=>{ | |
46 'stroke'=>"black", | |
47 'stroke-width'=>1 | |
48 }, | |
49 intron=>{ | |
50 'stroke'=>"black", | |
51 'stroke-width'=>1.5 | |
52 }, | |
53 font=>{ | |
54 'fill'=>"#000000", | |
55 'font-size'=>12, | |
56 'font-size2'=>10, | |
57 #'font-weight'=>'bold', | |
58 'font-family'=>"Arial" | |
59 #'font-family'=>"ArialNarrow-bold" | |
60 }, | |
61 rect=>{ | |
62 'fill'=>"lightgreen", | |
63 'stroke'=>"black", | |
64 'stroke-width'=>0.5 | |
65 }, | |
66 readwidth=>0.5 | |
67 ); | |
68 #############################s#define start coordinate and scale | |
69 open(TXT,">$opt{out}"); | |
70 open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}"; | |
71 my %length; | |
72 while (my $aline=<LENGTH>) { | |
73 chomp $aline; | |
74 next if($aline=~/^\#/); | |
75 my @temp=split/\t/,$aline; | |
76 $temp[0]=~s/^c/C/; | |
77 $length{$temp[0]}=$temp[1]; | |
78 } | |
79 close LENGTH; | |
80 #--------------------------------------------------------------- | |
81 open(GENE,"$opt{g}")||die"cannot open the file $opt{g}"; | |
82 my %genelist; | |
83 while (my $aline=<GENE>) { | |
84 chomp $aline;#LOC_Os01g01280 Chr1 133291 134685 + | |
85 next if($aline=~/^\#/); | |
86 my @temp=split/\t/,$aline; | |
87 if ($temp[1]=~/^Chr(\d)$/) { | |
88 $temp[1]="Chr0$1"; | |
89 } | |
90 push @{$genelist{$temp[1]}},[$temp[0],$temp[2],$temp[3]]; | |
91 | |
92 } | |
93 close GENE; | |
94 #my %have_gene; | |
95 #foreach my $chr (sort keys %genelist) { | |
96 # my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}}; | |
97 # my $start=$genelist[0][1]; | |
98 # my $end=$genelist[0][2]; | |
99 # for (my $i=0;$i<@genelist ;$i++) { | |
100 # if ($gene) { | |
101 # } | |
102 # } | |
103 #} | |
104 | |
105 my %gene_desity; | |
106 foreach my $chr (sort keys %genelist) { | |
107 my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}}; | |
108 for (my $i=0;$i<@genelist ;$i++) { | |
109 my $start=int($genelist[$i][1]/$span); | |
110 my $end=int($genelist[$i][2]/$span); | |
111 #my @t_rpkm=split/\t/,$target_rpkm{$genelist[$i][0]}; | |
112 if ($start==$end) { | |
113 $gene_desity{$chr}[$start]++; | |
114 } | |
115 else{ | |
116 for (my $k=$start;$k<=$end ;$k++) { | |
117 $gene_desity{$chr}[$k]++; | |
118 } | |
119 } | |
120 } | |
121 } | |
122 #------------------------------------------region_gene_number------------------------- | |
123 my $max_gene_number=0; | |
124 my $total=0; | |
125 foreach my $chr (sort keys %genelist) { | |
126 for (my $i=0;$i<@{$gene_desity{$chr}} ;$i++) { | |
127 if (!(defined($gene_desity{$chr}[$i]))) { | |
128 $gene_desity{$chr}[$i]=0; | |
129 } | |
130 if ($gene_desity{$chr}[$i]>$max_gene_number) { | |
131 $max_gene_number=$gene_desity{$chr}[$i]; | |
132 #print "$gene_desity{$chr}[$i]\n"; | |
133 } | |
134 #print TXT "$i\t$gene_desity[$i]\n"; | |
135 $total+=$gene_desity{$chr}[$i]; | |
136 #print "$chr\t$i\t$gene_desity{$chr}[$i]\n"; | |
137 } | |
138 } | |
139 #print "Gene max:$max_gene_number\ntotal:$total\n"; | |
140 | |
141 #--------------------------------------------------------------- | |
142 my %centromere; | |
143 if (defined($opt{cen})) { | |
144 open CEN,"$opt{cen}"; | |
145 while (my $aline=<CEN>) { | |
146 chomp $aline; | |
147 next if($aline=~/^\#/); | |
148 my @temp=split/\t/,$aline; | |
149 $temp[0]=~s/^c/C/; | |
150 $centromere{$temp[0]}[0]=$temp[1]; | |
151 $centromere{$temp[0]}[1]=$temp[2]; | |
152 } | |
153 close CEN; | |
154 } | |
155 | |
156 #--------------------------------------------------------------- | |
157 my $max_length=0; | |
158 foreach my $chr (keys %length) { | |
159 if ($max_length<$length{$chr}) { | |
160 $max_length=$length{$chr}; | |
161 } | |
162 print "$chr\n"; | |
163 } | |
164 #====================================cluster data======================================= | |
165 open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}"; | |
166 my %cluster; | |
167 my %cluster_density; | |
168 #my @sample=qw(39B3 3PA3 3LC3); | |
169 my @cluster_non_add; | |
170 while (my $aline=<CLUSTER>) { | |
171 next if($aline=~/^\#/); | |
172 chomp $aline;##Chr MajorLength Percent end 19B1 | |
173 my @temp=split/\t/,$aline; | |
174 my @ID=split/\:/,$temp[0]; | |
175 my @posi=split/\-/,$ID[1]; | |
176 my @all_rpkm=@temp; | |
177 shift @all_rpkm; | |
178 shift @all_rpkm; | |
179 shift @all_rpkm; | |
180 # for (my $s=0;$s<@all_rpkm ;$s++) {#log transfer | |
181 # $all_rpkm[$s]=log2($all_rpkm[$s]); | |
182 # } | |
183 push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],@all_rpkm];#ID start end rpkm(19B1,1PA1,1LC1); | |
184 } | |
185 close CLUSTER; | |
186 my %max_cluster; | |
187 my $chr_number=0; | |
188 print "@mark\n$mark\n"; | |
189 foreach my $chr (sort keys %cluster) { | |
190 for (my $i=0;$i<@mark ;$i++) { | |
191 $max_cluster{$chr}[$i]=0; | |
192 } | |
193 $chr_number++; | |
194 } | |
195 foreach my $chr (sort keys %cluster) { | |
196 @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}}; | |
197 for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { | |
198 for (my $s=0;$s<@mark;$s++) { | |
199 if ($cluster{$chr}[$i][3+$s]>$max_cluster{$chr}) { | |
200 $max_cluster{$chr}[$s]=$cluster{$chr}[$i][3+$s]; | |
201 } | |
202 } | |
203 } | |
204 | |
205 } | |
206 foreach my $chr (sort keys %max_cluster) { | |
207 for (my $s=0; $s<@mark;$s++) { | |
208 # print "$max_cluster{$chr}[$s]\n"; | |
209 } | |
210 } | |
211 #--------------------------------------------------------------------------------------- | |
212 foreach my $chr(keys %cluster) { | |
213 for(my $i=0;$i<$#{$cluster{$chr}};$i++) { | |
214 my $start=int($cluster{$chr}[$i][1]/$span); | |
215 my $end=int($cluster{$chr}[$i][2]/$span); | |
216 if ($start==$end) { | |
217 for (my $s=0;$s<@mark ;$s++) { | |
218 $cluster_density{$chr}[$start][$s]+=$cluster{$chr}[$i][3+$s]; | |
219 } | |
220 | |
221 } | |
222 else{ | |
223 for (my $m=$start;$m<=$end ;$m++) { | |
224 for (my $s=0;$s<@mark ;$s++) { | |
225 $cluster_density{$chr}[$m][$s]+=$cluster{$chr}[$i][3+$s]; | |
226 } | |
227 } | |
228 } | |
229 } | |
230 } | |
231 my %max_cluster_density; | |
232 my $max_all_density=0; | |
233 foreach my $chr (sort keys %cluster) {# | |
234 for (my $s=0;$s<@mark ;$s++) { | |
235 for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { | |
236 $max_cluster_density{$chr}[$s]=0; | |
237 } | |
238 } | |
239 | |
240 } | |
241 foreach my $chr (sort keys %cluster_density) { | |
242 print "$#{$cluster_density{$chr}}\n"; | |
243 for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) { | |
244 print TXT "$chr\t$k"; | |
245 for (my $s=0;$s<@mark;$s++) { | |
246 if (!(defined($cluster_density{$chr}[$k][$s]))) { | |
247 $cluster_density{$chr}[$k][$s]=0; | |
248 } | |
249 if ($cluster_density{$chr}[$k][$s]>$max_cluster_density{$chr}[$s]) { | |
250 $max_cluster_density{$chr}[$s]=$cluster_density{$chr}[$k][$s]; | |
251 } | |
252 if ($cluster_density{$chr}[$k][$s]>$max_all_density) { | |
253 $max_all_density=$cluster_density{$chr}[$k][$s]; | |
254 } | |
255 print TXT "\t$cluster_density{$chr}[$k][$s]"; | |
256 } | |
257 print TXT "\n"; | |
258 } | |
259 } | |
260 print "max density: $max_all_density\n"; | |
261 #-------------------------------------------------------------------- | |
262 my $top_margin=30; | |
263 my $tail_margin=30; | |
264 my $XOFFSET=50; | |
265 my $YOFFSET=60; | |
266 my $chr_length=600; | |
267 my $Xscale=$chr_length/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 | |
268 #my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰 | |
269 #my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算 | |
270 #========================================New canvas============================ | |
271 #### Starting #### | |
272 #新建画布 | |
273 my $width=1000; | |
274 my $heigth=100+130*$chr_number; | |
275 my $svg=SVG->new(width=>$width, height=>$heigth); | |
276 #画图起始点 | |
277 my $canvas_start_x=$XOFFSET; | |
278 my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺 画线 | |
279 my $canvas_start_y=$YOFFSET; | |
280 my $canvas_end_y=$YOFFSET; | |
281 my $chr_heigth=$heigth-$YOFFSET-$tail_margin; | |
282 print "chr number:$chr_number\n"; | |
283 my $one_chr_heigth=$chr_heigth/$chr_number; | |
284 my $Yscale=($one_chr_heigth-15)/$max_all_density; | |
285 #my $chr_width=$YOFFSET; | |
286 #my $chr_start_y; | |
287 #my $chr_end_y; | |
288 #my $Yscale=0.01; | |
289 #=======================================title of the graph=============================== | |
290 #my $span_k=$span/1000; | |
291 #$svg->text('x',$width/2,'y',$YOFFSET-20,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Clusters rpkm/"."$span_k"."kb Distribution"); | |
292 #=======================================the top max chr line============================= | |
293 $svg->line(id=>'l1',x1=>$canvas_start_x,y1=>$canvas_start_y,x2=>$canvas_end_x,y2=>$canvas_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); | |
294 $long_scale=int ($max_length/10);#十等分 大刻度 | |
295 #大坐标刻度 | |
296 for ($i=0;$i<=10;$i++) { | |
297 my $long_x_start=$XOFFSET+$long_scale*$i*$Xscale; | |
298 my $long_x_end=$long_x_start; | |
299 my $long_y_start=$YOFFSET; | |
300 my $long_y_end=$YOFFSET-5; | |
301 $svg->line('x1',$long_x_start,'y1',$long_y_start,'x2',$long_x_end,'y2',$long_y_end,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); | |
302 my $Bscale=$long_scale*$i; | |
303 my $cdata=int ($Bscale/1000000); | |
304 $svg->text('x',$long_x_start,'y',$long_y_start-10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$cdata."M"); | |
305 } | |
306 #========================================================================================= | |
307 my $cc=1; | |
308 foreach my $chr (sort keys %length) { | |
309 my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale; | |
310 my $chr_start_x=$XOFFSET; | |
311 my $chr_start_y=$YOFFSET+$cc*$one_chr_heigth; | |
312 my $chr_end_y=$chr_start_y; | |
313 #$chr_start_y+=$chr_width; | |
314 #$chr_end_y+=$chr_width; | |
315 # for (my $i=0;$i<@{$gene_desity{$chr}};$i++) { | |
316 # print "$chr\t$i\t$gene_desity{$chr}[$i]\n"; | |
317 # my $red=$gene_desity{$chr}[$i]/$max_gene_number*255; | |
318 # my $green=$gene_desity{$chr}[$i]/$max_gene_number*255; | |
319 # print "$red\t$green\t0\n"; | |
320 # $svg->rect('x',$chr_start_x+$i*$span*$Xscale,'y',$chr_start_y,'width',$span*$Xscale,'height',8,'stroke',"rgb($red,$green,0)",'stroke-width',0.1,'fill',"rgb($red,$green,0)"); | |
321 # } | |
322 | |
323 $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); | |
324 $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr); | |
325 my $m_length=$length{$chr}%1000000; | |
326 $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$m_length."M"); | |
327 | |
328 | |
329 if (defined($centromere{$chr}[0])) { | |
330 $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y-2,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); | |
331 } | |
332 for (my $s=0;$s<@mark ;$s++) { | |
333 for (my $i=0;$i<$#{$cluster_density{$chr}}-1 ;$i++) { | |
334 #if ($cluster_density{$chr}[$i]*$Yscale>40) { | |
335 #$cluster_density{$chr}[$i]=40/$Yscale; | |
336 #$svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); | |
337 #} | |
338 #print "$i\t$cluster_density{$chr}[$i][$s]\t$cluster_density{$chr}[$i+1][$s]\n"; | |
339 my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale; | |
340 my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale; | |
341 my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i][$s]*$Yscale; | |
342 my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][$s]*$Yscale; | |
343 my $c_red=($s+1)/@mark*255; | |
344 $svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"rgb($c_red,125,0)",'stroke-width',0.3); | |
345 } | |
346 | |
347 } | |
348 #=======Y axis | |
349 $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_start_x,y2=>$chr_start_y-$one_chr_heigth+15,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); | |
350 #=======Y axis ===>3 xiaoge | |
351 my $s10=1; | |
352 my $e10=0; | |
353 my $chr_max=$max_all_density; | |
354 while ($chr_max>10) { | |
355 $chr_max=int($chr_max/10); | |
356 $s10=$s10*10; | |
357 $e10++; | |
358 } | |
359 $chr_max=$chr_max/2; | |
360 #print "*****$max_all_density\t$chr_max\t$s10\n"; | |
361 for (my $i=1;$i<3 ;$i++) { | |
362 my $y1=$chr_start_y-$chr_max*$s10*$Yscale*$i; | |
363 my $xiaoge_Y=$chr_max*$i; | |
364 $svg->line('x1',$chr_start_x,'y1',$y1,'x2',$chr_start_x+3,'y2',$y1,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); | |
365 $svg->text('x',$chr_start_x-26,'y',$y1+4,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',8,'font-family',$attribute{font}{'font-family'},'-cdata',$xiaoge_Y."e".$e10); | |
366 } | |
367 $cc++; | |
368 } | |
369 | |
370 for (my $s=0;$s<@mark ;$s++) { | |
371 my $c_red=($s+1)/@mark*255; | |
372 print "**$c_red\n"; | |
373 $svg->line('x1',$canvas_end_x+100,'y1',$YOFFSET+$s*20+30,'x2',$canvas_end_x+130,'y2',$YOFFSET+$s*20+30,'stroke',"rgb($c_red,125,0)",'stroke-width',1); | |
374 $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+$s*20+5+30,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$s]); | |
375 } | |
376 # | |
377 # | |
378 if (defined($opt{cen})) { | |
379 $svg->rect('x',$canvas_end_x+100,'y',$YOFFSET+@mark*20+30,'width',30,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); | |
380 $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+@mark*20+30+5,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere"); | |
381 } | |
382 | |
383 close TXT; | |
384 | |
385 open (OUT,">$opt{o}"); | |
386 print OUT $svg->xmlify(); | |
387 | |
388 sub log2 { | |
389 my $n = shift; | |
390 return log($n)/log(2); | |
391 } | |
392 | |
393 sub usage{ | |
394 print <<"USAGE"; | |
395 Version $version | |
396 Usage: | |
397 $0 | |
398 options: | |
399 -g genelist | |
400 -span | |
401 -n sample cloumn | |
402 -mark sample name | |
403 -o output graph file name with html or svg extension | |
404 -c cluster file input | |
405 -out txt output | |
406 -l length of chr | |
407 -cen centromere | |
408 -h help | |
409 USAGE | |
410 exit(1); | |
411 } |