0
|
1 #!/usr/bin/perl -w
|
|
2 #==========================================================================================
|
|
3 # Date:
|
|
4 # Title:
|
|
5 # Comment: Program to plot gene structure
|
|
6 # Input: 1.
|
|
7 # 2.
|
|
8 # 3.
|
|
9 # Output: output file of gene structure graph by html or svg formt
|
|
10 # Test Usage:
|
|
11 #========================================================================================
|
|
12 #use strict;
|
|
13 my $version=1.00;
|
|
14 use SVG;
|
|
15 use Getopt::Long;
|
|
16 my %opt;
|
|
17 GetOptions(\%opt,"span=s","c=s","o=s","out=s","l=s","cen:s","n=s","mark=s","h");
|
|
18 if (!( defined $opt{o}) || defined $opt{h}) {
|
|
19 &usage;
|
|
20 }
|
|
21 my $span=$opt{span};
|
|
22 my $sample_cloumn=$opt{n};
|
|
23 my $mark=$opt{mark};
|
|
24 #===============================Define Attribute==========================================
|
|
25 my %attribute=(
|
|
26 canvas=>{
|
|
27 'width'=>1500,
|
|
28 'height'=>1800
|
|
29 },
|
|
30 text=>{
|
|
31 'stroke'=>"#000000",
|
|
32 'fill'=>"none",
|
|
33 'stroke-width'=>0.5
|
|
34 },
|
|
35 line=>{
|
|
36 'stroke'=>"black",
|
|
37 'stroke-width'=>1
|
|
38 },
|
|
39 csv=>{
|
|
40 'stroke'=>"red",
|
|
41 'stroke-width'=>0.5
|
|
42 },
|
|
43 exon=>{
|
|
44 'stroke'=>"black",
|
|
45 'stroke-width'=>1
|
|
46 },
|
|
47 intron=>{
|
|
48 'stroke'=>"black",
|
|
49 'stroke-width'=>1.5
|
|
50 },
|
|
51 font=>{
|
|
52 'fill'=>"#000000",
|
|
53 'font-size'=>12,
|
|
54 'font-size2'=>10,
|
|
55 #'font-weight'=>'bold',
|
|
56 'font-family'=>"Arial"
|
|
57 #'font-family'=>"ArialNarrow-bold"
|
|
58 },
|
|
59 rect=>{
|
|
60 'fill'=>"lightgreen",
|
|
61 'stroke'=>"black",
|
|
62 'stroke-width'=>0.5
|
|
63 },
|
|
64 readwidth=>0.5
|
|
65 );
|
|
66 #############################s#define start coordinate and scale
|
|
67 open(TXT,">$opt{out}");
|
|
68 open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}";
|
|
69 my %length;
|
|
70 while (my $aline=<LENGTH>) {
|
|
71 chomp $aline;
|
|
72 next if($aline=~/^\#/);
|
|
73 my @temp=split/\t/,$aline;
|
|
74 $temp[0]=~s/^c/C/;
|
|
75 $length{$temp[0]}=$temp[1];
|
|
76 }
|
|
77 close LENGTH;
|
|
78 #---------------------------------------------------------------
|
|
79 my %centromere;
|
|
80 if (defined($opt{cen})) {
|
|
81 open(CEN,"$opt{cen}")||die"cannot open the file $opt{cen}";
|
|
82 while (my $aline=<CEN>) {
|
|
83 chomp $aline;
|
|
84 next if($aline=~/^\#/);
|
|
85 my @temp=split/\t/,$aline;
|
|
86 $temp[0]=~s/^c/C/;
|
|
87 $centromere{$temp[0]}[0]=$temp[1];
|
|
88 $centromere{$temp[0]}[1]=$temp[2];
|
|
89 }
|
|
90 close CEN;
|
|
91 }
|
|
92
|
|
93 #---------------------------------------------------------------
|
|
94 my $max_length=0;
|
|
95 foreach my $chr (keys %length) {
|
|
96 if ($max_length<$length{$chr}) {
|
|
97 $max_length=$length{$chr};
|
|
98 }
|
|
99 print "$chr\n";
|
|
100 }
|
|
101 #====================================cluster data=======================================
|
|
102 open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}";
|
|
103 my %cluster;
|
|
104 my %cluster_density;
|
|
105 #my @sample=qw(39B3 3PA3 3LC3);
|
|
106 my @cluster_non_add;
|
|
107 while (my $aline=<CLUSTER>) {
|
|
108 next if($aline=~/^\#/);
|
|
109 chomp $aline;##ID chr strand start end 19B1
|
|
110 my @temp=split/\t/,$aline;
|
|
111 my @ID=split/\:/,$temp[0];
|
|
112 my @posi=split/\-/,$ID[1];
|
|
113 push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],$temp[2+$sample_cloumn]];#ID start end rpkm(19B1,1PA1,1LC1);
|
|
114 }
|
|
115 close CLUSTER;
|
|
116 my %max_cluster;
|
|
117 foreach my $chr (sort keys %cluster) {
|
|
118 # for (my $i=0;$i<3 ;$i++) {
|
|
119 # $max_cluster{$chr}[$i]=0;
|
|
120 # }
|
|
121 $max_cluster{$chr}=0
|
|
122 }
|
|
123 foreach my $chr (sort keys %cluster) {
|
|
124 @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}};
|
|
125 #for (my $s=0;$s<3;$s++) {
|
|
126 for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
|
|
127 if ($cluster{$chr}[$i][3]>$max_cluster{$chr}) {
|
|
128 $max_cluster{$chr}=$cluster{$chr}[$i][3];
|
|
129 }
|
|
130 }
|
|
131 #}
|
|
132
|
|
133 }
|
|
134 #---------------------------------------------------------------------------------------
|
|
135 foreach my $chr(keys %cluster) {
|
|
136 for(my $i=0;$i<$#{$cluster{$chr}};$i++) {
|
|
137 my $start=int($cluster{$chr}[$i][1]/$span);
|
|
138 my $end=int($cluster{$chr}[$i][2]/$span);
|
|
139 if ($start==$end) {
|
|
140 #for (my $j=0;$j<3 ;$j++) {
|
|
141 $cluster_density{$chr}[$start]+=$cluster{$chr}[$i][3];
|
|
142 #}
|
|
143
|
|
144 }
|
|
145 else{
|
|
146 for (my $m=$start;$m<=$end ;$m++) {
|
|
147 #for (my $j=0;$j<3 ;$j++) {
|
|
148 $cluster_density{$chr}[$m]+=$cluster{$chr}[$i][3];
|
|
149 #}
|
|
150 }
|
|
151 }
|
|
152 }
|
|
153 }
|
|
154 my %max_cluster_density;
|
|
155 foreach my $chr (sort keys %cluster) {#
|
|
156 #for (my $i=0;$i<3 ;$i++) {
|
|
157 for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
|
|
158 $max_cluster_density{$chr}=0;
|
|
159 }
|
|
160 #}
|
|
161 }
|
|
162 foreach my $chr (sort keys %cluster) {
|
|
163 #for (my $i=0;$i<3;$i++) {
|
|
164 for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) {
|
|
165 if (!(defined($cluster_density{$chr}[$k]))) {
|
|
166 $cluster_density{$chr}[$k]=0;
|
|
167 }
|
|
168 if ($cluster_density{$chr}[$k]>$max_cluster_density{$chr}) {
|
|
169 $max_cluster_density{$chr}=$cluster_density{$chr}[$k];
|
|
170 }
|
|
171 print TXT "$chr\t$k\t$cluster_density{$chr}[$k]\n";
|
|
172 }
|
|
173 #}
|
|
174 }
|
|
175 #--------------------------------------------------------------------
|
|
176 my $XOFFSET=50;
|
|
177 my $YOFFSET=60;
|
|
178 #my $length=$end-$start+1;
|
|
179 my $Xscale=600/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算
|
|
180 #my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰
|
|
181 #my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算
|
|
182 #========================================New canvas============================
|
|
183 #### Starting ####
|
|
184 #新建画布
|
|
185 my $svg=SVG->new();
|
|
186 #画图起始点
|
|
187 my $canvas_start_x=$XOFFSET;
|
|
188 my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺 画线
|
|
189 my $canvas_start_y=$YOFFSET;
|
|
190 my $canvas_end_y=$YOFFSET;
|
|
191
|
|
192 my $chr_width=$YOFFSET;
|
|
193 my $chr_start_y;
|
|
194 my $chr_end_y;
|
|
195 my $Yscale=0.01;
|
|
196 foreach my $chr (sort keys %length) {
|
|
197 my $chr_start_x=$XOFFSET;
|
|
198 my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale;
|
|
199 $chr_start_y+=$chr_width;
|
|
200 $chr_end_y+=$chr_width;
|
|
201 $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
|
|
202 $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr);
|
|
203 $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$length{$chr});
|
|
204
|
|
205
|
|
206 if (defined($centromere{$chr}[0])) {
|
|
207 $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
|
|
208 }
|
|
209 for (my $i=0;$i<$#{$cluster_density{$chr}} ;$i++) {
|
|
210 if ($cluster_density{$chr}[$i]*$Yscale>40) {
|
|
211 $cluster_density{$chr}[$i]=40/$Yscale;
|
|
212 $svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green");
|
|
213 }
|
|
214 my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale;
|
|
215 my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale;
|
|
216 my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i]*$Yscale;
|
|
217 #my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][0]*$Yscale;
|
|
218 #$svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"red",'stroke-width',$attribute{csv}{'stroke-width'});
|
|
219 $svg->rect('x',$cluster_density_start_x,'y',$chr_start_y-$cluster_density{$chr}[$i]*$Yscale,'width',$span*$Xscale,'height',$cluster_density{$chr}[$i]*$Yscale,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red");
|
|
220 }
|
|
221
|
|
222 $chr_width=50;
|
|
223
|
|
224 #$svg->rect('x',$c_non_add_start_x,'y',$c_non_add_start_y,'width',$cluster_non_add_width,'height',$cluster_non_add_heigth,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
|
|
225 }
|
|
226
|
|
227 my $span_k=$span/1000;
|
|
228 $svg->text('x',200,'y',$chr_start_y+20,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"$mark sRNA rpmk \/ $span_k kb");
|
|
229
|
|
230 $svg->rect('x',600,'y',500,'width',10,'height',10,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red");
|
|
231 $svg->text('x',620,'y',510,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"sRNA rpkm");
|
|
232
|
|
233 if (defined($opt{cen})) {
|
|
234 $svg->rect('x',600,'y',520,'width',10,'height',10,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
|
|
235 $svg->text('x',620,'y',530,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere");
|
|
236 }
|
|
237
|
|
238 close TXT;
|
|
239 open (OUT,">$opt{o}");
|
|
240 print OUT $svg->xmlify();
|
|
241
|
|
242 sub log2 {
|
|
243 my $n = shift;
|
|
244 return log($n)/log(2);
|
|
245 }
|
|
246
|
|
247 sub usage{
|
|
248 print <<"USAGE";
|
|
249 Version $version
|
|
250 Usage:
|
|
251 $0
|
|
252 options:
|
|
253 -span
|
|
254 -n sample cloumn
|
|
255 -mark sample name
|
|
256 -o output graph file name with html or svg extension
|
|
257 -c cluster file input
|
|
258 -out txt output
|
|
259 -l length of chr
|
|
260 -cen centromere
|
|
261 -h help
|
|
262 USAGE
|
|
263 exit(1);
|
|
264 } |