annotate Roary/contrib/roary2svg/roary2svg.pl @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/env perl
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
2 # From Torsten Seemann commit f46312e9df539c56b058f0ef25479d7297ceec89
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
3 # https://raw.githubusercontent.com/tseemann/nullarbor/master/bin/roary2svg.pl
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
4 use warnings;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
5 use strict;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
6 use Data::Dumper;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
7 use List::Util qw(min max sum);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
8 use List::MoreUtils qw(uniq all any);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
9 use Text::CSV;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
10 use SVG;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
11
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
12 use constant FONT_ASPECT => 0.8;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
13
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
14 my(@Options, $verbose, $taxacol, $width, $height, $acconly,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
15 $consensus, $border, $colour, $sepcolour);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
16 setOptions();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
17
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
18 # read gene_presence_absence.csv from stdin
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
19 # "Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","SRR2352235","SRR2352236","SRR2352237","SRR2352238","SRR2352239","SRR2352240","SRR2352241","SRR2352242","SRR2352243","SRR2352244","SRR2352245","SRR2352246","SRR2352247","SRR2352248","SRR2352249","SRR2352250","SRR2352251","SRR2352252"
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
20 my $csv = Text::CSV->new() or die $!;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
21 my $count=0;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
22 my @matrix;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
23 my @id;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
24 my $N;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
25 my $C=0;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
26 my @tally; # genes per taxon
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
27 my @is_core; # boolean for this cluster being core
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
28
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
29 while (my $row = $csv->getline(\*ARGV) ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
30 if ($count == 0) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
31 @id = splice @$row, $taxacol;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
32 $N = scalar(@id);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
33 print STDERR "Found $N taxa: @id\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
34 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
35 else {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
36 my @present = map { $row->[$taxacol+$_] ? 1 : 0 } (0 .. $N-1);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
37 my $num_present = sum(@present);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
38 $is_core[$count] = ($num_present == $N);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
39 next if $acconly and $is_core[$count];
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
40 # next if $panonly and all { $_==1 } @present;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
41 push @{ $matrix[$_] }, $present[$_] for (0 .. $N-1);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
42 $tally[$_] += $present[$_] for (0 .. $N-1);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
43 $C++;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
44 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
45 $count++;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
46 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
47 print STDERR "Found $C clusters.\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
48
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
49 my $real_height = $height*($N+1);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
50 my $svg = SVG->new(width=>$width, height=>$real_height);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
51 my $dy = $height;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
52 my $fontsize = 0.75 * $dy;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
53
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
54 my $lchars = max( map { length($_) } @id );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
55 my $llen = $fontsize * (1 + $lchars) * FONT_ASPECT;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
56
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
57 my $rchars = max( map { length("$_") } @tally);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
58 my $rlen = $fontsize * (1 + $rchars) * FONT_ASPECT;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
59
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
60 my $width2 = $width - $llen - $rlen;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
61 my $dx = $width2 / $C;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
62 my $font_style = { 'font-family'=>'sans-serif', 'fill'=>'black', 'font-size'=>$fontsize };
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
63
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
64 print STDERR "Box = $dx x $dy px\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
65 print STDERR "Left label = $lchars chr x $fontsize px\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
66 print STDERR "Right label = $rchars chr x $fontsize px\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
67
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
68 for my $j (0 .. $N-1) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
69 for my $i (0 .. $C-1) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
70 # print STDERR "$j $i $matrix[$j][$i]\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
71 if ($matrix[$j][$i]) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
72 # box for each present gene
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
73 $svg->rectangle(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
74 'x' => $llen+$i*$dx, 'y' => $j*$dy, 'width' => $dx, 'height' => $dy-1,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
75 'style' => { fill=>$colour, opacity=>($is_core[$i] ? 1 : 0.75) },
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
76 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
77 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
78 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
79 # taxon label for each row
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
80 $svg->text( x=>$fontsize, y=>($j+0.75)*$dy, -cdata=>$id[$j], style=>$font_style );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
81 # number of genes for each row
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
82 $svg->text( x=>$llen+$width2+$fontsize, y=>($j+0.75)*$dy, -cdata=>$tally[$j], style=>$font_style );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
83 # separator line
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
84 my $ypos = ($j+1)*$dy;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
85 $svg->line( x1=>0, y1=>$ypos, x2=>$width, y2=>$ypos, style=>{stroke=>$sepcolour});
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
86 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
87
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
88 # bottom label
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
89 my $bottom_text = "$N taxa, $C clusters";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
90 $bottom_text .= $acconly ? " (accessory only)" : " (core + accessory)";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
91 $svg->text( x=>$llen, y=>($N+0.75)*$dy, -cdata=>$bottom_text, style=>$font_style );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
92
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
93 # border
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
94 if ($border) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
95 $svg->rectangle(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
96 'x' => 0, 'y' => 0, 'width' => $width, 'height' => $real_height,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
97 'style' => { stroke=>$sepcolour, fill=>'none' },
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
98 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
99 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
100
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
101 print STDERR "Writing SVG file\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
102 print STDOUT $svg->xmlify;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
103
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
104 print STDERR "Done.\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
105
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
106 #----------------------------------------------------------------------
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
107 # Option setting routines
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
108
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
109 sub setOptions {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
110 use Getopt::Long;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
111
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
112 @Options = (
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
113 {OPT=>"help", VAR=>\&usage, DESC=>"This help"},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
114 {OPT=>"verbose!", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose output"},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
115 {OPT=>"width=i", VAR=>\$width, DEFAULT=>1024, DESC=>"Canvas width"},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
116 {OPT=>"height=i", VAR=>\$height, DEFAULT=>20, DESC=>"Row height"},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
117 {OPT=>"taxacolumn=i", VAR=>\$taxacol, DEFAULT=>14, DESC=>"Column in gpa.csv where taxa begin"},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
118 {OPT=>"colour=s", VAR=>\$colour, DEFAULT=>'DimGray', DESC=>"Colour of core cells"},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
119 {OPT=>"sepcolour=s", VAR=>\$sepcolour, DEFAULT=>'LightGray', DESC=>"Colour of horizontal separators/borders"},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
120 {OPT=>"acconly!", VAR=>\$acconly, DEFAULT=>0, DESC=>"Only draw accessory (non-core) genes"},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
121 # {OPT=>"consensus!", VAR=>\$consensus, DEFAULT=>0, DESC=>"Add consensus row"},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
122 {OPT=>"border!", VAR=>\$border, DEFAULT=>0, DESC=>"Add outline border"},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
123 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
124
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
125 (!@ARGV) && (usage());
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
126
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
127 &GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
128
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
129 # Now setup default values.
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
130 foreach (@Options) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
131 if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
132 ${$_->{VAR}} = $_->{DEFAULT};
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
133 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
134 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
135 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
136
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
137 sub usage {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
138 print "Usage: $0 [options] gene_presence_absence.csv > pan_genome.svg\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
139 foreach (@Options) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
140 printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC},
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
141 defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
142 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
143 exit(1);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
144 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
145
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
146 #----------------------------------------------------------------------
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
147