diff Roary/contrib/roary2svg/roary2svg.pl @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Roary/contrib/roary2svg/roary2svg.pl	Fri May 14 20:27:06 2021 +0000
@@ -0,0 +1,147 @@
+#!/usr/bin/env perl
+# From Torsten Seemann commit f46312e9df539c56b058f0ef25479d7297ceec89
+# https://raw.githubusercontent.com/tseemann/nullarbor/master/bin/roary2svg.pl
+use warnings;
+use strict;
+use Data::Dumper;
+use List::Util qw(min max sum);
+use List::MoreUtils qw(uniq all any);
+use Text::CSV;
+use SVG;
+
+use constant FONT_ASPECT => 0.8;
+
+my(@Options, $verbose, $taxacol, $width, $height, $acconly, 
+             $consensus, $border, $colour, $sepcolour);
+setOptions();
+
+# read gene_presence_absence.csv from stdin
+# "Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","SRR2352235","SRR2352236","SRR2352237","SRR2352238","SRR2352239","SRR2352240","SRR2352241","SRR2352242","SRR2352243","SRR2352244","SRR2352245","SRR2352246","SRR2352247","SRR2352248","SRR2352249","SRR2352250","SRR2352251","SRR2352252"
+my $csv = Text::CSV->new() or die $!;
+my $count=0;
+my @matrix;
+my @id;
+my $N;
+my $C=0;
+my @tally;  # genes per taxon
+my @is_core; # boolean for this cluster being core
+
+while (my $row = $csv->getline(\*ARGV) ) {
+  if ($count == 0) {
+    @id = splice @$row, $taxacol;
+    $N = scalar(@id);
+    print STDERR "Found $N taxa: @id\n";    
+  }
+  else {
+    my @present = map { $row->[$taxacol+$_] ? 1 : 0 } (0 .. $N-1);
+    my $num_present = sum(@present);
+    $is_core[$count] = ($num_present == $N);
+    next if $acconly and $is_core[$count];
+#    next if $panonly and all { $_==1 } @present;
+    push @{ $matrix[$_] }, $present[$_] for (0 .. $N-1);
+    $tally[$_] += $present[$_] for (0 .. $N-1);
+    $C++;
+  }
+  $count++;
+}
+print STDERR "Found $C clusters.\n";
+
+my $real_height = $height*($N+1);
+my $svg = SVG->new(width=>$width, height=>$real_height);
+my $dy = $height;
+my $fontsize = 0.75 * $dy;
+
+my $lchars = max( map { length($_) } @id );
+my $llen =  $fontsize * (1 + $lchars) * FONT_ASPECT;
+
+my $rchars = max( map { length("$_") } @tally);
+my $rlen = $fontsize * (1 + $rchars) * FONT_ASPECT;
+
+my $width2 = $width - $llen - $rlen;
+my $dx = $width2 / $C;
+my $font_style = { 'font-family'=>'sans-serif', 'fill'=>'black', 'font-size'=>$fontsize };
+
+print STDERR "Box = $dx x $dy px\n";
+print STDERR "Left label = $lchars chr x $fontsize px\n";
+print STDERR "Right label = $rchars chr x $fontsize px\n";
+
+for my $j (0 .. $N-1) {
+  for my $i (0 .. $C-1) {
+#    print STDERR "$j $i $matrix[$j][$i]\n";
+    if ($matrix[$j][$i]) {
+      # box for each present gene
+      $svg->rectangle( 
+          'x' => $llen+$i*$dx, 'y' => $j*$dy, 'width' => $dx, 'height' => $dy-1, 
+          'style' => { fill=>$colour, opacity=>($is_core[$i] ? 1 : 0.75) },
+      );      
+    }
+  }
+  # taxon label for each row
+  $svg->text( x=>$fontsize, y=>($j+0.75)*$dy, -cdata=>$id[$j], style=>$font_style );
+  # number of genes for each row
+  $svg->text( x=>$llen+$width2+$fontsize, y=>($j+0.75)*$dy, -cdata=>$tally[$j], style=>$font_style );
+  # separator line
+  my $ypos = ($j+1)*$dy;
+  $svg->line( x1=>0, y1=>$ypos, x2=>$width, y2=>$ypos, style=>{stroke=>$sepcolour});
+}
+
+# bottom label
+my $bottom_text = "$N taxa, $C clusters";
+$bottom_text .= $acconly ? " (accessory only)" : " (core + accessory)";
+$svg->text( x=>$llen, y=>($N+0.75)*$dy, -cdata=>$bottom_text, style=>$font_style );
+
+# border
+if ($border) {
+  $svg->rectangle( 
+    'x' => 0, 'y' => 0, 'width' => $width, 'height' => $real_height, 
+    'style' => { stroke=>$sepcolour, fill=>'none' },
+  );      
+}
+
+print STDERR "Writing SVG file\n";
+print STDOUT $svg->xmlify;
+
+print STDERR "Done.\n";
+
+#----------------------------------------------------------------------
+# Option setting routines
+
+sub setOptions {
+  use Getopt::Long;
+
+  @Options = (
+    {OPT=>"help",    VAR=>\&usage,             DESC=>"This help"},
+    {OPT=>"verbose!",  VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose output"},
+    {OPT=>"width=i",  VAR=>\$width, DEFAULT=>1024, DESC=>"Canvas width"},
+    {OPT=>"height=i",  VAR=>\$height, DEFAULT=>20, DESC=>"Row height"},
+    {OPT=>"taxacolumn=i",  VAR=>\$taxacol, DEFAULT=>14, DESC=>"Column in gpa.csv where taxa begin"},
+    {OPT=>"colour=s",  VAR=>\$colour, DEFAULT=>'DimGray', DESC=>"Colour of core cells"},
+    {OPT=>"sepcolour=s",  VAR=>\$sepcolour, DEFAULT=>'LightGray', DESC=>"Colour of horizontal separators/borders"},
+    {OPT=>"acconly!",  VAR=>\$acconly, DEFAULT=>0, DESC=>"Only draw accessory (non-core) genes"},
+#    {OPT=>"consensus!",  VAR=>\$consensus, DEFAULT=>0, DESC=>"Add consensus row"},
+    {OPT=>"border!",  VAR=>\$border, DEFAULT=>0, DESC=>"Add outline border"},
+  );
+
+  (!@ARGV) && (usage());
+
+  &GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage();
+
+  # Now setup default values.
+  foreach (@Options) {
+    if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
+      ${$_->{VAR}} = $_->{DEFAULT};
+    }
+  }
+}
+
+sub usage {
+  print "Usage: $0 [options] gene_presence_absence.csv > pan_genome.svg\n";
+  foreach (@Options) {
+    printf "  --%-13s %s%s.\n",$_->{OPT},$_->{DESC},
+           defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
+  }
+  exit(1);
+}
+ 
+#----------------------------------------------------------------------
+