Mercurial > repos > dereeper > pangenome_explorer
comparison Perl/reformatHeatmapSVG.pl @ 3:e42d30da7a74 draft
Uploaded
| author | dereeper |
|---|---|
| date | Thu, 30 May 2024 11:52:25 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:97e4e3e818b6 | 3:e42d30da7a74 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 use strict; | |
| 4 | |
| 5 my $filein = $ARGV[0]; | |
| 6 my $fileout = $ARGV[1]; | |
| 7 my $matrix = $ARGV[2]; | |
| 8 | |
| 9 | |
| 10 | |
| 11 | |
| 12 my $min_x = 100000; | |
| 13 my $max_x = 0; | |
| 14 my $min_y = 100000; | |
| 15 my $max_y = 0; | |
| 16 my $n=0; | |
| 17 open(OUT,">$fileout"); | |
| 18 open(F,$filein); | |
| 19 while(<F>){ | |
| 20 if (/<path style=" stroke:none;fill-rule:nonzero;fill/){ | |
| 21 | |
| 22 if (/d=\"M ([\s\d\.]+) L/){ | |
| 23 my @infos = split(/ /,$1); | |
| 24 my $x = $infos[0]; | |
| 25 if ($x < $min_x){$min_x = $x;} | |
| 26 if ($x > $max_x){$max_x = $x;} | |
| 27 } | |
| 28 if (/L ([\s\d\.]+) L/){ | |
| 29 my @infos = split(/ /,$1); | |
| 30 my $y = $infos[1]; | |
| 31 if ($y < $min_y){$min_y = $y;} | |
| 32 if ($y > $max_y){$max_y = $y;} | |
| 33 } | |
| 34 if (/L ([\s\d\.]+) Z/){ | |
| 35 my @infos = split(/ /,$1); | |
| 36 my $y = $infos[1]; | |
| 37 if ($y < $min_y){$min_y = $y;} | |
| 38 if ($y > $max_y){$max_y = $y;} | |
| 39 } | |
| 40 $n++; | |
| 41 } | |
| 42 else{ | |
| 43 if (!/\<\/svg\>/){ | |
| 44 print OUT $_; | |
| 45 } | |
| 46 } | |
| 47 } | |
| 48 close(F); | |
| 49 | |
| 50 my $nb_dispensable_clusters = `grep -P -c '\t0' $matrix`; | |
| 51 my $nb_samples = `awk {'print NF-1'} $matrix | head -1`; | |
| 52 | |
| 53 my $global_width = $max_x - $min_x; | |
| 54 my $width_of_one_block = $global_width / $nb_dispensable_clusters; | |
| 55 | |
| 56 my $global_height = $max_y - $min_y; | |
| 57 my $height_of_one_block = $global_height / $nb_samples; | |
| 58 | |
| 59 ########################################################### | |
| 60 # get distinct pattern of presence/absence | |
| 61 ########################################################### | |
| 62 my %patterns; | |
| 63 my $pattern_order = 0; | |
| 64 my %pattern_orders; | |
| 65 open(M,$matrix); | |
| 66 <M>; | |
| 67 while(<M>){ | |
| 68 my $line = $_; | |
| 69 $line =~s/\n//g;$line =~s/\r//g; | |
| 70 my @infos = split(/\t/,$line); | |
| 71 | |
| 72 my $pattern = ""; | |
| 73 for (my $k=1;$k<=$#infos;$k++){ | |
| 74 $pattern.=$infos[$k]; | |
| 75 } | |
| 76 | |
| 77 # print only dispensable (at least one absence) | |
| 78 if ($pattern =~/0/){ | |
| 79 if (!$patterns{$pattern}){ | |
| 80 $pattern_order++; | |
| 81 } | |
| 82 $patterns{$pattern}++; | |
| 83 $pattern_orders{$pattern_order} = $pattern; | |
| 84 } | |
| 85 } | |
| 86 close(M); | |
| 87 | |
| 88 print "Number of distinct patterns:"; | |
| 89 print scalar keys(%patterns)."\n"; | |
| 90 | |
| 91 my @colors = ("orange","green","red","blue","black","pink","yellow","brown","grey","purple","darkred"); | |
| 92 | |
| 93 my $cumul_x = 0; | |
| 94 foreach my $pattern_order(sort {$a<=>$b} keys(%pattern_orders)){ | |
| 95 my $pattern = $pattern_orders{$pattern_order}; | |
| 96 my $size = $patterns{$pattern}; | |
| 97 my $width = $size * $width_of_one_block; | |
| 98 my $x = $max_x - $cumul_x - $width; | |
| 99 | |
| 100 my $modulo = $pattern_order % 2; | |
| 101 print "$pattern_order $pattern $size $modulo\n"; | |
| 102 | |
| 103 #my $color = $colors[$pattern_order-1]; | |
| 104 my $color = $colors[$modulo]; | |
| 105 | |
| 106 my $pattern_y = $min_y-15; | |
| 107 print OUT "<rect y='$pattern_y' x='$x' width='$width' height='10' style=\"fill:$color;stroke-width:3;$color;\"/>"; | |
| 108 | |
| 109 $cumul_x += $width; | |
| 110 my @values = split(//,$pattern); | |
| 111 my $cumul_y = 0; | |
| 112 foreach my $val(@values){ | |
| 113 my $y = $max_y - $cumul_y - $height_of_one_block; | |
| 114 if ($val){ | |
| 115 print OUT "<rect y='$y' x='$x' width='$width' height='$height_of_one_block' style=\"fill:purple;stroke:purple;\"/>"; | |
| 116 } | |
| 117 $cumul_y += $height_of_one_block; | |
| 118 } | |
| 119 } | |
| 120 | |
| 121 | |
| 122 print OUT "</svg>\n"; | |
| 123 close(OUT); | |
| 124 | |
| 125 print "Min x : $min_x\n"; | |
| 126 print "Max x : $max_x\n"; | |
| 127 print "Min y : $min_y\n"; | |
| 128 print "Max y : $max_y\n"; | |
| 129 print "$n\n"; |
