view GALAXY_FILES/tools/EMBER/Make_Logo_Galaxy.pl @ 3:037c3edda16e

Uploaded
author mmaiensc
date Thu, 22 Mar 2012 13:49:52 -0400
parents 003f802d4c7d
children
line wrap: on
line source

#!/usr/bin/perl
# make a logo-ish gnu-plottable representation of a score matrix

use Getopt::Long;

#
# command line arguments
#
$options = "Usage: ./Make_Logo.pl <OPTIONS>
	-m  .model file from EMBER (required)
	-c  comparison list (used to make labels for the x-axis)
	-o  output (required)
	-a  include comparisons not used by EMBER (preceded by # in the .model file: y or n, default n)
	-f  image file format (default 1)
	     1 - eps
	     2 - cgm
	-k  key on/off (y or n, default y)
	-lx x labels on/off (y or n, default y)
	-ly y labels on/off (y or n, default y)
	-yr set yrange manually (enter <min>,<max>, e.g. -1.5,3.5; default chosen automatically)
\n";

$m = "";
$c = "";
$o = "";
$a = "n";
$f = 1;
$k = "y";
$lx= "y";
$ly= "y";
$yr= "zzzzzzzzzzz";

GetOptions('m=s' => \$m,
	   'c=s' => \$c,
	   'o=s' => \$o,
	   'a=s' => \$a,
	   'f=i' => \$f,
	   'k=s' => \$k,
	   'lx=s'=> \$lx,
	   'ly=s'=> \$ly,
	   'yr=s'=> \$yr
) || die "\n$options";

if( $m eq "" ){
	print "Error: set a value for -m\n\n$options";
	exit;
}
if( $o eq "" ){
	print "Error: set a value for -o\n\n$options";
	exit;
}
if( $f != 1 && $f != 2 ){
	print "Error: set -f to be 1 or 2\n\n$options";
	exit;
}
if( $a ne "y" && $a ne "n" ){
	print "Error: set -a to be y or n\n\n$options";
	exit;
}
if( $k ne "y" && $k ne "n" ){
	print "Error: set -k to be y or n\n\n$options";
	exit;
}
if( $lx ne "y" && $lx ne "n" ){
	print "Error: set -lx to be y or n\n\n$options";
	exit;
}
if( $ly ne "y" && $ly ne "n" ){
	print "Error: set -ly to be y or n\n\n$options";
	exit;
}
if( $yr ne "zzzzzzzzzzz" && $yr !~ /,/ ){
	print "Error: if setting -yr, choose a range delimited by a comma\n\n$options";
	exit;
}

@parts = split('\.',$m);
$name = $o;
#for($i=1; $i< $#parts; $i++){
#	$name = sprintf("%s.%s", $name, $parts[$i]);
#}

#
# read in matrix
#
open(IN,"$m") || die "Error: can't open file $m\n";
@re = ();
@mat = ();

# burn 2 lines
$line = <IN>;
$line = <IN>;

@coloffs = ();
while($line = <IN>){
	chomp($line);
	@parts = split(' ',$line);
	if( ($a eq "n" && $parts[0] !~ /#/) || $a eq "y" ){
		push(@coloffs, 1 );
		($ind, $reval) = split(',', $parts[0]);
		if( $reval eq "nan" ){$reval = 0.0;}
		push(@re, $reval);
		@tmp = ();
		for($i=1; $i<= $#parts; $i++){
			if( $parts[$i] eq "NA" ){
				$parts[$i] = 0.0;
			}
			push(@tmp, $parts[$i]);
		}
		push(@mat, [@tmp] );
	}
	else{
		push(@coloffs, 0);
	}
}
close(IN);

#
# rescale matrix so it's between 0 and 1, then multiply by re
#
for($i=0; $i<= $#mat; $i++){
	$min = $mat[$i][0];
	$max = $mat[$i][0];
	for($j=1; $j< 5; $j++){
		if( $min > $mat[$i][$j] ){ $min = $mat[$i][$j]; }
		if( $max < $mat[$i][$j] ){ $max = $mat[$i][$j]; }
	}
	for($j=0; $j< 5; $j++){
		if( $max != $min ){
			$mat[$i][$j] = $re[$i]*(($mat[$i][$j]-$min)/($max-$min));
		}
		else{
			$mat[$i][$j] = 0;
		}
	}
}
	

#
# print out in stacked format
#
open(OUT,">$name.logo");
for($i=0; $i<= $#mat; $i++){
	$posn = ($mat[$i][0] + $mat[$i][1] + 0.5*$mat[$i][2]);
	printf OUT ("%f ", -0.5*$mat[$i][2]);
	printf OUT ("%f ", -$mat[$i][3]);
	printf OUT ("%f ", -$mat[$i][4]);
	printf OUT ("%f ", 0.5*$mat[$i][2]);
	printf OUT ("%f ", $mat[$i][1]);
	printf OUT ("%f ", $mat[$i][0]);
	printf OUT ("0 0 0 0 0\n");
}
close(OUT);

#
# make gnu-plotable file
#

# possibly read in conditions to define xtics
$tics = "";
if( $c ne "" ){
	open(IN,"$c") || die "Error: can't open file $c\n";
	@comps = ();
	$i = 0;
	while($line = <IN>){
		chomp($line);
		@parts = split(' ',$line);
		if( $#parts != 1 ){
			print "Error: comparisons list does not have 2 columns\n";
			exit;
		}
		if( $coloffs[$i] == 1 ){
			$val = sprintf("%s vs %s", $parts[0], $parts[1]);
			push(@comps, $val);
		}
		$i++;
	}
	$tics = "set xtics(\"$comps[0]\" 0";
	for($i=1; $i<= $#comps; $i++){
		$tics = sprintf("%s, \"%s\" %i", $tics, $comps[$i], $i);
	}
	$tics = sprintf("%s)\nset xtics rotate by -45", $tics);
}

# choose format
if( $f == 1 ){
	$format = "postscript eps color 24";
	$suff = "eps";
}
if( $f == 2 ){
	$format = "cgm \"Helvetica\" 14";
	$suff = "cgm";
}

# set xrange min and max
$min = -0.5;
$max = $#mat+0.5;

# possibly turn off key, xlabel, ylabel
$keytoggle = "";
$xtoggle = "";
$ytoggle = "";
if( $k eq "n" ){
	$keytoggle = "unset key";
}
if( $lx eq "n" ){
	$xtoggle = "unset xtics";
}
if( $ly eq "n" ){
	$ytoggle = "unset ylabel";
}

# possibly set yrange
$yrange = "";
if( $yr ne "zzzzzzzzzzz" ){
	@parts = split(',',$yr);
	if( $#parts != 1 ){
		print "Error: more than two fields in your -yr value\n";
		exit;
	}
	$yrange = sprintf("set yrange [%f:%f]", $parts[0], $parts[1]);
}

@lines = (  "#!/bin/sh",
            "gnuplot << EOF",
            "reset",
            "set style fill solid 1.00 noborder",
            "set style histogram rowstacked",
            "set style data histograms",
            "set ylabel \"Bits x direction of regulation\"",
            "set key outside",
            "set xrange [$min: $max]",
            "$tics",
            "$keytoggle",
            "$xtoggle",
            "$ytoggle",
            "$yrange",
            "set terminal $format",
            "set output \'$o\'",
            "plot \'$name.logo\' u 1 notitle lc rgbcolor \"#000000\", \'\' u 2 notitle lc rgbcolor \"#008800\", \'\' u 3 notitle lc rgbcolor \"#00FF00\", \'\' u 7 t \"++\" lc rgbcolor \"#FF0000\", \'\' u 8 t \"+\" lc rgbcolor \"#880000\", \'\' u 9 t \"0\" lc rgbcolor \"#000000\", \'\' u 10 t \"-\" lc rgbcolor \"#008800\", \'\' u 11 t \"--\" lc rgbcolor \"#00FF00\", \'\' u 4 notitle lc rgbcolor \"#000000\", \'\' u 5 notitle lc rgbcolor \"#880000\", \'\' u 6 notitle lc rgbcolor \"#FF0000\"",
            "EOF"
	 );

$command = sprintf("%s\n", $lines[0]);
for($i=1; $i<= $#lines; $i++){
	$command = sprintf("%s%s\n", $command, $lines[$i]);
}

system("$command");