Mercurial > repos > md-anderson-bioinformatics > matrix_manipulation
view bar_chart_plot.py @ 1:f1bcd79cd923 draft default tip
Uploaded
author | insilico-bob |
---|---|
date | Tue, 27 Nov 2018 14:20:40 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python """ histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file> a generic histogram builder based on gnuplot backend data_file - tab delimited file with data xtic_column - column containing labels for x ticks [integer, 0 means no ticks] column_list - comma separated list of columns to plot title - title for the entire histrogram ylabel - y axis label yrange_max - minimal value at the y axis (integer) yrange_max - maximal value at the y_axis (integer) to set yrange to autoscaling assign 0 to yrange_min and yrange_max graph_file - file to write histogram image to img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.) This tool required gnuplot and gnuplot.py anton nekrutenko | anton@bx.psu.edu """ import string import sys import tempfile import Gnuplot import Gnuplot.funcutils assert sys.version_info[:2] >= (2, 4) def stop_err(msg): sys.stderr.write(msg) sys.exit() def main(tmpFileName): skipped_lines_count = 0 skipped_lines_index = [] gf = open(tmpFileName, 'w') try: in_file = open(sys.argv[1], 'r') xtic = int(sys.argv[2]) col_list = string.split(sys.argv[3], ",") title = 'set title "' + sys.argv[4] + '"' ylabel = 'set ylabel "' + sys.argv[5] + '"' ymin = sys.argv[6] ymax = sys.argv[7] img_file = sys.argv[8] img_size = sys.argv[9] except: stop_err("Check arguments\n") try: int(col_list[0]) except: stop_err('You forgot to set columns for plotting\n') for i, line in enumerate(in_file): valid = True line = line.rstrip('\r\n') if line and not line.startswith('#'): row = [] try: fields = line.split('\t') for col in col_list: row.append(str(float(fields[int(col) - 1]))) except: valid = False skipped_lines_count += 1 skipped_lines_index.append(i) else: valid = False skipped_lines_count += 1 skipped_lines_index.append(i) if valid and xtic > 0: row.append(fields[xtic - 1]) elif valid and xtic == 0: row.append(str(i)) if valid: gf.write('\t'.join(row)) gf.write('\n') if skipped_lines_count < i: # Prepare 'using' clause of plot statement g_plot_command = ' ' # Set the first column if xtic > 0: g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % (tmpFileName, str(len(row)), col_list[0]) else: g_plot_command = "'%s' using 1 ti 'Column %s', " % (tmpFileName, col_list[0]) # Set subsequent columns for i in range(1, len(col_list)): g_plot_command += "'%s' using %s t 'Column %s', " % (tmpFileName, str(i + 1), col_list[i]) g_plot_command = g_plot_command.rstrip(', ') yrange = 'set yrange [' + ymin + ":" + ymax + ']' try: g = Gnuplot.Gnuplot() g('reset') g('set boxwidth 0.9 absolute') g('set style fill solid 1.00 border -1') g('set style histogram clustered gap 5 title offset character 0, 0, 0') g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0') g('set key invert reverse Left outside') if xtic == 0: g('unset xtics') g(title) g(ylabel) g_term = 'set terminal png tiny size ' + img_size g(g_term) g_out = 'set output "' + img_file + '"' if ymin != ymax: g(yrange) g(g_out) g('set style data histograms') g.plot(g_plot_command) except: stop_err("Gnuplot error: Data cannot be plotted") else: sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' % sys.argv[3]) if skipped_lines_count > 0: sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % (skipped_lines_count, skipped_lines_index[0] + 1)) if __name__ == "__main__": # The tempfile initialization is here because while inside the main() it seems to create a condition # when the file is removed before gnuplot has a chance of accessing it gp_data_file = tempfile.NamedTemporaryFile('w') Gnuplot.gp.GnuplotOpts.default_term = 'png' main(gp_data_file.name)