Mercurial > repos > md-anderson-bioinformatics > matrix_manipulation
diff bar_chart_plot.py @ 1:f1bcd79cd923 draft default tip
Uploaded
author | insilico-bob |
---|---|
date | Tue, 27 Nov 2018 14:20:40 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bar_chart_plot.py Tue Nov 27 14:20:40 2018 -0500 @@ -0,0 +1,140 @@ +#!/usr/bin/env python +""" +histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file> +a generic histogram builder based on gnuplot backend + + data_file - tab delimited file with data + xtic_column - column containing labels for x ticks [integer, 0 means no ticks] + column_list - comma separated list of columns to plot + title - title for the entire histrogram + ylabel - y axis label + yrange_max - minimal value at the y axis (integer) + yrange_max - maximal value at the y_axis (integer) + to set yrange to autoscaling assign 0 to yrange_min and yrange_max + graph_file - file to write histogram image to + img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.) + + + This tool required gnuplot and gnuplot.py + +anton nekrutenko | anton@bx.psu.edu +""" + +import string +import sys +import tempfile + +import Gnuplot +import Gnuplot.funcutils + +assert sys.version_info[:2] >= (2, 4) + + +def stop_err(msg): + sys.stderr.write(msg) + sys.exit() + + +def main(tmpFileName): + skipped_lines_count = 0 + skipped_lines_index = [] + gf = open(tmpFileName, 'w') + + try: + in_file = open(sys.argv[1], 'r') + xtic = int(sys.argv[2]) + col_list = string.split(sys.argv[3], ",") + title = 'set title "' + sys.argv[4] + '"' + ylabel = 'set ylabel "' + sys.argv[5] + '"' + ymin = sys.argv[6] + ymax = sys.argv[7] + img_file = sys.argv[8] + img_size = sys.argv[9] + except: + stop_err("Check arguments\n") + + try: + int(col_list[0]) + except: + stop_err('You forgot to set columns for plotting\n') + + for i, line in enumerate(in_file): + valid = True + line = line.rstrip('\r\n') + if line and not line.startswith('#'): + row = [] + try: + fields = line.split('\t') + for col in col_list: + row.append(str(float(fields[int(col) - 1]))) + except: + valid = False + skipped_lines_count += 1 + skipped_lines_index.append(i) + else: + valid = False + skipped_lines_count += 1 + skipped_lines_index.append(i) + + if valid and xtic > 0: + row.append(fields[xtic - 1]) + elif valid and xtic == 0: + row.append(str(i)) + + if valid: + gf.write('\t'.join(row)) + gf.write('\n') + + if skipped_lines_count < i: + # Prepare 'using' clause of plot statement + g_plot_command = ' ' + + # Set the first column + if xtic > 0: + g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % (tmpFileName, str(len(row)), col_list[0]) + else: + g_plot_command = "'%s' using 1 ti 'Column %s', " % (tmpFileName, col_list[0]) + + # Set subsequent columns + for i in range(1, len(col_list)): + g_plot_command += "'%s' using %s t 'Column %s', " % (tmpFileName, str(i + 1), col_list[i]) + + g_plot_command = g_plot_command.rstrip(', ') + + yrange = 'set yrange [' + ymin + ":" + ymax + ']' + + try: + g = Gnuplot.Gnuplot() + g('reset') + g('set boxwidth 0.9 absolute') + g('set style fill solid 1.00 border -1') + g('set style histogram clustered gap 5 title offset character 0, 0, 0') + g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0') + g('set key invert reverse Left outside') + if xtic == 0: + g('unset xtics') + g(title) + g(ylabel) + g_term = 'set terminal png tiny size ' + img_size + g(g_term) + g_out = 'set output "' + img_file + '"' + if ymin != ymax: + g(yrange) + g(g_out) + g('set style data histograms') + g.plot(g_plot_command) + except: + stop_err("Gnuplot error: Data cannot be plotted") + else: + sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' % sys.argv[3]) + + if skipped_lines_count > 0: + sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % (skipped_lines_count, skipped_lines_index[0] + 1)) + + +if __name__ == "__main__": + # The tempfile initialization is here because while inside the main() it seems to create a condition + # when the file is removed before gnuplot has a chance of accessing it + gp_data_file = tempfile.NamedTemporaryFile('w') + Gnuplot.gp.GnuplotOpts.default_term = 'png' + main(gp_data_file.name)