Mercurial > repos > md-anderson-bioinformatics > matrix_manipulation
comparison bar_chart_plot.py @ 1:f1bcd79cd923 draft default tip
Uploaded
| author | insilico-bob |
|---|---|
| date | Tue, 27 Nov 2018 14:20:40 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:7f12c81e2083 | 1:f1bcd79cd923 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file> | |
| 4 a generic histogram builder based on gnuplot backend | |
| 5 | |
| 6 data_file - tab delimited file with data | |
| 7 xtic_column - column containing labels for x ticks [integer, 0 means no ticks] | |
| 8 column_list - comma separated list of columns to plot | |
| 9 title - title for the entire histrogram | |
| 10 ylabel - y axis label | |
| 11 yrange_max - minimal value at the y axis (integer) | |
| 12 yrange_max - maximal value at the y_axis (integer) | |
| 13 to set yrange to autoscaling assign 0 to yrange_min and yrange_max | |
| 14 graph_file - file to write histogram image to | |
| 15 img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.) | |
| 16 | |
| 17 | |
| 18 This tool required gnuplot and gnuplot.py | |
| 19 | |
| 20 anton nekrutenko | anton@bx.psu.edu | |
| 21 """ | |
| 22 | |
| 23 import string | |
| 24 import sys | |
| 25 import tempfile | |
| 26 | |
| 27 import Gnuplot | |
| 28 import Gnuplot.funcutils | |
| 29 | |
| 30 assert sys.version_info[:2] >= (2, 4) | |
| 31 | |
| 32 | |
| 33 def stop_err(msg): | |
| 34 sys.stderr.write(msg) | |
| 35 sys.exit() | |
| 36 | |
| 37 | |
| 38 def main(tmpFileName): | |
| 39 skipped_lines_count = 0 | |
| 40 skipped_lines_index = [] | |
| 41 gf = open(tmpFileName, 'w') | |
| 42 | |
| 43 try: | |
| 44 in_file = open(sys.argv[1], 'r') | |
| 45 xtic = int(sys.argv[2]) | |
| 46 col_list = string.split(sys.argv[3], ",") | |
| 47 title = 'set title "' + sys.argv[4] + '"' | |
| 48 ylabel = 'set ylabel "' + sys.argv[5] + '"' | |
| 49 ymin = sys.argv[6] | |
| 50 ymax = sys.argv[7] | |
| 51 img_file = sys.argv[8] | |
| 52 img_size = sys.argv[9] | |
| 53 except: | |
| 54 stop_err("Check arguments\n") | |
| 55 | |
| 56 try: | |
| 57 int(col_list[0]) | |
| 58 except: | |
| 59 stop_err('You forgot to set columns for plotting\n') | |
| 60 | |
| 61 for i, line in enumerate(in_file): | |
| 62 valid = True | |
| 63 line = line.rstrip('\r\n') | |
| 64 if line and not line.startswith('#'): | |
| 65 row = [] | |
| 66 try: | |
| 67 fields = line.split('\t') | |
| 68 for col in col_list: | |
| 69 row.append(str(float(fields[int(col) - 1]))) | |
| 70 except: | |
| 71 valid = False | |
| 72 skipped_lines_count += 1 | |
| 73 skipped_lines_index.append(i) | |
| 74 else: | |
| 75 valid = False | |
| 76 skipped_lines_count += 1 | |
| 77 skipped_lines_index.append(i) | |
| 78 | |
| 79 if valid and xtic > 0: | |
| 80 row.append(fields[xtic - 1]) | |
| 81 elif valid and xtic == 0: | |
| 82 row.append(str(i)) | |
| 83 | |
| 84 if valid: | |
| 85 gf.write('\t'.join(row)) | |
| 86 gf.write('\n') | |
| 87 | |
| 88 if skipped_lines_count < i: | |
| 89 # Prepare 'using' clause of plot statement | |
| 90 g_plot_command = ' ' | |
| 91 | |
| 92 # Set the first column | |
| 93 if xtic > 0: | |
| 94 g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % (tmpFileName, str(len(row)), col_list[0]) | |
| 95 else: | |
| 96 g_plot_command = "'%s' using 1 ti 'Column %s', " % (tmpFileName, col_list[0]) | |
| 97 | |
| 98 # Set subsequent columns | |
| 99 for i in range(1, len(col_list)): | |
| 100 g_plot_command += "'%s' using %s t 'Column %s', " % (tmpFileName, str(i + 1), col_list[i]) | |
| 101 | |
| 102 g_plot_command = g_plot_command.rstrip(', ') | |
| 103 | |
| 104 yrange = 'set yrange [' + ymin + ":" + ymax + ']' | |
| 105 | |
| 106 try: | |
| 107 g = Gnuplot.Gnuplot() | |
| 108 g('reset') | |
| 109 g('set boxwidth 0.9 absolute') | |
| 110 g('set style fill solid 1.00 border -1') | |
| 111 g('set style histogram clustered gap 5 title offset character 0, 0, 0') | |
| 112 g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0') | |
| 113 g('set key invert reverse Left outside') | |
| 114 if xtic == 0: | |
| 115 g('unset xtics') | |
| 116 g(title) | |
| 117 g(ylabel) | |
| 118 g_term = 'set terminal png tiny size ' + img_size | |
| 119 g(g_term) | |
| 120 g_out = 'set output "' + img_file + '"' | |
| 121 if ymin != ymax: | |
| 122 g(yrange) | |
| 123 g(g_out) | |
| 124 g('set style data histograms') | |
| 125 g.plot(g_plot_command) | |
| 126 except: | |
| 127 stop_err("Gnuplot error: Data cannot be plotted") | |
| 128 else: | |
| 129 sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' % sys.argv[3]) | |
| 130 | |
| 131 if skipped_lines_count > 0: | |
| 132 sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % (skipped_lines_count, skipped_lines_index[0] + 1)) | |
| 133 | |
| 134 | |
| 135 if __name__ == "__main__": | |
| 136 # The tempfile initialization is here because while inside the main() it seems to create a condition | |
| 137 # when the file is removed before gnuplot has a chance of accessing it | |
| 138 gp_data_file = tempfile.NamedTemporaryFile('w') | |
| 139 Gnuplot.gp.GnuplotOpts.default_term = 'png' | |
| 140 main(gp_data_file.name) |
