Mercurial > repos > md-anderson-bioinformatics > matrix_manipulation
comparison bar_chart_plot.py @ 1:f1bcd79cd923 draft default tip
Uploaded
author | insilico-bob |
---|---|
date | Tue, 27 Nov 2018 14:20:40 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:7f12c81e2083 | 1:f1bcd79cd923 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file> | |
4 a generic histogram builder based on gnuplot backend | |
5 | |
6 data_file - tab delimited file with data | |
7 xtic_column - column containing labels for x ticks [integer, 0 means no ticks] | |
8 column_list - comma separated list of columns to plot | |
9 title - title for the entire histrogram | |
10 ylabel - y axis label | |
11 yrange_max - minimal value at the y axis (integer) | |
12 yrange_max - maximal value at the y_axis (integer) | |
13 to set yrange to autoscaling assign 0 to yrange_min and yrange_max | |
14 graph_file - file to write histogram image to | |
15 img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.) | |
16 | |
17 | |
18 This tool required gnuplot and gnuplot.py | |
19 | |
20 anton nekrutenko | anton@bx.psu.edu | |
21 """ | |
22 | |
23 import string | |
24 import sys | |
25 import tempfile | |
26 | |
27 import Gnuplot | |
28 import Gnuplot.funcutils | |
29 | |
30 assert sys.version_info[:2] >= (2, 4) | |
31 | |
32 | |
33 def stop_err(msg): | |
34 sys.stderr.write(msg) | |
35 sys.exit() | |
36 | |
37 | |
38 def main(tmpFileName): | |
39 skipped_lines_count = 0 | |
40 skipped_lines_index = [] | |
41 gf = open(tmpFileName, 'w') | |
42 | |
43 try: | |
44 in_file = open(sys.argv[1], 'r') | |
45 xtic = int(sys.argv[2]) | |
46 col_list = string.split(sys.argv[3], ",") | |
47 title = 'set title "' + sys.argv[4] + '"' | |
48 ylabel = 'set ylabel "' + sys.argv[5] + '"' | |
49 ymin = sys.argv[6] | |
50 ymax = sys.argv[7] | |
51 img_file = sys.argv[8] | |
52 img_size = sys.argv[9] | |
53 except: | |
54 stop_err("Check arguments\n") | |
55 | |
56 try: | |
57 int(col_list[0]) | |
58 except: | |
59 stop_err('You forgot to set columns for plotting\n') | |
60 | |
61 for i, line in enumerate(in_file): | |
62 valid = True | |
63 line = line.rstrip('\r\n') | |
64 if line and not line.startswith('#'): | |
65 row = [] | |
66 try: | |
67 fields = line.split('\t') | |
68 for col in col_list: | |
69 row.append(str(float(fields[int(col) - 1]))) | |
70 except: | |
71 valid = False | |
72 skipped_lines_count += 1 | |
73 skipped_lines_index.append(i) | |
74 else: | |
75 valid = False | |
76 skipped_lines_count += 1 | |
77 skipped_lines_index.append(i) | |
78 | |
79 if valid and xtic > 0: | |
80 row.append(fields[xtic - 1]) | |
81 elif valid and xtic == 0: | |
82 row.append(str(i)) | |
83 | |
84 if valid: | |
85 gf.write('\t'.join(row)) | |
86 gf.write('\n') | |
87 | |
88 if skipped_lines_count < i: | |
89 # Prepare 'using' clause of plot statement | |
90 g_plot_command = ' ' | |
91 | |
92 # Set the first column | |
93 if xtic > 0: | |
94 g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % (tmpFileName, str(len(row)), col_list[0]) | |
95 else: | |
96 g_plot_command = "'%s' using 1 ti 'Column %s', " % (tmpFileName, col_list[0]) | |
97 | |
98 # Set subsequent columns | |
99 for i in range(1, len(col_list)): | |
100 g_plot_command += "'%s' using %s t 'Column %s', " % (tmpFileName, str(i + 1), col_list[i]) | |
101 | |
102 g_plot_command = g_plot_command.rstrip(', ') | |
103 | |
104 yrange = 'set yrange [' + ymin + ":" + ymax + ']' | |
105 | |
106 try: | |
107 g = Gnuplot.Gnuplot() | |
108 g('reset') | |
109 g('set boxwidth 0.9 absolute') | |
110 g('set style fill solid 1.00 border -1') | |
111 g('set style histogram clustered gap 5 title offset character 0, 0, 0') | |
112 g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0') | |
113 g('set key invert reverse Left outside') | |
114 if xtic == 0: | |
115 g('unset xtics') | |
116 g(title) | |
117 g(ylabel) | |
118 g_term = 'set terminal png tiny size ' + img_size | |
119 g(g_term) | |
120 g_out = 'set output "' + img_file + '"' | |
121 if ymin != ymax: | |
122 g(yrange) | |
123 g(g_out) | |
124 g('set style data histograms') | |
125 g.plot(g_plot_command) | |
126 except: | |
127 stop_err("Gnuplot error: Data cannot be plotted") | |
128 else: | |
129 sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' % sys.argv[3]) | |
130 | |
131 if skipped_lines_count > 0: | |
132 sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % (skipped_lines_count, skipped_lines_index[0] + 1)) | |
133 | |
134 | |
135 if __name__ == "__main__": | |
136 # The tempfile initialization is here because while inside the main() it seems to create a condition | |
137 # when the file is removed before gnuplot has a chance of accessing it | |
138 gp_data_file = tempfile.NamedTemporaryFile('w') | |
139 Gnuplot.gp.GnuplotOpts.default_term = 'png' | |
140 main(gp_data_file.name) |