annotate bar_chart_plot.py @ 1:f1bcd79cd923 draft default tip

Uploaded
author insilico-bob
date Tue, 27 Nov 2018 14:20:40 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
1 #!/usr/bin/env python
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
2 """
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
3 histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file>
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
4 a generic histogram builder based on gnuplot backend
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
5
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
6 data_file - tab delimited file with data
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
7 xtic_column - column containing labels for x ticks [integer, 0 means no ticks]
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
8 column_list - comma separated list of columns to plot
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
9 title - title for the entire histrogram
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
10 ylabel - y axis label
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
11 yrange_max - minimal value at the y axis (integer)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
12 yrange_max - maximal value at the y_axis (integer)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
13 to set yrange to autoscaling assign 0 to yrange_min and yrange_max
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
14 graph_file - file to write histogram image to
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
15 img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
16
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
17
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
18 This tool required gnuplot and gnuplot.py
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
19
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
20 anton nekrutenko | anton@bx.psu.edu
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
21 """
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
22
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
23 import string
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
24 import sys
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
25 import tempfile
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
26
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
27 import Gnuplot
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
28 import Gnuplot.funcutils
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
29
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
30 assert sys.version_info[:2] >= (2, 4)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
31
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
32
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
33 def stop_err(msg):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
34 sys.stderr.write(msg)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
35 sys.exit()
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
36
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
37
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
38 def main(tmpFileName):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
39 skipped_lines_count = 0
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
40 skipped_lines_index = []
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
41 gf = open(tmpFileName, 'w')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
42
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
43 try:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
44 in_file = open(sys.argv[1], 'r')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
45 xtic = int(sys.argv[2])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
46 col_list = string.split(sys.argv[3], ",")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
47 title = 'set title "' + sys.argv[4] + '"'
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
48 ylabel = 'set ylabel "' + sys.argv[5] + '"'
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
49 ymin = sys.argv[6]
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
50 ymax = sys.argv[7]
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
51 img_file = sys.argv[8]
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
52 img_size = sys.argv[9]
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
53 except:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
54 stop_err("Check arguments\n")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
55
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
56 try:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
57 int(col_list[0])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
58 except:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
59 stop_err('You forgot to set columns for plotting\n')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
60
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
61 for i, line in enumerate(in_file):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
62 valid = True
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
63 line = line.rstrip('\r\n')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
64 if line and not line.startswith('#'):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
65 row = []
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
66 try:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
67 fields = line.split('\t')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
68 for col in col_list:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
69 row.append(str(float(fields[int(col) - 1])))
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
70 except:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
71 valid = False
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
72 skipped_lines_count += 1
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
73 skipped_lines_index.append(i)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
74 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
75 valid = False
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
76 skipped_lines_count += 1
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
77 skipped_lines_index.append(i)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
78
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
79 if valid and xtic > 0:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
80 row.append(fields[xtic - 1])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
81 elif valid and xtic == 0:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
82 row.append(str(i))
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
83
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
84 if valid:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
85 gf.write('\t'.join(row))
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
86 gf.write('\n')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
87
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
88 if skipped_lines_count < i:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
89 # Prepare 'using' clause of plot statement
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
90 g_plot_command = ' '
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
91
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
92 # Set the first column
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
93 if xtic > 0:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
94 g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % (tmpFileName, str(len(row)), col_list[0])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
95 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
96 g_plot_command = "'%s' using 1 ti 'Column %s', " % (tmpFileName, col_list[0])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
97
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
98 # Set subsequent columns
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
99 for i in range(1, len(col_list)):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
100 g_plot_command += "'%s' using %s t 'Column %s', " % (tmpFileName, str(i + 1), col_list[i])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
101
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
102 g_plot_command = g_plot_command.rstrip(', ')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
103
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
104 yrange = 'set yrange [' + ymin + ":" + ymax + ']'
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
105
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
106 try:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
107 g = Gnuplot.Gnuplot()
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
108 g('reset')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
109 g('set boxwidth 0.9 absolute')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
110 g('set style fill solid 1.00 border -1')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
111 g('set style histogram clustered gap 5 title offset character 0, 0, 0')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
112 g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
113 g('set key invert reverse Left outside')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
114 if xtic == 0:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
115 g('unset xtics')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
116 g(title)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
117 g(ylabel)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
118 g_term = 'set terminal png tiny size ' + img_size
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
119 g(g_term)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
120 g_out = 'set output "' + img_file + '"'
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
121 if ymin != ymax:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
122 g(yrange)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
123 g(g_out)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
124 g('set style data histograms')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
125 g.plot(g_plot_command)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
126 except:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
127 stop_err("Gnuplot error: Data cannot be plotted")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
128 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
129 sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' % sys.argv[3])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
130
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
131 if skipped_lines_count > 0:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
132 sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % (skipped_lines_count, skipped_lines_index[0] + 1))
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
133
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
134
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
135 if __name__ == "__main__":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
136 # The tempfile initialization is here because while inside the main() it seems to create a condition
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
137 # when the file is removed before gnuplot has a chance of accessing it
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
138 gp_data_file = tempfile.NamedTemporaryFile('w')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
139 Gnuplot.gp.GnuplotOpts.default_term = 'png'
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
140 main(gp_data_file.name)