comparison tools/plotting/bar_chart.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #!/usr/bin/env python
2
3
4 """
5 histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file>
6 a generic histogram builder based on gnuplot backend
7
8 data_file - tab delimited file with data
9 xtic_column - column containing labels for x ticks [integer, 0 means no ticks]
10 column_list - comma separated list of columns to plot
11 title - title for the entire histrogram
12 ylabel - y axis label
13 yrange_max - minimal value at the y axis (integer)
14 yrange_max - maximal value at the y_axis (integer)
15 to set yrange to autoscaling assign 0 to yrange_min and yrange_max
16 graph_file - file to write histogram image to
17 img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.)
18
19
20 This tool required gnuplot and gnuplot.py
21
22 anton nekrutenko | anton@bx.psu.edu
23
24 """
25
26 import Gnuplot, Gnuplot.funcutils
27 import sys, string, tempfile, os
28
29 assert sys.version_info[:2] >= ( 2, 4 )
30
31 def stop_err(msg):
32 sys.stderr.write(msg)
33 sys.exit()
34
35 def main(tmpFileName):
36 skipped_lines_count = 0
37 skipped_lines_index = []
38 gf = open(tmpFileName, 'w')
39
40
41 try:
42 in_file = open( sys.argv[1], 'r' )
43 xtic = int( sys.argv[2] )
44 col_list = string.split( sys.argv[3],"," )
45 title = 'set title "' + sys.argv[4] + '"'
46 ylabel = 'set ylabel "' + sys.argv[5] + '"'
47 ymin = sys.argv[6]
48 ymax = sys.argv[7]
49 img_file = sys.argv[8]
50 img_size = sys.argv[9]
51 except:
52 stop_err("Check arguments\n")
53
54 try:
55 int( col_list[0] )
56 except:
57 stop_err('You forgot to set columns for plotting\n')
58
59
60 for i, line in enumerate( in_file ):
61 valid = True
62 line = line.rstrip('\r\n')
63 if line and not line.startswith( '#' ):
64 row = []
65 try:
66 fields = line.split( '\t' )
67 for col in col_list:
68 row.append( str( float( fields[int( col )-1] ) ) )
69
70 except:
71 valid = False
72 skipped_lines_count += 1
73 skipped_lines_index.append(i)
74
75 else:
76 valid = False
77 skipped_lines_count += 1
78 skipped_lines_index.append(i)
79
80 if valid and xtic > 0:
81 row.append( fields[xtic-1] )
82 elif valid and xtic == 0:
83 row.append( str( i ) )
84
85 if valid:
86 gf.write( '\t'.join( row ) )
87 gf.write( '\n' )
88
89 if skipped_lines_count < i:
90
91 #prepare 'using' clause of plot statement
92
93 g_plot_command = ' ';
94
95 #set the first column
96 if xtic > 0:
97 g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % ( tmpFileName, str( len( row ) ), col_list[0] )
98 else:
99 g_plot_command = "'%s' using 1 ti 'Column %s', " % ( tmpFileName, col_list[0] )
100
101 #set subsequent columns
102
103 for i in range(1,len(col_list)):
104 g_plot_command += "'%s' using %s t 'Column %s', " % ( tmpFileName, str( i+1 ), col_list[i] )
105
106 g_plot_command = g_plot_command.rstrip( ', ' )
107
108 yrange = 'set yrange [' + ymin + ":" + ymax + ']'
109
110 try:
111 g = Gnuplot.Gnuplot()
112 g('reset')
113 g('set boxwidth 0.9 absolute')
114 g('set style fill solid 1.00 border -1')
115 g('set style histogram clustered gap 5 title offset character 0, 0, 0')
116 g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0')
117 g('set key invert reverse Left outside')
118 if xtic == 0: g('unset xtics')
119 g(title)
120 g(ylabel)
121 g_term = 'set terminal png tiny size ' + img_size
122 g(g_term)
123 g_out = 'set output "' + img_file + '"'
124 if ymin != ymax:
125 g(yrange)
126 g(g_out)
127 g('set style data histograms')
128 g.plot(g_plot_command)
129 except:
130 stop_err("Gnuplot error: Data cannot be plotted")
131 else:
132 sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' %sys.argv[3] )
133
134 if skipped_lines_count > 0:
135 sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % ( skipped_lines_count, skipped_lines_index[0]+1 ) )
136
137
138 if __name__ == "__main__":
139 # The tempfile initialization is here because while inside the main() it seems to create a condition
140 # when the file is removed before gnuplot has a chance of accessing it
141 gp_data_file = tempfile.NamedTemporaryFile('w')
142 Gnuplot.gp.GnuplotOpts.default_term = 'png'
143 main(gp_data_file.name)
144