0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 # python histogram input_file output_file column bins
|
|
4 import sys, os
|
|
5 import matplotlib; matplotlib.use('Agg')
|
|
6
|
|
7 from pylab import *
|
|
8
|
|
9 assert sys.version_info[:2] >= ( 2, 4 )
|
|
10
|
|
11 def stop_err(msg):
|
|
12 sys.stderr.write(msg)
|
|
13 sys.exit()
|
|
14
|
|
15 if __name__ == '__main__':
|
|
16 # parse the arguments
|
|
17
|
|
18 if len(sys.argv) != 6:
|
|
19 stop_err('Usage: python histogram.py input_file column bins output_file style')
|
|
20 sys.exit()
|
|
21
|
|
22 mode = sys.argv[5]
|
|
23 HIST = mode == 'hist'
|
|
24 try:
|
|
25 col = int(float(sys.argv[2]))
|
|
26 if HIST:
|
|
27 bin = int(float(sys.argv[3]))
|
|
28 else:
|
|
29 # hack, this parameter is the plotting style for scatter plots
|
|
30 if sys.argv[3] == 'P':
|
|
31 style = 'o'
|
|
32 elif sys.argv[3] == 'LP':
|
|
33 style = 'o-'
|
|
34 else:
|
|
35 style = '-'
|
|
36
|
|
37 except:
|
|
38 msg = 'Parameter were not numbers %s, %s' % (sys.argv[3], sys.argv[4])
|
|
39 stop_err(msg)
|
|
40
|
|
41 # validate arguments
|
|
42 inp_file = sys.argv[1]
|
|
43 out_file = sys.argv[4]
|
|
44
|
|
45 if HIST:
|
|
46 print "Histogram on column %s (%s bins)" % (col, bin)
|
|
47 else:
|
|
48 print "Scatterplot on column %s" % (col)
|
|
49
|
|
50 xcol= col -1
|
|
51 # read the file
|
|
52 values = []
|
|
53 try:
|
|
54 count = 0
|
|
55 for line in file(inp_file):
|
|
56 count += 1
|
|
57 line = line.strip()
|
|
58 if line and line[0] != '#':
|
|
59 values.append(float(line.split()[xcol]))
|
|
60 except Exception, e:
|
|
61 stop_err('%s' % e)
|
|
62 stop_err("Non numerical data at line %d, column %d" % (count, col) )
|
|
63
|
|
64 # plot the data
|
|
65
|
|
66 if HIST:
|
|
67 n, bins, patches = hist(values, bins=bin, normed=0)
|
|
68 else:
|
|
69 plot(values, style)
|
|
70
|
|
71 xlabel('values')
|
|
72 ylabel('counts')
|
|
73
|
|
74 if HIST:
|
|
75 title('Histogram of values over column %s (%s bins)' % (col, len(bins)) )
|
|
76 else:
|
|
77 title('Scatterplot over column %s' % col )
|
|
78 grid(True)
|
|
79
|
|
80 # the plotter detects types by file extension
|
|
81 png_out = out_file + '.png' # force it to png
|
|
82 savefig(png_out)
|
|
83
|
|
84 # shuffle it back and clean up
|
|
85 data = file(png_out, 'rb').read()
|
|
86 fp = open(out_file, 'wb')
|
|
87 fp.write(data)
|
|
88 fp.close()
|
|
89 os.remove(png_out)
|