annotate home/ubuntu/lefse_to_export/plot_res.py @ 2:a31c10fe09c8 draft default tip

Fixed bug due to numerical approximation after normalization affecting root-level clades (e.g. "Bacteria" or "Archaea")
author george-weingart
date Tue, 07 Jul 2015 13:52:29 -0400
parents db64b6287cd6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
2
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
3 import os,sys
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
4 import matplotlib
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
5 matplotlib.use('Agg')
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
6 from pylab import *
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
7
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
8 from lefse import *
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
9 import argparse
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
10
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
11 colors = ['r','g','b','m','c','y','k','w']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
12
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
13 def read_params(args):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
14 parser = argparse.ArgumentParser(description='Plot results')
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
15 parser.add_argument('input_file', metavar='INPUT_FILE', type=str, help="tab delimited input file")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
16 parser.add_argument('output_file', metavar='OUTPUT_FILE', type=str, help="the file for the output image")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
17 parser.add_argument('--feature_font_size', dest="feature_font_size", type=int, default=7, help="the file for the output image")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
18 parser.add_argument('--format', dest="format", choices=["png","svg","pdf"], default='png', type=str, help="the format for the output file")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
19 parser.add_argument('--dpi',dest="dpi", type=int, default=72)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
20 parser.add_argument('--title',dest="title", type=str, default="")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
21 parser.add_argument('--title_font_size',dest="title_font_size", type=str, default="12")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
22 parser.add_argument('--class_legend_font_size',dest="class_legend_font_size", type=str, default="10")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
23 parser.add_argument('--width',dest="width", type=float, default=7.0 )
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
24 parser.add_argument('--height',dest="height", type=float, default=4.0, help="only for vertical histograms")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
25 parser.add_argument('--left_space',dest="ls", type=float, default=0.2 )
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
26 parser.add_argument('--right_space',dest="rs", type=float, default=0.1 )
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
27 parser.add_argument('--orientation',dest="orientation", type=str, choices=["h","v"], default="h" )
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
28 parser.add_argument('--autoscale',dest="autoscale", type=int, choices=[0,1], default=1 )
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
29 parser.add_argument('--background_color',dest="back_color", type=str, choices=["k","w"], default="w", help="set the color of the background")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
30 parser.add_argument('--subclades', dest="n_scl", type=int, default=1, help="number of label levels to be dislayed (starting from the leaves, -1 means all the levels, 1 is default )")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
31 parser.add_argument('--max_feature_len', dest="max_feature_len", type=int, default=60, help="Maximum length of feature strings (def 60)")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
32 parser.add_argument('--all_feats', dest="all_feats", type=str, default="")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
33 args = parser.parse_args()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
34 return vars(args)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
35
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
36 def read_data(input_file,output_file):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
37 with open(input_file, 'r') as inp:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
38 rows = [line.strip().split()[:-1] for line in inp.readlines() if len(line.strip().split())>3]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
39 classes = list(set([v[2] for v in rows if len(v)>2]))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
40 if len(classes) < 1:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
41 print "No differentially abundant features found in "+input_file
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
42 os.system("touch "+output_file)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
43 sys.exit()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
44 data = {}
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
45 data['rows'] = rows
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
46 data['cls'] = classes
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
47 return data
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
48
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
49 def plot_histo_hor(path,params,data,bcl):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
50 cls2 = []
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
51 if params['all_feats'] != "":
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
52 cls2 = sorted(params['all_feats'].split(":"))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
53 cls = sorted(data['cls'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
54 if bcl: data['rows'].sort(key=lambda ab: fabs(float(ab[3]))*(cls.index(ab[2])*2-1))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
55 else:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
56 mmax = max([fabs(float(a)) for a in zip(*data['rows'])[3]])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
57 data['rows'].sort(key=lambda ab: fabs(float(ab[3]))/mmax+(cls.index(ab[2])+1))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
58 pos = arange(len(data['rows']))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
59 head = 0.75
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
60 tail = 0.5
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
61 ht = head + tail
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
62 ints = max(len(pos)*0.2,1.5)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
63 fig = plt.figure(figsize=(params['width'], ints + ht), edgecolor=params['back_color'],facecolor=params['back_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
64 ax = fig.add_subplot(111,frame_on=False,axis_bgcolor=params['back_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
65 ls, rs = params['ls'], 1.0-params['rs']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
66 plt.subplots_adjust(left=ls,right=rs,top=1-head*(1.0-ints/(ints+ht)), bottom=tail*(1.0-ints/(ints+ht)))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
67
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
68 fig.canvas.set_window_title('LDA results')
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
69
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
70 l_align = {'horizontalalignment':'left', 'verticalalignment':'baseline'}
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
71 r_align = {'horizontalalignment':'right', 'verticalalignment':'baseline'}
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
72 added = []
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
73 m = 1 if data['rows'][0][2] == cls[0] else -1
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
74 for i,v in enumerate(data['rows']):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
75 indcl = cls.index(v[2])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
76 lab = str(v[2]) if str(v[2]) not in added else ""
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
77 added.append(str(v[2]))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
78 col = colors[indcl%len(colors)]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
79 if len(cls2) > 0:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
80 col = colors[cls2.index(v[2])%len(colors)]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
81 vv = fabs(float(v[3])) * (m*(indcl*2-1)) if bcl else fabs(float(v[3]))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
82 ax.barh(pos[i],vv, align='center', color=col, label=lab, height=0.8, edgecolor=params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
83 mv = max([abs(float(v[3])) for v in data['rows']])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
84 for i,r in enumerate(data['rows']):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
85 indcl = cls.index(data['rows'][i][2])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
86 if params['n_scl'] < 0: rr = r[0]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
87 else: rr = r[0].split(".")[-min(r[0].count("."),params['n_scl'])]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
88 if len(rr) > params['max_feature_len']: rr = rr[:params['max_feature_len']/2-2]+" [..]"+rr[-params['max_feature_len']/2+2:]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
89 if m*(indcl*2-1) < 0 and bcl: ax.text(mv/40.0,float(i)-0.3,rr, l_align, size=params['feature_font_size'],color=params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
90 else: ax.text(-mv/40.0,float(i)-0.3,rr, r_align, size=params['feature_font_size'],color=params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
91 ax.set_title(params['title'],size=params['title_font_size'],y=1.0+head*(1.0-ints/(ints+ht))*0.8,color=params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
92
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
93 ax.set_yticks([])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
94 ax.set_xlabel("LDA SCORE (log 10)")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
95 ax.xaxis.grid(True)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
96 xlim = ax.get_xlim()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
97 if params['autoscale']:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
98 ran = arange(0.0001,round(round((abs(xlim[0])+abs(xlim[1]))/10,4)*100,0)/100)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
99 if len(ran) > 1 and len(ran) < 100:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
100 ax.set_xticks(arange(xlim[0],xlim[1]+0.0001,min(xlim[1]+0.0001,round(round((abs(xlim[0])+abs(xlim[1]))/10,4)*100,0)/100)))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
101 ax.set_ylim((pos[0]-1,pos[-1]+1))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
102 leg = ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=5, borderaxespad=0., frameon=False,prop={'size':params['class_legend_font_size']})
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
103
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
104 def get_col_attr(x):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
105 return hasattr(x, 'set_color') and not hasattr(x, 'set_facecolor')
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
106 for o in leg.findobj(get_col_attr):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
107 o.set_color(params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
108 for o in ax.findobj(get_col_attr):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
109 o.set_color(params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
110
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
111
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
112 plt.savefig(path,format=params['format'],facecolor=params['back_color'],edgecolor=params['fore_color'],dpi=params['dpi'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
113 plt.close()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
114
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
115 def plot_histo_ver(path,params,data):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
116 cls = data['cls']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
117 mmax = max([fabs(float(a)) for a in zip(*data['rows'])[1]])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
118 data['rows'].sort(key=lambda ab: fabs(float(ab[3]))/mmax+(cls.index(ab[2])+1))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
119 pos = arange(len(data['rows']))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
120 if params['n_scl'] < 0: nam = [d[0] for d in data['rows']]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
121 else: nam = [d[0].split(".")[-min(d[0].count("."),params['n_scl'])] for d in data['rows']]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
122 fig = plt.figure(edgecolor=params['back_color'],facecolor=params['back_color'],figsize=(params['width'], params['height']))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
123 ax = fig.add_subplot(111,axis_bgcolor=params['back_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
124 plt.subplots_adjust(top=0.9, left=params['ls'], right=params['rs'], bottom=0.3)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
125 fig.canvas.set_window_title('LDA results')
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
126 l_align = {'horizontalalignment':'left', 'verticalalignment':'baseline'}
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
127 r_align = {'horizontalalignment':'right', 'verticalalignment':'baseline'}
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
128 added = []
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
129 for i,v in enumerate(data['rows']):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
130 indcl = data['cls'].index(v[2])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
131 lab = str(v[2]) if str(v[2]) not in added else ""
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
132 added.append(str(v[2]))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
133 col = colors[indcl%len(colors)]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
134 vv = fabs(float(v[3]))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
135 ax.bar(pos[i],vv, align='center', color=col, label=lab)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
136 xticks(pos,nam,rotation=-20, ha = 'left',size=params['feature_font_size'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
137 ax.set_title(params['title'],size=params['title_font_size'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
138 ax.set_ylabel("LDA SCORE (log 10)")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
139 ax.yaxis.grid(True)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
140 a,b = ax.get_xlim()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
141 dx = float(len(pos))/float((b-a))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
142 ax.set_xlim((0-dx,max(pos)+dx))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
143 plt.savefig(path,format=params['format'],facecolor=params['back_color'],edgecolor=params['fore_color'],dpi=params['dpi'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
144 plt.close()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
145
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
146 if __name__ == '__main__':
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
147 params = read_params(sys.argv)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
148 params['fore_color'] = 'w' if params['back_color'] == 'k' else 'k'
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
149 data = read_data(params['input_file'],params['output_file'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
150 if params['orientation'] == 'v': plot_histo_ver(params['output_file'],params,data)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
151 else: plot_histo_hor(params['output_file'],params,data,len(data['cls']) == 2)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
152
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
153