annotate home/ubuntu/lefse_to_export/plot_cladogram.py @ 1:db64b6287cd6 draft

Modified datatypes
author george-weingart
date Wed, 20 Aug 2014 16:56:51 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
2
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
3 import os,sys,matplotlib,argparse,string
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
4 matplotlib.use('Agg')
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
5 from pylab import *
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
6 from lefse import *
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
7 import numpy as np
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
8
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
9 colors = ['r','g','b','m','c',[1.0,0.5,0.0],[0.0,1.0,0.0],[0.33,0.125,0.0],[0.75,0.75,0.75],'k']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
10 dark_colors = [[0.4,0.0,0.0],[0.0,0.2,0.0],[0.0,0.0,0.4],'m','c',[1.0,0.5,0.0],[0.0,1.0,0.0],[0.33,0.125,0.0],[0.75,0.75,0.75],'k']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
11
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
12 class CladeNode:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
13 def __init__(self, name, abundance, viz = True):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
14 self.id = name
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
15 self.name = name.split(".")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
16 self.last_name = self.name[-1]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
17 self.abundance = abundance
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
18 self.pos = (-1.0,-1.0)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
19 self.children = {}
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
20 self.isleaf = True
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
21 self.color = 'y'
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
22 self.next_leaf = -1
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
23 self.prev_leaf = -1
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
24 self.viz = viz
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
25 def __repr__(self):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
26 return self.last_name
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
27 def add_child(self,node):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
28 self.isleaf = False
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
29 self.children[node.__repr__()] = node
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
30 def get_children(self):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
31 ck = sorted(self.children.keys())
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
32 return [self.children[k] for k in ck]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
33 def get_color(self):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
34 return self.color
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
35 def set_color(self,c):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
36 self.color = c
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
37 def set_pos(self,pos):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
38 self.pos = pos
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
39
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
40 def read_params(args):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
41 parser = argparse.ArgumentParser(description='Cladoplot')
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
42 parser.add_argument('input_file', metavar='INPUT_FILE', type=str, help="tab delimited input file")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
43 parser.add_argument('output_file', metavar='OUTPUT_FILE', type=str, help="the file for the output image")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
44 parser.add_argument('--clade_sep',dest="clade_sep", type=float, default=1.5)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
45 parser.add_argument('--max_lev',dest="max_lev", type=int, default=-1)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
46 parser.add_argument('--max_point_size',dest="max_point_size", type=float, default=6.0)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
47 parser.add_argument('--min_point_size',dest="min_point_size", type=float, default=1)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
48 parser.add_argument('--point_edge_width',dest="markeredgewidth", type=float, default=.25)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
49 parser.add_argument('--siblings_connector_width',dest="siblings_connector_width", type=float, default=2)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
50 parser.add_argument('--parents_connector_width',dest="parents_connector_width", type=float, default=0.75)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
51 parser.add_argument('--radial_start_lev',dest="radial_start_lev", type=int, default=1)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
52 parser.add_argument('--labeled_start_lev',dest="labeled_start_lev", type=int, default=2)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
53 parser.add_argument('--labeled_stop_lev',dest="labeled_stop_lev", type=int, default=5)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
54 parser.add_argument('--abrv_start_lev',dest="abrv_start_lev", type=int, default=3)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
55 parser.add_argument('--abrv_stop_lev',dest="abrv_stop_lev", type=int, default=5)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
56 parser.add_argument('--expand_void_lev',dest="expand_void_lev", type=int, default=1)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
57 parser.add_argument('--class_legend_vis',dest="class_legend_vis", type=int, default=1)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
58 parser.add_argument('--colored_connector',dest="colored_connectors", type=int, default=1)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
59 parser.add_argument('--alpha',dest="alpha", type=float, default=0.2)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
60 parser.add_argument('--title',dest="title", type=str, default="Cladogram")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
61 parser.add_argument('--sub_clade',dest="sub_clade", type=str, default="")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
62 parser.add_argument('--title_font_size',dest="title_font_size", type=str, default="14")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
63 parser.add_argument('--right_space_prop',dest="r_prop", type=float, default=0.1)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
64 parser.add_argument('--left_space_prop',dest="l_prop", type=float, default=0.1)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
65 parser.add_argument('--label_font_size',dest="label_font_size", type=str, default="6")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
66 parser.add_argument('--background_color',dest="back_color", type=str, choices=["k","w"], default="w", help="set the color of the background")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
67 parser.add_argument('--colored_labels',dest="col_lab", type=int, choices=[0,1], default=1, help="draw the label with class color (1) or in black (0)")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
68 parser.add_argument('--class_legend_font_size',dest="class_legend_font_size", type=str, default="10")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
69 parser.add_argument('--dpi',dest="dpi", type=int, default=72)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
70 parser.add_argument('--format', dest="format", choices=["png","svg","pdf"], default="svg", type=str, help="the format for the output file")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
71 parser.add_argument('--all_feats', dest="all_feats", type=str, default="")
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
72 args = parser.parse_args()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
73 return vars(args)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
74
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
75 def cmp_names(la,lb):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
76 if len(la) != len(lb): return False
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
77 for p in [(a,b) for i,a in enumerate(la) for j,b in enumerate(lb) if i == j]:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
78 if p[0] != p[1]: return False
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
79 return True
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
80
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
81 def build_tree(father,all_nodes,l,depth,viz):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
82 cc = [n for n in all_nodes if len(n.name) > len(father.name) and cmp_names(father.name,n.name[:len(father.name)])]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
83 children = [n for n in cc if len(n.name) == len(father.name)+1]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
84 if len(children) == 0 and l < depth -1: # !!!
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
85 nc = CladeNode(father.id+"."+father.id.split(".")[-1],1.0,viz)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
86 father.add_child(nc)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
87 children.append(nc)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
88 for child in children:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
89 build_tree(child,cc,l+1,depth,viz)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
90 father.add_child(child)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
91
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
92 def get_all_nodes(father):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
93 ret = [father]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
94 children = father.get_children()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
95 for c in children:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
96 ret += get_all_nodes(c)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
97 return ret
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
98
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
99 def read_data(input_file,params):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
100 with open(input_file, 'r') as inp:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
101 if params['sub_clade'] == "": rows = [line.strip().split()[:-1] for line in inp.readlines() if params['max_lev'] < 1 or line.split()[0].count(".") < params['max_lev']]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
102 else: rows = [line.split(params['sub_clade']+".")[1].strip().split()[:-1] for line in inp.readlines() if ( params['max_lev'] < 1 or line.split()[0].count(".") < params['max_lev'] ) and line.startswith(params['sub_clade']+".")]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
103 all_names = [lin[0] for lin in rows]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
104 to_add = []
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
105
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
106 abundances = [float(v) for v in zip(*rows)[1] if v >= 0.0]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
107 tree = {}
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
108 tree['classes'] = list(set([v[2] for v in rows if len(v)>2]))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
109 tree['classes'].sort()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
110 all_nodes = [CladeNode("root."+row[0],float(row[1])) for row in rows]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
111
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
112 depth = max([len(n.name) for n in all_nodes])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
113
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
114 n2 = ["_".join(nn.name) for nn in all_nodes]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
115 for i,nn in enumerate(all_nodes):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
116 n = nn
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
117 while "_".join(n.name[:-1]) not in n2 and len(n.name) > 1:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
118 n = CladeNode(".".join(n.name[:-1]),n.abundance)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
119 all_nodes.append(n)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
120 n2.append("_".join(n.name))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
121
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
122 cls2 = []
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
123 if params['all_feats'] != "":
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
124 cls2 = sorted(params['all_feats'].split(":"))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
125 for i,v in enumerate(rows):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
126 if len(v)>2:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
127 if len(cls2) > 0: all_nodes[i].set_color(colors[cls2.index(v[2])%len(colors)])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
128 else:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
129 if v[2].count('rgbcol') > 0:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
130 ccc = [float(tt) for tt in v[2].split('_')[1:]]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
131 all_nodes[i].set_color(ccc)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
132 else: all_nodes[i].set_color(colors[sorted(tree['classes']).index(v[2])%len(colors)])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
133 root = CladeNode("root",-1.0)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
134 root.set_pos((0.0,0.0))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
135
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
136 build_tree(root,all_nodes,0,depth,params['expand_void_lev']==1)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
137
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
138 all_nodes = get_all_nodes(root)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
139
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
140 tree['root'] = root
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
141 tree['max_abs'] = max(abundances)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
142 tree['min_abs'] = min(abundances)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
143 levs = []
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
144 for i in range(depth):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
145 depthi = [n for n in all_nodes if len(n.name) == i+1]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
146 levs.append(len(depthi))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
147 tree['nlev'] = levs
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
148 return tree
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
149
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
150 def add_all_pos(father,n,distn,seps,tsep,mlev,last_leaf=-1,nc=1):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
151 children = father.get_children()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
152 leaves = True if children[0].isleaf else False
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
153 for i,child in enumerate(children):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
154 if leaves:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
155 n += 1.0
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
156 men = 0.5 if len(children) == 1 else 0.0
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
157 child.set_pos((n*distn-men*float(distn)+tsep,(len(father.name))/float(mlev-1)))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
158 if last_leaf != -1:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
159 child.prev_leaf = last_leaf
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
160 last_leaf.next_leaf = child
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
161 last_leaf = child
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
162 else:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
163 ln = n
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
164 ltsep = tsep
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
165 n,tsep,last_leaf = add_all_pos(child,n,distn,seps,tsep,mlev,last_leaf,len(children))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
166 nn = (ln + n)*0.5*distn
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
167 ssep = (ltsep + tsep)*0.5
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
168 if n-ln == 1:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
169 ssep = ltsep
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
170 child.set_pos((nn+ssep,(len(father.name))/float(mlev-1)))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
171 tsep += seps[len(father.name)-1]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
172 return n,tsep,last_leaf
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
173
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
174 def plot_points(father,params,pt_scale,ax):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
175 children = father.get_children()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
176 children.sort(key = lambda a: -int(a.get_color() == 'y')*a.abundance)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
177 x,r = father.pos[0], father.pos[1]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
178 for i,child in enumerate(children):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
179 xc,rc = plot_points(child,params,pt_scale,ax)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
180 if not father.viz: return x,r
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
181 ps = pt_scale[0]+father.abundance/pt_scale[1]+pt_scale[0]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
182 col = father.get_color()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
183 pw = params['markeredgewidth'] if col == 'y' else params['markeredgewidth']*3.0
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
184 if x==0 and r==0: ax.plot(x,r, 'o',markersize=ps,color=col,markeredgewidth=0.01,markeredgecolor=params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
185 else: ax.plot(x,r, 'o',markersize=ps,color=col,markeredgewidth=pw,markeredgecolor=params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
186 return x,r
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
187
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
188 def plot_lines(father,params,depth,ax,xf):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
189 children = father.get_children()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
190 x,r = father.pos[0], father.pos[1]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
191 for i,child in enumerate(children):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
192 xc,rc = plot_lines(child,params,depth,ax,x)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
193 if i == 0: x_first, r_first = xc, rc
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
194 if len(father.name) >= depth-params['radial_start_lev']:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
195 col = params['fore_color']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
196 lw=params['parents_connector_width']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
197 if not child.viz: continue
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
198 if father.get_color() != 'y' and father.get_color() == child.get_color() and params['colored_connectors']:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
199 col = child.get_color()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
200 lw *=2.5
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
201 if col != params['fore_color']:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
202 ax.plot([x,xc],[r,rc],"-",color=params['fore_color'],lw=lw*1.5)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
203 ax.plot([x,xc],[r,rc],"-",color=col,lw=lw)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
204
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
205 if not father.viz or (len(children) == 1 and not children[0].viz): return x,r
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
206 if len(father.name) < depth-params['radial_start_lev']:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
207 col = params['fore_color']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
208 lw=params['parents_connector_width']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
209 if father.get_color() != 'y':
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
210 f =True
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
211 for child in children:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
212 if child.get_color() != father.get_color() or not params['colored_connectors']:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
213 f = False
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
214 break
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
215 if f:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
216 col = father.get_color()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
217 lw *= 2.5
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
218 if not (x==0 and r==0):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
219 xx = xc if len(children) > 0 else x
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
220 if len(children) == 0: rc = r
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
221 xt = x if len(children)>1 else xx
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
222 if col != params['fore_color']:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
223 ax.plot([x,xt],[r,rc],"-",color=params['fore_color'],lw=lw*1.5)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
224 ax.plot([x,xt],[r,rc],"-",color=col,lw=lw)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
225 if len(children) > 0 and 1 < len(father.name) < depth-params['radial_start_lev']:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
226 xs = arange(x_first,xc,0.01)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
227 ys = [rc for t in xs]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
228 ax.plot(xs,ys,"-",color=col,lw=params['siblings_connector_width'],markeredgecolor=params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
229 return x,r
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
230
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
231 def uniqueid():
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
232 for l in string.lowercase: yield l
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
233 for l in string.lowercase:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
234 for i in range(10):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
235 yield l+str(i)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
236 i = 0
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
237 while True:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
238 yield str(i)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
239 i += 1
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
240
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
241 def plot_names(father,params,depth,ax,u_i,seps):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
242 children = father.get_children()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
243 l = len(father.name)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
244 if len(children)==0:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
245 if father.prev_leaf == -1 or father.next_leaf == -1:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
246 fr_0, fr_1 = father.pos[0], father.pos[0]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
247 else: fr_0, fr_1 = (father.pos[0]+father.prev_leaf.pos[0])*0.5, (father.pos[0]+father.next_leaf.pos[0])*0.5
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
248 for i,child in enumerate(children):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
249 fr,to = plot_names(child,params,depth,ax,u_i,seps)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
250 if i == 0: fr_0 = fr
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
251 fr_1 = to
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
252 if father.get_color() != 'y' and params['labeled_start_lev'] < l <= params['labeled_stop_lev']+1:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
253 col = father.get_color()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
254 dd = params['labeled_stop_lev'] - params['labeled_start_lev'] + 1
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
255 de = depth - 1
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
256 dim = 1.0/float(de)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
257 perc_ext = 0.65 if dim > 0.1 else 1.0
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
258 clto = (de-l+1)*dim+dim*(dd+1-(l-dd-1))*perc_ext
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
259 clto = (de-l+1)*dim+dim*(dd-(l-params['labeled_start_lev'])+1)*perc_ext
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
260 des = float(180.0*(fr_0+fr_1)/np.pi)*0.5-90
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
261 lab = ""
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
262 txt = father.last_name
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
263 if params['abrv_start_lev'] < l <= params['abrv_stop_lev'] + 1:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
264 ide = u_i.next()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
265 lab = str(ide)+": "+father.last_name
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
266 txt = str(ide)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
267 # ax.bar(fr_0, clto, width = fr_1-fr_0, bottom = float(l-1)/float(depth-1), alpha = params['alpha'], color=col, edgecolor=col)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
268 ax.bar(fr_0, clto, width = fr_1-fr_0, bottom = float(l-1)/float(de), alpha = params['alpha'], color=col, edgecolor=col)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
269 ax.bar(0.0, 0.0, width = 0.0, bottom = 0.0, alpha = 1.0, color=col, edgecolor=params['fore_color'], label=lab)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
270 if l <= params['abrv_stop_lev'] + 1:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
271 if not params['col_lab']: col = params['fore_color']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
272 else:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
273 if col not in colors: col = params['fore_color']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
274 else: col = dark_colors[colors.index(col)%len(dark_colors)]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
275 ax.text((fr_0+fr_1)*0.5, clto+float(l-1)/float(de)-dim*perc_ext/2.0, txt, size = params['label_font_size'], rotation=des, ha ="center", va="center", color=col)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
276 return fr_0, fr_1
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
277
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
278 def draw_tree(out_file,tree,params):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
279 plt_size = 7
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
280 nlev = tree['nlev']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
281 pt_scale = (params['min_point_size'],max(1.0,((tree['max_abs']-tree['min_abs']))/(params['max_point_size']-params['min_point_size'])))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
282 depth = len(nlev)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
283 sep = (2.0*np.pi)/float(nlev[-1])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
284 seps = [params['clade_sep']*sep/float(depth-i+1) for i in range(1,len(tree['nlev'])+1)]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
285 totseps = sum([s*nlev[i] for i,s in enumerate(seps[:-1])])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
286 clade_sep_err = True if totseps > np.pi else False
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
287 while totseps > np.pi:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
288 params['clade_sep'] *= 0.75
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
289 seps = [params['clade_sep']*sep/(float(depth-i+1)*0.25) for i in range(1,len(tree['nlev'])+1)]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
290 totseps = sum([s*nlev[i] for i,s in enumerate(seps[:-1])])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
291 if clade_sep_err: print 'clade_sep parameter too large, lowered to',params['clade_sep']
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
292
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
293 fig = plt.figure(edgecolor=params['back_color'],facecolor=params['back_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
294 ax = fig.add_subplot(111, polar=True, frame_on=False, axis_bgcolor=params['back_color'] )
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
295 plt.subplots_adjust(right=1.0-params['r_prop'],left=params['l_prop'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
296 ax.grid(False)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
297 xticks([])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
298 yticks([])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
299
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
300 ds = (2.0*np.pi-totseps)/float(nlev[-1])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
301
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
302 add_all_pos(tree['root'],0.0,ds,seps,0.0,depth)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
303
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
304 plot_lines(tree['root'],params,depth,ax,0)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
305 plot_points(tree['root'],params,pt_scale,ax)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
306 plot_names(tree['root'],params,depth,ax,uniqueid(),seps)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
307
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
308 r = np.arange(0, 3.0, 0.01)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
309 theta = 2*np.pi*r
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
310
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
311 def get_col_attr(x):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
312 return hasattr(x, 'set_color') and not hasattr(x, 'set_facecolor')
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
313
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
314 h, l = ax.get_legend_handles_labels()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
315 if len(l) > 0:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
316 leg = ax.legend(bbox_to_anchor=(1.05, 1), frameon=False, loc=2, borderaxespad=0.,prop={'size':params['label_font_size']})
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
317 if leg != None:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
318 gca().add_artist(leg)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
319 for o in leg.findobj(get_col_attr):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
320 o.set_color(params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
321
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
322 cll = sorted(tree['classes']) if params['all_feats'] == "" else sorted(params['all_feats'].split(":"))
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
323 nll = [ax.bar(0.0, 0.0, width = 0.0, bottom = 0.0, color=colors[i%len(colors)], label=c) for i,c in enumerate(cll) if c in tree['classes']]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
324 cl = [c for c in cll if c in tree['classes']]
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
325
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
326 ax.set_title(params['title'],size=params['title_font_size'],color=params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
327
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
328 if params['class_legend_vis']:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
329 l2 = legend(nll, cl, loc=2, prop={'size':params['class_legend_font_size']}, frameon=False)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
330 if l2 != None:
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
331 for o in l2.findobj(get_col_attr):
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
332 o.set_color(params['fore_color'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
333
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
334 plt.savefig(out_file,format=params['format'],facecolor=params['back_color'],edgecolor=params['fore_color'],dpi=params['dpi'])
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
335 plt.close()
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
336
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
337 if __name__ == '__main__':
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
338 params = read_params(sys.argv)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
339 params['fore_color'] = 'w' if params['back_color'] == 'k' else 'k'
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
340 clad_tree = read_data(params['input_file'],params)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
341 draw_tree(params['output_file'],clad_tree,params)
db64b6287cd6 Modified datatypes
george-weingart
parents:
diff changeset
342