annotate src/breadcrumbs/hclust/hclust.py @ 0:0de566f21448 draft default tip

v2
author sagun98
date Thu, 03 Jun 2021 18:13:32 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
sagun98
parents:
diff changeset
1 #!/usr/bin/env python
sagun98
parents:
diff changeset
2
sagun98
parents:
diff changeset
3 import sys
sagun98
parents:
diff changeset
4 import numpy as np
sagun98
parents:
diff changeset
5 import matplotlib
sagun98
parents:
diff changeset
6 matplotlib.use('Agg')
sagun98
parents:
diff changeset
7 import scipy
sagun98
parents:
diff changeset
8 import pylab
sagun98
parents:
diff changeset
9 import scipy.cluster.hierarchy as sch
sagun98
parents:
diff changeset
10 import scipy.spatial.distance as dis
sagun98
parents:
diff changeset
11 from scipy import stats
sagun98
parents:
diff changeset
12
sagun98
parents:
diff changeset
13 # User defined color maps (in addition to matplotlib ones)
sagun98
parents:
diff changeset
14 bbcyr = {'red': ( (0.0, 0.0, 0.0),
sagun98
parents:
diff changeset
15 (0.25, 0.0, 0.0),
sagun98
parents:
diff changeset
16 (0.50, 0.0, 0.0),
sagun98
parents:
diff changeset
17 (0.75, 1.0, 1.0),
sagun98
parents:
diff changeset
18 (1.0, 1.0, 1.0)),
sagun98
parents:
diff changeset
19 'green': ( (0.0, 0.0, 0.0),
sagun98
parents:
diff changeset
20 (0.25, 0.0, 0.0),
sagun98
parents:
diff changeset
21 (0.50, 1.0, 1.0),
sagun98
parents:
diff changeset
22 (0.75, 1.0, 1.0),
sagun98
parents:
diff changeset
23 (1.0, 0.0, 1.0)),
sagun98
parents:
diff changeset
24 'blue': ( (0.0, 0.0, 0.0),
sagun98
parents:
diff changeset
25 (0.25, 1.0, 1.0),
sagun98
parents:
diff changeset
26 (0.50, 1.0, 1.0),
sagun98
parents:
diff changeset
27 (0.75, 0.0, 0.0),
sagun98
parents:
diff changeset
28 (1.0, 0.0, 1.0))}
sagun98
parents:
diff changeset
29
sagun98
parents:
diff changeset
30 bbcry = {'red': ( (0.0, 0.0, 0.0),
sagun98
parents:
diff changeset
31 (0.25, 0.0, 0.0),
sagun98
parents:
diff changeset
32 (0.50, 0.0, 0.0),
sagun98
parents:
diff changeset
33 (0.75, 1.0, 1.0),
sagun98
parents:
diff changeset
34 (1.0, 1.0, 1.0)),
sagun98
parents:
diff changeset
35 'green': ( (0.0, 0.0, 0.0),
sagun98
parents:
diff changeset
36 (0.25, 0.0, 0.0),
sagun98
parents:
diff changeset
37 (0.50, 1.0, 1.0),
sagun98
parents:
diff changeset
38 (0.75, 0.0, 0.0),
sagun98
parents:
diff changeset
39 (1.0, 1.0, 1.0)),
sagun98
parents:
diff changeset
40 'blue': ( (0.0, 0.0, 0.0),
sagun98
parents:
diff changeset
41 (0.25, 1.0, 1.0),
sagun98
parents:
diff changeset
42 (0.50, 1.0, 1.0),
sagun98
parents:
diff changeset
43 (0.75, 0.0, 0.0),
sagun98
parents:
diff changeset
44 (1.0, 0.0, 1.0))}
sagun98
parents:
diff changeset
45 my_colormaps = [ ('bbcyr',bbcyr),
sagun98
parents:
diff changeset
46 ('bbcry',bbcry)]
sagun98
parents:
diff changeset
47
sagun98
parents:
diff changeset
48
sagun98
parents:
diff changeset
49
sagun98
parents:
diff changeset
50 def read_params(args):
sagun98
parents:
diff changeset
51 import argparse as ap
sagun98
parents:
diff changeset
52 import textwrap
sagun98
parents:
diff changeset
53
sagun98
parents:
diff changeset
54 p = ap.ArgumentParser( description= "TBA" )
sagun98
parents:
diff changeset
55
sagun98
parents:
diff changeset
56 p.add_argument( '--in', '--inp', metavar='INPUT_FILE', type=str,
sagun98
parents:
diff changeset
57 nargs='?', default=sys.stdin,
sagun98
parents:
diff changeset
58 help= "the input archive " )
sagun98
parents:
diff changeset
59
sagun98
parents:
diff changeset
60 p.add_argument( '--out', metavar='OUTPUT_FILE', type=str,
sagun98
parents:
diff changeset
61 nargs = '?', default=None,
sagun98
parents:
diff changeset
62 help= " the output file, image on screen"
sagun98
parents:
diff changeset
63 " if not specified. " )
sagun98
parents:
diff changeset
64
sagun98
parents:
diff changeset
65 p.add_argument( '-m', metavar='method', type=str,
sagun98
parents:
diff changeset
66 choices=[ "single","complete","average",
sagun98
parents:
diff changeset
67 "weighted","centroid","median",
sagun98
parents:
diff changeset
68 "ward" ],
sagun98
parents:
diff changeset
69 default="average" )
sagun98
parents:
diff changeset
70
sagun98
parents:
diff changeset
71 dist_funcs = [ "euclidean","minkowski","cityblock","seuclidean",
sagun98
parents:
diff changeset
72 "sqeuclidean","cosine","correlation","hamming",
sagun98
parents:
diff changeset
73 "jaccard","chebyshev","canberra","braycurtis",
sagun98
parents:
diff changeset
74 "mahalanobis","yule","matching","dice",
sagun98
parents:
diff changeset
75 "kulsinski","rogerstanimoto","russellrao","sokalmichener",
sagun98
parents:
diff changeset
76 "sokalsneath","wminkowski","ward"]
sagun98
parents:
diff changeset
77 p.add_argument( '-d', metavar='distance function', type=str,
sagun98
parents:
diff changeset
78 choices=dist_funcs,
sagun98
parents:
diff changeset
79 default="euclidean" )
sagun98
parents:
diff changeset
80 p.add_argument( '-f', metavar='distance function for features', type=str,
sagun98
parents:
diff changeset
81 choices=dist_funcs,
sagun98
parents:
diff changeset
82 default="d" )
sagun98
parents:
diff changeset
83
sagun98
parents:
diff changeset
84 p.add_argument( '--dmf', metavar='distance matrix for features', type=str,
sagun98
parents:
diff changeset
85 default = None )
sagun98
parents:
diff changeset
86 p.add_argument( '--dms', metavar='distance matrix for samples', type=str,
sagun98
parents:
diff changeset
87 default = None )
sagun98
parents:
diff changeset
88
sagun98
parents:
diff changeset
89
sagun98
parents:
diff changeset
90 p.add_argument( '-l', metavar='sample label', type=str,
sagun98
parents:
diff changeset
91 default = None )
sagun98
parents:
diff changeset
92
sagun98
parents:
diff changeset
93 p.add_argument( '-s', metavar='scale norm', type=str,
sagun98
parents:
diff changeset
94 default = 'lin', choices = ['log','lin'])
sagun98
parents:
diff changeset
95
sagun98
parents:
diff changeset
96 p.add_argument( '-x', metavar='x cell width', type=float,
sagun98
parents:
diff changeset
97 default = 0.1)
sagun98
parents:
diff changeset
98 p.add_argument( '-y', metavar='y cell width', type=float,
sagun98
parents:
diff changeset
99 default = 0.1 )
sagun98
parents:
diff changeset
100
sagun98
parents:
diff changeset
101 p.add_argument( '--minv', metavar='min value', type=float,
sagun98
parents:
diff changeset
102 default = 0.0 )
sagun98
parents:
diff changeset
103 p.add_argument( '--maxv', metavar='max value', type=float,
sagun98
parents:
diff changeset
104 default = None )
sagun98
parents:
diff changeset
105
sagun98
parents:
diff changeset
106 p.add_argument( '--xstart', metavar='x coordinate of the top left cell '
sagun98
parents:
diff changeset
107 'of the values',
sagun98
parents:
diff changeset
108 type=int, default=1 )
sagun98
parents:
diff changeset
109 p.add_argument( '--ystart', metavar='y coordinate of the top left cell '
sagun98
parents:
diff changeset
110 'of the values',
sagun98
parents:
diff changeset
111 type=int, default=1 )
sagun98
parents:
diff changeset
112 p.add_argument( '--xstop', metavar='x coordinate of the bottom right cell '
sagun98
parents:
diff changeset
113 'of the values (default None = last row)',
sagun98
parents:
diff changeset
114 type=int, default=None )
sagun98
parents:
diff changeset
115 p.add_argument( '--ystop', metavar='y coordinate of the bottom right cell '
sagun98
parents:
diff changeset
116 'of the values (default None = last column)',
sagun98
parents:
diff changeset
117 type=int, default=None )
sagun98
parents:
diff changeset
118
sagun98
parents:
diff changeset
119 p.add_argument( '--perc', metavar='percentile for ordering and rows selection', type=int, default=None )
sagun98
parents:
diff changeset
120 p.add_argument( '--top', metavar='selection of the top N rows', type=int, default=None )
sagun98
parents:
diff changeset
121 p.add_argument( '--norm', metavar='whether to normalize columns (default 0)', type=int, default=0 )
sagun98
parents:
diff changeset
122
sagun98
parents:
diff changeset
123 p.add_argument( '--sdend_h', metavar='height of the sample dendrogram', type=float,
sagun98
parents:
diff changeset
124 default = 0.1 )
sagun98
parents:
diff changeset
125 p.add_argument( '--fdend_w', metavar='width of the feature dendrogram', type=float,
sagun98
parents:
diff changeset
126 default = 0.1 )
sagun98
parents:
diff changeset
127 p.add_argument( '--cm_h', metavar='height of the colormap', type=float,
sagun98
parents:
diff changeset
128 default = 0.03 )
sagun98
parents:
diff changeset
129 p.add_argument( '--cm_ticks', metavar='label for ticks of the colormap', type=str,
sagun98
parents:
diff changeset
130 default = None )
sagun98
parents:
diff changeset
131
sagun98
parents:
diff changeset
132 p.add_argument( '--font_size', metavar='label_font_size', type=int,
sagun98
parents:
diff changeset
133 default = 7 )
sagun98
parents:
diff changeset
134 p.add_argument( '--feat_dend_col_th', metavar='Color threshold for feature dendrogram', type=float,
sagun98
parents:
diff changeset
135 default = None )
sagun98
parents:
diff changeset
136 p.add_argument( '--sample_dend_col_th', metavar='Color threshold for sample dendrogram', type=float,
sagun98
parents:
diff changeset
137 default = None )
sagun98
parents:
diff changeset
138 p.add_argument( '--clust_ncols', metavar='Number of colors for clusters', type=int,
sagun98
parents:
diff changeset
139 default = 7 )
sagun98
parents:
diff changeset
140 p.add_argument( '--clust_line_w', metavar='Cluster line width', type=float,
sagun98
parents:
diff changeset
141 default = 1.0 )
sagun98
parents:
diff changeset
142 p.add_argument( '--label_cols', metavar='Label colors', type=str,
sagun98
parents:
diff changeset
143 default = None )
sagun98
parents:
diff changeset
144 p.add_argument( '--label2cols', metavar='Label to colors mapping file', type=str,
sagun98
parents:
diff changeset
145 default = None )
sagun98
parents:
diff changeset
146 p.add_argument( '--sdend_out', metavar='File for storing the samples dendrogram in PhyloXML format', type=str,
sagun98
parents:
diff changeset
147 default = None )
sagun98
parents:
diff changeset
148 p.add_argument( '--fdend_out', metavar='File for storing the features dendrogram in PhyloXML format', type=str,
sagun98
parents:
diff changeset
149 default = None )
sagun98
parents:
diff changeset
150
sagun98
parents:
diff changeset
151
sagun98
parents:
diff changeset
152 p.add_argument( '--pad_inches', metavar='Proportion of figure to be left blank around the plot', type=float,
sagun98
parents:
diff changeset
153 default = 0.1 )
sagun98
parents:
diff changeset
154
sagun98
parents:
diff changeset
155
sagun98
parents:
diff changeset
156 p.add_argument( '--flabel', metavar='Whether to show the labels for the features', type=int,
sagun98
parents:
diff changeset
157 default = 0 )
sagun98
parents:
diff changeset
158 p.add_argument( '--slabel', metavar='Whether to show the labels for the samples', type=int,
sagun98
parents:
diff changeset
159 default = 0 )
sagun98
parents:
diff changeset
160
sagun98
parents:
diff changeset
161 p.add_argument( '--legend', metavar='Whether to show the samples to label legend', type=int,
sagun98
parents:
diff changeset
162 default = 0 )
sagun98
parents:
diff changeset
163 p.add_argument( '--legend_font_size', metavar='Legend font size', type=int,
sagun98
parents:
diff changeset
164 default = 7 )
sagun98
parents:
diff changeset
165 p.add_argument( '--legend_ncol', metavar='Number of columns for the legend', type=int,
sagun98
parents:
diff changeset
166 default = 3 )
sagun98
parents:
diff changeset
167 p.add_argument( '--grid', metavar='Whether to show the grid (only black for now)', type=int,
sagun98
parents:
diff changeset
168 default = 0 )
sagun98
parents:
diff changeset
169
sagun98
parents:
diff changeset
170 col_maps = ['Accent', 'Blues', 'BrBG', 'BuGn', 'BuPu', 'Dark2', 'GnBu',
sagun98
parents:
diff changeset
171 'Greens', 'Greys', 'OrRd', 'Oranges', 'PRGn', 'Paired',
sagun98
parents:
diff changeset
172 'Pastel1', 'Pastel2', 'PiYG', 'PuBu', 'PuBuGn', 'PuOr',
sagun98
parents:
diff changeset
173 'PuRd', 'Purples', 'RdBu', 'RdGy', 'RdPu', 'RdYlBu', 'RdYlGn',
sagun98
parents:
diff changeset
174 'Reds', 'Set1', 'Set2', 'Set3', 'Spectral', 'YlGn', 'YlGnBu',
sagun98
parents:
diff changeset
175 'YlOrBr', 'YlOrRd', 'afmhot', 'autumn', 'binary', 'bone',
sagun98
parents:
diff changeset
176 'brg', 'bwr', 'cool', 'copper', 'flag', 'gist_earth',
sagun98
parents:
diff changeset
177 'gist_gray', 'gist_heat', 'gist_ncar', 'gist_rainbow',
sagun98
parents:
diff changeset
178 'gist_stern', 'gist_yarg', 'gnuplot', 'gnuplot2', 'gray',
sagun98
parents:
diff changeset
179 'hot', 'hsv', 'jet', 'ocean', 'pink', 'prism', 'rainbow',
sagun98
parents:
diff changeset
180 'seismic', 'spectral', 'spring', 'summer', 'terrain', 'winter'] + [n for n,c in my_colormaps]
sagun98
parents:
diff changeset
181 p.add_argument( '-c', metavar='colormap', type=str,
sagun98
parents:
diff changeset
182 choices = col_maps, default = 'jet' )
sagun98
parents:
diff changeset
183
sagun98
parents:
diff changeset
184 return vars(p.parse_args())
sagun98
parents:
diff changeset
185
sagun98
parents:
diff changeset
186 # Predefined colors for dendrograms brances and class labels
sagun98
parents:
diff changeset
187 colors = [ "#B22222","#006400","#0000CD","#9400D3","#696969","#8B4513",
sagun98
parents:
diff changeset
188 "#FF1493","#FF8C00","#3CB371","#00Bfff","#CDC9C9","#FFD700",
sagun98
parents:
diff changeset
189 "#2F4F4F","#FF0000","#ADFF2F","#B03060" ]
sagun98
parents:
diff changeset
190
sagun98
parents:
diff changeset
191 def samples2classes_panel(fig, samples, s2l, idx1, idx2, height, xsize, cols, legendon, fontsize, label2cols, legend_ncol ):
sagun98
parents:
diff changeset
192 from matplotlib.patches import Rectangle
sagun98
parents:
diff changeset
193 samples2labels = dict([(l[0],l[1])
sagun98
parents:
diff changeset
194 for l in [ll.strip().split('\t')
sagun98
parents:
diff changeset
195 for ll in open(s2l)] if len(l) > 1])
sagun98
parents:
diff changeset
196
sagun98
parents:
diff changeset
197 if label2cols:
sagun98
parents:
diff changeset
198 labels2colors = dict([(l[0],l[1]) for l in [ll.strip().split('\t') for ll in open(label2cols)]])
sagun98
parents:
diff changeset
199 else:
sagun98
parents:
diff changeset
200 cs = cols if cols else colors
sagun98
parents:
diff changeset
201 labels2colors = dict([(l,cs[i%len(cs)]) for i,l in enumerate(set(samples2labels.values()))])
sagun98
parents:
diff changeset
202 ax1 = fig.add_axes([0.,1.0,1.0,height],frameon=False)
sagun98
parents:
diff changeset
203 ax1.set_xticks([])
sagun98
parents:
diff changeset
204 ax1.set_yticks([])
sagun98
parents:
diff changeset
205 ax1.set_ylim( [0.0, height] )
sagun98
parents:
diff changeset
206 ax1.set_xlim( [0.0, xsize] )
sagun98
parents:
diff changeset
207 step = xsize / float(len(samples))
sagun98
parents:
diff changeset
208 labels = set()
sagun98
parents:
diff changeset
209 added_labels = set()
sagun98
parents:
diff changeset
210 for i,ind in enumerate(idx2):
sagun98
parents:
diff changeset
211 if not samples[ind] in samples2labels or \
sagun98
parents:
diff changeset
212 not samples2labels[samples[ind]] in labels2colors:
sagun98
parents:
diff changeset
213 fc, ll = "k", None
sagun98
parents:
diff changeset
214 else:
sagun98
parents:
diff changeset
215 ll = samples2labels[samples[ind]]
sagun98
parents:
diff changeset
216 ll = None if ll in added_labels else ll
sagun98
parents:
diff changeset
217 added_labels.add( ll )
sagun98
parents:
diff changeset
218 fc = labels2colors[samples2labels[samples[ind]]]
sagun98
parents:
diff changeset
219
sagun98
parents:
diff changeset
220 rect = Rectangle( [float(i)*step, 0.0], step, height,
sagun98
parents:
diff changeset
221 facecolor = fc,
sagun98
parents:
diff changeset
222 label = ll,
sagun98
parents:
diff changeset
223 edgecolor='b', lw = 0.0)
sagun98
parents:
diff changeset
224 labels.add( ll )
sagun98
parents:
diff changeset
225 ax1.add_patch(rect)
sagun98
parents:
diff changeset
226 ax1.autoscale_view()
sagun98
parents:
diff changeset
227
sagun98
parents:
diff changeset
228 if legendon:
sagun98
parents:
diff changeset
229 ax1.legend( loc = 2, ncol = legend_ncol, bbox_to_anchor=(1.01, 3.),
sagun98
parents:
diff changeset
230 borderpad = 0.0, labelspacing = 0.0,
sagun98
parents:
diff changeset
231 handlelength = 0.5, handletextpad = 0.3,
sagun98
parents:
diff changeset
232 borderaxespad = 0.0, columnspacing = 0.3,
sagun98
parents:
diff changeset
233 prop = {'size':fontsize}, frameon = False)
sagun98
parents:
diff changeset
234
sagun98
parents:
diff changeset
235 def samples_dend_panel( fig, Z, Z2, ystart, ylen, lw ):
sagun98
parents:
diff changeset
236 ax2 = fig.add_axes([0.0,1.0+ystart,1.0,ylen], frameon=False)
sagun98
parents:
diff changeset
237 Z2['color_list'] = [c.replace('b','k') for c in Z2['color_list']]
sagun98
parents:
diff changeset
238 mh = max(Z[:,2])
sagun98
parents:
diff changeset
239 sch._plot_dendrogram( Z2['icoord'], Z2['dcoord'], Z2['ivl'],
sagun98
parents:
diff changeset
240 Z.shape[0] + 1, Z.shape[0] + 1,
sagun98
parents:
diff changeset
241 mh, 'top', no_labels=True,
sagun98
parents:
diff changeset
242 color_list=Z2['color_list'] )
sagun98
parents:
diff changeset
243 for coll in ax2.collections:
sagun98
parents:
diff changeset
244 coll._linewidths = (lw,)
sagun98
parents:
diff changeset
245 ax2.set_xticks([])
sagun98
parents:
diff changeset
246 ax2.set_yticks([])
sagun98
parents:
diff changeset
247 ax2.set_xticklabels([])
sagun98
parents:
diff changeset
248
sagun98
parents:
diff changeset
249 def features_dend_panel( fig, Z, Z2, width, lw ):
sagun98
parents:
diff changeset
250 ax1 = fig.add_axes([-width,0.0,width,1.0], frameon=False)
sagun98
parents:
diff changeset
251 Z2['color_list'] = [c.replace('b','k').replace('x','b') for c in Z2['color_list']]
sagun98
parents:
diff changeset
252 mh = max(Z[:,2])
sagun98
parents:
diff changeset
253 sch._plot_dendrogram(Z2['icoord'], Z2['dcoord'], Z2['ivl'], Z.shape[0] + 1, Z.shape[0] + 1, mh, 'right', no_labels=True, color_list=Z2['color_list'])
sagun98
parents:
diff changeset
254 for coll in ax1.collections:
sagun98
parents:
diff changeset
255 coll._linewidths = (lw,)
sagun98
parents:
diff changeset
256 ax1.set_xticks([])
sagun98
parents:
diff changeset
257 ax1.set_yticks([])
sagun98
parents:
diff changeset
258 ax1.set_xticklabels([])
sagun98
parents:
diff changeset
259
sagun98
parents:
diff changeset
260
sagun98
parents:
diff changeset
261 def add_cmap( cmapdict, name ):
sagun98
parents:
diff changeset
262 my_cmap = matplotlib.colors.LinearSegmentedColormap(name,cmapdict,256)
sagun98
parents:
diff changeset
263 pylab.register_cmap(name=name,cmap=my_cmap)
sagun98
parents:
diff changeset
264
sagun98
parents:
diff changeset
265 def init_fig(xsize,ysize,ncol):
sagun98
parents:
diff changeset
266 fig = pylab.figure(figsize=(xsize,ysize))
sagun98
parents:
diff changeset
267 sch._link_line_colors = colors[:ncol]
sagun98
parents:
diff changeset
268 return fig
sagun98
parents:
diff changeset
269
sagun98
parents:
diff changeset
270 def heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, cols, rows, label_font_size, cb_offset, cb_l, flabelson, slabelson, cm_ticks, gridon, bar_offset ):
sagun98
parents:
diff changeset
271 cm = pylab.get_cmap(cm_name)
sagun98
parents:
diff changeset
272 bottom_col = [ cm._segmentdata['red'][0][1],
sagun98
parents:
diff changeset
273 cm._segmentdata['green'][0][1],
sagun98
parents:
diff changeset
274 cm._segmentdata['blue'][0][1] ]
sagun98
parents:
diff changeset
275 axmatrix = fig.add_axes( [0.0,0.0,1.0,1.0],
sagun98
parents:
diff changeset
276 axisbg=bottom_col)
sagun98
parents:
diff changeset
277 if any([c < 0.95 for c in bottom_col]):
sagun98
parents:
diff changeset
278 axmatrix.spines['right'].set_color('none')
sagun98
parents:
diff changeset
279 axmatrix.spines['left'].set_color('none')
sagun98
parents:
diff changeset
280 axmatrix.spines['top'].set_color('none')
sagun98
parents:
diff changeset
281 axmatrix.spines['bottom'].set_color('none')
sagun98
parents:
diff changeset
282 norm_f = matplotlib.colors.LogNorm if scale == 'log' else matplotlib.colors.Normalize
sagun98
parents:
diff changeset
283 im = axmatrix.matshow( D, norm = norm_f( vmin=minv if minv > 0.0 else None,
sagun98
parents:
diff changeset
284 vmax=maxv),
sagun98
parents:
diff changeset
285 aspect='auto', origin='lower', cmap=cm, vmax=maxv)
sagun98
parents:
diff changeset
286
sagun98
parents:
diff changeset
287 axmatrix2 = axmatrix.twinx()
sagun98
parents:
diff changeset
288 axmatrix3 = axmatrix.twiny()
sagun98
parents:
diff changeset
289
sagun98
parents:
diff changeset
290 axmatrix.set_xticks([])
sagun98
parents:
diff changeset
291 axmatrix2.set_xticks([])
sagun98
parents:
diff changeset
292 axmatrix3.set_xticks([])
sagun98
parents:
diff changeset
293 axmatrix.set_yticks([])
sagun98
parents:
diff changeset
294 axmatrix2.set_yticks([])
sagun98
parents:
diff changeset
295 axmatrix3.set_yticks([])
sagun98
parents:
diff changeset
296
sagun98
parents:
diff changeset
297 axmatrix.set_xticklabels([])
sagun98
parents:
diff changeset
298 axmatrix2.set_xticklabels([])
sagun98
parents:
diff changeset
299 axmatrix3.set_xticklabels([])
sagun98
parents:
diff changeset
300 axmatrix.set_yticklabels([])
sagun98
parents:
diff changeset
301 axmatrix2.set_yticklabels([])
sagun98
parents:
diff changeset
302 axmatrix3.set_yticklabels([])
sagun98
parents:
diff changeset
303
sagun98
parents:
diff changeset
304 if any([c < 0.95 for c in bottom_col]):
sagun98
parents:
diff changeset
305 axmatrix2.spines['right'].set_color('none')
sagun98
parents:
diff changeset
306 axmatrix2.spines['left'].set_color('none')
sagun98
parents:
diff changeset
307 axmatrix2.spines['top'].set_color('none')
sagun98
parents:
diff changeset
308 axmatrix2.spines['bottom'].set_color('none')
sagun98
parents:
diff changeset
309 if any([c < 0.95 for c in bottom_col]):
sagun98
parents:
diff changeset
310 axmatrix3.spines['right'].set_color('none')
sagun98
parents:
diff changeset
311 axmatrix3.spines['left'].set_color('none')
sagun98
parents:
diff changeset
312 axmatrix3.spines['top'].set_color('none')
sagun98
parents:
diff changeset
313 axmatrix3.spines['bottom'].set_color('none')
sagun98
parents:
diff changeset
314 if flabelson:
sagun98
parents:
diff changeset
315 axmatrix2.set_yticks(np.arange(len(rows))+0.5)
sagun98
parents:
diff changeset
316 axmatrix2.set_yticklabels([rows[r] for r in idx1],size=label_font_size,va='center')
sagun98
parents:
diff changeset
317 if slabelson:
sagun98
parents:
diff changeset
318 axmatrix.set_xticks(np.arange(len(cols)))
sagun98
parents:
diff changeset
319 axmatrix.set_xticklabels([cols[r] for r in idx2],size=label_font_size,rotation=90,va='top',ha='center')
sagun98
parents:
diff changeset
320 axmatrix.tick_params(length=0)
sagun98
parents:
diff changeset
321 axmatrix2.tick_params(length=0)
sagun98
parents:
diff changeset
322 axmatrix3.tick_params(length=0)
sagun98
parents:
diff changeset
323 axmatrix2.set_ylim(0,len(rows))
sagun98
parents:
diff changeset
324
sagun98
parents:
diff changeset
325 if gridon:
sagun98
parents:
diff changeset
326 axmatrix.set_yticks(np.arange(len(idx1)-1)+0.5)
sagun98
parents:
diff changeset
327 axmatrix.set_xticks(np.arange(len(idx2))+0.5)
sagun98
parents:
diff changeset
328 axmatrix.grid( True )
sagun98
parents:
diff changeset
329 ticklines = axmatrix.get_xticklines()
sagun98
parents:
diff changeset
330 ticklines.extend( axmatrix.get_yticklines() )
sagun98
parents:
diff changeset
331 #gridlines = axmatrix.get_xgridlines()
sagun98
parents:
diff changeset
332 #gridlines.extend( axmatrix.get_ygridlines() )
sagun98
parents:
diff changeset
333
sagun98
parents:
diff changeset
334 for line in ticklines:
sagun98
parents:
diff changeset
335 line.set_linewidth(3)
sagun98
parents:
diff changeset
336
sagun98
parents:
diff changeset
337 if cb_l > 0.0:
sagun98
parents:
diff changeset
338 axcolor = fig.add_axes([0.0,1.0+bar_offset*1.25,1.0,cb_l])
sagun98
parents:
diff changeset
339 cbar = fig.colorbar(im, cax=axcolor, orientation='horizontal')
sagun98
parents:
diff changeset
340 cbar.ax.tick_params(labelsize=label_font_size)
sagun98
parents:
diff changeset
341 if cm_ticks:
sagun98
parents:
diff changeset
342 cbar.ax.set_xticklabels( cm_ticks.split(":") )
sagun98
parents:
diff changeset
343
sagun98
parents:
diff changeset
344
sagun98
parents:
diff changeset
345 def read_table( fin, xstart,xstop,ystart,ystop, percentile = None, top = None, norm = False ):
sagun98
parents:
diff changeset
346 mat = [l.rstrip().split('\t') for l in open( fin )]
sagun98
parents:
diff changeset
347
sagun98
parents:
diff changeset
348 if fin.endswith(".biom"):
sagun98
parents:
diff changeset
349 sample_labels = mat[1][1:-1]
sagun98
parents:
diff changeset
350 m = [(mm[-1]+"; OTU"+mm[0],np.array([float(f) for f in mm[1:-1]])) for mm in mat[2:]]
sagun98
parents:
diff changeset
351 #feat_labels = [m[-1].replace(";","_").replace(" ","")+m[0] for m in mat[2:]]
sagun98
parents:
diff changeset
352 #print len(feat_labels)
sagun98
parents:
diff changeset
353 #print len(sample_labels)
sagun98
parents:
diff changeset
354 else:
sagun98
parents:
diff changeset
355 sample_labels = mat[0][xstart:xstop]
sagun98
parents:
diff changeset
356 m = [(mm[xstart-1],np.array([float(f) for f in mm[xstart:xstop]])) for mm in mat[ystart:ystop]]
sagun98
parents:
diff changeset
357
sagun98
parents:
diff changeset
358 if norm:
sagun98
parents:
diff changeset
359 msums = [0.0 for l in m[0][1]]
sagun98
parents:
diff changeset
360 for mm in m:
sagun98
parents:
diff changeset
361 for i,v in enumerate(mm[1]):
sagun98
parents:
diff changeset
362 msums[i] += v
sagun98
parents:
diff changeset
363
sagun98
parents:
diff changeset
364 if top and not percentile:
sagun98
parents:
diff changeset
365 percentile = 90
sagun98
parents:
diff changeset
366
sagun98
parents:
diff changeset
367 if percentile:
sagun98
parents:
diff changeset
368 m = sorted(m,key=lambda x:-stats.scoreatpercentile(x[1],percentile))
sagun98
parents:
diff changeset
369 if top:
sagun98
parents:
diff changeset
370 if fin.endswith(".biom"):
sagun98
parents:
diff changeset
371 #feat_labels = [mm[-1].replace(";","_").replace(" ","")+mm[0] for mm in m[:top]]
sagun98
parents:
diff changeset
372 feat_labels = [mm[0] for mm in m[:top]]
sagun98
parents:
diff changeset
373 else:
sagun98
parents:
diff changeset
374 feat_labels = [mm[0] for mm in m[:top]]
sagun98
parents:
diff changeset
375 if norm:
sagun98
parents:
diff changeset
376 m = [np.array([n/v for n,v in zip(mm[1],msums)]) for mm in m[:top]]
sagun98
parents:
diff changeset
377 else:
sagun98
parents:
diff changeset
378 m = [mm[1] for mm in m[:top]]
sagun98
parents:
diff changeset
379 else:
sagun98
parents:
diff changeset
380 if fin.endswith(".biom"):
sagun98
parents:
diff changeset
381 feat_labels = [mm[0] for mm in m]
sagun98
parents:
diff changeset
382 else:
sagun98
parents:
diff changeset
383 feat_labels = [mm[0] for mm in m]
sagun98
parents:
diff changeset
384 if norm:
sagun98
parents:
diff changeset
385 m = [np.array([n/v for n,v in zip(mm[1],msums)]) for mm in m]
sagun98
parents:
diff changeset
386 else:
sagun98
parents:
diff changeset
387 m = [mm[1] for mm in m]
sagun98
parents:
diff changeset
388 #m = [mm[1] for mm in m]
sagun98
parents:
diff changeset
389
sagun98
parents:
diff changeset
390 D = np.matrix( np.array( m ) )
sagun98
parents:
diff changeset
391
sagun98
parents:
diff changeset
392 return D, feat_labels, sample_labels
sagun98
parents:
diff changeset
393
sagun98
parents:
diff changeset
394 def read_dm( fin, n ):
sagun98
parents:
diff changeset
395 mat = [[float(f) for f in l.strip().split('\t')] for l in open( fin )]
sagun98
parents:
diff changeset
396 nc = sum([len(r) for r in mat])
sagun98
parents:
diff changeset
397
sagun98
parents:
diff changeset
398 if nc == n*n:
sagun98
parents:
diff changeset
399 dm = []
sagun98
parents:
diff changeset
400 for i in range(n):
sagun98
parents:
diff changeset
401 dm += mat[i][i+1:]
sagun98
parents:
diff changeset
402 return np.array(dm)
sagun98
parents:
diff changeset
403 if nc == (n*n-n)/2:
sagun98
parents:
diff changeset
404 dm = []
sagun98
parents:
diff changeset
405 for i in range(n):
sagun98
parents:
diff changeset
406 dm += mat[i]
sagun98
parents:
diff changeset
407 return np.array(dm)
sagun98
parents:
diff changeset
408 sys.stderr.write( "Error in reading the distance matrix\n" )
sagun98
parents:
diff changeset
409 sys.exit()
sagun98
parents:
diff changeset
410
sagun98
parents:
diff changeset
411
sagun98
parents:
diff changeset
412 def exp_newick( inp, labels, outfile, tree_format = 'phyloxml' ):
sagun98
parents:
diff changeset
413 n_leaves = int(inp[-1][-1])
sagun98
parents:
diff changeset
414 from Bio import Phylo
sagun98
parents:
diff changeset
415 import collections
sagun98
parents:
diff changeset
416 from Bio.Phylo.BaseTree import Tree as BTree
sagun98
parents:
diff changeset
417 from Bio.Phylo.BaseTree import Clade as BClade
sagun98
parents:
diff changeset
418 tree = BTree()
sagun98
parents:
diff changeset
419 tree.root = BClade()
sagun98
parents:
diff changeset
420
sagun98
parents:
diff changeset
421 subclades = {}
sagun98
parents:
diff changeset
422 sb_cbl = {}
sagun98
parents:
diff changeset
423
sagun98
parents:
diff changeset
424 for i,(fr,to,bl,nsub) in enumerate( inp ):
sagun98
parents:
diff changeset
425 if fr < n_leaves:
sagun98
parents:
diff changeset
426 fr_c = BClade(branch_length=-1.0,name=labels[int(fr)])
sagun98
parents:
diff changeset
427 subclades[fr] = fr_c
sagun98
parents:
diff changeset
428 sb_cbl[fr] = bl
sagun98
parents:
diff changeset
429 if to < n_leaves:
sagun98
parents:
diff changeset
430 to_c = BClade(branch_length=-1.0,name=labels[int(to)])
sagun98
parents:
diff changeset
431 subclades[to] = to_c
sagun98
parents:
diff changeset
432 sb_cbl[to] = bl
sagun98
parents:
diff changeset
433 for i,(fr,to,bl,nsub) in enumerate( inp ):
sagun98
parents:
diff changeset
434 fr_c = subclades[fr]
sagun98
parents:
diff changeset
435 to_c = subclades[to]
sagun98
parents:
diff changeset
436 cur_c = BClade(branch_length=bl)
sagun98
parents:
diff changeset
437 cur_c.clades.append( fr_c )
sagun98
parents:
diff changeset
438 cur_c.clades.append( to_c )
sagun98
parents:
diff changeset
439 subclades[i+n_leaves] = cur_c
sagun98
parents:
diff changeset
440
sagun98
parents:
diff changeset
441 def reset_rec( clade, fath_bl ):
sagun98
parents:
diff changeset
442 if clade.branch_length < 0:
sagun98
parents:
diff changeset
443 clade.branch_length = fath_bl
sagun98
parents:
diff changeset
444 return
sagun98
parents:
diff changeset
445 for c in clade.clades:
sagun98
parents:
diff changeset
446 reset_rec( c, clade.branch_length )
sagun98
parents:
diff changeset
447 clade.branch_length = fath_bl-clade.branch_length
sagun98
parents:
diff changeset
448
sagun98
parents:
diff changeset
449 tree.root = cur_c
sagun98
parents:
diff changeset
450 reset_rec( tree.root, 0.0 )
sagun98
parents:
diff changeset
451 tree.root.branch_length = 0.0
sagun98
parents:
diff changeset
452 Phylo.write(tree, outfile, tree_format )
sagun98
parents:
diff changeset
453
sagun98
parents:
diff changeset
454 def hclust( fin, fout,
sagun98
parents:
diff changeset
455 method = "average",
sagun98
parents:
diff changeset
456 dist_func = "euclidean",
sagun98
parents:
diff changeset
457 feat_dist_func = "d",
sagun98
parents:
diff changeset
458 xcw = 0.1,
sagun98
parents:
diff changeset
459 ycw = 0.1,
sagun98
parents:
diff changeset
460 scale = 'lin',
sagun98
parents:
diff changeset
461 minv = 0.0,
sagun98
parents:
diff changeset
462 maxv = None,
sagun98
parents:
diff changeset
463 xstart = 1,
sagun98
parents:
diff changeset
464 ystart = 1,
sagun98
parents:
diff changeset
465 xstop = None,
sagun98
parents:
diff changeset
466 ystop = None,
sagun98
parents:
diff changeset
467 percentile = None,
sagun98
parents:
diff changeset
468 top = None,
sagun98
parents:
diff changeset
469 norm = False,
sagun98
parents:
diff changeset
470 cm_name = 'jet',
sagun98
parents:
diff changeset
471 s2l = None,
sagun98
parents:
diff changeset
472 label_font_size = 7,
sagun98
parents:
diff changeset
473 feat_dend_col_th = None,
sagun98
parents:
diff changeset
474 sample_dend_col_th = None,
sagun98
parents:
diff changeset
475 clust_ncols = 7,
sagun98
parents:
diff changeset
476 clust_line_w = 1.0,
sagun98
parents:
diff changeset
477 label_cols = None,
sagun98
parents:
diff changeset
478 sdend_h = 0.1,
sagun98
parents:
diff changeset
479 fdend_w = 0.1,
sagun98
parents:
diff changeset
480 cm_h = 0.03,
sagun98
parents:
diff changeset
481 dmf = None,
sagun98
parents:
diff changeset
482 dms = None,
sagun98
parents:
diff changeset
483 legendon = False,
sagun98
parents:
diff changeset
484 label2cols = None,
sagun98
parents:
diff changeset
485 flabelon = True,
sagun98
parents:
diff changeset
486 slabelon = True,
sagun98
parents:
diff changeset
487 cm_ticks = None,
sagun98
parents:
diff changeset
488 legend_ncol = 3,
sagun98
parents:
diff changeset
489 pad_inches = None,
sagun98
parents:
diff changeset
490 legend_font_size = 7,
sagun98
parents:
diff changeset
491 gridon = 0,
sagun98
parents:
diff changeset
492 sdend_out = None,
sagun98
parents:
diff changeset
493 fdend_out= None):
sagun98
parents:
diff changeset
494
sagun98
parents:
diff changeset
495 if label_cols and label_cols.count("-"):
sagun98
parents:
diff changeset
496 label_cols = label_cols.split("-")
sagun98
parents:
diff changeset
497
sagun98
parents:
diff changeset
498 for n,c in my_colormaps:
sagun98
parents:
diff changeset
499 add_cmap( c, n )
sagun98
parents:
diff changeset
500
sagun98
parents:
diff changeset
501 if feat_dist_func == 'd':
sagun98
parents:
diff changeset
502 feat_dist_func = dist_func
sagun98
parents:
diff changeset
503
sagun98
parents:
diff changeset
504 D, feat_labels, sample_labels = read_table(fin,xstart,xstop,ystart,ystop,percentile,top,norm)
sagun98
parents:
diff changeset
505
sagun98
parents:
diff changeset
506 ylen,xlen = D[:].shape
sagun98
parents:
diff changeset
507 Dt = D.transpose()
sagun98
parents:
diff changeset
508
sagun98
parents:
diff changeset
509 size_cx, size_cy = xcw, ycw
sagun98
parents:
diff changeset
510
sagun98
parents:
diff changeset
511 xsize, ysize = max(xlen*size_cx,2.0), max(ylen*size_cy,2.0)
sagun98
parents:
diff changeset
512 ydend_offset = 0.025*8.0/ysize if s2l else 0.0
sagun98
parents:
diff changeset
513
sagun98
parents:
diff changeset
514 fig = init_fig(xsize,ysize,clust_ncols)
sagun98
parents:
diff changeset
515
sagun98
parents:
diff changeset
516 nfeats, nsamples = len(D), len(Dt)
sagun98
parents:
diff changeset
517
sagun98
parents:
diff changeset
518 if dmf:
sagun98
parents:
diff changeset
519 p1 = read_dm( dmf, nfeats )
sagun98
parents:
diff changeset
520 Y1 = sch.linkage( p1, method=method )
sagun98
parents:
diff changeset
521 else:
sagun98
parents:
diff changeset
522 p1 = dis.pdist( D, feat_dist_func )
sagun98
parents:
diff changeset
523 Y1 = sch.linkage( p1, method=method ) # , metric=feat_dist_func )
sagun98
parents:
diff changeset
524 #Y1 = sch.linkage( D, method=method, metric=feat_dist_func )
sagun98
parents:
diff changeset
525 Z1 = sch.dendrogram(Y1, no_plot=True, color_threshold=feat_dend_col_th)
sagun98
parents:
diff changeset
526
sagun98
parents:
diff changeset
527 if fdend_out:
sagun98
parents:
diff changeset
528 exp_newick( Y1, feat_labels, fdend_out )
sagun98
parents:
diff changeset
529
sagun98
parents:
diff changeset
530 if dms:
sagun98
parents:
diff changeset
531 p2 = read_dm( dms, nsamples )
sagun98
parents:
diff changeset
532 Y2 = sch.linkage( p2, method=method )
sagun98
parents:
diff changeset
533 else:
sagun98
parents:
diff changeset
534 p2 = dis.pdist( Dt, dist_func )
sagun98
parents:
diff changeset
535 Y2 = sch.linkage( p2, method=method ) # , metric=dist_func )
sagun98
parents:
diff changeset
536 #Y2 = sch.linkage( Dt, method=method, metric=dist_func )
sagun98
parents:
diff changeset
537 Z2 = sch.dendrogram(Y2, no_plot=True, color_threshold=sample_dend_col_th)
sagun98
parents:
diff changeset
538
sagun98
parents:
diff changeset
539 if sdend_out:
sagun98
parents:
diff changeset
540 exp_newick( Y2, sample_labels, sdend_out )
sagun98
parents:
diff changeset
541
sagun98
parents:
diff changeset
542 if fdend_w > 0.0:
sagun98
parents:
diff changeset
543 features_dend_panel(fig, Y1, Z1, fdend_w*8.0/xsize, clust_line_w )
sagun98
parents:
diff changeset
544 if sdend_h > 0.0:
sagun98
parents:
diff changeset
545 samples_dend_panel(fig, Y2, Z2, ydend_offset, sdend_h*8.0/ysize, clust_line_w)
sagun98
parents:
diff changeset
546
sagun98
parents:
diff changeset
547 idx1, idx2 = Z1['leaves'], Z2['leaves']
sagun98
parents:
diff changeset
548 D = D[idx1,:][:,idx2]
sagun98
parents:
diff changeset
549
sagun98
parents:
diff changeset
550 if s2l:
sagun98
parents:
diff changeset
551 samples2classes_panel( fig, sample_labels, s2l, idx1, idx2, 0.025*8.0/ysize, xsize, label_cols, legendon, legend_font_size, label2cols, legend_ncol )
sagun98
parents:
diff changeset
552 heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, sample_labels, feat_labels, label_font_size, -cm_h*8.0/ysize, cm_h*0.8*8.0/ysize, flabelon, slabelon, cm_ticks, gridon, ydend_offset+sdend_h*8.0/ysize )
sagun98
parents:
diff changeset
553
sagun98
parents:
diff changeset
554 fig.savefig( fout, bbox_inches='tight',
sagun98
parents:
diff changeset
555 pad_inches = pad_inches,
sagun98
parents:
diff changeset
556 dpi=300) if fout else pylab.show()
sagun98
parents:
diff changeset
557
sagun98
parents:
diff changeset
558 if __name__ == '__main__':
sagun98
parents:
diff changeset
559 pars = read_params( sys.argv )
sagun98
parents:
diff changeset
560
sagun98
parents:
diff changeset
561 hclust( fin = pars['in'],
sagun98
parents:
diff changeset
562 fout = pars['out'],
sagun98
parents:
diff changeset
563 method = pars['m'],
sagun98
parents:
diff changeset
564 dist_func = pars['d'],
sagun98
parents:
diff changeset
565 feat_dist_func = pars['f'],
sagun98
parents:
diff changeset
566 xcw = pars['x'],
sagun98
parents:
diff changeset
567 ycw = pars['y'],
sagun98
parents:
diff changeset
568 scale = pars['s'],
sagun98
parents:
diff changeset
569 minv = pars['minv'],
sagun98
parents:
diff changeset
570 maxv = pars['maxv'],
sagun98
parents:
diff changeset
571 xstart = pars['xstart'],
sagun98
parents:
diff changeset
572 ystart = pars['ystart'],
sagun98
parents:
diff changeset
573 xstop = pars['xstop'],
sagun98
parents:
diff changeset
574 ystop = pars['ystop'],
sagun98
parents:
diff changeset
575 percentile = pars['perc'],
sagun98
parents:
diff changeset
576 top = pars['top'],
sagun98
parents:
diff changeset
577 norm = pars['norm'],
sagun98
parents:
diff changeset
578 cm_name = pars['c'],
sagun98
parents:
diff changeset
579 s2l = pars['l'],
sagun98
parents:
diff changeset
580 label_font_size = pars['font_size'],
sagun98
parents:
diff changeset
581 feat_dend_col_th = pars['feat_dend_col_th'],
sagun98
parents:
diff changeset
582 sample_dend_col_th = pars['sample_dend_col_th'],
sagun98
parents:
diff changeset
583 clust_ncols = pars['clust_ncols'],
sagun98
parents:
diff changeset
584 clust_line_w = pars['clust_line_w'],
sagun98
parents:
diff changeset
585 label_cols = pars['label_cols'],
sagun98
parents:
diff changeset
586 sdend_h = pars['sdend_h'],
sagun98
parents:
diff changeset
587 fdend_w = pars['fdend_w'],
sagun98
parents:
diff changeset
588 cm_h = pars['cm_h'],
sagun98
parents:
diff changeset
589 dmf = pars['dmf'],
sagun98
parents:
diff changeset
590 dms = pars['dms'],
sagun98
parents:
diff changeset
591 legendon = pars['legend'],
sagun98
parents:
diff changeset
592 label2cols = pars['label2cols'],
sagun98
parents:
diff changeset
593 flabelon = pars['flabel'],
sagun98
parents:
diff changeset
594 slabelon = pars['slabel'],
sagun98
parents:
diff changeset
595 cm_ticks = pars['cm_ticks'],
sagun98
parents:
diff changeset
596 legend_ncol = pars['legend_ncol'],
sagun98
parents:
diff changeset
597 pad_inches = pars['pad_inches'],
sagun98
parents:
diff changeset
598 legend_font_size = pars['legend_font_size'],
sagun98
parents:
diff changeset
599 gridon = pars['grid'],
sagun98
parents:
diff changeset
600 sdend_out = pars['sdend_out'],
sagun98
parents:
diff changeset
601 fdend_out = pars['fdend_out'],
sagun98
parents:
diff changeset
602 )
sagun98
parents:
diff changeset
603