Mercurial > repos > sagun98 > micropita_v2
comparison src/breadcrumbs/hclust/hclust.py @ 0:0de566f21448 draft default tip
v2
| author | sagun98 |
|---|---|
| date | Thu, 03 Jun 2021 18:13:32 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0de566f21448 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import sys | |
| 4 import numpy as np | |
| 5 import matplotlib | |
| 6 matplotlib.use('Agg') | |
| 7 import scipy | |
| 8 import pylab | |
| 9 import scipy.cluster.hierarchy as sch | |
| 10 import scipy.spatial.distance as dis | |
| 11 from scipy import stats | |
| 12 | |
| 13 # User defined color maps (in addition to matplotlib ones) | |
| 14 bbcyr = {'red': ( (0.0, 0.0, 0.0), | |
| 15 (0.25, 0.0, 0.0), | |
| 16 (0.50, 0.0, 0.0), | |
| 17 (0.75, 1.0, 1.0), | |
| 18 (1.0, 1.0, 1.0)), | |
| 19 'green': ( (0.0, 0.0, 0.0), | |
| 20 (0.25, 0.0, 0.0), | |
| 21 (0.50, 1.0, 1.0), | |
| 22 (0.75, 1.0, 1.0), | |
| 23 (1.0, 0.0, 1.0)), | |
| 24 'blue': ( (0.0, 0.0, 0.0), | |
| 25 (0.25, 1.0, 1.0), | |
| 26 (0.50, 1.0, 1.0), | |
| 27 (0.75, 0.0, 0.0), | |
| 28 (1.0, 0.0, 1.0))} | |
| 29 | |
| 30 bbcry = {'red': ( (0.0, 0.0, 0.0), | |
| 31 (0.25, 0.0, 0.0), | |
| 32 (0.50, 0.0, 0.0), | |
| 33 (0.75, 1.0, 1.0), | |
| 34 (1.0, 1.0, 1.0)), | |
| 35 'green': ( (0.0, 0.0, 0.0), | |
| 36 (0.25, 0.0, 0.0), | |
| 37 (0.50, 1.0, 1.0), | |
| 38 (0.75, 0.0, 0.0), | |
| 39 (1.0, 1.0, 1.0)), | |
| 40 'blue': ( (0.0, 0.0, 0.0), | |
| 41 (0.25, 1.0, 1.0), | |
| 42 (0.50, 1.0, 1.0), | |
| 43 (0.75, 0.0, 0.0), | |
| 44 (1.0, 0.0, 1.0))} | |
| 45 my_colormaps = [ ('bbcyr',bbcyr), | |
| 46 ('bbcry',bbcry)] | |
| 47 | |
| 48 | |
| 49 | |
| 50 def read_params(args): | |
| 51 import argparse as ap | |
| 52 import textwrap | |
| 53 | |
| 54 p = ap.ArgumentParser( description= "TBA" ) | |
| 55 | |
| 56 p.add_argument( '--in', '--inp', metavar='INPUT_FILE', type=str, | |
| 57 nargs='?', default=sys.stdin, | |
| 58 help= "the input archive " ) | |
| 59 | |
| 60 p.add_argument( '--out', metavar='OUTPUT_FILE', type=str, | |
| 61 nargs = '?', default=None, | |
| 62 help= " the output file, image on screen" | |
| 63 " if not specified. " ) | |
| 64 | |
| 65 p.add_argument( '-m', metavar='method', type=str, | |
| 66 choices=[ "single","complete","average", | |
| 67 "weighted","centroid","median", | |
| 68 "ward" ], | |
| 69 default="average" ) | |
| 70 | |
| 71 dist_funcs = [ "euclidean","minkowski","cityblock","seuclidean", | |
| 72 "sqeuclidean","cosine","correlation","hamming", | |
| 73 "jaccard","chebyshev","canberra","braycurtis", | |
| 74 "mahalanobis","yule","matching","dice", | |
| 75 "kulsinski","rogerstanimoto","russellrao","sokalmichener", | |
| 76 "sokalsneath","wminkowski","ward"] | |
| 77 p.add_argument( '-d', metavar='distance function', type=str, | |
| 78 choices=dist_funcs, | |
| 79 default="euclidean" ) | |
| 80 p.add_argument( '-f', metavar='distance function for features', type=str, | |
| 81 choices=dist_funcs, | |
| 82 default="d" ) | |
| 83 | |
| 84 p.add_argument( '--dmf', metavar='distance matrix for features', type=str, | |
| 85 default = None ) | |
| 86 p.add_argument( '--dms', metavar='distance matrix for samples', type=str, | |
| 87 default = None ) | |
| 88 | |
| 89 | |
| 90 p.add_argument( '-l', metavar='sample label', type=str, | |
| 91 default = None ) | |
| 92 | |
| 93 p.add_argument( '-s', metavar='scale norm', type=str, | |
| 94 default = 'lin', choices = ['log','lin']) | |
| 95 | |
| 96 p.add_argument( '-x', metavar='x cell width', type=float, | |
| 97 default = 0.1) | |
| 98 p.add_argument( '-y', metavar='y cell width', type=float, | |
| 99 default = 0.1 ) | |
| 100 | |
| 101 p.add_argument( '--minv', metavar='min value', type=float, | |
| 102 default = 0.0 ) | |
| 103 p.add_argument( '--maxv', metavar='max value', type=float, | |
| 104 default = None ) | |
| 105 | |
| 106 p.add_argument( '--xstart', metavar='x coordinate of the top left cell ' | |
| 107 'of the values', | |
| 108 type=int, default=1 ) | |
| 109 p.add_argument( '--ystart', metavar='y coordinate of the top left cell ' | |
| 110 'of the values', | |
| 111 type=int, default=1 ) | |
| 112 p.add_argument( '--xstop', metavar='x coordinate of the bottom right cell ' | |
| 113 'of the values (default None = last row)', | |
| 114 type=int, default=None ) | |
| 115 p.add_argument( '--ystop', metavar='y coordinate of the bottom right cell ' | |
| 116 'of the values (default None = last column)', | |
| 117 type=int, default=None ) | |
| 118 | |
| 119 p.add_argument( '--perc', metavar='percentile for ordering and rows selection', type=int, default=None ) | |
| 120 p.add_argument( '--top', metavar='selection of the top N rows', type=int, default=None ) | |
| 121 p.add_argument( '--norm', metavar='whether to normalize columns (default 0)', type=int, default=0 ) | |
| 122 | |
| 123 p.add_argument( '--sdend_h', metavar='height of the sample dendrogram', type=float, | |
| 124 default = 0.1 ) | |
| 125 p.add_argument( '--fdend_w', metavar='width of the feature dendrogram', type=float, | |
| 126 default = 0.1 ) | |
| 127 p.add_argument( '--cm_h', metavar='height of the colormap', type=float, | |
| 128 default = 0.03 ) | |
| 129 p.add_argument( '--cm_ticks', metavar='label for ticks of the colormap', type=str, | |
| 130 default = None ) | |
| 131 | |
| 132 p.add_argument( '--font_size', metavar='label_font_size', type=int, | |
| 133 default = 7 ) | |
| 134 p.add_argument( '--feat_dend_col_th', metavar='Color threshold for feature dendrogram', type=float, | |
| 135 default = None ) | |
| 136 p.add_argument( '--sample_dend_col_th', metavar='Color threshold for sample dendrogram', type=float, | |
| 137 default = None ) | |
| 138 p.add_argument( '--clust_ncols', metavar='Number of colors for clusters', type=int, | |
| 139 default = 7 ) | |
| 140 p.add_argument( '--clust_line_w', metavar='Cluster line width', type=float, | |
| 141 default = 1.0 ) | |
| 142 p.add_argument( '--label_cols', metavar='Label colors', type=str, | |
| 143 default = None ) | |
| 144 p.add_argument( '--label2cols', metavar='Label to colors mapping file', type=str, | |
| 145 default = None ) | |
| 146 p.add_argument( '--sdend_out', metavar='File for storing the samples dendrogram in PhyloXML format', type=str, | |
| 147 default = None ) | |
| 148 p.add_argument( '--fdend_out', metavar='File for storing the features dendrogram in PhyloXML format', type=str, | |
| 149 default = None ) | |
| 150 | |
| 151 | |
| 152 p.add_argument( '--pad_inches', metavar='Proportion of figure to be left blank around the plot', type=float, | |
| 153 default = 0.1 ) | |
| 154 | |
| 155 | |
| 156 p.add_argument( '--flabel', metavar='Whether to show the labels for the features', type=int, | |
| 157 default = 0 ) | |
| 158 p.add_argument( '--slabel', metavar='Whether to show the labels for the samples', type=int, | |
| 159 default = 0 ) | |
| 160 | |
| 161 p.add_argument( '--legend', metavar='Whether to show the samples to label legend', type=int, | |
| 162 default = 0 ) | |
| 163 p.add_argument( '--legend_font_size', metavar='Legend font size', type=int, | |
| 164 default = 7 ) | |
| 165 p.add_argument( '--legend_ncol', metavar='Number of columns for the legend', type=int, | |
| 166 default = 3 ) | |
| 167 p.add_argument( '--grid', metavar='Whether to show the grid (only black for now)', type=int, | |
| 168 default = 0 ) | |
| 169 | |
| 170 col_maps = ['Accent', 'Blues', 'BrBG', 'BuGn', 'BuPu', 'Dark2', 'GnBu', | |
| 171 'Greens', 'Greys', 'OrRd', 'Oranges', 'PRGn', 'Paired', | |
| 172 'Pastel1', 'Pastel2', 'PiYG', 'PuBu', 'PuBuGn', 'PuOr', | |
| 173 'PuRd', 'Purples', 'RdBu', 'RdGy', 'RdPu', 'RdYlBu', 'RdYlGn', | |
| 174 'Reds', 'Set1', 'Set2', 'Set3', 'Spectral', 'YlGn', 'YlGnBu', | |
| 175 'YlOrBr', 'YlOrRd', 'afmhot', 'autumn', 'binary', 'bone', | |
| 176 'brg', 'bwr', 'cool', 'copper', 'flag', 'gist_earth', | |
| 177 'gist_gray', 'gist_heat', 'gist_ncar', 'gist_rainbow', | |
| 178 'gist_stern', 'gist_yarg', 'gnuplot', 'gnuplot2', 'gray', | |
| 179 'hot', 'hsv', 'jet', 'ocean', 'pink', 'prism', 'rainbow', | |
| 180 'seismic', 'spectral', 'spring', 'summer', 'terrain', 'winter'] + [n for n,c in my_colormaps] | |
| 181 p.add_argument( '-c', metavar='colormap', type=str, | |
| 182 choices = col_maps, default = 'jet' ) | |
| 183 | |
| 184 return vars(p.parse_args()) | |
| 185 | |
| 186 # Predefined colors for dendrograms brances and class labels | |
| 187 colors = [ "#B22222","#006400","#0000CD","#9400D3","#696969","#8B4513", | |
| 188 "#FF1493","#FF8C00","#3CB371","#00Bfff","#CDC9C9","#FFD700", | |
| 189 "#2F4F4F","#FF0000","#ADFF2F","#B03060" ] | |
| 190 | |
| 191 def samples2classes_panel(fig, samples, s2l, idx1, idx2, height, xsize, cols, legendon, fontsize, label2cols, legend_ncol ): | |
| 192 from matplotlib.patches import Rectangle | |
| 193 samples2labels = dict([(l[0],l[1]) | |
| 194 for l in [ll.strip().split('\t') | |
| 195 for ll in open(s2l)] if len(l) > 1]) | |
| 196 | |
| 197 if label2cols: | |
| 198 labels2colors = dict([(l[0],l[1]) for l in [ll.strip().split('\t') for ll in open(label2cols)]]) | |
| 199 else: | |
| 200 cs = cols if cols else colors | |
| 201 labels2colors = dict([(l,cs[i%len(cs)]) for i,l in enumerate(set(samples2labels.values()))]) | |
| 202 ax1 = fig.add_axes([0.,1.0,1.0,height],frameon=False) | |
| 203 ax1.set_xticks([]) | |
| 204 ax1.set_yticks([]) | |
| 205 ax1.set_ylim( [0.0, height] ) | |
| 206 ax1.set_xlim( [0.0, xsize] ) | |
| 207 step = xsize / float(len(samples)) | |
| 208 labels = set() | |
| 209 added_labels = set() | |
| 210 for i,ind in enumerate(idx2): | |
| 211 if not samples[ind] in samples2labels or \ | |
| 212 not samples2labels[samples[ind]] in labels2colors: | |
| 213 fc, ll = "k", None | |
| 214 else: | |
| 215 ll = samples2labels[samples[ind]] | |
| 216 ll = None if ll in added_labels else ll | |
| 217 added_labels.add( ll ) | |
| 218 fc = labels2colors[samples2labels[samples[ind]]] | |
| 219 | |
| 220 rect = Rectangle( [float(i)*step, 0.0], step, height, | |
| 221 facecolor = fc, | |
| 222 label = ll, | |
| 223 edgecolor='b', lw = 0.0) | |
| 224 labels.add( ll ) | |
| 225 ax1.add_patch(rect) | |
| 226 ax1.autoscale_view() | |
| 227 | |
| 228 if legendon: | |
| 229 ax1.legend( loc = 2, ncol = legend_ncol, bbox_to_anchor=(1.01, 3.), | |
| 230 borderpad = 0.0, labelspacing = 0.0, | |
| 231 handlelength = 0.5, handletextpad = 0.3, | |
| 232 borderaxespad = 0.0, columnspacing = 0.3, | |
| 233 prop = {'size':fontsize}, frameon = False) | |
| 234 | |
| 235 def samples_dend_panel( fig, Z, Z2, ystart, ylen, lw ): | |
| 236 ax2 = fig.add_axes([0.0,1.0+ystart,1.0,ylen], frameon=False) | |
| 237 Z2['color_list'] = [c.replace('b','k') for c in Z2['color_list']] | |
| 238 mh = max(Z[:,2]) | |
| 239 sch._plot_dendrogram( Z2['icoord'], Z2['dcoord'], Z2['ivl'], | |
| 240 Z.shape[0] + 1, Z.shape[0] + 1, | |
| 241 mh, 'top', no_labels=True, | |
| 242 color_list=Z2['color_list'] ) | |
| 243 for coll in ax2.collections: | |
| 244 coll._linewidths = (lw,) | |
| 245 ax2.set_xticks([]) | |
| 246 ax2.set_yticks([]) | |
| 247 ax2.set_xticklabels([]) | |
| 248 | |
| 249 def features_dend_panel( fig, Z, Z2, width, lw ): | |
| 250 ax1 = fig.add_axes([-width,0.0,width,1.0], frameon=False) | |
| 251 Z2['color_list'] = [c.replace('b','k').replace('x','b') for c in Z2['color_list']] | |
| 252 mh = max(Z[:,2]) | |
| 253 sch._plot_dendrogram(Z2['icoord'], Z2['dcoord'], Z2['ivl'], Z.shape[0] + 1, Z.shape[0] + 1, mh, 'right', no_labels=True, color_list=Z2['color_list']) | |
| 254 for coll in ax1.collections: | |
| 255 coll._linewidths = (lw,) | |
| 256 ax1.set_xticks([]) | |
| 257 ax1.set_yticks([]) | |
| 258 ax1.set_xticklabels([]) | |
| 259 | |
| 260 | |
| 261 def add_cmap( cmapdict, name ): | |
| 262 my_cmap = matplotlib.colors.LinearSegmentedColormap(name,cmapdict,256) | |
| 263 pylab.register_cmap(name=name,cmap=my_cmap) | |
| 264 | |
| 265 def init_fig(xsize,ysize,ncol): | |
| 266 fig = pylab.figure(figsize=(xsize,ysize)) | |
| 267 sch._link_line_colors = colors[:ncol] | |
| 268 return fig | |
| 269 | |
| 270 def heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, cols, rows, label_font_size, cb_offset, cb_l, flabelson, slabelson, cm_ticks, gridon, bar_offset ): | |
| 271 cm = pylab.get_cmap(cm_name) | |
| 272 bottom_col = [ cm._segmentdata['red'][0][1], | |
| 273 cm._segmentdata['green'][0][1], | |
| 274 cm._segmentdata['blue'][0][1] ] | |
| 275 axmatrix = fig.add_axes( [0.0,0.0,1.0,1.0], | |
| 276 axisbg=bottom_col) | |
| 277 if any([c < 0.95 for c in bottom_col]): | |
| 278 axmatrix.spines['right'].set_color('none') | |
| 279 axmatrix.spines['left'].set_color('none') | |
| 280 axmatrix.spines['top'].set_color('none') | |
| 281 axmatrix.spines['bottom'].set_color('none') | |
| 282 norm_f = matplotlib.colors.LogNorm if scale == 'log' else matplotlib.colors.Normalize | |
| 283 im = axmatrix.matshow( D, norm = norm_f( vmin=minv if minv > 0.0 else None, | |
| 284 vmax=maxv), | |
| 285 aspect='auto', origin='lower', cmap=cm, vmax=maxv) | |
| 286 | |
| 287 axmatrix2 = axmatrix.twinx() | |
| 288 axmatrix3 = axmatrix.twiny() | |
| 289 | |
| 290 axmatrix.set_xticks([]) | |
| 291 axmatrix2.set_xticks([]) | |
| 292 axmatrix3.set_xticks([]) | |
| 293 axmatrix.set_yticks([]) | |
| 294 axmatrix2.set_yticks([]) | |
| 295 axmatrix3.set_yticks([]) | |
| 296 | |
| 297 axmatrix.set_xticklabels([]) | |
| 298 axmatrix2.set_xticklabels([]) | |
| 299 axmatrix3.set_xticklabels([]) | |
| 300 axmatrix.set_yticklabels([]) | |
| 301 axmatrix2.set_yticklabels([]) | |
| 302 axmatrix3.set_yticklabels([]) | |
| 303 | |
| 304 if any([c < 0.95 for c in bottom_col]): | |
| 305 axmatrix2.spines['right'].set_color('none') | |
| 306 axmatrix2.spines['left'].set_color('none') | |
| 307 axmatrix2.spines['top'].set_color('none') | |
| 308 axmatrix2.spines['bottom'].set_color('none') | |
| 309 if any([c < 0.95 for c in bottom_col]): | |
| 310 axmatrix3.spines['right'].set_color('none') | |
| 311 axmatrix3.spines['left'].set_color('none') | |
| 312 axmatrix3.spines['top'].set_color('none') | |
| 313 axmatrix3.spines['bottom'].set_color('none') | |
| 314 if flabelson: | |
| 315 axmatrix2.set_yticks(np.arange(len(rows))+0.5) | |
| 316 axmatrix2.set_yticklabels([rows[r] for r in idx1],size=label_font_size,va='center') | |
| 317 if slabelson: | |
| 318 axmatrix.set_xticks(np.arange(len(cols))) | |
| 319 axmatrix.set_xticklabels([cols[r] for r in idx2],size=label_font_size,rotation=90,va='top',ha='center') | |
| 320 axmatrix.tick_params(length=0) | |
| 321 axmatrix2.tick_params(length=0) | |
| 322 axmatrix3.tick_params(length=0) | |
| 323 axmatrix2.set_ylim(0,len(rows)) | |
| 324 | |
| 325 if gridon: | |
| 326 axmatrix.set_yticks(np.arange(len(idx1)-1)+0.5) | |
| 327 axmatrix.set_xticks(np.arange(len(idx2))+0.5) | |
| 328 axmatrix.grid( True ) | |
| 329 ticklines = axmatrix.get_xticklines() | |
| 330 ticklines.extend( axmatrix.get_yticklines() ) | |
| 331 #gridlines = axmatrix.get_xgridlines() | |
| 332 #gridlines.extend( axmatrix.get_ygridlines() ) | |
| 333 | |
| 334 for line in ticklines: | |
| 335 line.set_linewidth(3) | |
| 336 | |
| 337 if cb_l > 0.0: | |
| 338 axcolor = fig.add_axes([0.0,1.0+bar_offset*1.25,1.0,cb_l]) | |
| 339 cbar = fig.colorbar(im, cax=axcolor, orientation='horizontal') | |
| 340 cbar.ax.tick_params(labelsize=label_font_size) | |
| 341 if cm_ticks: | |
| 342 cbar.ax.set_xticklabels( cm_ticks.split(":") ) | |
| 343 | |
| 344 | |
| 345 def read_table( fin, xstart,xstop,ystart,ystop, percentile = None, top = None, norm = False ): | |
| 346 mat = [l.rstrip().split('\t') for l in open( fin )] | |
| 347 | |
| 348 if fin.endswith(".biom"): | |
| 349 sample_labels = mat[1][1:-1] | |
| 350 m = [(mm[-1]+"; OTU"+mm[0],np.array([float(f) for f in mm[1:-1]])) for mm in mat[2:]] | |
| 351 #feat_labels = [m[-1].replace(";","_").replace(" ","")+m[0] for m in mat[2:]] | |
| 352 #print len(feat_labels) | |
| 353 #print len(sample_labels) | |
| 354 else: | |
| 355 sample_labels = mat[0][xstart:xstop] | |
| 356 m = [(mm[xstart-1],np.array([float(f) for f in mm[xstart:xstop]])) for mm in mat[ystart:ystop]] | |
| 357 | |
| 358 if norm: | |
| 359 msums = [0.0 for l in m[0][1]] | |
| 360 for mm in m: | |
| 361 for i,v in enumerate(mm[1]): | |
| 362 msums[i] += v | |
| 363 | |
| 364 if top and not percentile: | |
| 365 percentile = 90 | |
| 366 | |
| 367 if percentile: | |
| 368 m = sorted(m,key=lambda x:-stats.scoreatpercentile(x[1],percentile)) | |
| 369 if top: | |
| 370 if fin.endswith(".biom"): | |
| 371 #feat_labels = [mm[-1].replace(";","_").replace(" ","")+mm[0] for mm in m[:top]] | |
| 372 feat_labels = [mm[0] for mm in m[:top]] | |
| 373 else: | |
| 374 feat_labels = [mm[0] for mm in m[:top]] | |
| 375 if norm: | |
| 376 m = [np.array([n/v for n,v in zip(mm[1],msums)]) for mm in m[:top]] | |
| 377 else: | |
| 378 m = [mm[1] for mm in m[:top]] | |
| 379 else: | |
| 380 if fin.endswith(".biom"): | |
| 381 feat_labels = [mm[0] for mm in m] | |
| 382 else: | |
| 383 feat_labels = [mm[0] for mm in m] | |
| 384 if norm: | |
| 385 m = [np.array([n/v for n,v in zip(mm[1],msums)]) for mm in m] | |
| 386 else: | |
| 387 m = [mm[1] for mm in m] | |
| 388 #m = [mm[1] for mm in m] | |
| 389 | |
| 390 D = np.matrix( np.array( m ) ) | |
| 391 | |
| 392 return D, feat_labels, sample_labels | |
| 393 | |
| 394 def read_dm( fin, n ): | |
| 395 mat = [[float(f) for f in l.strip().split('\t')] for l in open( fin )] | |
| 396 nc = sum([len(r) for r in mat]) | |
| 397 | |
| 398 if nc == n*n: | |
| 399 dm = [] | |
| 400 for i in range(n): | |
| 401 dm += mat[i][i+1:] | |
| 402 return np.array(dm) | |
| 403 if nc == (n*n-n)/2: | |
| 404 dm = [] | |
| 405 for i in range(n): | |
| 406 dm += mat[i] | |
| 407 return np.array(dm) | |
| 408 sys.stderr.write( "Error in reading the distance matrix\n" ) | |
| 409 sys.exit() | |
| 410 | |
| 411 | |
| 412 def exp_newick( inp, labels, outfile, tree_format = 'phyloxml' ): | |
| 413 n_leaves = int(inp[-1][-1]) | |
| 414 from Bio import Phylo | |
| 415 import collections | |
| 416 from Bio.Phylo.BaseTree import Tree as BTree | |
| 417 from Bio.Phylo.BaseTree import Clade as BClade | |
| 418 tree = BTree() | |
| 419 tree.root = BClade() | |
| 420 | |
| 421 subclades = {} | |
| 422 sb_cbl = {} | |
| 423 | |
| 424 for i,(fr,to,bl,nsub) in enumerate( inp ): | |
| 425 if fr < n_leaves: | |
| 426 fr_c = BClade(branch_length=-1.0,name=labels[int(fr)]) | |
| 427 subclades[fr] = fr_c | |
| 428 sb_cbl[fr] = bl | |
| 429 if to < n_leaves: | |
| 430 to_c = BClade(branch_length=-1.0,name=labels[int(to)]) | |
| 431 subclades[to] = to_c | |
| 432 sb_cbl[to] = bl | |
| 433 for i,(fr,to,bl,nsub) in enumerate( inp ): | |
| 434 fr_c = subclades[fr] | |
| 435 to_c = subclades[to] | |
| 436 cur_c = BClade(branch_length=bl) | |
| 437 cur_c.clades.append( fr_c ) | |
| 438 cur_c.clades.append( to_c ) | |
| 439 subclades[i+n_leaves] = cur_c | |
| 440 | |
| 441 def reset_rec( clade, fath_bl ): | |
| 442 if clade.branch_length < 0: | |
| 443 clade.branch_length = fath_bl | |
| 444 return | |
| 445 for c in clade.clades: | |
| 446 reset_rec( c, clade.branch_length ) | |
| 447 clade.branch_length = fath_bl-clade.branch_length | |
| 448 | |
| 449 tree.root = cur_c | |
| 450 reset_rec( tree.root, 0.0 ) | |
| 451 tree.root.branch_length = 0.0 | |
| 452 Phylo.write(tree, outfile, tree_format ) | |
| 453 | |
| 454 def hclust( fin, fout, | |
| 455 method = "average", | |
| 456 dist_func = "euclidean", | |
| 457 feat_dist_func = "d", | |
| 458 xcw = 0.1, | |
| 459 ycw = 0.1, | |
| 460 scale = 'lin', | |
| 461 minv = 0.0, | |
| 462 maxv = None, | |
| 463 xstart = 1, | |
| 464 ystart = 1, | |
| 465 xstop = None, | |
| 466 ystop = None, | |
| 467 percentile = None, | |
| 468 top = None, | |
| 469 norm = False, | |
| 470 cm_name = 'jet', | |
| 471 s2l = None, | |
| 472 label_font_size = 7, | |
| 473 feat_dend_col_th = None, | |
| 474 sample_dend_col_th = None, | |
| 475 clust_ncols = 7, | |
| 476 clust_line_w = 1.0, | |
| 477 label_cols = None, | |
| 478 sdend_h = 0.1, | |
| 479 fdend_w = 0.1, | |
| 480 cm_h = 0.03, | |
| 481 dmf = None, | |
| 482 dms = None, | |
| 483 legendon = False, | |
| 484 label2cols = None, | |
| 485 flabelon = True, | |
| 486 slabelon = True, | |
| 487 cm_ticks = None, | |
| 488 legend_ncol = 3, | |
| 489 pad_inches = None, | |
| 490 legend_font_size = 7, | |
| 491 gridon = 0, | |
| 492 sdend_out = None, | |
| 493 fdend_out= None): | |
| 494 | |
| 495 if label_cols and label_cols.count("-"): | |
| 496 label_cols = label_cols.split("-") | |
| 497 | |
| 498 for n,c in my_colormaps: | |
| 499 add_cmap( c, n ) | |
| 500 | |
| 501 if feat_dist_func == 'd': | |
| 502 feat_dist_func = dist_func | |
| 503 | |
| 504 D, feat_labels, sample_labels = read_table(fin,xstart,xstop,ystart,ystop,percentile,top,norm) | |
| 505 | |
| 506 ylen,xlen = D[:].shape | |
| 507 Dt = D.transpose() | |
| 508 | |
| 509 size_cx, size_cy = xcw, ycw | |
| 510 | |
| 511 xsize, ysize = max(xlen*size_cx,2.0), max(ylen*size_cy,2.0) | |
| 512 ydend_offset = 0.025*8.0/ysize if s2l else 0.0 | |
| 513 | |
| 514 fig = init_fig(xsize,ysize,clust_ncols) | |
| 515 | |
| 516 nfeats, nsamples = len(D), len(Dt) | |
| 517 | |
| 518 if dmf: | |
| 519 p1 = read_dm( dmf, nfeats ) | |
| 520 Y1 = sch.linkage( p1, method=method ) | |
| 521 else: | |
| 522 p1 = dis.pdist( D, feat_dist_func ) | |
| 523 Y1 = sch.linkage( p1, method=method ) # , metric=feat_dist_func ) | |
| 524 #Y1 = sch.linkage( D, method=method, metric=feat_dist_func ) | |
| 525 Z1 = sch.dendrogram(Y1, no_plot=True, color_threshold=feat_dend_col_th) | |
| 526 | |
| 527 if fdend_out: | |
| 528 exp_newick( Y1, feat_labels, fdend_out ) | |
| 529 | |
| 530 if dms: | |
| 531 p2 = read_dm( dms, nsamples ) | |
| 532 Y2 = sch.linkage( p2, method=method ) | |
| 533 else: | |
| 534 p2 = dis.pdist( Dt, dist_func ) | |
| 535 Y2 = sch.linkage( p2, method=method ) # , metric=dist_func ) | |
| 536 #Y2 = sch.linkage( Dt, method=method, metric=dist_func ) | |
| 537 Z2 = sch.dendrogram(Y2, no_plot=True, color_threshold=sample_dend_col_th) | |
| 538 | |
| 539 if sdend_out: | |
| 540 exp_newick( Y2, sample_labels, sdend_out ) | |
| 541 | |
| 542 if fdend_w > 0.0: | |
| 543 features_dend_panel(fig, Y1, Z1, fdend_w*8.0/xsize, clust_line_w ) | |
| 544 if sdend_h > 0.0: | |
| 545 samples_dend_panel(fig, Y2, Z2, ydend_offset, sdend_h*8.0/ysize, clust_line_w) | |
| 546 | |
| 547 idx1, idx2 = Z1['leaves'], Z2['leaves'] | |
| 548 D = D[idx1,:][:,idx2] | |
| 549 | |
| 550 if s2l: | |
| 551 samples2classes_panel( fig, sample_labels, s2l, idx1, idx2, 0.025*8.0/ysize, xsize, label_cols, legendon, legend_font_size, label2cols, legend_ncol ) | |
| 552 heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, sample_labels, feat_labels, label_font_size, -cm_h*8.0/ysize, cm_h*0.8*8.0/ysize, flabelon, slabelon, cm_ticks, gridon, ydend_offset+sdend_h*8.0/ysize ) | |
| 553 | |
| 554 fig.savefig( fout, bbox_inches='tight', | |
| 555 pad_inches = pad_inches, | |
| 556 dpi=300) if fout else pylab.show() | |
| 557 | |
| 558 if __name__ == '__main__': | |
| 559 pars = read_params( sys.argv ) | |
| 560 | |
| 561 hclust( fin = pars['in'], | |
| 562 fout = pars['out'], | |
| 563 method = pars['m'], | |
| 564 dist_func = pars['d'], | |
| 565 feat_dist_func = pars['f'], | |
| 566 xcw = pars['x'], | |
| 567 ycw = pars['y'], | |
| 568 scale = pars['s'], | |
| 569 minv = pars['minv'], | |
| 570 maxv = pars['maxv'], | |
| 571 xstart = pars['xstart'], | |
| 572 ystart = pars['ystart'], | |
| 573 xstop = pars['xstop'], | |
| 574 ystop = pars['ystop'], | |
| 575 percentile = pars['perc'], | |
| 576 top = pars['top'], | |
| 577 norm = pars['norm'], | |
| 578 cm_name = pars['c'], | |
| 579 s2l = pars['l'], | |
| 580 label_font_size = pars['font_size'], | |
| 581 feat_dend_col_th = pars['feat_dend_col_th'], | |
| 582 sample_dend_col_th = pars['sample_dend_col_th'], | |
| 583 clust_ncols = pars['clust_ncols'], | |
| 584 clust_line_w = pars['clust_line_w'], | |
| 585 label_cols = pars['label_cols'], | |
| 586 sdend_h = pars['sdend_h'], | |
| 587 fdend_w = pars['fdend_w'], | |
| 588 cm_h = pars['cm_h'], | |
| 589 dmf = pars['dmf'], | |
| 590 dms = pars['dms'], | |
| 591 legendon = pars['legend'], | |
| 592 label2cols = pars['label2cols'], | |
| 593 flabelon = pars['flabel'], | |
| 594 slabelon = pars['slabel'], | |
| 595 cm_ticks = pars['cm_ticks'], | |
| 596 legend_ncol = pars['legend_ncol'], | |
| 597 pad_inches = pars['pad_inches'], | |
| 598 legend_font_size = pars['legend_font_size'], | |
| 599 gridon = pars['grid'], | |
| 600 sdend_out = pars['sdend_out'], | |
| 601 fdend_out = pars['fdend_out'], | |
| 602 ) | |
| 603 |
