comparison dpmix_plot.py @ 12:4b6590dd7250

Uploaded
author miller-lab
date Wed, 12 Sep 2012 17:10:26 -0400
parents
children 8997f2ca8c7a
comparison
equal deleted inserted replaced
11:d4ec09e8079f 12:4b6590dd7250
1 #!/usr/bin/env python
2
3 import os
4 import sys
5 import math
6 import matplotlib as mpl
7 mpl.use('PDF')
8 import matplotlib.pyplot as plt
9 from matplotlib.path import Path
10 import matplotlib.patches as patches
11
12 ################################################################################
13
14 def build_chrom_len_dict(dbkey, galaxy_data_index_dir):
15 chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom')
16 chrom_len_file = '{0}.len'.format(dbkey)
17 chrom_len_path = os.path.join(chrom_len_root, chrom_len_file)
18
19 chrom_len = {}
20
21 try:
22 with open(chrom_len_path) as fh:
23 for line in fh:
24 line = line.rstrip('\r\n')
25 elems = line.split()
26 if len(elems) == 2:
27 chrom = elems[0]
28 length = int(elems[1])
29 chrom_len[chrom] = length
30 except:
31 pass
32
33 return chrom_len
34
35 def parse_input_file(input_file):
36 chroms = []
37 individuals = []
38 data = {}
39 chrom_len = {}
40
41 with open(input_file) as fh:
42 for line in fh:
43 line = line.strip()
44 if line:
45 elems = line.split()
46 chrom = elems[0]
47 p1, p2, state = map(int, elems[1:4])
48 id = elems[4]
49
50 if chrom not in chroms:
51 chroms.append(chrom)
52
53 if id not in individuals:
54 individuals.append(id)
55
56 data.setdefault(chrom, {})
57 data[chrom].setdefault(id, [])
58 data[chrom][id].append((p1, p2, state))
59
60 if p2 > chrom_len.setdefault(chrom, 0):
61 chrom_len[chrom] = p2
62
63 return chroms, individuals, data, chrom_len
64
65 def check_chroms(chroms, chrom_len, dbkey):
66 error = 0
67 for chrom in chroms:
68 if chrom not in chrom_len:
69 print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom)
70 error = 1
71 if error:
72 sys.exit(1)
73
74 def check_data(data, chrom_len, dbkey):
75 error = 0
76 for chrom in data:
77 chrom_beg = 0
78 chrom_end = chrom_len[chrom]
79 for individual in data[chrom]:
80 for p1, p2, state in data[chrom][individual]:
81 if p1 >= p2:
82 print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual)
83 error = 1
84 if p1 < chrom_beg or p2 > chrom_end:
85 print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual)
86 error = 1
87 if error:
88 sys.exit(1)
89
90 def make_rectangle(p1, p2, color, bottom=0.0, top=1.0):
91 verts = [
92 (p1, bottom), # left, bottom
93 (p1, top), # left, top
94 (p2, top), # right, top
95 (p2, bottom), # right, bottom
96 (0.0, 0.0) # ignored
97 ]
98
99 codes = [
100 Path.MOVETO,
101 Path.LINETO,
102 Path.LINETO,
103 Path.LINETO,
104 Path.CLOSEPOLY
105 ]
106
107 path = Path(verts, codes)
108 return patches.PathPatch(path, facecolor=color, lw=0)
109
110 def make_split_rectangle(p1, p2, top_color, bottom_color):
111 patch1 = make_rectangle(p1, p2, bottom_color, top=0.5)
112 patch2 = make_rectangle(p1, p2, top_color, bottom=0.5)
113 return [patch1, patch2]
114
115 def make_state_rectangle(p1, p2, state, chrom, individual):
116 if state == 0:
117 return [ make_rectangle(p1, p2, 'r') ]
118 elif state == 1:
119 return make_split_rectangle(p1, p2, 'r', 'g')
120 elif state == 2:
121 return [ make_rectangle(p1, p2, 'g') ]
122 elif state == 3:
123 return [ make_rectangle(p1, p2, '#c7c7c7') ]
124 else:
125 print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual)
126 sys.exit(1)
127
128 def nicenum(num, round=False):
129 if num == 0:
130 return 0.0
131
132 exp = int(math.floor(math.log10(num)))
133 f = num / math.pow(10, exp)
134
135 if round:
136 if f < 1.5:
137 nf = 1.0
138 elif f < 3.0:
139 nf = 2.0
140 elif f < 7.0:
141 nf = 5.0
142 else:
143 nf = 10.0
144 else:
145 if f <= 1.0:
146 nf = 1.0
147 elif f <= 2.0:
148 nf = 2.0
149 elif f <= 5.0:
150 nf = 5.0
151 else:
152 nf = 10.0
153
154 return nf * pow(10, exp)
155
156 def tick_foo(beg, end, loose=False):
157 ntick = 10
158
159 range = nicenum(end - beg, round=False)
160 d = nicenum(range/(ntick - 1), round=True)
161 digits = int(math.floor(math.log10(d)))
162
163 if loose:
164 graph_min = math.floor(beg/d) * d
165 graph_max = math.ceil(end/d) * d
166 else:
167 graph_min = beg
168 graph_max = end
169
170 nfrac = max([-1 * digits, 0])
171 vals = []
172
173 stop = graph_max
174 if loose:
175 stop = graph_max + (0.5 * d)
176
177 x = graph_min
178 while x <= stop:
179 vals.append(int(x))
180 x += d
181
182 vals = vals[1:]
183
184 # if not loose:
185 # if vals[-1] < graph_max:
186 # vals.append(int(graph_max))
187
188 labels = []
189 for val in vals:
190 labels.append('{0}'.format(int(val/math.pow(10, digits))))
191
192 # labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits)))
193
194 return vals, labels
195
196 ################################################################################
197
198 def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir):
199 fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir)
200 chroms, individuals, data, chrom_len = parse_input_file(input_file)
201
202 for chrom in chrom_len.keys():
203 if chrom in fs_chrom_len:
204 chrom_len[chrom] = fs_chrom_len[chrom]
205
206 #check_chroms(chroms, chrom_len, input_dbkey)
207 check_data(data, chrom_len, input_dbkey)
208
209 ## units below are inches
210 top_space = 0.10
211 chrom_space = 0.25
212 chrom_height = 0.25
213 ind_space = 0.10
214 ind_height = 0.25
215
216 total_height = 0.0
217 at_top = True
218 for chrom in chroms:
219 if at_top:
220 total_height += (top_space + chrom_height)
221 at_top = False
222 else:
223 total_height += (top_space + chrom_space + chrom_height)
224
225 individual_count = 0
226 for individual in individuals:
227 if individual in data[chrom]:
228 individual_count += 1
229 total_height += individual_count * (ind_space + ind_height)
230
231 width = 7.5
232 height = math.ceil(total_height)
233
234 bottom = 1.0
235
236 fig = plt.figure(figsize=(width, height))
237
238 at_top = True
239 for_webb = False
240
241 for chrom in chroms:
242 length = chrom_len[chrom]
243 vals, labels = tick_foo(0, length)
244
245 if at_top:
246 bottom -= (top_space + chrom_height)/height
247 at_top = False
248 else:
249 bottom -= (top_space + chrom_space + chrom_height)/height
250
251 if not for_webb:
252 ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/height])
253 plt.axis('off')
254 plt.text(0.5, 0.5, chrom, fontsize=14, ha='center')
255
256 individual_count = 0
257 for individual in individuals:
258 if individual in data[chrom]:
259 individual_count += 1
260
261 i = 0
262 for individual in individuals:
263 if individual in data[chrom]:
264 i += 1
265
266 bottom -= (ind_space + ind_height)/height
267 if not for_webb:
268 # [left, bottom, width, height]
269 ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/height])
270 plt.axis('off')
271 plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center')
272 # [left, bottom, width, height]
273 ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/height], frame_on=False)
274 ax2.set_xlim(0, length)
275 ax2.set_ylim(0, 1)
276 if i != individual_count:
277 plt.axis('off')
278 else:
279 if not for_webb:
280 ax2.tick_params(top=False, left=False, right=False, labelleft=False)
281 ax2.set_xticks(vals)
282 ax2.set_xticklabels(labels)
283 else:
284 plt.axis('off')
285 for p1, p2, state in sorted(data[chrom][individual]):
286 for patch in make_state_rectangle(p1, p2, state, chrom, individual):
287 ax2.add_patch(patch)
288
289 plt.savefig(output_file)
290
291 ################################################################################
292
293 if __name__ == '__main__':
294 input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5]
295 make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir)
296 sys.exit(0)
297