Mercurial > repos > miller-lab > genome_diversity
comparison dpmix_plot.py @ 12:4b6590dd7250
Uploaded
author | miller-lab |
---|---|
date | Wed, 12 Sep 2012 17:10:26 -0400 |
parents | |
children | 8997f2ca8c7a |
comparison
equal
deleted
inserted
replaced
11:d4ec09e8079f | 12:4b6590dd7250 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import os | |
4 import sys | |
5 import math | |
6 import matplotlib as mpl | |
7 mpl.use('PDF') | |
8 import matplotlib.pyplot as plt | |
9 from matplotlib.path import Path | |
10 import matplotlib.patches as patches | |
11 | |
12 ################################################################################ | |
13 | |
14 def build_chrom_len_dict(dbkey, galaxy_data_index_dir): | |
15 chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom') | |
16 chrom_len_file = '{0}.len'.format(dbkey) | |
17 chrom_len_path = os.path.join(chrom_len_root, chrom_len_file) | |
18 | |
19 chrom_len = {} | |
20 | |
21 try: | |
22 with open(chrom_len_path) as fh: | |
23 for line in fh: | |
24 line = line.rstrip('\r\n') | |
25 elems = line.split() | |
26 if len(elems) == 2: | |
27 chrom = elems[0] | |
28 length = int(elems[1]) | |
29 chrom_len[chrom] = length | |
30 except: | |
31 pass | |
32 | |
33 return chrom_len | |
34 | |
35 def parse_input_file(input_file): | |
36 chroms = [] | |
37 individuals = [] | |
38 data = {} | |
39 chrom_len = {} | |
40 | |
41 with open(input_file) as fh: | |
42 for line in fh: | |
43 line = line.strip() | |
44 if line: | |
45 elems = line.split() | |
46 chrom = elems[0] | |
47 p1, p2, state = map(int, elems[1:4]) | |
48 id = elems[4] | |
49 | |
50 if chrom not in chroms: | |
51 chroms.append(chrom) | |
52 | |
53 if id not in individuals: | |
54 individuals.append(id) | |
55 | |
56 data.setdefault(chrom, {}) | |
57 data[chrom].setdefault(id, []) | |
58 data[chrom][id].append((p1, p2, state)) | |
59 | |
60 if p2 > chrom_len.setdefault(chrom, 0): | |
61 chrom_len[chrom] = p2 | |
62 | |
63 return chroms, individuals, data, chrom_len | |
64 | |
65 def check_chroms(chroms, chrom_len, dbkey): | |
66 error = 0 | |
67 for chrom in chroms: | |
68 if chrom not in chrom_len: | |
69 print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom) | |
70 error = 1 | |
71 if error: | |
72 sys.exit(1) | |
73 | |
74 def check_data(data, chrom_len, dbkey): | |
75 error = 0 | |
76 for chrom in data: | |
77 chrom_beg = 0 | |
78 chrom_end = chrom_len[chrom] | |
79 for individual in data[chrom]: | |
80 for p1, p2, state in data[chrom][individual]: | |
81 if p1 >= p2: | |
82 print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual) | |
83 error = 1 | |
84 if p1 < chrom_beg or p2 > chrom_end: | |
85 print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual) | |
86 error = 1 | |
87 if error: | |
88 sys.exit(1) | |
89 | |
90 def make_rectangle(p1, p2, color, bottom=0.0, top=1.0): | |
91 verts = [ | |
92 (p1, bottom), # left, bottom | |
93 (p1, top), # left, top | |
94 (p2, top), # right, top | |
95 (p2, bottom), # right, bottom | |
96 (0.0, 0.0) # ignored | |
97 ] | |
98 | |
99 codes = [ | |
100 Path.MOVETO, | |
101 Path.LINETO, | |
102 Path.LINETO, | |
103 Path.LINETO, | |
104 Path.CLOSEPOLY | |
105 ] | |
106 | |
107 path = Path(verts, codes) | |
108 return patches.PathPatch(path, facecolor=color, lw=0) | |
109 | |
110 def make_split_rectangle(p1, p2, top_color, bottom_color): | |
111 patch1 = make_rectangle(p1, p2, bottom_color, top=0.5) | |
112 patch2 = make_rectangle(p1, p2, top_color, bottom=0.5) | |
113 return [patch1, patch2] | |
114 | |
115 def make_state_rectangle(p1, p2, state, chrom, individual): | |
116 if state == 0: | |
117 return [ make_rectangle(p1, p2, 'r') ] | |
118 elif state == 1: | |
119 return make_split_rectangle(p1, p2, 'r', 'g') | |
120 elif state == 2: | |
121 return [ make_rectangle(p1, p2, 'g') ] | |
122 elif state == 3: | |
123 return [ make_rectangle(p1, p2, '#c7c7c7') ] | |
124 else: | |
125 print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual) | |
126 sys.exit(1) | |
127 | |
128 def nicenum(num, round=False): | |
129 if num == 0: | |
130 return 0.0 | |
131 | |
132 exp = int(math.floor(math.log10(num))) | |
133 f = num / math.pow(10, exp) | |
134 | |
135 if round: | |
136 if f < 1.5: | |
137 nf = 1.0 | |
138 elif f < 3.0: | |
139 nf = 2.0 | |
140 elif f < 7.0: | |
141 nf = 5.0 | |
142 else: | |
143 nf = 10.0 | |
144 else: | |
145 if f <= 1.0: | |
146 nf = 1.0 | |
147 elif f <= 2.0: | |
148 nf = 2.0 | |
149 elif f <= 5.0: | |
150 nf = 5.0 | |
151 else: | |
152 nf = 10.0 | |
153 | |
154 return nf * pow(10, exp) | |
155 | |
156 def tick_foo(beg, end, loose=False): | |
157 ntick = 10 | |
158 | |
159 range = nicenum(end - beg, round=False) | |
160 d = nicenum(range/(ntick - 1), round=True) | |
161 digits = int(math.floor(math.log10(d))) | |
162 | |
163 if loose: | |
164 graph_min = math.floor(beg/d) * d | |
165 graph_max = math.ceil(end/d) * d | |
166 else: | |
167 graph_min = beg | |
168 graph_max = end | |
169 | |
170 nfrac = max([-1 * digits, 0]) | |
171 vals = [] | |
172 | |
173 stop = graph_max | |
174 if loose: | |
175 stop = graph_max + (0.5 * d) | |
176 | |
177 x = graph_min | |
178 while x <= stop: | |
179 vals.append(int(x)) | |
180 x += d | |
181 | |
182 vals = vals[1:] | |
183 | |
184 # if not loose: | |
185 # if vals[-1] < graph_max: | |
186 # vals.append(int(graph_max)) | |
187 | |
188 labels = [] | |
189 for val in vals: | |
190 labels.append('{0}'.format(int(val/math.pow(10, digits)))) | |
191 | |
192 # labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits))) | |
193 | |
194 return vals, labels | |
195 | |
196 ################################################################################ | |
197 | |
198 def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir): | |
199 fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir) | |
200 chroms, individuals, data, chrom_len = parse_input_file(input_file) | |
201 | |
202 for chrom in chrom_len.keys(): | |
203 if chrom in fs_chrom_len: | |
204 chrom_len[chrom] = fs_chrom_len[chrom] | |
205 | |
206 #check_chroms(chroms, chrom_len, input_dbkey) | |
207 check_data(data, chrom_len, input_dbkey) | |
208 | |
209 ## units below are inches | |
210 top_space = 0.10 | |
211 chrom_space = 0.25 | |
212 chrom_height = 0.25 | |
213 ind_space = 0.10 | |
214 ind_height = 0.25 | |
215 | |
216 total_height = 0.0 | |
217 at_top = True | |
218 for chrom in chroms: | |
219 if at_top: | |
220 total_height += (top_space + chrom_height) | |
221 at_top = False | |
222 else: | |
223 total_height += (top_space + chrom_space + chrom_height) | |
224 | |
225 individual_count = 0 | |
226 for individual in individuals: | |
227 if individual in data[chrom]: | |
228 individual_count += 1 | |
229 total_height += individual_count * (ind_space + ind_height) | |
230 | |
231 width = 7.5 | |
232 height = math.ceil(total_height) | |
233 | |
234 bottom = 1.0 | |
235 | |
236 fig = plt.figure(figsize=(width, height)) | |
237 | |
238 at_top = True | |
239 for_webb = False | |
240 | |
241 for chrom in chroms: | |
242 length = chrom_len[chrom] | |
243 vals, labels = tick_foo(0, length) | |
244 | |
245 if at_top: | |
246 bottom -= (top_space + chrom_height)/height | |
247 at_top = False | |
248 else: | |
249 bottom -= (top_space + chrom_space + chrom_height)/height | |
250 | |
251 if not for_webb: | |
252 ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/height]) | |
253 plt.axis('off') | |
254 plt.text(0.5, 0.5, chrom, fontsize=14, ha='center') | |
255 | |
256 individual_count = 0 | |
257 for individual in individuals: | |
258 if individual in data[chrom]: | |
259 individual_count += 1 | |
260 | |
261 i = 0 | |
262 for individual in individuals: | |
263 if individual in data[chrom]: | |
264 i += 1 | |
265 | |
266 bottom -= (ind_space + ind_height)/height | |
267 if not for_webb: | |
268 # [left, bottom, width, height] | |
269 ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/height]) | |
270 plt.axis('off') | |
271 plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center') | |
272 # [left, bottom, width, height] | |
273 ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/height], frame_on=False) | |
274 ax2.set_xlim(0, length) | |
275 ax2.set_ylim(0, 1) | |
276 if i != individual_count: | |
277 plt.axis('off') | |
278 else: | |
279 if not for_webb: | |
280 ax2.tick_params(top=False, left=False, right=False, labelleft=False) | |
281 ax2.set_xticks(vals) | |
282 ax2.set_xticklabels(labels) | |
283 else: | |
284 plt.axis('off') | |
285 for p1, p2, state in sorted(data[chrom][individual]): | |
286 for patch in make_state_rectangle(p1, p2, state, chrom, individual): | |
287 ax2.add_patch(patch) | |
288 | |
289 plt.savefig(output_file) | |
290 | |
291 ################################################################################ | |
292 | |
293 if __name__ == '__main__': | |
294 input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5] | |
295 make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir) | |
296 sys.exit(0) | |
297 |