12
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import os
|
|
4 import sys
|
|
5 import math
|
|
6 import matplotlib as mpl
|
|
7 mpl.use('PDF')
|
|
8 import matplotlib.pyplot as plt
|
|
9 from matplotlib.path import Path
|
|
10 import matplotlib.patches as patches
|
|
11
|
|
12 ################################################################################
|
|
13
|
|
14 def build_chrom_len_dict(dbkey, galaxy_data_index_dir):
|
|
15 chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom')
|
|
16 chrom_len_file = '{0}.len'.format(dbkey)
|
|
17 chrom_len_path = os.path.join(chrom_len_root, chrom_len_file)
|
|
18
|
|
19 chrom_len = {}
|
|
20
|
|
21 try:
|
|
22 with open(chrom_len_path) as fh:
|
|
23 for line in fh:
|
|
24 line = line.rstrip('\r\n')
|
|
25 elems = line.split()
|
|
26 if len(elems) == 2:
|
|
27 chrom = elems[0]
|
|
28 length = int(elems[1])
|
|
29 chrom_len[chrom] = length
|
|
30 except:
|
|
31 pass
|
|
32
|
|
33 return chrom_len
|
|
34
|
|
35 def parse_input_file(input_file):
|
|
36 chroms = []
|
|
37 individuals = []
|
|
38 data = {}
|
|
39 chrom_len = {}
|
|
40
|
|
41 with open(input_file) as fh:
|
|
42 for line in fh:
|
|
43 line = line.strip()
|
|
44 if line:
|
|
45 elems = line.split()
|
|
46 chrom = elems[0]
|
|
47 p1, p2, state = map(int, elems[1:4])
|
|
48 id = elems[4]
|
|
49
|
|
50 if chrom not in chroms:
|
|
51 chroms.append(chrom)
|
|
52
|
|
53 if id not in individuals:
|
|
54 individuals.append(id)
|
|
55
|
|
56 data.setdefault(chrom, {})
|
|
57 data[chrom].setdefault(id, [])
|
|
58 data[chrom][id].append((p1, p2, state))
|
|
59
|
|
60 if p2 > chrom_len.setdefault(chrom, 0):
|
|
61 chrom_len[chrom] = p2
|
|
62
|
|
63 return chroms, individuals, data, chrom_len
|
|
64
|
|
65 def check_chroms(chroms, chrom_len, dbkey):
|
|
66 error = 0
|
|
67 for chrom in chroms:
|
|
68 if chrom not in chrom_len:
|
|
69 print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom)
|
|
70 error = 1
|
|
71 if error:
|
|
72 sys.exit(1)
|
|
73
|
|
74 def check_data(data, chrom_len, dbkey):
|
|
75 error = 0
|
|
76 for chrom in data:
|
|
77 chrom_beg = 0
|
|
78 chrom_end = chrom_len[chrom]
|
|
79 for individual in data[chrom]:
|
|
80 for p1, p2, state in data[chrom][individual]:
|
|
81 if p1 >= p2:
|
|
82 print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual)
|
|
83 error = 1
|
|
84 if p1 < chrom_beg or p2 > chrom_end:
|
|
85 print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual)
|
|
86 error = 1
|
|
87 if error:
|
|
88 sys.exit(1)
|
|
89
|
|
90 def make_rectangle(p1, p2, color, bottom=0.0, top=1.0):
|
|
91 verts = [
|
|
92 (p1, bottom), # left, bottom
|
|
93 (p1, top), # left, top
|
|
94 (p2, top), # right, top
|
|
95 (p2, bottom), # right, bottom
|
|
96 (0.0, 0.0) # ignored
|
|
97 ]
|
|
98
|
|
99 codes = [
|
|
100 Path.MOVETO,
|
|
101 Path.LINETO,
|
|
102 Path.LINETO,
|
|
103 Path.LINETO,
|
|
104 Path.CLOSEPOLY
|
|
105 ]
|
|
106
|
|
107 path = Path(verts, codes)
|
|
108 return patches.PathPatch(path, facecolor=color, lw=0)
|
|
109
|
|
110 def make_split_rectangle(p1, p2, top_color, bottom_color):
|
|
111 patch1 = make_rectangle(p1, p2, bottom_color, top=0.5)
|
|
112 patch2 = make_rectangle(p1, p2, top_color, bottom=0.5)
|
|
113 return [patch1, patch2]
|
|
114
|
|
115 def make_state_rectangle(p1, p2, state, chrom, individual):
|
|
116 if state == 0:
|
|
117 return [ make_rectangle(p1, p2, 'r') ]
|
|
118 elif state == 1:
|
|
119 return make_split_rectangle(p1, p2, 'r', 'g')
|
|
120 elif state == 2:
|
|
121 return [ make_rectangle(p1, p2, 'g') ]
|
|
122 elif state == 3:
|
|
123 return [ make_rectangle(p1, p2, '#c7c7c7') ]
|
|
124 else:
|
|
125 print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual)
|
|
126 sys.exit(1)
|
|
127
|
|
128 def nicenum(num, round=False):
|
|
129 if num == 0:
|
|
130 return 0.0
|
|
131
|
|
132 exp = int(math.floor(math.log10(num)))
|
|
133 f = num / math.pow(10, exp)
|
|
134
|
|
135 if round:
|
|
136 if f < 1.5:
|
|
137 nf = 1.0
|
|
138 elif f < 3.0:
|
|
139 nf = 2.0
|
|
140 elif f < 7.0:
|
|
141 nf = 5.0
|
|
142 else:
|
|
143 nf = 10.0
|
|
144 else:
|
|
145 if f <= 1.0:
|
|
146 nf = 1.0
|
|
147 elif f <= 2.0:
|
|
148 nf = 2.0
|
|
149 elif f <= 5.0:
|
|
150 nf = 5.0
|
|
151 else:
|
|
152 nf = 10.0
|
|
153
|
|
154 return nf * pow(10, exp)
|
|
155
|
|
156 def tick_foo(beg, end, loose=False):
|
|
157 ntick = 10
|
|
158
|
|
159 range = nicenum(end - beg, round=False)
|
|
160 d = nicenum(range/(ntick - 1), round=True)
|
|
161 digits = int(math.floor(math.log10(d)))
|
|
162
|
|
163 if loose:
|
|
164 graph_min = math.floor(beg/d) * d
|
|
165 graph_max = math.ceil(end/d) * d
|
|
166 else:
|
|
167 graph_min = beg
|
|
168 graph_max = end
|
|
169
|
|
170 nfrac = max([-1 * digits, 0])
|
|
171 vals = []
|
|
172
|
|
173 stop = graph_max
|
|
174 if loose:
|
|
175 stop = graph_max + (0.5 * d)
|
|
176
|
|
177 x = graph_min
|
|
178 while x <= stop:
|
|
179 vals.append(int(x))
|
|
180 x += d
|
|
181
|
|
182 vals = vals[1:]
|
|
183
|
|
184 # if not loose:
|
|
185 # if vals[-1] < graph_max:
|
|
186 # vals.append(int(graph_max))
|
|
187
|
|
188 labels = []
|
|
189 for val in vals:
|
|
190 labels.append('{0}'.format(int(val/math.pow(10, digits))))
|
|
191
|
|
192 # labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits)))
|
|
193
|
|
194 return vals, labels
|
|
195
|
|
196 ################################################################################
|
|
197
|
|
198 def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir):
|
|
199 fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir)
|
|
200 chroms, individuals, data, chrom_len = parse_input_file(input_file)
|
|
201
|
|
202 for chrom in chrom_len.keys():
|
|
203 if chrom in fs_chrom_len:
|
|
204 chrom_len[chrom] = fs_chrom_len[chrom]
|
|
205
|
|
206 #check_chroms(chroms, chrom_len, input_dbkey)
|
|
207 check_data(data, chrom_len, input_dbkey)
|
|
208
|
|
209 ## units below are inches
|
|
210 top_space = 0.10
|
|
211 chrom_space = 0.25
|
|
212 chrom_height = 0.25
|
|
213 ind_space = 0.10
|
|
214 ind_height = 0.25
|
|
215
|
|
216 total_height = 0.0
|
|
217 at_top = True
|
|
218 for chrom in chroms:
|
|
219 if at_top:
|
|
220 total_height += (top_space + chrom_height)
|
|
221 at_top = False
|
|
222 else:
|
|
223 total_height += (top_space + chrom_space + chrom_height)
|
|
224
|
|
225 individual_count = 0
|
|
226 for individual in individuals:
|
|
227 if individual in data[chrom]:
|
|
228 individual_count += 1
|
|
229 total_height += individual_count * (ind_space + ind_height)
|
|
230
|
|
231 width = 7.5
|
|
232 height = math.ceil(total_height)
|
|
233
|
|
234 bottom = 1.0
|
|
235
|
|
236 fig = plt.figure(figsize=(width, height))
|
|
237
|
|
238 at_top = True
|
|
239 for_webb = False
|
|
240
|
|
241 for chrom in chroms:
|
|
242 length = chrom_len[chrom]
|
|
243 vals, labels = tick_foo(0, length)
|
|
244
|
|
245 if at_top:
|
|
246 bottom -= (top_space + chrom_height)/height
|
|
247 at_top = False
|
|
248 else:
|
|
249 bottom -= (top_space + chrom_space + chrom_height)/height
|
|
250
|
|
251 if not for_webb:
|
|
252 ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/height])
|
|
253 plt.axis('off')
|
|
254 plt.text(0.5, 0.5, chrom, fontsize=14, ha='center')
|
|
255
|
|
256 individual_count = 0
|
|
257 for individual in individuals:
|
|
258 if individual in data[chrom]:
|
|
259 individual_count += 1
|
|
260
|
|
261 i = 0
|
|
262 for individual in individuals:
|
|
263 if individual in data[chrom]:
|
|
264 i += 1
|
|
265
|
|
266 bottom -= (ind_space + ind_height)/height
|
|
267 if not for_webb:
|
|
268 # [left, bottom, width, height]
|
|
269 ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/height])
|
|
270 plt.axis('off')
|
|
271 plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center')
|
|
272 # [left, bottom, width, height]
|
|
273 ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/height], frame_on=False)
|
|
274 ax2.set_xlim(0, length)
|
|
275 ax2.set_ylim(0, 1)
|
|
276 if i != individual_count:
|
|
277 plt.axis('off')
|
|
278 else:
|
|
279 if not for_webb:
|
|
280 ax2.tick_params(top=False, left=False, right=False, labelleft=False)
|
|
281 ax2.set_xticks(vals)
|
|
282 ax2.set_xticklabels(labels)
|
|
283 else:
|
|
284 plt.axis('off')
|
|
285 for p1, p2, state in sorted(data[chrom][individual]):
|
|
286 for patch in make_state_rectangle(p1, p2, state, chrom, individual):
|
|
287 ax2.add_patch(patch)
|
|
288
|
|
289 plt.savefig(output_file)
|
|
290
|
|
291 ################################################################################
|
|
292
|
|
293 if __name__ == '__main__':
|
|
294 input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5]
|
|
295 make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir)
|
|
296 sys.exit(0)
|
|
297
|