Mercurial > repos > iuc > jbrowse
comparison jbrowse.py @ 3:7342f467507b draft
Uploaded v0.4 of JBrowse
author | iuc |
---|---|
date | Thu, 31 Dec 2015 13:58:43 -0500 |
parents | 497c6bb3b717 |
children | ae9382cfb6ac |
comparison
equal
deleted
inserted
replaced
2:b6a0e126dbee | 3:7342f467507b |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 from string import Template | |
3 import os | 2 import os |
3 import copy | |
4 import argparse | 4 import argparse |
5 import subprocess | 5 import subprocess |
6 import hashlib | 6 import hashlib |
7 import struct | |
7 import tempfile | 8 import tempfile |
9 import shutil | |
8 import json | 10 import json |
9 import yaml | 11 from Bio.Data import CodonTable |
12 import xml.etree.ElementTree as ET | |
10 import logging | 13 import logging |
11 logging.basicConfig() | 14 from collections import defaultdict |
12 log = logging.getLogger(__name__) | 15 logging.basicConfig(level=logging.INFO) |
13 | 16 log = logging.getLogger('jbrowse') |
14 COLOR_FUNCTION_TEMPLATE = Template(""" | 17 |
15 function(feature, variableName, glyphObject, track) { | 18 |
16 var score = ${score}; | 19 class ColorScaling(object): |
17 ${opacity} | 20 |
18 return 'rgba(${red}, ${green}, ${blue}, ' + opacity + ')'; | 21 COLOR_FUNCTION_TEMPLATE = """ |
19 } | 22 function(feature, variableName, glyphObject, track) {{ |
20 """) | 23 var score = {score}; |
21 | 24 {opacity} |
22 BLAST_OPACITY_MATH = """ | 25 return 'rgba({red}, {green}, {blue}, ' + opacity + ')'; |
23 var opacity = 0; | 26 }} |
24 if(score == 0.0) { | 27 """ |
25 opacity = 1; | 28 |
26 } else{ | 29 COLOR_FUNCTION_TEMPLATE_QUAL = """ |
27 opacity = (20 - Math.log10(score)) / 180; | 30 function(feature, variableName, glyphObject, track) {{ |
28 } | 31 var search_up = function self(sf, attr){{ |
29 """ | 32 if(sf.get(attr) !== undefined){{ |
30 | 33 return sf.get(attr); |
31 BREWER_COLOUR_IDX = 0 | 34 }} |
32 BREWER_COLOUR_SCHEMES = [ | 35 if(sf.parent() === undefined) {{ |
33 (228, 26, 28), | 36 return; |
34 (55, 126, 184), | 37 }}else{{ |
35 (77, 175, 74), | 38 return self(sf.parent(), attr); |
36 (152, 78, 163), | 39 }} |
37 (255, 127, 0), | 40 }}; |
38 ] | 41 |
39 | 42 var search_down = function self(sf, attr){{ |
40 | 43 if(sf.get(attr) !== undefined){{ |
41 # score comes from feature._parent.get('score') or feature.get('score') | 44 return sf.get(attr); |
42 # Opacity math | 45 }} |
43 | 46 if(sf.children() === undefined) {{ |
44 TN_TABLE = { | 47 return; |
45 'gff3': '--gff', | 48 }}else{{ |
46 'gff': '--gff', | 49 var kids = sf.children(); |
47 'bed': '--bed', | 50 for(var child_idx in kids){{ |
48 # 'genbank': '--gbk', | 51 var x = self(kids[child_idx], attr); |
49 } | 52 if(x !== undefined){{ |
50 | 53 return x; |
51 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) | 54 }} |
52 | 55 }} |
53 | 56 return; |
54 class JbrowseConnector(object): | 57 }} |
55 | 58 }}; |
56 def __init__(self, jbrowse, jbrowse_dir, outdir, genome): | 59 |
57 self.jbrowse = jbrowse | 60 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color}); |
58 self.jbrowse_dir = jbrowse_dir | 61 var score = (search_up(feature, 'score') || search_down(feature, 'score')); |
59 self.outdir = outdir | 62 {opacity} |
60 self.genome_path = genome | 63 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color); |
64 var red = parseInt(result[1], 16); | |
65 var green = parseInt(result[2], 16); | |
66 var blue = parseInt(result[3], 16); | |
67 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }} | |
68 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')'; | |
69 }} | |
70 """ | |
71 | |
72 OPACITY_MATH = { | |
73 'linear': """ | |
74 var opacity = (score - ({min})) / (({max}) - ({min})); | |
75 """, | |
76 'logarithmic': """ | |
77 var opacity = (score - ({min})) / (({max}) - ({min})); | |
78 opacity = Math.log10(opacity) + Math.log10({max}); | |
79 """, | |
80 'blast': """ | |
81 var opacity = 0; | |
82 if(score == 0.0) { | |
83 opacity = 1; | |
84 } else{ | |
85 opacity = (20 - Math.log10(score)) / 180; | |
86 } | |
87 """ | |
88 } | |
89 | |
90 BREWER_COLOUR_IDX = 0 | |
91 BREWER_COLOUR_SCHEMES = [ | |
92 (166, 206, 227), | |
93 (31, 120, 180), | |
94 (178, 223, 138), | |
95 (51, 160, 44), | |
96 (251, 154, 153), | |
97 (227, 26, 28), | |
98 (253, 191, 111), | |
99 (255, 127, 0), | |
100 (202, 178, 214), | |
101 (106, 61, 154), | |
102 (255, 255, 153), | |
103 (177, 89, 40), | |
104 (228, 26, 28), | |
105 (55, 126, 184), | |
106 (77, 175, 74), | |
107 (152, 78, 163), | |
108 (255, 127, 0), | |
109 ] | |
110 | |
111 BREWER_DIVERGING_PALLETES = { | |
112 'BrBg': ("#543005", "#003c30"), | |
113 'PiYg': ("#8e0152", "#276419"), | |
114 'PRGn': ("#40004b", "#00441b"), | |
115 'PuOr': ("#7f3b08", "#2d004b"), | |
116 'RdBu': ("#67001f", "#053061"), | |
117 'RdGy': ("#67001f", "#1a1a1a"), | |
118 'RdYlBu': ("#a50026", "#313695"), | |
119 'RdYlGn': ("#a50026", "#006837"), | |
120 'Spectral': ("#9e0142", "#5e4fa2"), | |
121 } | |
122 | |
123 def __init__(self): | |
61 self.brewer_colour_idx = 0 | 124 self.brewer_colour_idx = 0 |
62 | 125 |
63 self.clone_jbrowse(self.jbrowse, self.outdir) | 126 def rgb_from_hex(self, hexstr): |
64 self.process_genome() | 127 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back |
65 | 128 return struct.unpack('BBB',hexstr.decode('hex')) |
66 def subprocess_check_call(self, command): | 129 |
67 log.debug('cd %s && %s', self.jbrowse_dir, ' '.join(command)) | 130 def min_max_gff(self, gff_file): |
68 subprocess.check_call(command, cwd=self.jbrowse_dir) | |
69 | |
70 def _get_colours(self): | |
71 r, g, b = BREWER_COLOUR_SCHEMES[self.brewer_colour_idx] | |
72 self.brewer_colour_idx += 1 | |
73 return r, g, b | |
74 | |
75 def process_genome(self): | |
76 self.subprocess_check_call(['perl', 'bin/prepare-refseqs.pl', | |
77 '--fasta', self.genome_path]) | |
78 | |
79 def _add_json(self, json_data): | |
80 if len(json_data.keys()) == 0: | |
81 return | |
82 | |
83 tmp = tempfile.NamedTemporaryFile(delete=False) | |
84 tmp.write(json.dumps(json_data)) | |
85 tmp.close() | |
86 cmd = ['perl', 'bin/add-track-json.pl', tmp.name, | |
87 os.path.join('data', 'trackList.json')] | |
88 self.subprocess_check_call(cmd) | |
89 os.unlink(tmp.name) | |
90 | |
91 def add_blastxml(self, data, key, **kwargs): | |
92 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) | |
93 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), | |
94 '--trim_end', '--min_gap', str(kwargs['min_gap']), data] | |
95 subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_unrebased) | |
96 gff3_unrebased.close() | |
97 | |
98 gff3_rebased = tempfile.NamedTemporaryFile(delete=False) | |
99 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')] | |
100 if kwargs['protein']: | |
101 cmd.append('--protein2dna') | |
102 cmd.extend([kwargs['parent'], gff3_unrebased.name]) | |
103 subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_rebased) | |
104 gff3_rebased.close() | |
105 | |
106 label = hashlib.md5(data).hexdigest() | |
107 | |
108 red, green, blue = self._get_colours() | |
109 color_function = COLOR_FUNCTION_TEMPLATE.substitute({ | |
110 'score': "feature._parent.get('score')", | |
111 'opacity': BLAST_OPACITY_MATH, | |
112 'red': red, | |
113 'green': green, | |
114 'blue': blue, | |
115 }) | |
116 | |
117 clientConfig = { | |
118 'label': 'description', | |
119 'color': color_function.replace('\n', ''), | |
120 'description': 'Hit_titles', | |
121 } | |
122 config = {'glyph': 'JBrowse/View/FeatureGlyph/Segments'} | |
123 if 'category' in kwargs: | |
124 config['category'] = kwargs['category'] | |
125 | |
126 cmd = ['perl', 'bin/flatfile-to-json.pl', | |
127 '--gff', gff3_rebased.name, | |
128 '--trackLabel', label, | |
129 '--key', key, | |
130 '--clientConfig', json.dumps(clientConfig), | |
131 '--config', json.dumps(config), | |
132 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
133 ] | |
134 | |
135 self.subprocess_check_call(cmd) | |
136 os.unlink(gff3_rebased.name) | |
137 os.unlink(gff3_unrebased.name) | |
138 | |
139 def _min_max_gff(self, gff_file): | |
140 min_val = None | 131 min_val = None |
141 max_val = None | 132 max_val = None |
142 with open(gff_file, 'r') as handle: | 133 with open(gff_file, 'r') as handle: |
143 for line in handle: | 134 for line in handle: |
144 try: | 135 try: |
153 max_val = value | 144 max_val = value |
154 except Exception: | 145 except Exception: |
155 pass | 146 pass |
156 return min_val, max_val | 147 return min_val, max_val |
157 | 148 |
158 def add_bigwig(self, data, key, **kwargs): | 149 def hex_from_rgb(self, r, g, b): |
159 label = hashlib.md5(data).hexdigest() | 150 return '#%02x%02x%02x' % (r, g, b) |
160 dest = os.path.join('data', 'raw', os.path.basename(data)) | 151 |
161 cmd = ['ln', '-s', data, dest] | 152 def _get_colours(self): |
162 self.subprocess_check_call(cmd) | 153 r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx] |
163 | 154 self.brewer_colour_idx += 1 |
164 track_data = { | 155 return r, g, b |
165 "label": label, | 156 |
157 def parse_colours(self, track, trackFormat, gff3=None): | |
158 # Wiggle tracks have a bicolor pallete | |
159 trackConfig = {'style': {}} | |
160 if trackFormat == 'wiggle': | |
161 | |
162 trackConfig['style']['pos_color'] = track['wiggle']['color_pos'] | |
163 trackConfig['style']['neg_color'] = track['wiggle']['color_neg'] | |
164 | |
165 if trackConfig['style']['pos_color'] == '__auto__': | |
166 trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours()) | |
167 trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours()) | |
168 | |
169 | |
170 # Wiggle tracks can change colour at a specified place | |
171 bc_pivot = track['wiggle']['bicolor_pivot'] | |
172 if bc_pivot not in ('mean', 'zero'): | |
173 # The values are either one of those two strings | |
174 # or a number | |
175 bc_pivot = float(bc_pivot) | |
176 trackConfig['bicolor_pivot'] = bc_pivot | |
177 elif 'scaling' in track: | |
178 if track['scaling']['method'] == 'ignore': | |
179 if track['scaling']['scheme']['color'] != '__auto__': | |
180 trackConfig['style']['color'] = track['scaling']['scheme']['color'] | |
181 else: | |
182 trackConfig['style']['color'] = self.hex_from_rgb(*self._get_colours()) | |
183 else: | |
184 # Scored method | |
185 algo = track['scaling']['algo'] | |
186 # linear, logarithmic, blast | |
187 scales = track['scaling']['scales'] | |
188 # type __auto__, manual (min, max) | |
189 scheme = track['scaling']['scheme'] | |
190 # scheme -> (type (opacity), color) | |
191 # ================================== | |
192 # GENE CALLS OR BLAST | |
193 # ================================== | |
194 if trackFormat == 'blast': | |
195 red, green, blue = self._get_colours() | |
196 color_function = self.COLOR_FUNCTION_TEMPLATE.format(**{ | |
197 'score': "feature._parent.get('score')", | |
198 'opacity': self.OPACITY_MATH['blast'], | |
199 'red': red, | |
200 'green': green, | |
201 'blue': blue, | |
202 }) | |
203 trackConfig['style']['color'] = color_function.replace('\n', '') | |
204 elif trackFormat == 'gene_calls': | |
205 # Default values, based on GFF3 spec | |
206 min_val = 0 | |
207 max_val = 1000 | |
208 # Get min/max and build a scoring function since JBrowse doesn't | |
209 if scales['type'] == 'automatic': | |
210 min_val, max_val = self.min_max_gff(gff3) | |
211 else: | |
212 min_val = scales['min'] | |
213 max_val = scales['max'] | |
214 | |
215 if scheme['color'] == '__auto__': | |
216 user_color = 'undefined' | |
217 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) | |
218 elif scheme['color'].startswith('#'): | |
219 user_color = "'%s'" % self.hex_from_rgb(*self.rgb_from_hex(scheme['color'][1:])) | |
220 auto_color = 'undefined' | |
221 else: | |
222 user_color = 'undefined' | |
223 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) | |
224 | |
225 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{ | |
226 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val,'min': min_val}), | |
227 'user_spec_color': user_color, | |
228 'auto_gen_color': auto_color, | |
229 }) | |
230 | |
231 trackConfig['style']['color'] = color_function.replace('\n', '') | |
232 return trackConfig | |
233 | |
234 | |
235 def etree_to_dict(t): | |
236 d = {t.tag: {} if t.attrib else None} | |
237 children = list(t) | |
238 if children: | |
239 dd = defaultdict(list) | |
240 for dc in map(etree_to_dict, children): | |
241 for k, v in dc.iteritems(): | |
242 dd[k].append(v) | |
243 d = {t.tag: {k:v[0] if len(v) == 1 else v for k, v in dd.iteritems()}} | |
244 if t.attrib: | |
245 d[t.tag].update(('@' + k, v) for k, v in t.attrib.iteritems()) | |
246 if t.text: | |
247 text = t.text.strip() | |
248 if children or t.attrib: | |
249 if text: | |
250 d[t.tag]['#text'] = text | |
251 else: | |
252 d[t.tag] = text | |
253 return d | |
254 | |
255 | |
256 # score comes from feature._parent.get('score') or feature.get('score') | |
257 | |
258 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) | |
259 | |
260 | |
261 class JbrowseConnector(object): | |
262 | |
263 def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1): | |
264 self.TN_TABLE = { | |
265 'gff3': '--gff', | |
266 'gff': '--gff', | |
267 'bed': '--bed', | |
268 'genbank': '--gbk', | |
269 } | |
270 | |
271 self.cs = ColorScaling() | |
272 self.jbrowse = jbrowse | |
273 self.outdir = outdir | |
274 self.genome_paths = genomes | |
275 self.standalone = standalone | |
276 self.gencode = gencode | |
277 | |
278 if standalone: | |
279 self.clone_jbrowse(self.jbrowse, self.outdir) | |
280 else: | |
281 try: | |
282 os.makedirs(self.outdir) | |
283 except OSError: | |
284 # Ignore if the folder exists | |
285 pass | |
286 | |
287 self.process_genomes() | |
288 self.update_gencode() | |
289 | |
290 def update_gencode(self): | |
291 table = CodonTable.unambiguous_dna_by_id[int(self.gencode)] | |
292 trackList = os.path.join(self.outdir, 'data', 'trackList.json') | |
293 with open(trackList, 'r') as handle: | |
294 trackListData = json.load(handle) | |
295 | |
296 trackListData['tracks'][0].update({ | |
297 'codonStarts': table.start_codons, | |
298 'codonStops': table.stop_codons, | |
299 'codonTable': table.forward_table, | |
300 }) | |
301 | |
302 with open(trackList, 'w') as handle: | |
303 json.dump(trackListData, handle, indent=2) | |
304 | |
305 | |
306 def subprocess_check_call(self, command): | |
307 log.debug('cd %s && %s', self.outdir, ' '.join(command)) | |
308 subprocess.check_call(command, cwd=self.outdir) | |
309 | |
310 def _jbrowse_bin(self, command): | |
311 return os.path.realpath(os.path.join(self.jbrowse, 'bin', command)) | |
312 | |
313 def process_genomes(self): | |
314 for genome_path in self.genome_paths: | |
315 self.subprocess_check_call([ | |
316 'perl', self._jbrowse_bin('prepare-refseqs.pl'), | |
317 '--fasta', genome_path]) | |
318 | |
319 def _add_json(self, json_data): | |
320 if len(json_data.keys()) == 0: | |
321 return | |
322 | |
323 tmp = tempfile.NamedTemporaryFile(delete=False) | |
324 tmp.write(json.dumps(json_data)) | |
325 tmp.close() | |
326 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name, | |
327 os.path.join('data', 'trackList.json')] | |
328 self.subprocess_check_call(cmd) | |
329 os.unlink(tmp.name) | |
330 | |
331 def _add_track_json(self, json_data): | |
332 if len(json_data.keys()) == 0: | |
333 return | |
334 | |
335 tmp = tempfile.NamedTemporaryFile(delete=False) | |
336 tmp.write(json.dumps(json_data)) | |
337 tmp.close() | |
338 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name, | |
339 os.path.join('data', 'trackList.json')] | |
340 self.subprocess_check_call(cmd) | |
341 os.unlink(tmp.name) | |
342 | |
343 | |
344 def _blastxml_to_gff3(self, xml, min_gap=10): | |
345 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) | |
346 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), | |
347 '--trim', '--trim_end', '--min_gap', str(min_gap), xml] | |
348 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name) | |
349 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) | |
350 gff3_unrebased.close() | |
351 return gff3_unrebased.name | |
352 | |
353 def add_blastxml(self, data, trackData, blastOpts, **kwargs): | |
354 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts['min_gap']) | |
355 | |
356 if 'parent' in blastOpts: | |
357 gff3_rebased = tempfile.NamedTemporaryFile(delete=False) | |
358 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')] | |
359 if blastOpts.get('protein', 'false') == 'true': | |
360 cmd.append('--protein2dna') | |
361 cmd.extend([os.path.realpath(blastOpts['parent']), gff3]) | |
362 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_rebased.name) | |
363 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) | |
364 gff3_rebased.close() | |
365 | |
366 # Replace original gff3 file | |
367 shutil.copy(gff3_rebased.name, gff3) | |
368 os.unlink(gff3_rebased.name) | |
369 | |
370 config = { | |
371 'glyph': 'JBrowse/View/FeatureGlyph/Segments', | |
372 "category": trackData['category'], | |
373 } | |
374 | |
375 clientConfig = trackData['style'] | |
376 | |
377 cmd = ['perl', self._jbrowse_bin('flatfile-to-json.pl'), | |
378 '--gff', gff3, | |
379 '--trackLabel', trackData['label'], | |
380 '--key', trackData['key'], | |
381 '--clientConfig', json.dumps(clientConfig), | |
382 '--config', json.dumps(config), | |
383 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
384 ] | |
385 | |
386 self.subprocess_check_call(cmd) | |
387 os.unlink(gff3) | |
388 | |
389 def add_bigwig(self, data, trackData, wiggleOpts, **kwargs): | |
390 dest = os.path.join('data', 'raw', trackData['label'] + '.bw') | |
391 cmd = ['ln', data, dest] | |
392 self.subprocess_check_call(cmd) | |
393 | |
394 trackData.update({ | |
166 "urlTemplate": os.path.join('..', dest), | 395 "urlTemplate": os.path.join('..', dest), |
167 "bicolor_pivot": "zero", | |
168 "storeClass": "JBrowse/Store/SeqFeature/BigWig", | 396 "storeClass": "JBrowse/Store/SeqFeature/BigWig", |
169 "type": "JBrowse/View/Track/Wiggle/Density", | 397 "type": "JBrowse/View/Track/Wiggle/Density", |
170 "key": key, | 398 }) |
171 } | 399 |
172 track_data.update(kwargs) | 400 trackData['type'] = wiggleOpts['type'] |
173 | 401 trackData['variance_band'] = True if wiggleOpts['variance_band'] == 'true' else False |
174 if 'min' not in track_data and 'max' not in track_data \ | 402 |
175 and 'autoscale' not in track_data: | 403 if 'min' in wiggleOpts and 'max' in wiggleOpts: |
176 track_data['autoscale'] = 'local' | 404 trackData['min_score'] = wiggleOpts['min'] |
177 | 405 trackData['max_score'] = wiggleOpts['max'] |
178 self._add_json(track_data) | 406 else: |
179 | 407 trackData['autoscale'] = wiggleOpts.get('autoscale', 'local') |
180 def add_bam(self, data, key, **kwargs): | 408 |
181 label = hashlib.md5(data).hexdigest() | 409 self._add_track_json(trackData) |
182 dest = os.path.join('data', 'raw', os.path.basename(data)) | 410 |
411 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | |
412 dest = os.path.join('data', 'raw', trackData['label'] + '.bam') | |
413 cmd = ['ln', '-s', os.path.realpath(data), dest] | |
414 self.subprocess_check_call(cmd) | |
415 | |
416 cmd = ['ln', '-s', os.path.realpath(bam_index), dest + '.bai'] | |
417 self.subprocess_check_call(cmd) | |
418 | |
419 trackData.update({ | |
420 "urlTemplate": os.path.join('..', dest), | |
421 "type": "JBrowse/View/Track/Alignments2", | |
422 "storeClass": "JBrowse/Store/SeqFeature/BAM", | |
423 }) | |
424 | |
425 | |
426 self._add_track_json(trackData) | |
427 | |
428 if bamOpts.get('auto_snp', 'false') == 'true': | |
429 trackData2 = copy.copy(trackData) | |
430 trackData2.update({ | |
431 "type": "JBrowse/View/Track/SNPCoverage", | |
432 "key": trackData['key'] + " - SNPs/Coverage", | |
433 "label": trackData['label'] + "_autosnp", | |
434 }) | |
435 self._add_track_json(trackData2) | |
436 | |
437 def add_vcf(self, data, trackData, vcfOpts={}, **kwargs): | |
438 dest = os.path.join('data', 'raw', trackData['label'] + '.vcf') | |
183 # ln? | 439 # ln? |
184 cmd = ['ln', '-s', data, dest] | 440 cmd = ['ln', '-s', data, dest] |
185 self.subprocess_check_call(cmd) | 441 self.subprocess_check_call(cmd) |
186 | |
187 bai_source = kwargs['bam_index'] | |
188 cmd = ['ln', '-s', bai_source, dest + '.bai'] | |
189 self.subprocess_check_call(cmd) | |
190 | |
191 track_data = { | |
192 "urlTemplate": os.path.join('..', dest), | |
193 "key": key, | |
194 "label": label, | |
195 "type": "JBrowse/View/Track/Alignments2", | |
196 "storeClass": "JBrowse/Store/SeqFeature/BAM", | |
197 } | |
198 if 'category' in kwargs: | |
199 track_data['category'] = kwargs['category'] | |
200 self._add_json(track_data) | |
201 | |
202 if kwargs.get('auto_snp', False): | |
203 track_data = { | |
204 "storeClass": "JBrowse/Store/SeqFeature/BAM", | |
205 "urlTemplate": os.path.join('..', dest), | |
206 "type": "JBrowse/View/Track/SNPCoverage", | |
207 "key": key + " - SNPs/Coverage", | |
208 "label": label + "_autosnp", | |
209 } | |
210 if 'category' in kwargs: | |
211 track_data['category'] = kwargs['category'] | |
212 self._add_json(track_data) | |
213 | |
214 def add_vcf(self, data, key, **kwargs): | |
215 label = hashlib.md5(data).hexdigest() | |
216 dest = os.path.join('data', 'raw', os.path.basename(data)) | |
217 # ln? | |
218 cmd = ['ln', '-s', data, dest] | |
219 self.subprocess_check_call(cmd) | |
220 cmd = ['bgzip', dest] | 442 cmd = ['bgzip', dest] |
221 self.subprocess_check_call(cmd) | 443 self.subprocess_check_call(cmd) |
222 cmd = ['tabix', '-p', 'vcf', dest + '.gz'] | 444 cmd = ['tabix', '-p', 'vcf', dest + '.gz'] |
223 self.subprocess_check_call(cmd) | 445 self.subprocess_check_call(cmd) |
224 | 446 |
225 track_data = { | 447 trackData.update({ |
226 "key": key, | |
227 "label": label, | |
228 "urlTemplate": os.path.join('..', dest + '.gz'), | 448 "urlTemplate": os.path.join('..', dest + '.gz'), |
229 "type": "JBrowse/View/Track/HTMLVariants", | 449 "type": "JBrowse/View/Track/HTMLVariants", |
230 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", | 450 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", |
451 }) | |
452 self._add_track_json(trackData) | |
453 | |
454 def add_features(self, data, format, trackData, gffOpts, **kwargs): | |
455 cmd = [ | |
456 'perl', self._jbrowse_bin('flatfile-to-json.pl'), | |
457 self.TN_TABLE.get(format, 'gff'), | |
458 data, | |
459 '--trackLabel', trackData['label'], | |
460 '--trackType', 'JBrowse/View/Track/CanvasFeatures', | |
461 '--key', trackData['key'] | |
462 ] | |
463 | |
464 config = copy.copy(trackData) | |
465 clientConfig = trackData['style'] | |
466 del config['style'] | |
467 | |
468 if 'match' in gffOpts: | |
469 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' | |
470 cmd += ['--type', gffOpts['match']] | |
471 | |
472 cmd += ['--clientConfig', json.dumps(clientConfig), | |
473 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
474 ] | |
475 | |
476 cmd.extend(['--config', json.dumps(config)]) | |
477 | |
478 self.subprocess_check_call(cmd) | |
479 | |
480 | |
481 def process_annotations(self, track): | |
482 outputTrackConfig = { | |
483 'style': { | |
484 'label': track['style'].get('label', 'description'), | |
485 'className': track['style'].get('className', 'feature'), | |
486 'description': track['style'].get('description', ''), | |
487 }, | |
488 'category': track['category'], | |
231 } | 489 } |
232 track_data.update(kwargs) | 490 |
233 self._add_json(track_data) | 491 for i, (dataset_path, dataset_ext, track_human_label) in enumerate(track['trackfiles']): |
234 | 492 log.info('Processing %s / %s', track['category'], track_human_label) |
235 def add_features(self, data, key, format, **kwargs): | 493 outputTrackConfig['key'] = track_human_label |
236 label = hashlib.md5(data).hexdigest() | 494 hashData = [dataset_path, track_human_label, track['category']] |
237 cmd = ['perl', 'bin/flatfile-to-json.pl', | 495 outputTrackConfig['label'] = hashlib.md5('|'.join(hashData)).hexdigest() + '_%s' % i |
238 TN_TABLE.get(format, 'gff'), data, | 496 |
239 '--trackLabel', label, | 497 # Colour parsing is complex due to different track types having |
240 '--key', key] | 498 # different colour options. |
241 | 499 colourOptions = self.cs.parse_colours(track['conf']['options'], track['format'], gff3=dataset_path) |
242 config = {} | 500 # This used to be done with a dict.update() call, however that wiped out any previous style settings... |
243 if 'category' in kwargs: | 501 for key in colourOptions: |
244 config['category'] = kwargs['category'] | 502 if key == 'style': |
245 | 503 for subkey in colourOptions['style']: |
246 if kwargs.get('match', False): | 504 outputTrackConfig['style'][subkey] = colourOptions['style'][subkey] |
247 clientConfig = { | 505 else: |
248 'label': 'description', | 506 outputTrackConfig[key] = colourOptions[key] |
249 'description': 'Hit_titles', | 507 |
250 } | 508 if dataset_ext in ('gff', 'gff3', 'bed'): |
251 | 509 self.add_features(dataset_path, dataset_ext, outputTrackConfig, |
252 # Get min/max and build a scoring function since JBrowse doesn't | 510 track['conf']['options']['gff']) |
253 min_val, max_val = self._min_max_gff(data) | 511 elif dataset_ext == 'bigwig': |
254 | 512 self.add_bigwig(dataset_path, outputTrackConfig, |
255 if min_val is not None and max_val is not None: | 513 track['conf']['options']['wiggle']) |
256 MIN_MAX_OPACITY_MATH = Template(""" | 514 elif dataset_ext == 'bam': |
257 var opacity = (score - ${min}) * (1/(${max} - ${min})); | 515 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index'] |
258 """).substitute({ | 516 if not isinstance(real_indexes, list): |
259 'max': max_val, | 517 # <bam_indices> |
260 'min': min_val, | 518 # <bam_index>/path/to/a.bam.bai</bam_index> |
261 }) | 519 # </bam_indices> |
262 | 520 # |
263 red, green, blue = self._get_colours() | 521 # The above will result in the 'bam_index' key containing a |
264 color_function = COLOR_FUNCTION_TEMPLATE.substitute({ | 522 # string. If there are two or more indices, the container |
265 'score': "feature.get('score')", | 523 # becomes a list. Fun! |
266 'opacity': MIN_MAX_OPACITY_MATH, | 524 real_indexes = [real_indexes] |
267 'red': red, | 525 |
268 'green': green, | 526 self.add_bam(dataset_path, outputTrackConfig, |
269 'blue': blue, | 527 track['conf']['options']['pileup'], |
270 }) | 528 bam_index=real_indexes[i]) |
271 | 529 elif dataset_ext == 'blastxml': |
272 clientConfig['color'] = color_function.replace('\n', '') | 530 self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast']) |
273 | 531 elif dataset_ext == 'vcf': |
274 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' | 532 self.add_vcf(dataset_path, outputTrackConfig) |
275 | |
276 cmd += ['--clientConfig', json.dumps(clientConfig), | |
277 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
278 ] | |
279 | |
280 cmd.extend(['--config', json.dumps(config)]) | |
281 | |
282 self.subprocess_check_call(cmd) | |
283 | |
284 def process_annotations(self, data, key, format, **kwargs): | |
285 if format in ('gff', 'gff3', 'bed'): | |
286 self.add_features(data, key, format, **kwargs) | |
287 elif format == 'bigwig': | |
288 self.add_bigwig(data, key, **kwargs) | |
289 elif format == 'bam': | |
290 self.add_bam(data, key, **kwargs) | |
291 elif format == 'blastxml': | |
292 self.add_blastxml(data, key, **kwargs) | |
293 elif format == 'vcf': | |
294 self.add_vcf(data, key, **kwargs) | |
295 | 533 |
296 def clone_jbrowse(self, jbrowse_dir, destination): | 534 def clone_jbrowse(self, jbrowse_dir, destination): |
535 """Clone a JBrowse directory into a destination directory. | |
536 """ | |
297 # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks | 537 # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks |
298 # unlike copytree | 538 # unlike copytree |
299 cmd = ['mkdir', '-p', destination] | 539 cmd = ['cp', '-r', os.path.join(jbrowse_dir, '.'), destination] |
540 log.debug(' '.join(cmd)) | |
300 subprocess.check_call(cmd) | 541 subprocess.check_call(cmd) |
301 cmd = ['cp', '-r', jbrowse_dir, destination] | 542 cmd = ['mkdir', '-p', os.path.join(destination, 'data', 'raw')] |
302 subprocess.check_call(cmd) | 543 log.debug(' '.join(cmd)) |
303 cmd = ['mkdir', '-p', os.path.join(destination, 'JBrowse-1.11.6', | |
304 'data', 'raw')] | |
305 subprocess.check_call(cmd) | 544 subprocess.check_call(cmd) |
306 | 545 |
307 # http://unix.stackexchange.com/a/38691/22785 | 546 # http://unix.stackexchange.com/a/38691/22785 |
308 # JBrowse releases come with some broken symlinks | 547 # JBrowse releases come with some broken symlinks |
309 cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+'] | 548 cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+'] |
549 log.debug(' '.join(cmd)) | |
310 subprocess.check_call(cmd) | 550 subprocess.check_call(cmd) |
311 | 551 |
312 | 552 |
313 if __name__ == '__main__': | 553 if __name__ == '__main__': |
314 parser = argparse.ArgumentParser(description="", epilog="") | 554 parser = argparse.ArgumentParser(description="", epilog="") |
315 parser.add_argument('genome', type=file, help='Input genome file') | 555 parser.add_argument('xml', type=file, help='Track Configuration') |
316 parser.add_argument('yaml', type=file, help='Track Configuration') | |
317 | 556 |
318 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') | 557 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') |
319 parser.add_argument('--outdir', help='Output directory', default='out') | 558 parser.add_argument('--outdir', help='Output directory', default='out') |
559 parser.add_argument('--standalone', help='Standalone mode includes a copy of JBrowse', action='store_true') | |
320 args = parser.parse_args() | 560 args = parser.parse_args() |
561 | |
562 tree = ET.parse(args.xml.name) | |
563 root = tree.getroot() | |
321 | 564 |
322 jc = JbrowseConnector( | 565 jc = JbrowseConnector( |
323 jbrowse=args.jbrowse, | 566 jbrowse=args.jbrowse, |
324 jbrowse_dir=os.path.join(args.outdir, 'JBrowse-1.11.6'), | |
325 outdir=args.outdir, | 567 outdir=args.outdir, |
326 genome=os.path.realpath(args.genome.name), | 568 genomes=[os.path.realpath(x.text) for x in root.findall('metadata/genomes/genome')], |
569 standalone=args.standalone, | |
570 gencode=root.find('metadata/gencode').text | |
327 ) | 571 ) |
328 | 572 |
329 track_data = yaml.load(args.yaml) | 573 for track in root.findall('tracks/track'): |
330 for track in track_data: | 574 track_conf = {} |
331 path = os.path.realpath(track['file']) | 575 track_conf['trackfiles'] = [ |
332 extra = track.get('options', {}) | 576 (os.path.realpath(x.attrib['path']), x.attrib['ext'], x.attrib['label']) |
333 if '__unused__' in extra: | 577 for x in track.findall('files/trackFile') |
334 del extra['__unused__'] | 578 ] |
335 | 579 |
336 for possible_partial_path in ('bam_index', 'parent'): | 580 track_conf['category'] = track.attrib['cat'] |
337 if possible_partial_path in extra: | 581 track_conf['format'] = track.attrib['format'] |
338 extra[possible_partial_path] = os.path.realpath( | 582 try: |
339 extra[possible_partial_path]) | 583 # Only pertains to gff3 + blastxml. TODO? |
340 extra['category'] = track.get('category', 'Default') | 584 track_conf['style'] = {t.tag: t.text for t in track.find('options/style')} |
341 | 585 except TypeError, te: |
342 jc.process_annotations(path, track['label'], track['ext'], **extra) | 586 track_conf['style'] = {} |
343 | 587 pass |
344 print """ | 588 track_conf['conf'] = etree_to_dict(track.find('options')) |
345 <html> | 589 jc.process_annotations(track_conf) |
346 <body> | |
347 <script type="text/javascript"> | |
348 window.location=JBrowse-1.11.6/index.html | |
349 </script> | |
350 <a href="JBrowse-1.11.6/index.html">Go to JBrowse</a> | |
351 <p>Please note that JBrowse functions best on production Galaxy | |
352 instances. The paste server used in development instances has issues | |
353 serving the volumes of data regularly involved in JBrowse</p> | |
354 </body> | |
355 </html> | |
356 """ |