comparison jbrowse.py @ 3:7342f467507b draft

Uploaded v0.4 of JBrowse
author iuc
date Thu, 31 Dec 2015 13:58:43 -0500
parents 497c6bb3b717
children ae9382cfb6ac
comparison
equal deleted inserted replaced
2:b6a0e126dbee 3:7342f467507b
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 from string import Template
3 import os 2 import os
3 import copy
4 import argparse 4 import argparse
5 import subprocess 5 import subprocess
6 import hashlib 6 import hashlib
7 import struct
7 import tempfile 8 import tempfile
9 import shutil
8 import json 10 import json
9 import yaml 11 from Bio.Data import CodonTable
12 import xml.etree.ElementTree as ET
10 import logging 13 import logging
11 logging.basicConfig() 14 from collections import defaultdict
12 log = logging.getLogger(__name__) 15 logging.basicConfig(level=logging.INFO)
13 16 log = logging.getLogger('jbrowse')
14 COLOR_FUNCTION_TEMPLATE = Template(""" 17
15 function(feature, variableName, glyphObject, track) { 18
16 var score = ${score}; 19 class ColorScaling(object):
17 ${opacity} 20
18 return 'rgba(${red}, ${green}, ${blue}, ' + opacity + ')'; 21 COLOR_FUNCTION_TEMPLATE = """
19 } 22 function(feature, variableName, glyphObject, track) {{
20 """) 23 var score = {score};
21 24 {opacity}
22 BLAST_OPACITY_MATH = """ 25 return 'rgba({red}, {green}, {blue}, ' + opacity + ')';
23 var opacity = 0; 26 }}
24 if(score == 0.0) { 27 """
25 opacity = 1; 28
26 } else{ 29 COLOR_FUNCTION_TEMPLATE_QUAL = """
27 opacity = (20 - Math.log10(score)) / 180; 30 function(feature, variableName, glyphObject, track) {{
28 } 31 var search_up = function self(sf, attr){{
29 """ 32 if(sf.get(attr) !== undefined){{
30 33 return sf.get(attr);
31 BREWER_COLOUR_IDX = 0 34 }}
32 BREWER_COLOUR_SCHEMES = [ 35 if(sf.parent() === undefined) {{
33 (228, 26, 28), 36 return;
34 (55, 126, 184), 37 }}else{{
35 (77, 175, 74), 38 return self(sf.parent(), attr);
36 (152, 78, 163), 39 }}
37 (255, 127, 0), 40 }};
38 ] 41
39 42 var search_down = function self(sf, attr){{
40 43 if(sf.get(attr) !== undefined){{
41 # score comes from feature._parent.get('score') or feature.get('score') 44 return sf.get(attr);
42 # Opacity math 45 }}
43 46 if(sf.children() === undefined) {{
44 TN_TABLE = { 47 return;
45 'gff3': '--gff', 48 }}else{{
46 'gff': '--gff', 49 var kids = sf.children();
47 'bed': '--bed', 50 for(var child_idx in kids){{
48 # 'genbank': '--gbk', 51 var x = self(kids[child_idx], attr);
49 } 52 if(x !== undefined){{
50 53 return x;
51 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) 54 }}
52 55 }}
53 56 return;
54 class JbrowseConnector(object): 57 }}
55 58 }};
56 def __init__(self, jbrowse, jbrowse_dir, outdir, genome): 59
57 self.jbrowse = jbrowse 60 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
58 self.jbrowse_dir = jbrowse_dir 61 var score = (search_up(feature, 'score') || search_down(feature, 'score'));
59 self.outdir = outdir 62 {opacity}
60 self.genome_path = genome 63 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
64 var red = parseInt(result[1], 16);
65 var green = parseInt(result[2], 16);
66 var blue = parseInt(result[3], 16);
67 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }}
68 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')';
69 }}
70 """
71
72 OPACITY_MATH = {
73 'linear': """
74 var opacity = (score - ({min})) / (({max}) - ({min}));
75 """,
76 'logarithmic': """
77 var opacity = (score - ({min})) / (({max}) - ({min}));
78 opacity = Math.log10(opacity) + Math.log10({max});
79 """,
80 'blast': """
81 var opacity = 0;
82 if(score == 0.0) {
83 opacity = 1;
84 } else{
85 opacity = (20 - Math.log10(score)) / 180;
86 }
87 """
88 }
89
90 BREWER_COLOUR_IDX = 0
91 BREWER_COLOUR_SCHEMES = [
92 (166, 206, 227),
93 (31, 120, 180),
94 (178, 223, 138),
95 (51, 160, 44),
96 (251, 154, 153),
97 (227, 26, 28),
98 (253, 191, 111),
99 (255, 127, 0),
100 (202, 178, 214),
101 (106, 61, 154),
102 (255, 255, 153),
103 (177, 89, 40),
104 (228, 26, 28),
105 (55, 126, 184),
106 (77, 175, 74),
107 (152, 78, 163),
108 (255, 127, 0),
109 ]
110
111 BREWER_DIVERGING_PALLETES = {
112 'BrBg': ("#543005", "#003c30"),
113 'PiYg': ("#8e0152", "#276419"),
114 'PRGn': ("#40004b", "#00441b"),
115 'PuOr': ("#7f3b08", "#2d004b"),
116 'RdBu': ("#67001f", "#053061"),
117 'RdGy': ("#67001f", "#1a1a1a"),
118 'RdYlBu': ("#a50026", "#313695"),
119 'RdYlGn': ("#a50026", "#006837"),
120 'Spectral': ("#9e0142", "#5e4fa2"),
121 }
122
123 def __init__(self):
61 self.brewer_colour_idx = 0 124 self.brewer_colour_idx = 0
62 125
63 self.clone_jbrowse(self.jbrowse, self.outdir) 126 def rgb_from_hex(self, hexstr):
64 self.process_genome() 127 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
65 128 return struct.unpack('BBB',hexstr.decode('hex'))
66 def subprocess_check_call(self, command): 129
67 log.debug('cd %s && %s', self.jbrowse_dir, ' '.join(command)) 130 def min_max_gff(self, gff_file):
68 subprocess.check_call(command, cwd=self.jbrowse_dir)
69
70 def _get_colours(self):
71 r, g, b = BREWER_COLOUR_SCHEMES[self.brewer_colour_idx]
72 self.brewer_colour_idx += 1
73 return r, g, b
74
75 def process_genome(self):
76 self.subprocess_check_call(['perl', 'bin/prepare-refseqs.pl',
77 '--fasta', self.genome_path])
78
79 def _add_json(self, json_data):
80 if len(json_data.keys()) == 0:
81 return
82
83 tmp = tempfile.NamedTemporaryFile(delete=False)
84 tmp.write(json.dumps(json_data))
85 tmp.close()
86 cmd = ['perl', 'bin/add-track-json.pl', tmp.name,
87 os.path.join('data', 'trackList.json')]
88 self.subprocess_check_call(cmd)
89 os.unlink(tmp.name)
90
91 def add_blastxml(self, data, key, **kwargs):
92 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
93 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
94 '--trim_end', '--min_gap', str(kwargs['min_gap']), data]
95 subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_unrebased)
96 gff3_unrebased.close()
97
98 gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
99 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')]
100 if kwargs['protein']:
101 cmd.append('--protein2dna')
102 cmd.extend([kwargs['parent'], gff3_unrebased.name])
103 subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_rebased)
104 gff3_rebased.close()
105
106 label = hashlib.md5(data).hexdigest()
107
108 red, green, blue = self._get_colours()
109 color_function = COLOR_FUNCTION_TEMPLATE.substitute({
110 'score': "feature._parent.get('score')",
111 'opacity': BLAST_OPACITY_MATH,
112 'red': red,
113 'green': green,
114 'blue': blue,
115 })
116
117 clientConfig = {
118 'label': 'description',
119 'color': color_function.replace('\n', ''),
120 'description': 'Hit_titles',
121 }
122 config = {'glyph': 'JBrowse/View/FeatureGlyph/Segments'}
123 if 'category' in kwargs:
124 config['category'] = kwargs['category']
125
126 cmd = ['perl', 'bin/flatfile-to-json.pl',
127 '--gff', gff3_rebased.name,
128 '--trackLabel', label,
129 '--key', key,
130 '--clientConfig', json.dumps(clientConfig),
131 '--config', json.dumps(config),
132 '--trackType', 'JBrowse/View/Track/CanvasFeatures'
133 ]
134
135 self.subprocess_check_call(cmd)
136 os.unlink(gff3_rebased.name)
137 os.unlink(gff3_unrebased.name)
138
139 def _min_max_gff(self, gff_file):
140 min_val = None 131 min_val = None
141 max_val = None 132 max_val = None
142 with open(gff_file, 'r') as handle: 133 with open(gff_file, 'r') as handle:
143 for line in handle: 134 for line in handle:
144 try: 135 try:
153 max_val = value 144 max_val = value
154 except Exception: 145 except Exception:
155 pass 146 pass
156 return min_val, max_val 147 return min_val, max_val
157 148
158 def add_bigwig(self, data, key, **kwargs): 149 def hex_from_rgb(self, r, g, b):
159 label = hashlib.md5(data).hexdigest() 150 return '#%02x%02x%02x' % (r, g, b)
160 dest = os.path.join('data', 'raw', os.path.basename(data)) 151
161 cmd = ['ln', '-s', data, dest] 152 def _get_colours(self):
162 self.subprocess_check_call(cmd) 153 r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx]
163 154 self.brewer_colour_idx += 1
164 track_data = { 155 return r, g, b
165 "label": label, 156
157 def parse_colours(self, track, trackFormat, gff3=None):
158 # Wiggle tracks have a bicolor pallete
159 trackConfig = {'style': {}}
160 if trackFormat == 'wiggle':
161
162 trackConfig['style']['pos_color'] = track['wiggle']['color_pos']
163 trackConfig['style']['neg_color'] = track['wiggle']['color_neg']
164
165 if trackConfig['style']['pos_color'] == '__auto__':
166 trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours())
167 trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours())
168
169
170 # Wiggle tracks can change colour at a specified place
171 bc_pivot = track['wiggle']['bicolor_pivot']
172 if bc_pivot not in ('mean', 'zero'):
173 # The values are either one of those two strings
174 # or a number
175 bc_pivot = float(bc_pivot)
176 trackConfig['bicolor_pivot'] = bc_pivot
177 elif 'scaling' in track:
178 if track['scaling']['method'] == 'ignore':
179 if track['scaling']['scheme']['color'] != '__auto__':
180 trackConfig['style']['color'] = track['scaling']['scheme']['color']
181 else:
182 trackConfig['style']['color'] = self.hex_from_rgb(*self._get_colours())
183 else:
184 # Scored method
185 algo = track['scaling']['algo']
186 # linear, logarithmic, blast
187 scales = track['scaling']['scales']
188 # type __auto__, manual (min, max)
189 scheme = track['scaling']['scheme']
190 # scheme -> (type (opacity), color)
191 # ==================================
192 # GENE CALLS OR BLAST
193 # ==================================
194 if trackFormat == 'blast':
195 red, green, blue = self._get_colours()
196 color_function = self.COLOR_FUNCTION_TEMPLATE.format(**{
197 'score': "feature._parent.get('score')",
198 'opacity': self.OPACITY_MATH['blast'],
199 'red': red,
200 'green': green,
201 'blue': blue,
202 })
203 trackConfig['style']['color'] = color_function.replace('\n', '')
204 elif trackFormat == 'gene_calls':
205 # Default values, based on GFF3 spec
206 min_val = 0
207 max_val = 1000
208 # Get min/max and build a scoring function since JBrowse doesn't
209 if scales['type'] == 'automatic':
210 min_val, max_val = self.min_max_gff(gff3)
211 else:
212 min_val = scales['min']
213 max_val = scales['max']
214
215 if scheme['color'] == '__auto__':
216 user_color = 'undefined'
217 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
218 elif scheme['color'].startswith('#'):
219 user_color = "'%s'" % self.hex_from_rgb(*self.rgb_from_hex(scheme['color'][1:]))
220 auto_color = 'undefined'
221 else:
222 user_color = 'undefined'
223 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
224
225 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{
226 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val,'min': min_val}),
227 'user_spec_color': user_color,
228 'auto_gen_color': auto_color,
229 })
230
231 trackConfig['style']['color'] = color_function.replace('\n', '')
232 return trackConfig
233
234
235 def etree_to_dict(t):
236 d = {t.tag: {} if t.attrib else None}
237 children = list(t)
238 if children:
239 dd = defaultdict(list)
240 for dc in map(etree_to_dict, children):
241 for k, v in dc.iteritems():
242 dd[k].append(v)
243 d = {t.tag: {k:v[0] if len(v) == 1 else v for k, v in dd.iteritems()}}
244 if t.attrib:
245 d[t.tag].update(('@' + k, v) for k, v in t.attrib.iteritems())
246 if t.text:
247 text = t.text.strip()
248 if children or t.attrib:
249 if text:
250 d[t.tag]['#text'] = text
251 else:
252 d[t.tag] = text
253 return d
254
255
256 # score comes from feature._parent.get('score') or feature.get('score')
257
258 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
259
260
261 class JbrowseConnector(object):
262
263 def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1):
264 self.TN_TABLE = {
265 'gff3': '--gff',
266 'gff': '--gff',
267 'bed': '--bed',
268 'genbank': '--gbk',
269 }
270
271 self.cs = ColorScaling()
272 self.jbrowse = jbrowse
273 self.outdir = outdir
274 self.genome_paths = genomes
275 self.standalone = standalone
276 self.gencode = gencode
277
278 if standalone:
279 self.clone_jbrowse(self.jbrowse, self.outdir)
280 else:
281 try:
282 os.makedirs(self.outdir)
283 except OSError:
284 # Ignore if the folder exists
285 pass
286
287 self.process_genomes()
288 self.update_gencode()
289
290 def update_gencode(self):
291 table = CodonTable.unambiguous_dna_by_id[int(self.gencode)]
292 trackList = os.path.join(self.outdir, 'data', 'trackList.json')
293 with open(trackList, 'r') as handle:
294 trackListData = json.load(handle)
295
296 trackListData['tracks'][0].update({
297 'codonStarts': table.start_codons,
298 'codonStops': table.stop_codons,
299 'codonTable': table.forward_table,
300 })
301
302 with open(trackList, 'w') as handle:
303 json.dump(trackListData, handle, indent=2)
304
305
306 def subprocess_check_call(self, command):
307 log.debug('cd %s && %s', self.outdir, ' '.join(command))
308 subprocess.check_call(command, cwd=self.outdir)
309
310 def _jbrowse_bin(self, command):
311 return os.path.realpath(os.path.join(self.jbrowse, 'bin', command))
312
313 def process_genomes(self):
314 for genome_path in self.genome_paths:
315 self.subprocess_check_call([
316 'perl', self._jbrowse_bin('prepare-refseqs.pl'),
317 '--fasta', genome_path])
318
319 def _add_json(self, json_data):
320 if len(json_data.keys()) == 0:
321 return
322
323 tmp = tempfile.NamedTemporaryFile(delete=False)
324 tmp.write(json.dumps(json_data))
325 tmp.close()
326 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name,
327 os.path.join('data', 'trackList.json')]
328 self.subprocess_check_call(cmd)
329 os.unlink(tmp.name)
330
331 def _add_track_json(self, json_data):
332 if len(json_data.keys()) == 0:
333 return
334
335 tmp = tempfile.NamedTemporaryFile(delete=False)
336 tmp.write(json.dumps(json_data))
337 tmp.close()
338 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name,
339 os.path.join('data', 'trackList.json')]
340 self.subprocess_check_call(cmd)
341 os.unlink(tmp.name)
342
343
344 def _blastxml_to_gff3(self, xml, min_gap=10):
345 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
346 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
347 '--trim', '--trim_end', '--min_gap', str(min_gap), xml]
348 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name)
349 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
350 gff3_unrebased.close()
351 return gff3_unrebased.name
352
353 def add_blastxml(self, data, trackData, blastOpts, **kwargs):
354 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts['min_gap'])
355
356 if 'parent' in blastOpts:
357 gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
358 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')]
359 if blastOpts.get('protein', 'false') == 'true':
360 cmd.append('--protein2dna')
361 cmd.extend([os.path.realpath(blastOpts['parent']), gff3])
362 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_rebased.name)
363 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
364 gff3_rebased.close()
365
366 # Replace original gff3 file
367 shutil.copy(gff3_rebased.name, gff3)
368 os.unlink(gff3_rebased.name)
369
370 config = {
371 'glyph': 'JBrowse/View/FeatureGlyph/Segments',
372 "category": trackData['category'],
373 }
374
375 clientConfig = trackData['style']
376
377 cmd = ['perl', self._jbrowse_bin('flatfile-to-json.pl'),
378 '--gff', gff3,
379 '--trackLabel', trackData['label'],
380 '--key', trackData['key'],
381 '--clientConfig', json.dumps(clientConfig),
382 '--config', json.dumps(config),
383 '--trackType', 'JBrowse/View/Track/CanvasFeatures'
384 ]
385
386 self.subprocess_check_call(cmd)
387 os.unlink(gff3)
388
389 def add_bigwig(self, data, trackData, wiggleOpts, **kwargs):
390 dest = os.path.join('data', 'raw', trackData['label'] + '.bw')
391 cmd = ['ln', data, dest]
392 self.subprocess_check_call(cmd)
393
394 trackData.update({
166 "urlTemplate": os.path.join('..', dest), 395 "urlTemplate": os.path.join('..', dest),
167 "bicolor_pivot": "zero",
168 "storeClass": "JBrowse/Store/SeqFeature/BigWig", 396 "storeClass": "JBrowse/Store/SeqFeature/BigWig",
169 "type": "JBrowse/View/Track/Wiggle/Density", 397 "type": "JBrowse/View/Track/Wiggle/Density",
170 "key": key, 398 })
171 } 399
172 track_data.update(kwargs) 400 trackData['type'] = wiggleOpts['type']
173 401 trackData['variance_band'] = True if wiggleOpts['variance_band'] == 'true' else False
174 if 'min' not in track_data and 'max' not in track_data \ 402
175 and 'autoscale' not in track_data: 403 if 'min' in wiggleOpts and 'max' in wiggleOpts:
176 track_data['autoscale'] = 'local' 404 trackData['min_score'] = wiggleOpts['min']
177 405 trackData['max_score'] = wiggleOpts['max']
178 self._add_json(track_data) 406 else:
179 407 trackData['autoscale'] = wiggleOpts.get('autoscale', 'local')
180 def add_bam(self, data, key, **kwargs): 408
181 label = hashlib.md5(data).hexdigest() 409 self._add_track_json(trackData)
182 dest = os.path.join('data', 'raw', os.path.basename(data)) 410
411 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
412 dest = os.path.join('data', 'raw', trackData['label'] + '.bam')
413 cmd = ['ln', '-s', os.path.realpath(data), dest]
414 self.subprocess_check_call(cmd)
415
416 cmd = ['ln', '-s', os.path.realpath(bam_index), dest + '.bai']
417 self.subprocess_check_call(cmd)
418
419 trackData.update({
420 "urlTemplate": os.path.join('..', dest),
421 "type": "JBrowse/View/Track/Alignments2",
422 "storeClass": "JBrowse/Store/SeqFeature/BAM",
423 })
424
425
426 self._add_track_json(trackData)
427
428 if bamOpts.get('auto_snp', 'false') == 'true':
429 trackData2 = copy.copy(trackData)
430 trackData2.update({
431 "type": "JBrowse/View/Track/SNPCoverage",
432 "key": trackData['key'] + " - SNPs/Coverage",
433 "label": trackData['label'] + "_autosnp",
434 })
435 self._add_track_json(trackData2)
436
437 def add_vcf(self, data, trackData, vcfOpts={}, **kwargs):
438 dest = os.path.join('data', 'raw', trackData['label'] + '.vcf')
183 # ln? 439 # ln?
184 cmd = ['ln', '-s', data, dest] 440 cmd = ['ln', '-s', data, dest]
185 self.subprocess_check_call(cmd) 441 self.subprocess_check_call(cmd)
186
187 bai_source = kwargs['bam_index']
188 cmd = ['ln', '-s', bai_source, dest + '.bai']
189 self.subprocess_check_call(cmd)
190
191 track_data = {
192 "urlTemplate": os.path.join('..', dest),
193 "key": key,
194 "label": label,
195 "type": "JBrowse/View/Track/Alignments2",
196 "storeClass": "JBrowse/Store/SeqFeature/BAM",
197 }
198 if 'category' in kwargs:
199 track_data['category'] = kwargs['category']
200 self._add_json(track_data)
201
202 if kwargs.get('auto_snp', False):
203 track_data = {
204 "storeClass": "JBrowse/Store/SeqFeature/BAM",
205 "urlTemplate": os.path.join('..', dest),
206 "type": "JBrowse/View/Track/SNPCoverage",
207 "key": key + " - SNPs/Coverage",
208 "label": label + "_autosnp",
209 }
210 if 'category' in kwargs:
211 track_data['category'] = kwargs['category']
212 self._add_json(track_data)
213
214 def add_vcf(self, data, key, **kwargs):
215 label = hashlib.md5(data).hexdigest()
216 dest = os.path.join('data', 'raw', os.path.basename(data))
217 # ln?
218 cmd = ['ln', '-s', data, dest]
219 self.subprocess_check_call(cmd)
220 cmd = ['bgzip', dest] 442 cmd = ['bgzip', dest]
221 self.subprocess_check_call(cmd) 443 self.subprocess_check_call(cmd)
222 cmd = ['tabix', '-p', 'vcf', dest + '.gz'] 444 cmd = ['tabix', '-p', 'vcf', dest + '.gz']
223 self.subprocess_check_call(cmd) 445 self.subprocess_check_call(cmd)
224 446
225 track_data = { 447 trackData.update({
226 "key": key,
227 "label": label,
228 "urlTemplate": os.path.join('..', dest + '.gz'), 448 "urlTemplate": os.path.join('..', dest + '.gz'),
229 "type": "JBrowse/View/Track/HTMLVariants", 449 "type": "JBrowse/View/Track/HTMLVariants",
230 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", 450 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix",
451 })
452 self._add_track_json(trackData)
453
454 def add_features(self, data, format, trackData, gffOpts, **kwargs):
455 cmd = [
456 'perl', self._jbrowse_bin('flatfile-to-json.pl'),
457 self.TN_TABLE.get(format, 'gff'),
458 data,
459 '--trackLabel', trackData['label'],
460 '--trackType', 'JBrowse/View/Track/CanvasFeatures',
461 '--key', trackData['key']
462 ]
463
464 config = copy.copy(trackData)
465 clientConfig = trackData['style']
466 del config['style']
467
468 if 'match' in gffOpts:
469 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments'
470 cmd += ['--type', gffOpts['match']]
471
472 cmd += ['--clientConfig', json.dumps(clientConfig),
473 '--trackType', 'JBrowse/View/Track/CanvasFeatures'
474 ]
475
476 cmd.extend(['--config', json.dumps(config)])
477
478 self.subprocess_check_call(cmd)
479
480
481 def process_annotations(self, track):
482 outputTrackConfig = {
483 'style': {
484 'label': track['style'].get('label', 'description'),
485 'className': track['style'].get('className', 'feature'),
486 'description': track['style'].get('description', ''),
487 },
488 'category': track['category'],
231 } 489 }
232 track_data.update(kwargs) 490
233 self._add_json(track_data) 491 for i, (dataset_path, dataset_ext, track_human_label) in enumerate(track['trackfiles']):
234 492 log.info('Processing %s / %s', track['category'], track_human_label)
235 def add_features(self, data, key, format, **kwargs): 493 outputTrackConfig['key'] = track_human_label
236 label = hashlib.md5(data).hexdigest() 494 hashData = [dataset_path, track_human_label, track['category']]
237 cmd = ['perl', 'bin/flatfile-to-json.pl', 495 outputTrackConfig['label'] = hashlib.md5('|'.join(hashData)).hexdigest() + '_%s' % i
238 TN_TABLE.get(format, 'gff'), data, 496
239 '--trackLabel', label, 497 # Colour parsing is complex due to different track types having
240 '--key', key] 498 # different colour options.
241 499 colourOptions = self.cs.parse_colours(track['conf']['options'], track['format'], gff3=dataset_path)
242 config = {} 500 # This used to be done with a dict.update() call, however that wiped out any previous style settings...
243 if 'category' in kwargs: 501 for key in colourOptions:
244 config['category'] = kwargs['category'] 502 if key == 'style':
245 503 for subkey in colourOptions['style']:
246 if kwargs.get('match', False): 504 outputTrackConfig['style'][subkey] = colourOptions['style'][subkey]
247 clientConfig = { 505 else:
248 'label': 'description', 506 outputTrackConfig[key] = colourOptions[key]
249 'description': 'Hit_titles', 507
250 } 508 if dataset_ext in ('gff', 'gff3', 'bed'):
251 509 self.add_features(dataset_path, dataset_ext, outputTrackConfig,
252 # Get min/max and build a scoring function since JBrowse doesn't 510 track['conf']['options']['gff'])
253 min_val, max_val = self._min_max_gff(data) 511 elif dataset_ext == 'bigwig':
254 512 self.add_bigwig(dataset_path, outputTrackConfig,
255 if min_val is not None and max_val is not None: 513 track['conf']['options']['wiggle'])
256 MIN_MAX_OPACITY_MATH = Template(""" 514 elif dataset_ext == 'bam':
257 var opacity = (score - ${min}) * (1/(${max} - ${min})); 515 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index']
258 """).substitute({ 516 if not isinstance(real_indexes, list):
259 'max': max_val, 517 # <bam_indices>
260 'min': min_val, 518 # <bam_index>/path/to/a.bam.bai</bam_index>
261 }) 519 # </bam_indices>
262 520 #
263 red, green, blue = self._get_colours() 521 # The above will result in the 'bam_index' key containing a
264 color_function = COLOR_FUNCTION_TEMPLATE.substitute({ 522 # string. If there are two or more indices, the container
265 'score': "feature.get('score')", 523 # becomes a list. Fun!
266 'opacity': MIN_MAX_OPACITY_MATH, 524 real_indexes = [real_indexes]
267 'red': red, 525
268 'green': green, 526 self.add_bam(dataset_path, outputTrackConfig,
269 'blue': blue, 527 track['conf']['options']['pileup'],
270 }) 528 bam_index=real_indexes[i])
271 529 elif dataset_ext == 'blastxml':
272 clientConfig['color'] = color_function.replace('\n', '') 530 self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast'])
273 531 elif dataset_ext == 'vcf':
274 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' 532 self.add_vcf(dataset_path, outputTrackConfig)
275
276 cmd += ['--clientConfig', json.dumps(clientConfig),
277 '--trackType', 'JBrowse/View/Track/CanvasFeatures'
278 ]
279
280 cmd.extend(['--config', json.dumps(config)])
281
282 self.subprocess_check_call(cmd)
283
284 def process_annotations(self, data, key, format, **kwargs):
285 if format in ('gff', 'gff3', 'bed'):
286 self.add_features(data, key, format, **kwargs)
287 elif format == 'bigwig':
288 self.add_bigwig(data, key, **kwargs)
289 elif format == 'bam':
290 self.add_bam(data, key, **kwargs)
291 elif format == 'blastxml':
292 self.add_blastxml(data, key, **kwargs)
293 elif format == 'vcf':
294 self.add_vcf(data, key, **kwargs)
295 533
296 def clone_jbrowse(self, jbrowse_dir, destination): 534 def clone_jbrowse(self, jbrowse_dir, destination):
535 """Clone a JBrowse directory into a destination directory.
536 """
297 # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks 537 # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks
298 # unlike copytree 538 # unlike copytree
299 cmd = ['mkdir', '-p', destination] 539 cmd = ['cp', '-r', os.path.join(jbrowse_dir, '.'), destination]
540 log.debug(' '.join(cmd))
300 subprocess.check_call(cmd) 541 subprocess.check_call(cmd)
301 cmd = ['cp', '-r', jbrowse_dir, destination] 542 cmd = ['mkdir', '-p', os.path.join(destination, 'data', 'raw')]
302 subprocess.check_call(cmd) 543 log.debug(' '.join(cmd))
303 cmd = ['mkdir', '-p', os.path.join(destination, 'JBrowse-1.11.6',
304 'data', 'raw')]
305 subprocess.check_call(cmd) 544 subprocess.check_call(cmd)
306 545
307 # http://unix.stackexchange.com/a/38691/22785 546 # http://unix.stackexchange.com/a/38691/22785
308 # JBrowse releases come with some broken symlinks 547 # JBrowse releases come with some broken symlinks
309 cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+'] 548 cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+']
549 log.debug(' '.join(cmd))
310 subprocess.check_call(cmd) 550 subprocess.check_call(cmd)
311 551
312 552
313 if __name__ == '__main__': 553 if __name__ == '__main__':
314 parser = argparse.ArgumentParser(description="", epilog="") 554 parser = argparse.ArgumentParser(description="", epilog="")
315 parser.add_argument('genome', type=file, help='Input genome file') 555 parser.add_argument('xml', type=file, help='Track Configuration')
316 parser.add_argument('yaml', type=file, help='Track Configuration')
317 556
318 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') 557 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
319 parser.add_argument('--outdir', help='Output directory', default='out') 558 parser.add_argument('--outdir', help='Output directory', default='out')
559 parser.add_argument('--standalone', help='Standalone mode includes a copy of JBrowse', action='store_true')
320 args = parser.parse_args() 560 args = parser.parse_args()
561
562 tree = ET.parse(args.xml.name)
563 root = tree.getroot()
321 564
322 jc = JbrowseConnector( 565 jc = JbrowseConnector(
323 jbrowse=args.jbrowse, 566 jbrowse=args.jbrowse,
324 jbrowse_dir=os.path.join(args.outdir, 'JBrowse-1.11.6'),
325 outdir=args.outdir, 567 outdir=args.outdir,
326 genome=os.path.realpath(args.genome.name), 568 genomes=[os.path.realpath(x.text) for x in root.findall('metadata/genomes/genome')],
569 standalone=args.standalone,
570 gencode=root.find('metadata/gencode').text
327 ) 571 )
328 572
329 track_data = yaml.load(args.yaml) 573 for track in root.findall('tracks/track'):
330 for track in track_data: 574 track_conf = {}
331 path = os.path.realpath(track['file']) 575 track_conf['trackfiles'] = [
332 extra = track.get('options', {}) 576 (os.path.realpath(x.attrib['path']), x.attrib['ext'], x.attrib['label'])
333 if '__unused__' in extra: 577 for x in track.findall('files/trackFile')
334 del extra['__unused__'] 578 ]
335 579
336 for possible_partial_path in ('bam_index', 'parent'): 580 track_conf['category'] = track.attrib['cat']
337 if possible_partial_path in extra: 581 track_conf['format'] = track.attrib['format']
338 extra[possible_partial_path] = os.path.realpath( 582 try:
339 extra[possible_partial_path]) 583 # Only pertains to gff3 + blastxml. TODO?
340 extra['category'] = track.get('category', 'Default') 584 track_conf['style'] = {t.tag: t.text for t in track.find('options/style')}
341 585 except TypeError, te:
342 jc.process_annotations(path, track['label'], track['ext'], **extra) 586 track_conf['style'] = {}
343 587 pass
344 print """ 588 track_conf['conf'] = etree_to_dict(track.find('options'))
345 <html> 589 jc.process_annotations(track_conf)
346 <body>
347 <script type="text/javascript">
348 window.location=JBrowse-1.11.6/index.html
349 </script>
350 <a href="JBrowse-1.11.6/index.html">Go to JBrowse</a>
351 <p>Please note that JBrowse functions best on production Galaxy
352 instances. The paste server used in development instances has issues
353 serving the volumes of data regularly involved in JBrowse</p>
354 </body>
355 </html>
356 """