comparison abjbrowse2.py @ 13:1d86925dbb4c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 873a12803692b0a84814a6dc08331d772d0e5492-dirty
author fubar
date Mon, 22 Jan 2024 11:52:19 +0000
parents
children 7c2e28e144f3
comparison
equal deleted inserted replaced
12:247e17ce504b 13:1d86925dbb4c
1 #!/usr/bin/env python
2 import argparse
3 import binascii
4 import datetime
5 import hashlib
6 import json
7 import logging
8 import os
9 import re
10 import shutil
11 import struct
12 import subprocess
13 import tempfile
14 import xml.etree.ElementTree as ET
15 from collections import defaultdict
16
17 logging.basicConfig(level=logging.INFO)
18 log = logging.getLogger('jbrowse')
19 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
20 GALAXY_INFRASTRUCTURE_URL = None
21
22
23 class ColorScaling(object):
24
25 COLOR_FUNCTION_TEMPLATE = """
26 function(feature, variableName, glyphObject, track) {{
27 var score = {score};
28 {opacity}
29 return 'rgba({red}, {green}, {blue}, ' + opacity + ')';
30 }}
31 """
32
33 COLOR_FUNCTION_TEMPLATE_QUAL = r"""
34 function(feature, variableName, glyphObject, track) {{
35 var search_up = function self(sf, attr){{
36 if(sf.get(attr) !== undefined){{
37 return sf.get(attr);
38 }}
39 if(sf.parent() === undefined) {{
40 return;
41 }}else{{
42 return self(sf.parent(), attr);
43 }}
44 }};
45
46 var search_down = function self(sf, attr){{
47 if(sf.get(attr) !== undefined){{
48 return sf.get(attr);
49 }}
50 if(sf.children() === undefined) {{
51 return;
52 }}else{{
53 var kids = sf.children();
54 for(var child_idx in kids){{
55 var x = self(kids[child_idx], attr);
56 if(x !== undefined){{
57 return x;
58 }}
59 }}
60 return;
61 }}
62 }};
63
64 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
65 var score = (search_up(feature, 'score') || search_down(feature, 'score'));
66 {opacity}
67 if(score === undefined){{ opacity = 1; }}
68 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
69 var red = parseInt(result[1], 16);
70 var green = parseInt(result[2], 16);
71 var blue = parseInt(result[3], 16);
72 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }}
73 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')';
74 }}
75 """
76
77 OPACITY_MATH = {
78 'linear': """
79 var opacity = (score - ({min})) / (({max}) - ({min}));
80 """,
81 'logarithmic': """
82 var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min}));
83 """,
84 'blast': """
85 var opacity = 0;
86 if(score == 0.0) {{
87 opacity = 1;
88 }} else {{
89 opacity = (20 - Math.log10(score)) / 180;
90 }}
91 """
92 }
93
94 BREWER_COLOUR_IDX = 0
95 BREWER_COLOUR_SCHEMES = [
96 (166, 206, 227),
97 (31, 120, 180),
98 (178, 223, 138),
99 (51, 160, 44),
100 (251, 154, 153),
101 (227, 26, 28),
102 (253, 191, 111),
103 (255, 127, 0),
104 (202, 178, 214),
105 (106, 61, 154),
106 (255, 255, 153),
107 (177, 89, 40),
108 (228, 26, 28),
109 (55, 126, 184),
110 (77, 175, 74),
111 (152, 78, 163),
112 (255, 127, 0),
113 ]
114
115 BREWER_DIVERGING_PALLETES = {
116 'BrBg': ("#543005", "#003c30"),
117 'PiYg': ("#8e0152", "#276419"),
118 'PRGn': ("#40004b", "#00441b"),
119 'PuOr': ("#7f3b08", "#2d004b"),
120 'RdBu': ("#67001f", "#053061"),
121 'RdGy': ("#67001f", "#1a1a1a"),
122 'RdYlBu': ("#a50026", "#313695"),
123 'RdYlGn': ("#a50026", "#006837"),
124 'Spectral': ("#9e0142", "#5e4fa2"),
125 }
126
127 def __init__(self):
128 self.brewer_colour_idx = 0
129
130 def rgb_from_hex(self, hexstr):
131 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
132 return struct.unpack('BBB', binascii.unhexlify(hexstr))
133
134 def min_max_gff(self, gff_file):
135 min_val = None
136 max_val = None
137 with open(gff_file, 'r') as handle:
138 for line in handle:
139 try:
140 value = float(line.split('\t')[5])
141 min_val = min(value, (min_val or value))
142 max_val = max(value, (max_val or value))
143
144 if value < min_val:
145 min_val = value
146
147 if value > max_val:
148 max_val = value
149 except Exception:
150 pass
151 return min_val, max_val
152
153 def hex_from_rgb(self, r, g, b):
154 return '#%02x%02x%02x' % (r, g, b)
155
156 def _get_colours(self):
157 r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)]
158 self.brewer_colour_idx += 1
159 return r, g, b
160
161 def parse_menus(self, track):
162 trackConfig = {'menuTemplate': [{}, {}, {}, {}]}
163
164 if 'menu' in track['menus']:
165 menu_list = [track['menus']['menu']]
166 if isinstance(track['menus']['menu'], list):
167 menu_list = track['menus']['menu']
168
169 for m in menu_list:
170 tpl = {
171 'action': m['action'],
172 'label': m.get('label', '{name}'),
173 'iconClass': m.get('iconClass', 'dijitIconBookmark'),
174 }
175 if 'url' in m:
176 tpl['url'] = m['url']
177 if 'content' in m:
178 tpl['content'] = m['content']
179 if 'title' in m:
180 tpl['title'] = m['title']
181
182 trackConfig['menuTemplate'].append(tpl)
183
184 return trackConfig
185
186 def parse_colours(self, track, trackFormat, gff3=None):
187 # Wiggle tracks have a bicolor pallete
188 trackConfig = {'style': {}}
189 if trackFormat == 'wiggle':
190
191 trackConfig['style']['pos_color'] = track['wiggle']['color_pos']
192 trackConfig['style']['neg_color'] = track['wiggle']['color_neg']
193
194 if trackConfig['style']['pos_color'] == '__auto__':
195 trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours())
196 trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours())
197
198 # Wiggle tracks can change colour at a specified place
199 bc_pivot = track['wiggle']['bicolor_pivot']
200 if bc_pivot not in ('mean', 'zero'):
201 # The values are either one of those two strings
202 # or a number
203 bc_pivot = float(bc_pivot)
204 trackConfig['bicolor_pivot'] = bc_pivot
205 elif 'scaling' in track:
206 if track['scaling']['method'] == 'ignore':
207 if track['scaling']['scheme']['color'] != '__auto__':
208 trackConfig['style']['color'] = track['scaling']['scheme']['color']
209 else:
210 trackConfig['style']['color'] = self.hex_from_rgb(*self._get_colours())
211 else:
212 # Scored method
213 algo = track['scaling']['algo']
214 # linear, logarithmic, blast
215 scales = track['scaling']['scales']
216 # type __auto__, manual (min, max)
217 scheme = track['scaling']['scheme']
218 # scheme -> (type (opacity), color)
219 # ==================================
220 # GENE CALLS OR BLAST
221 # ==================================
222 if trackFormat == 'blast':
223 red, green, blue = self._get_colours()
224 color_function = self.COLOR_FUNCTION_TEMPLATE.format(**{
225 'score': "feature._parent.get('score')",
226 'opacity': self.OPACITY_MATH['blast'],
227 'red': red,
228 'green': green,
229 'blue': blue,
230 })
231 trackConfig['style']['color'] = color_function.replace('\n', '')
232 elif trackFormat == 'gene_calls':
233 # Default values, based on GFF3 spec
234 min_val = 0
235 max_val = 1000
236 # Get min/max and build a scoring function since JBrowse doesn't
237 if scales['type'] == 'automatic' or scales['type'] == '__auto__':
238 min_val, max_val = self.min_max_gff(gff3)
239 else:
240 min_val = scales.get('min', 0)
241 max_val = scales.get('max', 1000)
242
243 if scheme['color'] == '__auto__':
244 user_color = 'undefined'
245 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
246 elif scheme['color'].startswith('#'):
247 user_color = "'%s'" % self.hex_from_rgb(*self.rgb_from_hex(scheme['color'][1:]))
248 auto_color = 'undefined'
249 else:
250 user_color = 'undefined'
251 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
252
253 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{
254 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val, 'min': min_val}),
255 'user_spec_color': user_color,
256 'auto_gen_color': auto_color,
257 })
258
259 trackConfig['style']['color'] = color_function.replace('\n', '')
260 return trackConfig
261
262
263 def etree_to_dict(t):
264 if t is None:
265 return {}
266
267 d = {t.tag: {} if t.attrib else None}
268 children = list(t)
269 if children:
270 dd = defaultdict(list)
271 for dc in map(etree_to_dict, children):
272 for k, v in dc.items():
273 dd[k].append(v)
274 d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
275 if t.attrib:
276 d[t.tag].update(('@' + k, v) for k, v in t.attrib.items())
277 if t.text:
278 text = t.text.strip()
279 if children or t.attrib:
280 if text:
281 d[t.tag]['#text'] = text
282 else:
283 d[t.tag] = text
284 return d
285
286
287 # score comes from feature._parent.get('score') or feature.get('score')
288
289 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
290
291
292 def metadata_from_node(node):
293 metadata = {}
294 try:
295 if len(node.findall('dataset')) != 1:
296 # exit early
297 return metadata
298 except Exception:
299 return {}
300
301 for (key, value) in node.findall('dataset')[0].attrib.items():
302 metadata['dataset_%s' % key] = value
303
304 for (key, value) in node.findall('history')[0].attrib.items():
305 metadata['history_%s' % key] = value
306
307 for (key, value) in node.findall('metadata')[0].attrib.items():
308 metadata['metadata_%s' % key] = value
309
310 for (key, value) in node.findall('tool')[0].attrib.items():
311 metadata['tool_%s' % key] = value
312
313 # Additional Mappings applied:
314 metadata['dataset_edam_format'] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(metadata['dataset_edam_format'], metadata['dataset_file_ext'])
315 metadata['history_user_email'] = '<a href="mailto:{0}">{0}</a>'.format(metadata['history_user_email'])
316 metadata['history_display_name'] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
317 galaxy=GALAXY_INFRASTRUCTURE_URL,
318 encoded_hist_id=metadata['history_id'],
319 hist_name=metadata['history_display_name']
320 )
321 metadata['tool_tool'] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format(
322 galaxy=GALAXY_INFRASTRUCTURE_URL,
323 encoded_id=metadata['dataset_id'],
324 tool_id=metadata['tool_tool_id'],
325 # tool_version=metadata['tool_tool_version'],
326 )
327 return metadata
328
329
330 class JbrowseConnector(object):
331
332 def __init__(self, jbrowse, outdir, genomes):
333 self.cs = ColorScaling()
334 self.jbrowse = jbrowse
335 self.outdir = outdir
336 self.genome_paths = genomes
337 self.tracksToIndex = []
338
339 # This is the id of the current assembly
340 self.assembly_ids = {}
341 self.current_assembly_id = []
342
343 # If upgrading, look at the existing data
344 self.check_existing(self.outdir)
345
346 self.clone_jbrowse(self.jbrowse, self.outdir)
347
348 self.process_genomes()
349
350 def subprocess_check_call(self, command, output=None):
351 if output:
352 log.debug('cd %s && %s > %s', self.outdir, ' '.join(command), output)
353 subprocess.check_call(command, cwd=self.outdir, stdout=output)
354 else:
355 log.debug('cd %s && %s', self.outdir, ' '.join(command))
356 subprocess.check_call(command, cwd=self.outdir)
357
358 def subprocess_popen(self, command):
359 log.debug('cd %s && %s', self.outdir, command)
360 p = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
361 output, err = p.communicate()
362 retcode = p.returncode
363 if retcode != 0:
364 log.error('cd %s && %s', self.outdir, command)
365 log.error(output)
366 log.error(err)
367 raise RuntimeError("Command failed with exit code %s" % (retcode))
368
369 def subprocess_check_output(self, command):
370 log.debug('cd %s && %s', self.outdir, ' '.join(command))
371 return subprocess.check_output(command, cwd=self.outdir)
372
373 def symlink_or_copy(self, src, dest):
374 if 'GALAXY_JBROWSE_SYMLINKS' in os.environ and bool(os.environ['GALAXY_JBROWSE_SYMLINKS']):
375 cmd = ['ln', '-s', src, dest]
376 else:
377 cmd = ['cp', src, dest]
378
379 return self.subprocess_check_call(cmd)
380
381 def symlink_or_copy_load_action(self):
382 if 'GALAXY_JBROWSE_SYMLINKS' in os.environ and bool(os.environ['GALAXY_JBROWSE_SYMLINKS']):
383 return 'symlink'
384 else:
385 return 'copy'
386
387 def check_existing(self, destination):
388 existing = os.path.join(destination, 'data', "config.json")
389 if os.path.exists(existing):
390 with open(existing, 'r') as existing_conf:
391 conf = json.load(existing_conf)
392 if 'assemblies' in conf:
393 for assembly in conf['assemblies']:
394 if 'name' in assembly:
395 self.assembly_ids[assembly['name']] = None
396
397 def process_genomes(self):
398 for genome_node in self.genome_paths:
399 # We only expect one input genome per run. This for loop is just
400 # easier to write than the alternative / catches any possible
401 # issues.
402 self.add_assembly(genome_node['path'], genome_node['label'])
403
404 def add_assembly(self, path, label, default=True):
405 # Find a non-existing filename for the new genome
406 # (to avoid colision when upgrading an existing instance)
407 rel_seq_path = os.path.join('data', 'assembly')
408 seq_path = os.path.join(self.outdir, rel_seq_path)
409 fn_try = 1
410 while (os.path.exists(seq_path + '.fasta') or os.path.exists(seq_path + '.fasta.gz')
411 or os.path.exists(seq_path + '.fasta.gz.fai') or os.path.exists(seq_path + '.fasta.gz.gzi')):
412 rel_seq_path = os.path.join('data', 'assembly%s' % fn_try)
413 seq_path = os.path.join(self.outdir, rel_seq_path)
414 fn_try += 1
415
416 # Find a non-existing label for the new genome
417 # (to avoid colision when upgrading an existing instance)
418 lab_try = 1
419 uniq_label = label
420 while uniq_label in self.assembly_ids:
421 uniq_label = label + str(lab_try)
422 lab_try += 1
423
424 # Find a default scaffold to display
425 # TODO this may not be necessary in the future, see https://github.com/GMOD/jbrowse-components/issues/2708
426 with open(path, 'r') as fa_handle:
427 fa_header = fa_handle.readline()[1:].strip().split(' ')[0]
428
429 self.assembly_ids[uniq_label] = fa_header
430 if default:
431 self.current_assembly_id = uniq_label
432
433 copied_genome = seq_path + '.fasta'
434 shutil.copy(path, copied_genome)
435
436 # Compress with bgzip
437 cmd = ['bgzip', copied_genome]
438 self.subprocess_check_call(cmd)
439
440 # FAI Index
441 cmd = ['samtools', 'faidx', copied_genome + '.gz']
442 self.subprocess_check_call(cmd)
443
444 self.subprocess_check_call([
445 'jbrowse', 'add-assembly',
446 '--load', 'inPlace',
447 '--name', uniq_label,
448 '--type', 'bgzipFasta',
449 '--target', os.path.join(self.outdir, 'data'),
450 '--skipCheck',
451 rel_seq_path + '.fasta.gz'])
452
453 return uniq_label
454
455 def text_index(self):
456 # Index tracks
457 args = [
458 'jbrowse', 'text-index',
459 '--target', os.path.join(self.outdir, 'data'),
460 '--assemblies', self.current_assembly_id,
461 ]
462
463 tracks = ','.join(self.tracksToIndex)
464 if tracks:
465 args += ['--tracks', tracks]
466
467 self.subprocess_check_call(args)
468
469 def _blastxml_to_gff3(self, xml, min_gap=10):
470 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
471 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
472 '--trim', '--trim_end', '--include_seq', '--min_gap', str(min_gap), xml]
473 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name)
474 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
475 gff3_unrebased.close()
476 return gff3_unrebased.name
477
478 def _prepare_track_style(self, xml_conf):
479
480 style_data = {
481 "type": "LinearBasicDisplay"
482 }
483
484 if 'display' in xml_conf['style']:
485 style_data['type'] = xml_conf['style']['display']
486 del xml_conf['style']['display']
487
488 style_data['displayId'] = "%s_%s" % (xml_conf['label'], style_data['type'])
489
490 style_data.update(xml_conf['style'])
491
492 return {'displays': [style_data]}
493
494 def add_blastxml(self, data, trackData, blastOpts, **kwargs):
495 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts['min_gap'])
496
497 if 'parent' in blastOpts and blastOpts['parent'] != 'None':
498 gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
499 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')]
500 if blastOpts.get('protein', 'false') == 'true':
501 cmd.append('--protein2dna')
502 cmd.extend([os.path.realpath(blastOpts['parent']), gff3])
503 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_rebased.name)
504 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
505 gff3_rebased.close()
506
507 # Replace original gff3 file
508 shutil.copy(gff3_rebased.name, gff3)
509 os.unlink(gff3_rebased.name)
510
511 rel_dest = os.path.join('data', trackData['label'] + '.gff')
512 dest = os.path.join(self.outdir, rel_dest)
513
514 self._sort_gff(gff3, dest)
515 os.unlink(gff3)
516
517 style_json = self._prepare_track_style(trackData)
518
519 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json)
520
521 def add_bigwig(self, data, trackData, wiggleOpts, **kwargs):
522
523 rel_dest = os.path.join('data', trackData['label'] + '.bw')
524 dest = os.path.join(self.outdir, rel_dest)
525 self.symlink_or_copy(os.path.realpath(data), dest)
526
527 style_json = self._prepare_track_style(trackData)
528
529 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
530
531 # Anything ending in "am" (Bam or Cram)
532 def add_xam(self, data, trackData, xamOpts, index=None, ext="bam", **kwargs):
533
534 index_ext = "bai"
535 if ext == "cram":
536 index_ext = "crai"
537
538 rel_dest = os.path.join('data', trackData['label'] + '.%s' % ext)
539 dest = os.path.join(self.outdir, rel_dest)
540
541 self.symlink_or_copy(os.path.realpath(data), dest)
542
543 if index is not None and os.path.exists(os.path.realpath(index)):
544 # xai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
545 self.subprocess_check_call(['cp', os.path.realpath(index), dest + '.%s' % index_ext])
546 else:
547 # Can happen in exotic condition
548 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
549 # => no index generated by galaxy, but there might be one next to the symlink target
550 # this trick allows to skip the bam sorting made by galaxy if already done outside
551 if os.path.exists(os.path.realpath(data) + '.%s' % index_ext):
552 self.symlink_or_copy(os.path.realpath(data) + '.%s' % index_ext, dest + '.%s' % index_ext)
553 else:
554 log.warn('Could not find a bam index (.%s file) for %s', (index_ext, data))
555
556 style_json = self._prepare_track_style(trackData)
557
558 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
559
560 def add_vcf(self, data, trackData, vcfOpts={}, zipped=False, **kwargs):
561
562 if zipped:
563 rel_dest = os.path.join('data', trackData['label'] + '.vcf.gz')
564 dest = os.path.join(self.outdir, rel_dest)
565 shutil.copy(os.path.realpath(data), dest)
566 else:
567 rel_dest = os.path.join('data', trackData['label'] + '.vcf')
568 dest = os.path.join(self.outdir, rel_dest)
569 shutil.copy(os.path.realpath(data), dest)
570
571 cmd = ['bgzip', dest]
572 self.subprocess_check_call(cmd)
573 cmd = ['tabix', dest + '.gz']
574 self.subprocess_check_call(cmd)
575
576 rel_dest = os.path.join('data', trackData['label'] + '.vcf.gz')
577
578 style_json = self._prepare_track_style(trackData)
579
580 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
581
582 def add_gff(self, data, format, trackData, gffOpts, **kwargs):
583 rel_dest = os.path.join('data', trackData['label'] + '.gff')
584 dest = os.path.join(self.outdir, rel_dest)
585
586 self._sort_gff(data, dest)
587
588 style_json = self._prepare_track_style(trackData)
589
590 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json)
591
592 def add_bed(self, data, format, trackData, gffOpts, **kwargs):
593 rel_dest = os.path.join('data', trackData['label'] + '.bed')
594 dest = os.path.join(self.outdir, rel_dest)
595
596 self._sort_bed(data, dest)
597
598 style_json = self._prepare_track_style(trackData)
599
600 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json)
601
602 def add_paf(self, data, trackData, pafOpts, **kwargs):
603 rel_dest = os.path.join('data', trackData['label'] + '.paf')
604 dest = os.path.join(self.outdir, rel_dest)
605
606 self.symlink_or_copy(os.path.realpath(data), dest)
607
608 added_assembly = self.add_assembly(pafOpts['genome'], pafOpts['genome_label'], default=False)
609
610 style_json = self._prepare_track_style(trackData)
611
612 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, assemblies=[self.current_assembly_id, added_assembly], config=style_json)
613
614 def add_hic(self, data, trackData, hicOpts, **kwargs):
615 rel_dest = os.path.join('data', trackData['label'] + '.hic')
616 dest = os.path.join(self.outdir, rel_dest)
617
618 self.symlink_or_copy(os.path.realpath(data), dest)
619
620 style_json = self._prepare_track_style(trackData)
621
622 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
623
624 def add_sparql(self, url, query, query_refnames, trackData):
625
626 json_track_data = {
627 "type": "FeatureTrack",
628 "trackId": id,
629 "name": trackData['label'],
630 "adapter": {
631 "type": "SPARQLAdapter",
632 "endpoint": {
633 "uri": url,
634 "locationType": "UriLocation"
635 },
636 "queryTemplate": query
637 },
638 "category": [
639 trackData['category']
640 ],
641 "assemblyNames": [
642 self.current_assembly_id
643 ]
644 }
645
646 if query_refnames:
647 json_track_data['adapter']['refNamesQueryTemplate']: query_refnames
648
649 self.subprocess_check_call([
650 'jbrowse', 'add-track-json',
651 '--target', os.path.join(self.outdir, 'data'),
652 json_track_data])
653
654 # Doesn't work as of 1.6.4, might work in the future
655 # self.subprocess_check_call([
656 # 'jbrowse', 'add-track',
657 # '--trackType', 'sparql',
658 # '--name', trackData['label'],
659 # '--category', trackData['category'],
660 # '--target', os.path.join(self.outdir, 'data'),
661 # '--trackId', id,
662 # '--config', '{"queryTemplate": "%s"}' % query,
663 # url])
664
665 def _add_track(self, id, label, category, path, assemblies=[], config=None):
666
667 assemblies_opt = self.current_assembly_id
668 if assemblies:
669 assemblies_opt = ','.join(assemblies)
670
671 cmd = [
672 'jbrowse', 'add-track',
673 '--load', 'inPlace',
674 '--name', label,
675 '--category', category,
676 '--target', os.path.join(self.outdir, 'data'),
677 '--trackId', id,
678 '--assemblyNames', assemblies_opt
679 ]
680
681 if config:
682 cmd.append('--config')
683 cmd.append(json.dumps(config))
684
685 cmd.append(path)
686
687 self.subprocess_check_call(cmd)
688
689 def _sort_gff(self, data, dest):
690 # Only index if not already done
691 if not os.path.exists(dest):
692 cmd = "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" % (data, dest)
693 self.subprocess_popen(cmd)
694
695 self.subprocess_check_call(['bgzip', '-f', dest])
696 self.subprocess_check_call(['tabix', '-f', '-p', 'gff', dest + '.gz'])
697
698 def _sort_bed(self, data, dest):
699 # Only index if not already done
700 if not os.path.exists(dest):
701 cmd = ['sort', '-k1,1', '-k2,2n', data]
702 with open(dest, 'w') as handle:
703 self.subprocess_check_call(cmd, output=handle)
704
705 self.subprocess_check_call(['bgzip', '-f', dest])
706 self.subprocess_check_call(['tabix', '-f', '-p', 'bed', dest + '.gz'])
707
708 def process_annotations(self, track):
709
710 category = track['category'].replace('__pd__date__pd__', TODAY)
711 outputTrackConfig = {
712 'category': category,
713 }
714
715 mapped_chars = {
716 '>': '__gt__',
717 '<': '__lt__',
718 "'": '__sq__',
719 '"': '__dq__',
720 '[': '__ob__',
721 ']': '__cb__',
722 '{': '__oc__',
723 '}': '__cc__',
724 '@': '__at__',
725 '#': '__pd__',
726 "": '__cn__'
727 }
728
729 for i, (dataset_path, dataset_ext, track_human_label, extra_metadata) in enumerate(track['trackfiles']):
730 # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
731 for key, value in mapped_chars.items():
732 track_human_label = track_human_label.replace(value, key)
733
734 log.info('Processing track %s / %s (%s)', category, track_human_label, dataset_ext)
735 outputTrackConfig['key'] = track_human_label
736 # We add extra data to hash for the case of REST + SPARQL.
737 if 'conf' in track and 'options' in track['conf'] and 'url' in track['conf']['options']:
738 rest_url = track['conf']['options']['url']
739 else:
740 rest_url = ''
741
742 # I chose to use track['category'] instead of 'category' here. This
743 # is intentional. This way re-running the tool on a different date
744 # will not generate different hashes and make comparison of outputs
745 # much simpler.
746 hashData = [str(dataset_path), track_human_label, track['category'], rest_url, self.current_assembly_id]
747 hashData = '|'.join(hashData).encode('utf-8')
748 outputTrackConfig['label'] = hashlib.md5(hashData).hexdigest() + '_%s' % i
749 outputTrackConfig['metadata'] = extra_metadata
750
751 outputTrackConfig['style'] = track['style']
752
753 if 'menus' in track['conf']['options']:
754 menus = self.cs.parse_menus(track['conf']['options'])
755 outputTrackConfig.update(menus)
756
757 if dataset_ext in ('gff', 'gff3'):
758 self.add_gff(dataset_path, dataset_ext, outputTrackConfig,
759 track['conf']['options']['gff'])
760 elif dataset_ext == 'bed':
761 self.add_bed(dataset_path, dataset_ext, outputTrackConfig,
762 track['conf']['options']['gff'])
763 elif dataset_ext == 'bigwig':
764 self.add_bigwig(dataset_path, outputTrackConfig,
765 track['conf']['options']['wiggle'])
766 elif dataset_ext == 'bam':
767 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index']
768 if not isinstance(real_indexes, list):
769 # <bam_indices>
770 # <bam_index>/path/to/a.bam.bai</bam_index>
771 # </bam_indices>
772 #
773 # The above will result in the 'bam_index' key containing a
774 # string. If there are two or more indices, the container
775 # becomes a list. Fun!
776 real_indexes = [real_indexes]
777
778 self.add_xam(dataset_path, outputTrackConfig,
779 track['conf']['options']['pileup'],
780 index=real_indexes[i], ext="bam")
781 elif dataset_ext == 'cram':
782 real_indexes = track['conf']['options']['cram']['cram_indices']['cram_index']
783 if not isinstance(real_indexes, list):
784 # <bam_indices>
785 # <bam_index>/path/to/a.bam.bai</bam_index>
786 # </bam_indices>
787 #
788 # The above will result in the 'bam_index' key containing a
789 # string. If there are two or more indices, the container
790 # becomes a list. Fun!
791 real_indexes = [real_indexes]
792
793 self.add_xam(dataset_path, outputTrackConfig,
794 track['conf']['options']['cram'],
795 index=real_indexes[i], ext="cram")
796 elif dataset_ext == 'blastxml':
797 self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast'])
798 elif dataset_ext == 'vcf':
799 self.add_vcf(dataset_path, outputTrackConfig)
800 elif dataset_ext == 'vcf_bgzip':
801 self.add_vcf(dataset_path, outputTrackConfig, zipped=True)
802 elif dataset_ext == 'rest':
803 self.add_rest(track['conf']['options']['rest']['url'], outputTrackConfig)
804 elif dataset_ext == 'synteny':
805 self.add_paf(dataset_path, outputTrackConfig,
806 track['conf']['options']['synteny'])
807 elif dataset_ext == 'hic':
808 self.add_hic(dataset_path, outputTrackConfig,
809 track['conf']['options']['hic'])
810 elif dataset_ext == 'sparql':
811 sparql_query = track['conf']['options']['sparql']['query']
812 for key, value in mapped_chars.items():
813 sparql_query = sparql_query.replace(value, key)
814 sparql_query_refnames = track['conf']['options']['sparql']['query_refnames']
815 for key, value in mapped_chars.items():
816 sparql_query_refnames = sparql_query_refnames.replace(value, key)
817 self.add_sparql(track['conf']['options']['sparql']['url'], sparql_query, sparql_query_refnames, outputTrackConfig)
818 else:
819 log.warn('Do not know how to handle %s', dataset_ext)
820
821 # Return non-human label for use in other fields
822 yield outputTrackConfig['label']
823
824 def add_default_session(self, data):
825 """
826 Add some default session settings: set some assemblies/tracks on/off
827 """
828 tracks_data = []
829
830 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
831
832 # We need to know the track type from the config.json generated just before
833 config_path = os.path.join(self.outdir, 'data', 'config.json')
834 track_types = {}
835 with open(config_path, 'r') as config_file:
836 config_json = json.load(config_file)
837
838 for track_conf in config_json['tracks']:
839 track_types[track_conf['trackId']] = track_conf['type']
840
841 for on_track in data['visibility']['default_on']:
842 # TODO several problems with this currently
843 # - we are forced to copy the same kind of style config as the per track config from _prepare_track_style (not exactly the same though)
844 # - we get an error when refreshing the page
845 # - this could be solved by session specs, see https://github.com/GMOD/jbrowse-components/issues/2708
846 style_data = {
847 "type": "LinearBasicDisplay",
848 "height": 100
849 }
850
851 if on_track in data['style']:
852 if 'display' in data['style'][on_track]:
853 style_data['type'] = data['style'][on_track]['display']
854 del data['style'][on_track]['display']
855
856 style_data.update(data['style'][on_track])
857
858 if on_track in data['style_labels']:
859 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
860 # TODO move this to per track displays?
861 style_data['labels'] = data['style_labels'][on_track]
862
863 tracks_data.append({
864 "type": track_types[on_track],
865 "configuration": on_track,
866 "displays": [
867 style_data
868 ]
869 })
870
871 # The view for the assembly we're adding
872 view_json = {
873 "type": "LinearGenomeView",
874 "tracks": tracks_data
875 }
876
877 refName = None
878 if data.get('defaultLocation', ''):
879 loc_match = re.search(r'^(\w+):(\d+)\.+(\d+)$', data['defaultLocation'])
880 if loc_match:
881 refName = loc_match.group(1)
882 start = int(loc_match.group(2))
883 end = int(loc_match.group(3))
884 elif self.assembly_ids[self.current_assembly_id] is not None:
885 refName = self.assembly_ids[self.current_assembly_id]
886 start = 0
887 end = 1000000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708
888
889 if refName is not None:
890 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
891 view_json['displayedRegions'] = [{
892 "refName": refName,
893 "start": start,
894 "end": end,
895 "reversed": False,
896 "assemblyName": self.current_assembly_id
897 }]
898
899 session_name = data.get('session_name', "New session")
900 if not session_name:
901 session_name = "New session"
902
903 # Merge with possibly existing defaultSession (if upgrading a jbrowse instance)
904 session_json = {}
905 if 'defaultSession' in config_json:
906 session_json = config_json['defaultSession']
907
908 session_json["name"] = session_name
909
910 if 'views' not in session_json:
911 session_json['views'] = []
912
913 session_json['views'].append(view_json)
914
915 config_json['defaultSession'] = session_json
916
917 with open(config_path, 'w') as config_file:
918 json.dump(config_json, config_file, indent=2)
919
920 def add_general_configuration(self, data):
921 """
922 Add some general configuration to the config.json file
923 """
924
925 config_path = os.path.join(self.outdir, 'data', 'config.json')
926 with open(config_path, 'r') as config_file:
927 config_json = json.load(config_file)
928
929 config_data = {}
930
931 config_data['disableAnalytics'] = data.get('analytics', 'false') == 'true'
932
933 config_data['theme'] = {
934 "palette": {
935 "primary": {
936 "main": data.get('primary_color', '#0D233F')
937 },
938 "secondary": {
939 "main": data.get('secondary_color', '#721E63')
940 },
941 "tertiary": {
942 "main": data.get('tertiary_color', '#135560')
943 },
944 "quaternary": {
945 "main": data.get('quaternary_color', '#FFB11D')
946 },
947 },
948 "typography": {
949 "fontSize": int(data.get('font_size', 10))
950 },
951 }
952
953 config_json['configuration'].update(config_data)
954
955 with open(config_path, 'w') as config_file:
956 json.dump(config_json, config_file, indent=2)
957
958 def clone_jbrowse(self, jbrowse_dir, destination):
959 """Clone a JBrowse directory into a destination directory.
960 """
961
962 copytree(jbrowse_dir, destination)
963
964 try:
965 shutil.rmtree(os.path.join(destination, 'test_data'))
966 except OSError as e:
967 log.error("Error: %s - %s." % (e.filename, e.strerror))
968
969 if not os.path.exists(os.path.join(destination, 'data')):
970 # It can already exist if upgrading an instance
971 os.makedirs(os.path.join(destination, 'data'))
972 log.info("makedir %s" % (os.path.join(destination, 'data')))
973
974 os.symlink('./data/config.json', os.path.join(destination, 'config.json'))
975
976
977 def copytree(src, dst, symlinks=False, ignore=None):
978 for item in os.listdir(src):
979 s = os.path.join(src, item)
980 d = os.path.join(dst, item)
981 if os.path.isdir(s):
982 shutil.copytree(s, d, symlinks, ignore)
983 else:
984 shutil.copy2(s, d)
985
986
987 def parse_style_conf(item):
988 if 'type' in item.attrib and item.attrib['type'] in ['boolean', 'integer']:
989 if item.attrib['type'] == 'boolean':
990 return item.text in ("yes", "true", "True")
991 elif item.attrib['type'] == 'integer':
992 return int(item.text)
993 else:
994 return item.text
995
996
997 if __name__ == '__main__':
998 parser = argparse.ArgumentParser(description="", epilog="")
999 parser.add_argument('xml', type=argparse.FileType('r'), help='Track Configuration')
1000
1001 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
1002 parser.add_argument('--outdir', help='Output directory', default='out')
1003 parser.add_argument('--version', '-V', action='version', version="%(prog)s 0.8.0")
1004 args = parser.parse_args()
1005
1006 tree = ET.parse(args.xml.name)
1007 root = tree.getroot()
1008
1009 # This should be done ASAP
1010 GALAXY_INFRASTRUCTURE_URL = root.find('metadata/galaxyUrl').text
1011 # Sometimes this comes as `localhost` without a protocol
1012 if not GALAXY_INFRASTRUCTURE_URL.startswith('http'):
1013 # so we'll prepend `http://` and hope for the best. Requests *should*
1014 # be GET and not POST so it should redirect OK
1015 GALAXY_INFRASTRUCTURE_URL = 'http://' + GALAXY_INFRASTRUCTURE_URL
1016
1017 jc = JbrowseConnector(
1018 jbrowse=args.jbrowse,
1019 outdir=args.outdir,
1020 genomes=[
1021 {
1022 'path': os.path.realpath(x.attrib['path']),
1023 'meta': metadata_from_node(x.find('metadata')),
1024 'label': x.attrib['label']
1025 }
1026 for x in root.findall('metadata/genomes/genome')
1027 ]
1028 )
1029
1030 default_session_data = {
1031 'visibility': {
1032 'default_on': [],
1033 'default_off': [],
1034 },
1035 'style': {},
1036 'style_labels': {}
1037 }
1038
1039 # TODO add metadata to tracks
1040 for track in root.findall('tracks/track'):
1041 track_conf = {}
1042 track_conf['trackfiles'] = []
1043
1044 trackfiles = track.findall('files/trackFile')
1045 if trackfiles:
1046 for x in track.findall('files/trackFile'):
1047 if trackfiles:
1048 metadata = metadata_from_node(x.find('metadata'))
1049
1050 track_conf['trackfiles'].append((
1051 os.path.realpath(x.attrib['path']),
1052 x.attrib['ext'],
1053 x.attrib['label'],
1054 metadata
1055 ))
1056 else:
1057 # For tracks without files (rest, sparql)
1058 track_conf['trackfiles'].append((
1059 '', # N/A, no path for rest or sparql
1060 track.attrib['format'],
1061 track.find('options/label').text,
1062 {}
1063 ))
1064
1065 track_conf['category'] = track.attrib['cat']
1066 track_conf['format'] = track.attrib['format']
1067 track_conf['style'] = {item.tag: parse_style_conf(item) for item in track.find('options/style')}
1068
1069 track_conf['style'] = {item.tag: parse_style_conf(item) for item in track.find('options/style')}
1070
1071 track_conf['style_labels'] = {item.tag: parse_style_conf(item) for item in track.find('options/style_labels')}
1072
1073 track_conf['conf'] = etree_to_dict(track.find('options'))
1074 keys = jc.process_annotations(track_conf)
1075
1076 for key in keys:
1077 default_session_data['visibility'][track.attrib.get('visibility', 'default_off')].append(key)
1078
1079 default_session_data['style'][key] = track_conf['style'] # TODO do we need this anymore?
1080 default_session_data['style_labels'][key] = track_conf['style_labels']
1081
1082 default_session_data['defaultLocation'] = root.find('metadata/general/defaultLocation').text
1083 default_session_data['session_name'] = root.find('metadata/general/session_name').text
1084
1085 general_data = {
1086 'analytics': root.find('metadata/general/analytics').text,
1087 'primary_color': root.find('metadata/general/primary_color').text,
1088 'secondary_color': root.find('metadata/general/secondary_color').text,
1089 'tertiary_color': root.find('metadata/general/tertiary_color').text,
1090 'quaternary_color': root.find('metadata/general/quaternary_color').text,
1091 'font_size': root.find('metadata/general/font_size').text,
1092 }
1093
1094 jc.add_default_session(default_session_data)
1095 jc.add_general_configuration(general_data)
1096 jc.text_index()
1097