Mercurial > repos > fubar > jbrowse2
comparison abjbrowse2.py @ 13:1d86925dbb4c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 873a12803692b0a84814a6dc08331d772d0e5492-dirty
| author | fubar |
|---|---|
| date | Mon, 22 Jan 2024 11:52:19 +0000 |
| parents | |
| children | 7c2e28e144f3 |
comparison
equal
deleted
inserted
replaced
| 12:247e17ce504b | 13:1d86925dbb4c |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 import binascii | |
| 4 import datetime | |
| 5 import hashlib | |
| 6 import json | |
| 7 import logging | |
| 8 import os | |
| 9 import re | |
| 10 import shutil | |
| 11 import struct | |
| 12 import subprocess | |
| 13 import tempfile | |
| 14 import xml.etree.ElementTree as ET | |
| 15 from collections import defaultdict | |
| 16 | |
| 17 logging.basicConfig(level=logging.INFO) | |
| 18 log = logging.getLogger('jbrowse') | |
| 19 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") | |
| 20 GALAXY_INFRASTRUCTURE_URL = None | |
| 21 | |
| 22 | |
| 23 class ColorScaling(object): | |
| 24 | |
| 25 COLOR_FUNCTION_TEMPLATE = """ | |
| 26 function(feature, variableName, glyphObject, track) {{ | |
| 27 var score = {score}; | |
| 28 {opacity} | |
| 29 return 'rgba({red}, {green}, {blue}, ' + opacity + ')'; | |
| 30 }} | |
| 31 """ | |
| 32 | |
| 33 COLOR_FUNCTION_TEMPLATE_QUAL = r""" | |
| 34 function(feature, variableName, glyphObject, track) {{ | |
| 35 var search_up = function self(sf, attr){{ | |
| 36 if(sf.get(attr) !== undefined){{ | |
| 37 return sf.get(attr); | |
| 38 }} | |
| 39 if(sf.parent() === undefined) {{ | |
| 40 return; | |
| 41 }}else{{ | |
| 42 return self(sf.parent(), attr); | |
| 43 }} | |
| 44 }}; | |
| 45 | |
| 46 var search_down = function self(sf, attr){{ | |
| 47 if(sf.get(attr) !== undefined){{ | |
| 48 return sf.get(attr); | |
| 49 }} | |
| 50 if(sf.children() === undefined) {{ | |
| 51 return; | |
| 52 }}else{{ | |
| 53 var kids = sf.children(); | |
| 54 for(var child_idx in kids){{ | |
| 55 var x = self(kids[child_idx], attr); | |
| 56 if(x !== undefined){{ | |
| 57 return x; | |
| 58 }} | |
| 59 }} | |
| 60 return; | |
| 61 }} | |
| 62 }}; | |
| 63 | |
| 64 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color}); | |
| 65 var score = (search_up(feature, 'score') || search_down(feature, 'score')); | |
| 66 {opacity} | |
| 67 if(score === undefined){{ opacity = 1; }} | |
| 68 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color); | |
| 69 var red = parseInt(result[1], 16); | |
| 70 var green = parseInt(result[2], 16); | |
| 71 var blue = parseInt(result[3], 16); | |
| 72 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }} | |
| 73 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')'; | |
| 74 }} | |
| 75 """ | |
| 76 | |
| 77 OPACITY_MATH = { | |
| 78 'linear': """ | |
| 79 var opacity = (score - ({min})) / (({max}) - ({min})); | |
| 80 """, | |
| 81 'logarithmic': """ | |
| 82 var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min})); | |
| 83 """, | |
| 84 'blast': """ | |
| 85 var opacity = 0; | |
| 86 if(score == 0.0) {{ | |
| 87 opacity = 1; | |
| 88 }} else {{ | |
| 89 opacity = (20 - Math.log10(score)) / 180; | |
| 90 }} | |
| 91 """ | |
| 92 } | |
| 93 | |
| 94 BREWER_COLOUR_IDX = 0 | |
| 95 BREWER_COLOUR_SCHEMES = [ | |
| 96 (166, 206, 227), | |
| 97 (31, 120, 180), | |
| 98 (178, 223, 138), | |
| 99 (51, 160, 44), | |
| 100 (251, 154, 153), | |
| 101 (227, 26, 28), | |
| 102 (253, 191, 111), | |
| 103 (255, 127, 0), | |
| 104 (202, 178, 214), | |
| 105 (106, 61, 154), | |
| 106 (255, 255, 153), | |
| 107 (177, 89, 40), | |
| 108 (228, 26, 28), | |
| 109 (55, 126, 184), | |
| 110 (77, 175, 74), | |
| 111 (152, 78, 163), | |
| 112 (255, 127, 0), | |
| 113 ] | |
| 114 | |
| 115 BREWER_DIVERGING_PALLETES = { | |
| 116 'BrBg': ("#543005", "#003c30"), | |
| 117 'PiYg': ("#8e0152", "#276419"), | |
| 118 'PRGn': ("#40004b", "#00441b"), | |
| 119 'PuOr': ("#7f3b08", "#2d004b"), | |
| 120 'RdBu': ("#67001f", "#053061"), | |
| 121 'RdGy': ("#67001f", "#1a1a1a"), | |
| 122 'RdYlBu': ("#a50026", "#313695"), | |
| 123 'RdYlGn': ("#a50026", "#006837"), | |
| 124 'Spectral': ("#9e0142", "#5e4fa2"), | |
| 125 } | |
| 126 | |
| 127 def __init__(self): | |
| 128 self.brewer_colour_idx = 0 | |
| 129 | |
| 130 def rgb_from_hex(self, hexstr): | |
| 131 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back | |
| 132 return struct.unpack('BBB', binascii.unhexlify(hexstr)) | |
| 133 | |
| 134 def min_max_gff(self, gff_file): | |
| 135 min_val = None | |
| 136 max_val = None | |
| 137 with open(gff_file, 'r') as handle: | |
| 138 for line in handle: | |
| 139 try: | |
| 140 value = float(line.split('\t')[5]) | |
| 141 min_val = min(value, (min_val or value)) | |
| 142 max_val = max(value, (max_val or value)) | |
| 143 | |
| 144 if value < min_val: | |
| 145 min_val = value | |
| 146 | |
| 147 if value > max_val: | |
| 148 max_val = value | |
| 149 except Exception: | |
| 150 pass | |
| 151 return min_val, max_val | |
| 152 | |
| 153 def hex_from_rgb(self, r, g, b): | |
| 154 return '#%02x%02x%02x' % (r, g, b) | |
| 155 | |
| 156 def _get_colours(self): | |
| 157 r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)] | |
| 158 self.brewer_colour_idx += 1 | |
| 159 return r, g, b | |
| 160 | |
| 161 def parse_menus(self, track): | |
| 162 trackConfig = {'menuTemplate': [{}, {}, {}, {}]} | |
| 163 | |
| 164 if 'menu' in track['menus']: | |
| 165 menu_list = [track['menus']['menu']] | |
| 166 if isinstance(track['menus']['menu'], list): | |
| 167 menu_list = track['menus']['menu'] | |
| 168 | |
| 169 for m in menu_list: | |
| 170 tpl = { | |
| 171 'action': m['action'], | |
| 172 'label': m.get('label', '{name}'), | |
| 173 'iconClass': m.get('iconClass', 'dijitIconBookmark'), | |
| 174 } | |
| 175 if 'url' in m: | |
| 176 tpl['url'] = m['url'] | |
| 177 if 'content' in m: | |
| 178 tpl['content'] = m['content'] | |
| 179 if 'title' in m: | |
| 180 tpl['title'] = m['title'] | |
| 181 | |
| 182 trackConfig['menuTemplate'].append(tpl) | |
| 183 | |
| 184 return trackConfig | |
| 185 | |
| 186 def parse_colours(self, track, trackFormat, gff3=None): | |
| 187 # Wiggle tracks have a bicolor pallete | |
| 188 trackConfig = {'style': {}} | |
| 189 if trackFormat == 'wiggle': | |
| 190 | |
| 191 trackConfig['style']['pos_color'] = track['wiggle']['color_pos'] | |
| 192 trackConfig['style']['neg_color'] = track['wiggle']['color_neg'] | |
| 193 | |
| 194 if trackConfig['style']['pos_color'] == '__auto__': | |
| 195 trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours()) | |
| 196 trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours()) | |
| 197 | |
| 198 # Wiggle tracks can change colour at a specified place | |
| 199 bc_pivot = track['wiggle']['bicolor_pivot'] | |
| 200 if bc_pivot not in ('mean', 'zero'): | |
| 201 # The values are either one of those two strings | |
| 202 # or a number | |
| 203 bc_pivot = float(bc_pivot) | |
| 204 trackConfig['bicolor_pivot'] = bc_pivot | |
| 205 elif 'scaling' in track: | |
| 206 if track['scaling']['method'] == 'ignore': | |
| 207 if track['scaling']['scheme']['color'] != '__auto__': | |
| 208 trackConfig['style']['color'] = track['scaling']['scheme']['color'] | |
| 209 else: | |
| 210 trackConfig['style']['color'] = self.hex_from_rgb(*self._get_colours()) | |
| 211 else: | |
| 212 # Scored method | |
| 213 algo = track['scaling']['algo'] | |
| 214 # linear, logarithmic, blast | |
| 215 scales = track['scaling']['scales'] | |
| 216 # type __auto__, manual (min, max) | |
| 217 scheme = track['scaling']['scheme'] | |
| 218 # scheme -> (type (opacity), color) | |
| 219 # ================================== | |
| 220 # GENE CALLS OR BLAST | |
| 221 # ================================== | |
| 222 if trackFormat == 'blast': | |
| 223 red, green, blue = self._get_colours() | |
| 224 color_function = self.COLOR_FUNCTION_TEMPLATE.format(**{ | |
| 225 'score': "feature._parent.get('score')", | |
| 226 'opacity': self.OPACITY_MATH['blast'], | |
| 227 'red': red, | |
| 228 'green': green, | |
| 229 'blue': blue, | |
| 230 }) | |
| 231 trackConfig['style']['color'] = color_function.replace('\n', '') | |
| 232 elif trackFormat == 'gene_calls': | |
| 233 # Default values, based on GFF3 spec | |
| 234 min_val = 0 | |
| 235 max_val = 1000 | |
| 236 # Get min/max and build a scoring function since JBrowse doesn't | |
| 237 if scales['type'] == 'automatic' or scales['type'] == '__auto__': | |
| 238 min_val, max_val = self.min_max_gff(gff3) | |
| 239 else: | |
| 240 min_val = scales.get('min', 0) | |
| 241 max_val = scales.get('max', 1000) | |
| 242 | |
| 243 if scheme['color'] == '__auto__': | |
| 244 user_color = 'undefined' | |
| 245 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) | |
| 246 elif scheme['color'].startswith('#'): | |
| 247 user_color = "'%s'" % self.hex_from_rgb(*self.rgb_from_hex(scheme['color'][1:])) | |
| 248 auto_color = 'undefined' | |
| 249 else: | |
| 250 user_color = 'undefined' | |
| 251 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) | |
| 252 | |
| 253 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{ | |
| 254 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val, 'min': min_val}), | |
| 255 'user_spec_color': user_color, | |
| 256 'auto_gen_color': auto_color, | |
| 257 }) | |
| 258 | |
| 259 trackConfig['style']['color'] = color_function.replace('\n', '') | |
| 260 return trackConfig | |
| 261 | |
| 262 | |
| 263 def etree_to_dict(t): | |
| 264 if t is None: | |
| 265 return {} | |
| 266 | |
| 267 d = {t.tag: {} if t.attrib else None} | |
| 268 children = list(t) | |
| 269 if children: | |
| 270 dd = defaultdict(list) | |
| 271 for dc in map(etree_to_dict, children): | |
| 272 for k, v in dc.items(): | |
| 273 dd[k].append(v) | |
| 274 d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}} | |
| 275 if t.attrib: | |
| 276 d[t.tag].update(('@' + k, v) for k, v in t.attrib.items()) | |
| 277 if t.text: | |
| 278 text = t.text.strip() | |
| 279 if children or t.attrib: | |
| 280 if text: | |
| 281 d[t.tag]['#text'] = text | |
| 282 else: | |
| 283 d[t.tag] = text | |
| 284 return d | |
| 285 | |
| 286 | |
| 287 # score comes from feature._parent.get('score') or feature.get('score') | |
| 288 | |
| 289 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) | |
| 290 | |
| 291 | |
| 292 def metadata_from_node(node): | |
| 293 metadata = {} | |
| 294 try: | |
| 295 if len(node.findall('dataset')) != 1: | |
| 296 # exit early | |
| 297 return metadata | |
| 298 except Exception: | |
| 299 return {} | |
| 300 | |
| 301 for (key, value) in node.findall('dataset')[0].attrib.items(): | |
| 302 metadata['dataset_%s' % key] = value | |
| 303 | |
| 304 for (key, value) in node.findall('history')[0].attrib.items(): | |
| 305 metadata['history_%s' % key] = value | |
| 306 | |
| 307 for (key, value) in node.findall('metadata')[0].attrib.items(): | |
| 308 metadata['metadata_%s' % key] = value | |
| 309 | |
| 310 for (key, value) in node.findall('tool')[0].attrib.items(): | |
| 311 metadata['tool_%s' % key] = value | |
| 312 | |
| 313 # Additional Mappings applied: | |
| 314 metadata['dataset_edam_format'] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(metadata['dataset_edam_format'], metadata['dataset_file_ext']) | |
| 315 metadata['history_user_email'] = '<a href="mailto:{0}">{0}</a>'.format(metadata['history_user_email']) | |
| 316 metadata['history_display_name'] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( | |
| 317 galaxy=GALAXY_INFRASTRUCTURE_URL, | |
| 318 encoded_hist_id=metadata['history_id'], | |
| 319 hist_name=metadata['history_display_name'] | |
| 320 ) | |
| 321 metadata['tool_tool'] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format( | |
| 322 galaxy=GALAXY_INFRASTRUCTURE_URL, | |
| 323 encoded_id=metadata['dataset_id'], | |
| 324 tool_id=metadata['tool_tool_id'], | |
| 325 # tool_version=metadata['tool_tool_version'], | |
| 326 ) | |
| 327 return metadata | |
| 328 | |
| 329 | |
| 330 class JbrowseConnector(object): | |
| 331 | |
| 332 def __init__(self, jbrowse, outdir, genomes): | |
| 333 self.cs = ColorScaling() | |
| 334 self.jbrowse = jbrowse | |
| 335 self.outdir = outdir | |
| 336 self.genome_paths = genomes | |
| 337 self.tracksToIndex = [] | |
| 338 | |
| 339 # This is the id of the current assembly | |
| 340 self.assembly_ids = {} | |
| 341 self.current_assembly_id = [] | |
| 342 | |
| 343 # If upgrading, look at the existing data | |
| 344 self.check_existing(self.outdir) | |
| 345 | |
| 346 self.clone_jbrowse(self.jbrowse, self.outdir) | |
| 347 | |
| 348 self.process_genomes() | |
| 349 | |
| 350 def subprocess_check_call(self, command, output=None): | |
| 351 if output: | |
| 352 log.debug('cd %s && %s > %s', self.outdir, ' '.join(command), output) | |
| 353 subprocess.check_call(command, cwd=self.outdir, stdout=output) | |
| 354 else: | |
| 355 log.debug('cd %s && %s', self.outdir, ' '.join(command)) | |
| 356 subprocess.check_call(command, cwd=self.outdir) | |
| 357 | |
| 358 def subprocess_popen(self, command): | |
| 359 log.debug('cd %s && %s', self.outdir, command) | |
| 360 p = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| 361 output, err = p.communicate() | |
| 362 retcode = p.returncode | |
| 363 if retcode != 0: | |
| 364 log.error('cd %s && %s', self.outdir, command) | |
| 365 log.error(output) | |
| 366 log.error(err) | |
| 367 raise RuntimeError("Command failed with exit code %s" % (retcode)) | |
| 368 | |
| 369 def subprocess_check_output(self, command): | |
| 370 log.debug('cd %s && %s', self.outdir, ' '.join(command)) | |
| 371 return subprocess.check_output(command, cwd=self.outdir) | |
| 372 | |
| 373 def symlink_or_copy(self, src, dest): | |
| 374 if 'GALAXY_JBROWSE_SYMLINKS' in os.environ and bool(os.environ['GALAXY_JBROWSE_SYMLINKS']): | |
| 375 cmd = ['ln', '-s', src, dest] | |
| 376 else: | |
| 377 cmd = ['cp', src, dest] | |
| 378 | |
| 379 return self.subprocess_check_call(cmd) | |
| 380 | |
| 381 def symlink_or_copy_load_action(self): | |
| 382 if 'GALAXY_JBROWSE_SYMLINKS' in os.environ and bool(os.environ['GALAXY_JBROWSE_SYMLINKS']): | |
| 383 return 'symlink' | |
| 384 else: | |
| 385 return 'copy' | |
| 386 | |
| 387 def check_existing(self, destination): | |
| 388 existing = os.path.join(destination, 'data', "config.json") | |
| 389 if os.path.exists(existing): | |
| 390 with open(existing, 'r') as existing_conf: | |
| 391 conf = json.load(existing_conf) | |
| 392 if 'assemblies' in conf: | |
| 393 for assembly in conf['assemblies']: | |
| 394 if 'name' in assembly: | |
| 395 self.assembly_ids[assembly['name']] = None | |
| 396 | |
| 397 def process_genomes(self): | |
| 398 for genome_node in self.genome_paths: | |
| 399 # We only expect one input genome per run. This for loop is just | |
| 400 # easier to write than the alternative / catches any possible | |
| 401 # issues. | |
| 402 self.add_assembly(genome_node['path'], genome_node['label']) | |
| 403 | |
| 404 def add_assembly(self, path, label, default=True): | |
| 405 # Find a non-existing filename for the new genome | |
| 406 # (to avoid colision when upgrading an existing instance) | |
| 407 rel_seq_path = os.path.join('data', 'assembly') | |
| 408 seq_path = os.path.join(self.outdir, rel_seq_path) | |
| 409 fn_try = 1 | |
| 410 while (os.path.exists(seq_path + '.fasta') or os.path.exists(seq_path + '.fasta.gz') | |
| 411 or os.path.exists(seq_path + '.fasta.gz.fai') or os.path.exists(seq_path + '.fasta.gz.gzi')): | |
| 412 rel_seq_path = os.path.join('data', 'assembly%s' % fn_try) | |
| 413 seq_path = os.path.join(self.outdir, rel_seq_path) | |
| 414 fn_try += 1 | |
| 415 | |
| 416 # Find a non-existing label for the new genome | |
| 417 # (to avoid colision when upgrading an existing instance) | |
| 418 lab_try = 1 | |
| 419 uniq_label = label | |
| 420 while uniq_label in self.assembly_ids: | |
| 421 uniq_label = label + str(lab_try) | |
| 422 lab_try += 1 | |
| 423 | |
| 424 # Find a default scaffold to display | |
| 425 # TODO this may not be necessary in the future, see https://github.com/GMOD/jbrowse-components/issues/2708 | |
| 426 with open(path, 'r') as fa_handle: | |
| 427 fa_header = fa_handle.readline()[1:].strip().split(' ')[0] | |
| 428 | |
| 429 self.assembly_ids[uniq_label] = fa_header | |
| 430 if default: | |
| 431 self.current_assembly_id = uniq_label | |
| 432 | |
| 433 copied_genome = seq_path + '.fasta' | |
| 434 shutil.copy(path, copied_genome) | |
| 435 | |
| 436 # Compress with bgzip | |
| 437 cmd = ['bgzip', copied_genome] | |
| 438 self.subprocess_check_call(cmd) | |
| 439 | |
| 440 # FAI Index | |
| 441 cmd = ['samtools', 'faidx', copied_genome + '.gz'] | |
| 442 self.subprocess_check_call(cmd) | |
| 443 | |
| 444 self.subprocess_check_call([ | |
| 445 'jbrowse', 'add-assembly', | |
| 446 '--load', 'inPlace', | |
| 447 '--name', uniq_label, | |
| 448 '--type', 'bgzipFasta', | |
| 449 '--target', os.path.join(self.outdir, 'data'), | |
| 450 '--skipCheck', | |
| 451 rel_seq_path + '.fasta.gz']) | |
| 452 | |
| 453 return uniq_label | |
| 454 | |
| 455 def text_index(self): | |
| 456 # Index tracks | |
| 457 args = [ | |
| 458 'jbrowse', 'text-index', | |
| 459 '--target', os.path.join(self.outdir, 'data'), | |
| 460 '--assemblies', self.current_assembly_id, | |
| 461 ] | |
| 462 | |
| 463 tracks = ','.join(self.tracksToIndex) | |
| 464 if tracks: | |
| 465 args += ['--tracks', tracks] | |
| 466 | |
| 467 self.subprocess_check_call(args) | |
| 468 | |
| 469 def _blastxml_to_gff3(self, xml, min_gap=10): | |
| 470 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) | |
| 471 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), | |
| 472 '--trim', '--trim_end', '--include_seq', '--min_gap', str(min_gap), xml] | |
| 473 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name) | |
| 474 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) | |
| 475 gff3_unrebased.close() | |
| 476 return gff3_unrebased.name | |
| 477 | |
| 478 def _prepare_track_style(self, xml_conf): | |
| 479 | |
| 480 style_data = { | |
| 481 "type": "LinearBasicDisplay" | |
| 482 } | |
| 483 | |
| 484 if 'display' in xml_conf['style']: | |
| 485 style_data['type'] = xml_conf['style']['display'] | |
| 486 del xml_conf['style']['display'] | |
| 487 | |
| 488 style_data['displayId'] = "%s_%s" % (xml_conf['label'], style_data['type']) | |
| 489 | |
| 490 style_data.update(xml_conf['style']) | |
| 491 | |
| 492 return {'displays': [style_data]} | |
| 493 | |
| 494 def add_blastxml(self, data, trackData, blastOpts, **kwargs): | |
| 495 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts['min_gap']) | |
| 496 | |
| 497 if 'parent' in blastOpts and blastOpts['parent'] != 'None': | |
| 498 gff3_rebased = tempfile.NamedTemporaryFile(delete=False) | |
| 499 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')] | |
| 500 if blastOpts.get('protein', 'false') == 'true': | |
| 501 cmd.append('--protein2dna') | |
| 502 cmd.extend([os.path.realpath(blastOpts['parent']), gff3]) | |
| 503 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_rebased.name) | |
| 504 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) | |
| 505 gff3_rebased.close() | |
| 506 | |
| 507 # Replace original gff3 file | |
| 508 shutil.copy(gff3_rebased.name, gff3) | |
| 509 os.unlink(gff3_rebased.name) | |
| 510 | |
| 511 rel_dest = os.path.join('data', trackData['label'] + '.gff') | |
| 512 dest = os.path.join(self.outdir, rel_dest) | |
| 513 | |
| 514 self._sort_gff(gff3, dest) | |
| 515 os.unlink(gff3) | |
| 516 | |
| 517 style_json = self._prepare_track_style(trackData) | |
| 518 | |
| 519 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json) | |
| 520 | |
| 521 def add_bigwig(self, data, trackData, wiggleOpts, **kwargs): | |
| 522 | |
| 523 rel_dest = os.path.join('data', trackData['label'] + '.bw') | |
| 524 dest = os.path.join(self.outdir, rel_dest) | |
| 525 self.symlink_or_copy(os.path.realpath(data), dest) | |
| 526 | |
| 527 style_json = self._prepare_track_style(trackData) | |
| 528 | |
| 529 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json) | |
| 530 | |
| 531 # Anything ending in "am" (Bam or Cram) | |
| 532 def add_xam(self, data, trackData, xamOpts, index=None, ext="bam", **kwargs): | |
| 533 | |
| 534 index_ext = "bai" | |
| 535 if ext == "cram": | |
| 536 index_ext = "crai" | |
| 537 | |
| 538 rel_dest = os.path.join('data', trackData['label'] + '.%s' % ext) | |
| 539 dest = os.path.join(self.outdir, rel_dest) | |
| 540 | |
| 541 self.symlink_or_copy(os.path.realpath(data), dest) | |
| 542 | |
| 543 if index is not None and os.path.exists(os.path.realpath(index)): | |
| 544 # xai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | |
| 545 self.subprocess_check_call(['cp', os.path.realpath(index), dest + '.%s' % index_ext]) | |
| 546 else: | |
| 547 # Can happen in exotic condition | |
| 548 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | |
| 549 # => no index generated by galaxy, but there might be one next to the symlink target | |
| 550 # this trick allows to skip the bam sorting made by galaxy if already done outside | |
| 551 if os.path.exists(os.path.realpath(data) + '.%s' % index_ext): | |
| 552 self.symlink_or_copy(os.path.realpath(data) + '.%s' % index_ext, dest + '.%s' % index_ext) | |
| 553 else: | |
| 554 log.warn('Could not find a bam index (.%s file) for %s', (index_ext, data)) | |
| 555 | |
| 556 style_json = self._prepare_track_style(trackData) | |
| 557 | |
| 558 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json) | |
| 559 | |
| 560 def add_vcf(self, data, trackData, vcfOpts={}, zipped=False, **kwargs): | |
| 561 | |
| 562 if zipped: | |
| 563 rel_dest = os.path.join('data', trackData['label'] + '.vcf.gz') | |
| 564 dest = os.path.join(self.outdir, rel_dest) | |
| 565 shutil.copy(os.path.realpath(data), dest) | |
| 566 else: | |
| 567 rel_dest = os.path.join('data', trackData['label'] + '.vcf') | |
| 568 dest = os.path.join(self.outdir, rel_dest) | |
| 569 shutil.copy(os.path.realpath(data), dest) | |
| 570 | |
| 571 cmd = ['bgzip', dest] | |
| 572 self.subprocess_check_call(cmd) | |
| 573 cmd = ['tabix', dest + '.gz'] | |
| 574 self.subprocess_check_call(cmd) | |
| 575 | |
| 576 rel_dest = os.path.join('data', trackData['label'] + '.vcf.gz') | |
| 577 | |
| 578 style_json = self._prepare_track_style(trackData) | |
| 579 | |
| 580 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json) | |
| 581 | |
| 582 def add_gff(self, data, format, trackData, gffOpts, **kwargs): | |
| 583 rel_dest = os.path.join('data', trackData['label'] + '.gff') | |
| 584 dest = os.path.join(self.outdir, rel_dest) | |
| 585 | |
| 586 self._sort_gff(data, dest) | |
| 587 | |
| 588 style_json = self._prepare_track_style(trackData) | |
| 589 | |
| 590 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json) | |
| 591 | |
| 592 def add_bed(self, data, format, trackData, gffOpts, **kwargs): | |
| 593 rel_dest = os.path.join('data', trackData['label'] + '.bed') | |
| 594 dest = os.path.join(self.outdir, rel_dest) | |
| 595 | |
| 596 self._sort_bed(data, dest) | |
| 597 | |
| 598 style_json = self._prepare_track_style(trackData) | |
| 599 | |
| 600 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json) | |
| 601 | |
| 602 def add_paf(self, data, trackData, pafOpts, **kwargs): | |
| 603 rel_dest = os.path.join('data', trackData['label'] + '.paf') | |
| 604 dest = os.path.join(self.outdir, rel_dest) | |
| 605 | |
| 606 self.symlink_or_copy(os.path.realpath(data), dest) | |
| 607 | |
| 608 added_assembly = self.add_assembly(pafOpts['genome'], pafOpts['genome_label'], default=False) | |
| 609 | |
| 610 style_json = self._prepare_track_style(trackData) | |
| 611 | |
| 612 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, assemblies=[self.current_assembly_id, added_assembly], config=style_json) | |
| 613 | |
| 614 def add_hic(self, data, trackData, hicOpts, **kwargs): | |
| 615 rel_dest = os.path.join('data', trackData['label'] + '.hic') | |
| 616 dest = os.path.join(self.outdir, rel_dest) | |
| 617 | |
| 618 self.symlink_or_copy(os.path.realpath(data), dest) | |
| 619 | |
| 620 style_json = self._prepare_track_style(trackData) | |
| 621 | |
| 622 self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json) | |
| 623 | |
| 624 def add_sparql(self, url, query, query_refnames, trackData): | |
| 625 | |
| 626 json_track_data = { | |
| 627 "type": "FeatureTrack", | |
| 628 "trackId": id, | |
| 629 "name": trackData['label'], | |
| 630 "adapter": { | |
| 631 "type": "SPARQLAdapter", | |
| 632 "endpoint": { | |
| 633 "uri": url, | |
| 634 "locationType": "UriLocation" | |
| 635 }, | |
| 636 "queryTemplate": query | |
| 637 }, | |
| 638 "category": [ | |
| 639 trackData['category'] | |
| 640 ], | |
| 641 "assemblyNames": [ | |
| 642 self.current_assembly_id | |
| 643 ] | |
| 644 } | |
| 645 | |
| 646 if query_refnames: | |
| 647 json_track_data['adapter']['refNamesQueryTemplate']: query_refnames | |
| 648 | |
| 649 self.subprocess_check_call([ | |
| 650 'jbrowse', 'add-track-json', | |
| 651 '--target', os.path.join(self.outdir, 'data'), | |
| 652 json_track_data]) | |
| 653 | |
| 654 # Doesn't work as of 1.6.4, might work in the future | |
| 655 # self.subprocess_check_call([ | |
| 656 # 'jbrowse', 'add-track', | |
| 657 # '--trackType', 'sparql', | |
| 658 # '--name', trackData['label'], | |
| 659 # '--category', trackData['category'], | |
| 660 # '--target', os.path.join(self.outdir, 'data'), | |
| 661 # '--trackId', id, | |
| 662 # '--config', '{"queryTemplate": "%s"}' % query, | |
| 663 # url]) | |
| 664 | |
| 665 def _add_track(self, id, label, category, path, assemblies=[], config=None): | |
| 666 | |
| 667 assemblies_opt = self.current_assembly_id | |
| 668 if assemblies: | |
| 669 assemblies_opt = ','.join(assemblies) | |
| 670 | |
| 671 cmd = [ | |
| 672 'jbrowse', 'add-track', | |
| 673 '--load', 'inPlace', | |
| 674 '--name', label, | |
| 675 '--category', category, | |
| 676 '--target', os.path.join(self.outdir, 'data'), | |
| 677 '--trackId', id, | |
| 678 '--assemblyNames', assemblies_opt | |
| 679 ] | |
| 680 | |
| 681 if config: | |
| 682 cmd.append('--config') | |
| 683 cmd.append(json.dumps(config)) | |
| 684 | |
| 685 cmd.append(path) | |
| 686 | |
| 687 self.subprocess_check_call(cmd) | |
| 688 | |
| 689 def _sort_gff(self, data, dest): | |
| 690 # Only index if not already done | |
| 691 if not os.path.exists(dest): | |
| 692 cmd = "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" % (data, dest) | |
| 693 self.subprocess_popen(cmd) | |
| 694 | |
| 695 self.subprocess_check_call(['bgzip', '-f', dest]) | |
| 696 self.subprocess_check_call(['tabix', '-f', '-p', 'gff', dest + '.gz']) | |
| 697 | |
| 698 def _sort_bed(self, data, dest): | |
| 699 # Only index if not already done | |
| 700 if not os.path.exists(dest): | |
| 701 cmd = ['sort', '-k1,1', '-k2,2n', data] | |
| 702 with open(dest, 'w') as handle: | |
| 703 self.subprocess_check_call(cmd, output=handle) | |
| 704 | |
| 705 self.subprocess_check_call(['bgzip', '-f', dest]) | |
| 706 self.subprocess_check_call(['tabix', '-f', '-p', 'bed', dest + '.gz']) | |
| 707 | |
| 708 def process_annotations(self, track): | |
| 709 | |
| 710 category = track['category'].replace('__pd__date__pd__', TODAY) | |
| 711 outputTrackConfig = { | |
| 712 'category': category, | |
| 713 } | |
| 714 | |
| 715 mapped_chars = { | |
| 716 '>': '__gt__', | |
| 717 '<': '__lt__', | |
| 718 "'": '__sq__', | |
| 719 '"': '__dq__', | |
| 720 '[': '__ob__', | |
| 721 ']': '__cb__', | |
| 722 '{': '__oc__', | |
| 723 '}': '__cc__', | |
| 724 '@': '__at__', | |
| 725 '#': '__pd__', | |
| 726 "": '__cn__' | |
| 727 } | |
| 728 | |
| 729 for i, (dataset_path, dataset_ext, track_human_label, extra_metadata) in enumerate(track['trackfiles']): | |
| 730 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) | |
| 731 for key, value in mapped_chars.items(): | |
| 732 track_human_label = track_human_label.replace(value, key) | |
| 733 | |
| 734 log.info('Processing track %s / %s (%s)', category, track_human_label, dataset_ext) | |
| 735 outputTrackConfig['key'] = track_human_label | |
| 736 # We add extra data to hash for the case of REST + SPARQL. | |
| 737 if 'conf' in track and 'options' in track['conf'] and 'url' in track['conf']['options']: | |
| 738 rest_url = track['conf']['options']['url'] | |
| 739 else: | |
| 740 rest_url = '' | |
| 741 | |
| 742 # I chose to use track['category'] instead of 'category' here. This | |
| 743 # is intentional. This way re-running the tool on a different date | |
| 744 # will not generate different hashes and make comparison of outputs | |
| 745 # much simpler. | |
| 746 hashData = [str(dataset_path), track_human_label, track['category'], rest_url, self.current_assembly_id] | |
| 747 hashData = '|'.join(hashData).encode('utf-8') | |
| 748 outputTrackConfig['label'] = hashlib.md5(hashData).hexdigest() + '_%s' % i | |
| 749 outputTrackConfig['metadata'] = extra_metadata | |
| 750 | |
| 751 outputTrackConfig['style'] = track['style'] | |
| 752 | |
| 753 if 'menus' in track['conf']['options']: | |
| 754 menus = self.cs.parse_menus(track['conf']['options']) | |
| 755 outputTrackConfig.update(menus) | |
| 756 | |
| 757 if dataset_ext in ('gff', 'gff3'): | |
| 758 self.add_gff(dataset_path, dataset_ext, outputTrackConfig, | |
| 759 track['conf']['options']['gff']) | |
| 760 elif dataset_ext == 'bed': | |
| 761 self.add_bed(dataset_path, dataset_ext, outputTrackConfig, | |
| 762 track['conf']['options']['gff']) | |
| 763 elif dataset_ext == 'bigwig': | |
| 764 self.add_bigwig(dataset_path, outputTrackConfig, | |
| 765 track['conf']['options']['wiggle']) | |
| 766 elif dataset_ext == 'bam': | |
| 767 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index'] | |
| 768 if not isinstance(real_indexes, list): | |
| 769 # <bam_indices> | |
| 770 # <bam_index>/path/to/a.bam.bai</bam_index> | |
| 771 # </bam_indices> | |
| 772 # | |
| 773 # The above will result in the 'bam_index' key containing a | |
| 774 # string. If there are two or more indices, the container | |
| 775 # becomes a list. Fun! | |
| 776 real_indexes = [real_indexes] | |
| 777 | |
| 778 self.add_xam(dataset_path, outputTrackConfig, | |
| 779 track['conf']['options']['pileup'], | |
| 780 index=real_indexes[i], ext="bam") | |
| 781 elif dataset_ext == 'cram': | |
| 782 real_indexes = track['conf']['options']['cram']['cram_indices']['cram_index'] | |
| 783 if not isinstance(real_indexes, list): | |
| 784 # <bam_indices> | |
| 785 # <bam_index>/path/to/a.bam.bai</bam_index> | |
| 786 # </bam_indices> | |
| 787 # | |
| 788 # The above will result in the 'bam_index' key containing a | |
| 789 # string. If there are two or more indices, the container | |
| 790 # becomes a list. Fun! | |
| 791 real_indexes = [real_indexes] | |
| 792 | |
| 793 self.add_xam(dataset_path, outputTrackConfig, | |
| 794 track['conf']['options']['cram'], | |
| 795 index=real_indexes[i], ext="cram") | |
| 796 elif dataset_ext == 'blastxml': | |
| 797 self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast']) | |
| 798 elif dataset_ext == 'vcf': | |
| 799 self.add_vcf(dataset_path, outputTrackConfig) | |
| 800 elif dataset_ext == 'vcf_bgzip': | |
| 801 self.add_vcf(dataset_path, outputTrackConfig, zipped=True) | |
| 802 elif dataset_ext == 'rest': | |
| 803 self.add_rest(track['conf']['options']['rest']['url'], outputTrackConfig) | |
| 804 elif dataset_ext == 'synteny': | |
| 805 self.add_paf(dataset_path, outputTrackConfig, | |
| 806 track['conf']['options']['synteny']) | |
| 807 elif dataset_ext == 'hic': | |
| 808 self.add_hic(dataset_path, outputTrackConfig, | |
| 809 track['conf']['options']['hic']) | |
| 810 elif dataset_ext == 'sparql': | |
| 811 sparql_query = track['conf']['options']['sparql']['query'] | |
| 812 for key, value in mapped_chars.items(): | |
| 813 sparql_query = sparql_query.replace(value, key) | |
| 814 sparql_query_refnames = track['conf']['options']['sparql']['query_refnames'] | |
| 815 for key, value in mapped_chars.items(): | |
| 816 sparql_query_refnames = sparql_query_refnames.replace(value, key) | |
| 817 self.add_sparql(track['conf']['options']['sparql']['url'], sparql_query, sparql_query_refnames, outputTrackConfig) | |
| 818 else: | |
| 819 log.warn('Do not know how to handle %s', dataset_ext) | |
| 820 | |
| 821 # Return non-human label for use in other fields | |
| 822 yield outputTrackConfig['label'] | |
| 823 | |
| 824 def add_default_session(self, data): | |
| 825 """ | |
| 826 Add some default session settings: set some assemblies/tracks on/off | |
| 827 """ | |
| 828 tracks_data = [] | |
| 829 | |
| 830 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 | |
| 831 | |
| 832 # We need to know the track type from the config.json generated just before | |
| 833 config_path = os.path.join(self.outdir, 'data', 'config.json') | |
| 834 track_types = {} | |
| 835 with open(config_path, 'r') as config_file: | |
| 836 config_json = json.load(config_file) | |
| 837 | |
| 838 for track_conf in config_json['tracks']: | |
| 839 track_types[track_conf['trackId']] = track_conf['type'] | |
| 840 | |
| 841 for on_track in data['visibility']['default_on']: | |
| 842 # TODO several problems with this currently | |
| 843 # - we are forced to copy the same kind of style config as the per track config from _prepare_track_style (not exactly the same though) | |
| 844 # - we get an error when refreshing the page | |
| 845 # - this could be solved by session specs, see https://github.com/GMOD/jbrowse-components/issues/2708 | |
| 846 style_data = { | |
| 847 "type": "LinearBasicDisplay", | |
| 848 "height": 100 | |
| 849 } | |
| 850 | |
| 851 if on_track in data['style']: | |
| 852 if 'display' in data['style'][on_track]: | |
| 853 style_data['type'] = data['style'][on_track]['display'] | |
| 854 del data['style'][on_track]['display'] | |
| 855 | |
| 856 style_data.update(data['style'][on_track]) | |
| 857 | |
| 858 if on_track in data['style_labels']: | |
| 859 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work | |
| 860 # TODO move this to per track displays? | |
| 861 style_data['labels'] = data['style_labels'][on_track] | |
| 862 | |
| 863 tracks_data.append({ | |
| 864 "type": track_types[on_track], | |
| 865 "configuration": on_track, | |
| 866 "displays": [ | |
| 867 style_data | |
| 868 ] | |
| 869 }) | |
| 870 | |
| 871 # The view for the assembly we're adding | |
| 872 view_json = { | |
| 873 "type": "LinearGenomeView", | |
| 874 "tracks": tracks_data | |
| 875 } | |
| 876 | |
| 877 refName = None | |
| 878 if data.get('defaultLocation', ''): | |
| 879 loc_match = re.search(r'^(\w+):(\d+)\.+(\d+)$', data['defaultLocation']) | |
| 880 if loc_match: | |
| 881 refName = loc_match.group(1) | |
| 882 start = int(loc_match.group(2)) | |
| 883 end = int(loc_match.group(3)) | |
| 884 elif self.assembly_ids[self.current_assembly_id] is not None: | |
| 885 refName = self.assembly_ids[self.current_assembly_id] | |
| 886 start = 0 | |
| 887 end = 1000000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708 | |
| 888 | |
| 889 if refName is not None: | |
| 890 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome | |
| 891 view_json['displayedRegions'] = [{ | |
| 892 "refName": refName, | |
| 893 "start": start, | |
| 894 "end": end, | |
| 895 "reversed": False, | |
| 896 "assemblyName": self.current_assembly_id | |
| 897 }] | |
| 898 | |
| 899 session_name = data.get('session_name', "New session") | |
| 900 if not session_name: | |
| 901 session_name = "New session" | |
| 902 | |
| 903 # Merge with possibly existing defaultSession (if upgrading a jbrowse instance) | |
| 904 session_json = {} | |
| 905 if 'defaultSession' in config_json: | |
| 906 session_json = config_json['defaultSession'] | |
| 907 | |
| 908 session_json["name"] = session_name | |
| 909 | |
| 910 if 'views' not in session_json: | |
| 911 session_json['views'] = [] | |
| 912 | |
| 913 session_json['views'].append(view_json) | |
| 914 | |
| 915 config_json['defaultSession'] = session_json | |
| 916 | |
| 917 with open(config_path, 'w') as config_file: | |
| 918 json.dump(config_json, config_file, indent=2) | |
| 919 | |
| 920 def add_general_configuration(self, data): | |
| 921 """ | |
| 922 Add some general configuration to the config.json file | |
| 923 """ | |
| 924 | |
| 925 config_path = os.path.join(self.outdir, 'data', 'config.json') | |
| 926 with open(config_path, 'r') as config_file: | |
| 927 config_json = json.load(config_file) | |
| 928 | |
| 929 config_data = {} | |
| 930 | |
| 931 config_data['disableAnalytics'] = data.get('analytics', 'false') == 'true' | |
| 932 | |
| 933 config_data['theme'] = { | |
| 934 "palette": { | |
| 935 "primary": { | |
| 936 "main": data.get('primary_color', '#0D233F') | |
| 937 }, | |
| 938 "secondary": { | |
| 939 "main": data.get('secondary_color', '#721E63') | |
| 940 }, | |
| 941 "tertiary": { | |
| 942 "main": data.get('tertiary_color', '#135560') | |
| 943 }, | |
| 944 "quaternary": { | |
| 945 "main": data.get('quaternary_color', '#FFB11D') | |
| 946 }, | |
| 947 }, | |
| 948 "typography": { | |
| 949 "fontSize": int(data.get('font_size', 10)) | |
| 950 }, | |
| 951 } | |
| 952 | |
| 953 config_json['configuration'].update(config_data) | |
| 954 | |
| 955 with open(config_path, 'w') as config_file: | |
| 956 json.dump(config_json, config_file, indent=2) | |
| 957 | |
| 958 def clone_jbrowse(self, jbrowse_dir, destination): | |
| 959 """Clone a JBrowse directory into a destination directory. | |
| 960 """ | |
| 961 | |
| 962 copytree(jbrowse_dir, destination) | |
| 963 | |
| 964 try: | |
| 965 shutil.rmtree(os.path.join(destination, 'test_data')) | |
| 966 except OSError as e: | |
| 967 log.error("Error: %s - %s." % (e.filename, e.strerror)) | |
| 968 | |
| 969 if not os.path.exists(os.path.join(destination, 'data')): | |
| 970 # It can already exist if upgrading an instance | |
| 971 os.makedirs(os.path.join(destination, 'data')) | |
| 972 log.info("makedir %s" % (os.path.join(destination, 'data'))) | |
| 973 | |
| 974 os.symlink('./data/config.json', os.path.join(destination, 'config.json')) | |
| 975 | |
| 976 | |
| 977 def copytree(src, dst, symlinks=False, ignore=None): | |
| 978 for item in os.listdir(src): | |
| 979 s = os.path.join(src, item) | |
| 980 d = os.path.join(dst, item) | |
| 981 if os.path.isdir(s): | |
| 982 shutil.copytree(s, d, symlinks, ignore) | |
| 983 else: | |
| 984 shutil.copy2(s, d) | |
| 985 | |
| 986 | |
| 987 def parse_style_conf(item): | |
| 988 if 'type' in item.attrib and item.attrib['type'] in ['boolean', 'integer']: | |
| 989 if item.attrib['type'] == 'boolean': | |
| 990 return item.text in ("yes", "true", "True") | |
| 991 elif item.attrib['type'] == 'integer': | |
| 992 return int(item.text) | |
| 993 else: | |
| 994 return item.text | |
| 995 | |
| 996 | |
| 997 if __name__ == '__main__': | |
| 998 parser = argparse.ArgumentParser(description="", epilog="") | |
| 999 parser.add_argument('xml', type=argparse.FileType('r'), help='Track Configuration') | |
| 1000 | |
| 1001 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') | |
| 1002 parser.add_argument('--outdir', help='Output directory', default='out') | |
| 1003 parser.add_argument('--version', '-V', action='version', version="%(prog)s 0.8.0") | |
| 1004 args = parser.parse_args() | |
| 1005 | |
| 1006 tree = ET.parse(args.xml.name) | |
| 1007 root = tree.getroot() | |
| 1008 | |
| 1009 # This should be done ASAP | |
| 1010 GALAXY_INFRASTRUCTURE_URL = root.find('metadata/galaxyUrl').text | |
| 1011 # Sometimes this comes as `localhost` without a protocol | |
| 1012 if not GALAXY_INFRASTRUCTURE_URL.startswith('http'): | |
| 1013 # so we'll prepend `http://` and hope for the best. Requests *should* | |
| 1014 # be GET and not POST so it should redirect OK | |
| 1015 GALAXY_INFRASTRUCTURE_URL = 'http://' + GALAXY_INFRASTRUCTURE_URL | |
| 1016 | |
| 1017 jc = JbrowseConnector( | |
| 1018 jbrowse=args.jbrowse, | |
| 1019 outdir=args.outdir, | |
| 1020 genomes=[ | |
| 1021 { | |
| 1022 'path': os.path.realpath(x.attrib['path']), | |
| 1023 'meta': metadata_from_node(x.find('metadata')), | |
| 1024 'label': x.attrib['label'] | |
| 1025 } | |
| 1026 for x in root.findall('metadata/genomes/genome') | |
| 1027 ] | |
| 1028 ) | |
| 1029 | |
| 1030 default_session_data = { | |
| 1031 'visibility': { | |
| 1032 'default_on': [], | |
| 1033 'default_off': [], | |
| 1034 }, | |
| 1035 'style': {}, | |
| 1036 'style_labels': {} | |
| 1037 } | |
| 1038 | |
| 1039 # TODO add metadata to tracks | |
| 1040 for track in root.findall('tracks/track'): | |
| 1041 track_conf = {} | |
| 1042 track_conf['trackfiles'] = [] | |
| 1043 | |
| 1044 trackfiles = track.findall('files/trackFile') | |
| 1045 if trackfiles: | |
| 1046 for x in track.findall('files/trackFile'): | |
| 1047 if trackfiles: | |
| 1048 metadata = metadata_from_node(x.find('metadata')) | |
| 1049 | |
| 1050 track_conf['trackfiles'].append(( | |
| 1051 os.path.realpath(x.attrib['path']), | |
| 1052 x.attrib['ext'], | |
| 1053 x.attrib['label'], | |
| 1054 metadata | |
| 1055 )) | |
| 1056 else: | |
| 1057 # For tracks without files (rest, sparql) | |
| 1058 track_conf['trackfiles'].append(( | |
| 1059 '', # N/A, no path for rest or sparql | |
| 1060 track.attrib['format'], | |
| 1061 track.find('options/label').text, | |
| 1062 {} | |
| 1063 )) | |
| 1064 | |
| 1065 track_conf['category'] = track.attrib['cat'] | |
| 1066 track_conf['format'] = track.attrib['format'] | |
| 1067 track_conf['style'] = {item.tag: parse_style_conf(item) for item in track.find('options/style')} | |
| 1068 | |
| 1069 track_conf['style'] = {item.tag: parse_style_conf(item) for item in track.find('options/style')} | |
| 1070 | |
| 1071 track_conf['style_labels'] = {item.tag: parse_style_conf(item) for item in track.find('options/style_labels')} | |
| 1072 | |
| 1073 track_conf['conf'] = etree_to_dict(track.find('options')) | |
| 1074 keys = jc.process_annotations(track_conf) | |
| 1075 | |
| 1076 for key in keys: | |
| 1077 default_session_data['visibility'][track.attrib.get('visibility', 'default_off')].append(key) | |
| 1078 | |
| 1079 default_session_data['style'][key] = track_conf['style'] # TODO do we need this anymore? | |
| 1080 default_session_data['style_labels'][key] = track_conf['style_labels'] | |
| 1081 | |
| 1082 default_session_data['defaultLocation'] = root.find('metadata/general/defaultLocation').text | |
| 1083 default_session_data['session_name'] = root.find('metadata/general/session_name').text | |
| 1084 | |
| 1085 general_data = { | |
| 1086 'analytics': root.find('metadata/general/analytics').text, | |
| 1087 'primary_color': root.find('metadata/general/primary_color').text, | |
| 1088 'secondary_color': root.find('metadata/general/secondary_color').text, | |
| 1089 'tertiary_color': root.find('metadata/general/tertiary_color').text, | |
| 1090 'quaternary_color': root.find('metadata/general/quaternary_color').text, | |
| 1091 'font_size': root.find('metadata/general/font_size').text, | |
| 1092 } | |
| 1093 | |
| 1094 jc.add_default_session(default_session_data) | |
| 1095 jc.add_general_configuration(general_data) | |
| 1096 jc.text_index() | |
| 1097 |
