Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 62:ab0d6782a95f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 2b6d4a24585beb1ba5055e5d34aacb3b299b1943-dirty
author | fubar |
---|---|
date | Thu, 28 Mar 2024 04:51:06 +0000 |
parents | e7a6f7a7148d |
children | ac00dcfb5d1d |
comparison
equal
deleted
inserted
replaced
61:e7a6f7a7148d | 62:ab0d6782a95f |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 import argparse | 3 import argparse |
4 import binascii | 4 import binascii |
5 import datetime | 5 import datetime |
6 import json | 6 import json |
13 import tempfile | 13 import tempfile |
14 import urllib.request | 14 import urllib.request |
15 import xml.etree.ElementTree as ET | 15 import xml.etree.ElementTree as ET |
16 from collections import defaultdict | 16 from collections import defaultdict |
17 | 17 |
18 logging.basicConfig(level=logging.INFO) | 18 logging.basicConfig(level=logging.DEBUG) |
19 log = logging.getLogger("jbrowse") | 19 log = logging.getLogger("jbrowse") |
20 | 20 |
21 JB2VER = "v2.10.3" | 21 JB2VER = "v2.10.3" |
22 # version pinned for cloning | 22 # version pinned for cloning |
23 | 23 |
24 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") | 24 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") |
25 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__)) | |
25 GALAXY_INFRASTRUCTURE_URL = None | 26 GALAXY_INFRASTRUCTURE_URL = None |
26 | |
27 mapped_chars = { | 27 mapped_chars = { |
28 ">": "__gt__", | 28 ">": "__gt__", |
29 "<": "__lt__", | 29 "<": "__lt__", |
30 "'": "__sq__", | 30 "'": "__sq__", |
31 '"': "__dq__", | 31 '"': "__dq__", |
339 metadata["history_%s" % key] = value | 339 metadata["history_%s" % key] = value |
340 | 340 |
341 if node.findall("metadata"): | 341 if node.findall("metadata"): |
342 for (key, value) in node.findall("metadata")[0].attrib.items(): | 342 for (key, value) in node.findall("metadata")[0].attrib.items(): |
343 metadata["metadata_%s" % key] = value | 343 metadata["metadata_%s" % key] = value |
344 # Additional Mappings applied: | 344 # Additional Mappings applied: |
345 metadata[ | 345 metadata[ |
346 "dataset_edam_format" | 346 "dataset_edam_format" |
347 ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( | 347 ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( |
348 metadata["dataset_edam_format"], metadata["dataset_file_ext"] | 348 metadata["dataset_edam_format"], metadata["dataset_file_ext"] |
349 ) | 349 ) |
350 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( | 350 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( |
351 metadata["history_user_email"] | 351 metadata["history_user_email"] |
352 ) | 352 ) |
353 metadata["hist_name"] = metadata["history_display_name"] | 353 metadata["hist_name"] = metadata["history_display_name"] |
354 metadata[ | 354 metadata[ |
355 "history_display_name" | 355 "history_display_name" |
356 ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( | 356 ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( |
357 galaxy=GALAXY_INFRASTRUCTURE_URL, | 357 galaxy=GALAXY_INFRASTRUCTURE_URL, |
358 encoded_hist_id=metadata["history_id"], | 358 encoded_hist_id=metadata.get("history_id", "not available"), |
359 hist_name=metadata["history_display_name"], | 359 hist_name=metadata.get("history_display_name", "not available"), |
360 ) | 360 ) |
361 if node.findall("tool"): | 361 if node.findall("tool"): |
362 for (key, value) in node.findall("tool")[0].attrib.items(): | 362 for (key, value) in node.findall("tool")[0].attrib.items(): |
363 metadata["tool_%s" % key] = value | 363 metadata["tool_%s" % key] = value |
364 metadata[ | 364 metadata[ |
365 "tool_tool" | 365 "tool_tool" |
371 ) | 371 ) |
372 return metadata | 372 return metadata |
373 | 373 |
374 | 374 |
375 class JbrowseConnector(object): | 375 class JbrowseConnector(object): |
376 def __init__(self, outdir, jbrowse2path, genomes): | 376 def __init__(self, outdir, jbrowse2path): |
377 self.assemblies = [] # these require more than a few line diff. | |
378 self.assmeta = {} | |
377 self.giURL = GALAXY_INFRASTRUCTURE_URL | 379 self.giURL = GALAXY_INFRASTRUCTURE_URL |
378 self.outdir = outdir | 380 self.outdir = outdir |
381 self.genome_firstcontig = None | |
379 self.jbrowse2path = jbrowse2path | 382 self.jbrowse2path = jbrowse2path |
380 os.makedirs(self.outdir, exist_ok=True) | 383 os.makedirs(self.outdir, exist_ok=True) |
381 self.genome_paths = genomes | |
382 self.genome_name = None | |
383 self.genome_names = [] | 384 self.genome_names = [] |
384 self.trackIdlist = [] | 385 self.trackIdlist = [] |
385 self.tracksToAdd = [] | 386 self.tracksToAdd = {} |
386 self.config_json = {} | 387 self.config_json = {} |
387 self.config_json_file = os.path.join(outdir, "config.json") | 388 self.config_json_file = os.path.join(outdir, "config.json") |
388 self.clone_jbrowse() | 389 self.clone_jbrowse() |
389 | 390 |
390 def subprocess_check_call(self, command, output=None): | 391 def get_cwd(self, cwd): |
392 if cwd: | |
393 return self.outdir | |
394 else: | |
395 return subprocess.check_output(["pwd"]).decode("utf-8").strip() | |
396 # return None | |
397 | |
398 def subprocess_check_call(self, command, output=None, cwd=True): | |
391 if output: | 399 if output: |
392 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) | 400 log.debug("cd %s && %s > %s", self.get_cwd(cwd), " ".join(command), output) |
393 subprocess.check_call(command, cwd=self.outdir, stdout=output) | 401 subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output) |
394 else: | 402 else: |
395 log.debug("cd %s && %s", self.outdir, " ".join(command)) | 403 log.debug("cd %s && %s", self.get_cwd(cwd), " ".join(command)) |
396 subprocess.check_call(command, cwd=self.outdir) | 404 subprocess.check_call(command, cwd=self.get_cwd(cwd)) |
397 | 405 |
398 def subprocess_popen(self, command): | 406 def subprocess_popen(self, command, cwd=True): |
399 log.debug(command) | 407 log.debug(command) |
400 p = subprocess.Popen( | 408 p = subprocess.Popen( |
401 command, | 409 command, |
402 cwd=self.outdir, | 410 cwd=self.get_cwd(cwd), |
403 shell=True, | 411 shell=True, |
404 stdin=subprocess.PIPE, | 412 stdin=subprocess.PIPE, |
405 stdout=subprocess.PIPE, | 413 stdout=subprocess.PIPE, |
406 stderr=subprocess.PIPE, | 414 stderr=subprocess.PIPE, |
407 ) | 415 ) |
442 style_data, | 450 style_data, |
443 ] | 451 ] |
444 } | 452 } |
445 return wstyle | 453 return wstyle |
446 | 454 |
447 def process_genomes(self): | 455 def process_genomes(self, genomes): |
448 assemblies = [] | 456 assembly = [] |
457 assmeta = [] | |
449 useuri = False | 458 useuri = False |
450 for i, genome_node in enumerate(self.genome_paths): | 459 genome_names = [] |
451 if genome_node["useuri"].strip().lower() == "yes": | 460 for i, genome_node in enumerate(genomes): |
461 this_genome = {} | |
462 if genome_node["useuri"] == "yes": | |
452 useuri = True | 463 useuri = True |
453 genome_name = genome_node["meta"]["dataset_dname"].strip() | 464 genome_name = genome_node["label"].strip() |
454 if len(genome_name.split()) > 1: | 465 if len(genome_name.split()) > 1: |
455 genome_name = genome_name.split()[0] | 466 genome_name = genome_name.split()[0] |
456 # spaces and cruft break scripts when substituted | 467 # spaces and cruft break scripts when substituted |
457 if genome_name not in self.genome_names: | 468 if genome_name not in genome_names: |
458 # pafs with shared references | 469 # pafs with shared references |
459 fapath = genome_node["path"] | 470 fapath = genome_node["path"] |
460 if not useuri: | 471 if not useuri: |
461 fapath = os.path.realpath(fapath) | 472 fapath = os.path.realpath(fapath) |
462 assem = self.make_assembly(fapath, genome_name, useuri) | 473 assem = self.make_assembly(fapath, genome_name, useuri) |
463 assemblies.append(assem) | 474 assembly.append(assem) |
464 self.genome_names.append(genome_name) | 475 if len(genome_names) == 0: |
465 if self.genome_name is None: | 476 this_genome["genome_name"] = genome_name # first one for all tracks |
466 self.genome_name = ( | 477 genome_names.append(genome_name) |
467 genome_name # first one for all tracks | 478 this_genome["genome_sequence_adapter"] = assem["sequence"][ |
468 ) | 479 "adapter" |
469 self.genome_sequence_adapter = assem["sequence"]["adapter"] | 480 ] |
470 self.genome_firstcontig = None | 481 this_genome["genome_firstcontig"] = None |
471 if not useuri: | 482 if not useuri: |
472 fl = open(fapath, "r").readline() | 483 fl = open(fapath, "r").readline() |
473 fls = fl.strip().split(">") | 484 fls = fl.strip().split(">") |
474 if len(fls) > 1: | 485 if len(fls) > 1: |
475 fl = fls[1] | 486 fl = fls[1] |
476 if len(fl.split()) > 1: | 487 if len(fl.split()) > 1: |
477 self.genome_firstcontig = fl.split()[0].strip() | 488 this_genome["genome_firstcontig"] = fl.split()[ |
489 0 | |
490 ].strip() | |
478 else: | 491 else: |
479 self.genome_firstcontig = fl | 492 this_genome["genome_firstcontig"] = fl |
480 else: | 493 else: |
481 try: | 494 try: |
482 fl = urllib.request.urlopen(fapath + ".fai").readline() | 495 fl = urllib.request.urlopen(fapath + ".fai").readline() |
483 except: | 496 except: |
484 fl = None | 497 fl = None |
485 if fl: # is first row of the text fai so the first contig name | 498 if fl: # is first row of the text fai so the first contig name |
486 self.genome_firstcontig = ( | 499 this_genome["genome_firstcontig"] = ( |
487 fl.decode("utf8").strip().split()[0] | 500 fl.decode("utf8").strip().split()[0] |
488 ) | 501 ) |
489 else: | 502 assmeta.append(this_genome) |
490 self.genome_firstcontig = None | 503 self.assemblies += assembly |
504 self.assmeta[genome_names[0]] = assmeta | |
505 self.tracksToAdd[genome_names[0]] = [] | |
491 if self.config_json.get("assemblies", None): | 506 if self.config_json.get("assemblies", None): |
492 self.config_json["assemblies"] += assemblies | 507 self.config_json["assemblies"] += assembly |
493 else: | 508 else: |
494 self.config_json["assemblies"] = assemblies | 509 self.config_json["assemblies"] = assembly |
510 self.genome_names += genome_names | |
511 return this_genome["genome_name"] | |
495 | 512 |
496 def make_assembly(self, fapath, gname, useuri): | 513 def make_assembly(self, fapath, gname, useuri): |
497 if useuri: | 514 if useuri: |
498 faname = fapath | 515 faname = fapath |
499 adapter = { | 516 adapter = { |
500 "type": "BgzipFastaAdapter", | 517 "type": "BgzipFastaAdapter", |
501 "fastaLocation": { | 518 "fastaLocation": {"uri": faname, "locationType": "UriLocation"}, |
502 "uri": faname, | 519 "faiLocation": {"uri": faname + ".fai", "locationType": "UriLocation"}, |
503 "locationType": "UriLocation" | 520 "gziLocation": {"uri": faname + ".gzi", "locationType": "UriLocation"}, |
504 }, | |
505 "faiLocation": { | |
506 "uri": faname + ".fai", | |
507 "locationType": "UriLocation" | |
508 }, | |
509 "gziLocation": { | |
510 "uri": faname + ".gzi", | |
511 "locationType": "UriLocation" | |
512 } | |
513 } | 521 } |
514 else: | 522 else: |
515 faname = gname + ".fa.gz" | 523 faname = gname + ".fa.gz" |
516 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | 524 fadest = os.path.realpath(os.path.join(self.outdir, faname)) |
517 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( | 525 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( |
530 "faiLocation": { | 538 "faiLocation": { |
531 "uri": faname + ".fai", | 539 "uri": faname + ".fai", |
532 }, | 540 }, |
533 "gziLocation": { | 541 "gziLocation": { |
534 "uri": faname + ".gzi", | 542 "uri": faname + ".gzi", |
535 } | 543 }, |
536 } | 544 } |
537 | 545 |
538 trackDict = { | 546 trackDict = { |
539 "name": gname, | 547 "name": gname, |
540 "sequence": { | 548 "sequence": { |
549 }, | 557 }, |
550 { | 558 { |
551 "type": "LinearGCContentDisplay", | 559 "type": "LinearGCContentDisplay", |
552 "displayId": "%s-LinearGCContentDisplay" % gname, | 560 "displayId": "%s-LinearGCContentDisplay" % gname, |
553 }, | 561 }, |
554 ] | 562 ], |
555 } | 563 } |
556 return trackDict | 564 return trackDict |
557 | 565 |
558 def add_default_view(self): | 566 def add_default_view(self): |
559 cmd = [ | 567 cmd = [ |
580 # Index tracks | 588 # Index tracks |
581 args = [ | 589 args = [ |
582 "jbrowse", | 590 "jbrowse", |
583 "text-index", | 591 "text-index", |
584 "--target", | 592 "--target", |
585 os.path.join(self.outdir, "data"), | 593 self.outdir, |
586 "--assemblies", | 594 "--assemblies", |
587 self.genome_name, | 595 self.genome_name, |
588 ] | 596 ] |
589 | 597 |
590 tracks = ",".join(self.trackIdlist) | 598 tracks = ",".join(self.trackIdlist) |
626 self.subprocess_check_call(cmd) | 634 self.subprocess_check_call(cmd) |
627 categ = trackData["category"] | 635 categ = trackData["category"] |
628 trackDict = { | 636 trackDict = { |
629 "type": "HicTrack", | 637 "type": "HicTrack", |
630 "trackId": tId, | 638 "trackId": tId, |
631 "name": trackData["name"], | 639 "name": trackData["name"], |
632 "assemblyNames": [self.genome_name], | 640 "assemblyNames": [trackData["assemblyNames"]], |
633 "category": [ | 641 "category": [ |
634 categ, | 642 categ, |
635 ], | 643 ], |
636 "adapter": { | 644 "adapter": {"type": "HicAdapter", "hicLocation": {"uri": uri}}, |
637 "type": "HicAdapter", | |
638 "hicLocation": { "uri": uri } | |
639 } | |
640 } | 645 } |
641 self.tracksToAdd.append(trackDict) | 646 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) |
642 self.trackIdlist.append(tId) | 647 self.trackIdlist.append(tId) |
643 | 648 |
644 def add_maf(self, data, trackData): | 649 def add_maf(self, data, trackData): |
645 """ | 650 """ |
646 from https://github.com/cmdcolin/maf2bed | 651 from https://github.com/cmdcolin/maf2bed |
658 ] | 663 ] |
659 } | 664 } |
660 categ = trackData["category"] | 665 categ = trackData["category"] |
661 fname = "%s" % tId | 666 fname = "%s" % tId |
662 dest = "%s/%s" % (self.outdir, fname) | 667 dest = "%s/%s" % (self.outdir, fname) |
663 gname = self.genome_name | 668 gname = trackData["assemblyNames"] |
664 | 669 |
665 cmd = [ | 670 cmd = [ |
666 "bash", | 671 "bash", |
667 os.path.join(INSTALLED_TO, "convertMAF.sh"), | 672 os.path.join(INSTALLED_TO, "convertMAF.sh"), |
668 data, | 673 data, |
669 gname, | 674 gname, |
670 INSTALLED_TO, | 675 INSTALLED_TO, |
671 dest, | 676 dest, |
672 ] | 677 ] |
673 self.subprocess_check_call(cmd) | 678 self.subprocess_check_call(cmd) |
674 mafs = open(data,'r').readlines() | 679 mafs = open(data, "r").readlines() |
675 mafss = [x for x in mafs if (x.startswith('s\t') or x.startswith('s '))] | 680 mafss = [x for x in mafs if (x.startswith("s\t") or x.startswith("s "))] |
676 samp = [x.split()[1] for x in mafss if len(x.split()) > 0] | 681 samp = [x.split()[1] for x in mafss if len(x.split()) > 0] |
677 sampu = list(dict.fromkeys(samp)) | 682 sampu = list(dict.fromkeys(samp)) |
678 samples = [x.split('.')[0] for x in sampu] | 683 samples = [x.split(".")[0] for x in sampu] |
679 samples.sort() | 684 samples.sort() |
680 logging.warn("$$$$ cmd=%s, mafss=%s samp=%s samples=%s" % (' '.join(cmd), mafss, samp, samples)) | 685 logging.warn( |
686 "$$$$ cmd=%s, mafss=%s samp=%s samples=%s" | |
687 % (" ".join(cmd), mafss, samp, samples) | |
688 ) | |
681 trackDict = { | 689 trackDict = { |
682 "type": "MafTrack", | 690 "type": "MafTrack", |
683 "trackId": tId, | 691 "trackId": tId, |
684 "name": trackData["name"], | 692 "name": trackData["name"], |
685 "category": [ | 693 "category": [ |
695 "location": { | 703 "location": { |
696 "uri": fname + ".sorted.bed.gz.tbi", | 704 "uri": fname + ".sorted.bed.gz.tbi", |
697 }, | 705 }, |
698 }, | 706 }, |
699 }, | 707 }, |
700 "assemblyNames": [self.genome_name], | 708 "assemblyNames": [trackData["assemblyNames"]], |
701 "displays": [ | 709 "displays": [ |
702 { | 710 { |
703 "type": "LinearBasicDisplay", | 711 "type": "LinearBasicDisplay", |
704 "displayId": "%s-LinearBasicDisplay" % tId | 712 "displayId": "%s-LinearBasicDisplay" % tId, |
705 }, | 713 }, |
706 { | 714 {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId}, |
707 "type": "LinearArcDisplay", | 715 ], |
708 "displayId": "%s-LinearArcDisplay" % tId | |
709 }, | |
710 ] | |
711 } | 716 } |
712 style_json = self._prepare_track_style(trackDict) | 717 style_json = self._prepare_track_style(trackDict) |
713 trackDict["style"] = style_json | 718 trackDict["style"] = style_json |
714 self.tracksToAdd.append(trackDict) | 719 self.tracksToAdd[gname].append(trackDict) |
715 self.trackIdlist.append(tId) | 720 self.trackIdlist.append(tId) |
716 if self.config_json.get("plugins", None): | 721 if self.config_json.get("plugins", None): |
717 self.config_json["plugins"].append(mafPlugin[0]) | 722 self.config_json["plugins"].append(mafPlugin[0]) |
718 else: | 723 else: |
719 self.config_json.update(mafPlugin) | 724 self.config_json.update(mafPlugin) |
730 str(min_gap), | 735 str(min_gap), |
731 xml, | 736 xml, |
732 ] | 737 ] |
733 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) | 738 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) |
734 gff3_unrebased.close() | 739 gff3_unrebased.close() |
735 logging.warn("### blastxml to gff3 cmd = %s" % ' '.join(cmd)) | 740 logging.warn("### blastxml to gff3 cmd = %s" % " ".join(cmd)) |
736 return gff3_unrebased.name | 741 return gff3_unrebased.name |
737 | 742 |
738 def add_blastxml(self, data, trackData, blastOpts, **kwargs): | 743 def add_blastxml(self, data, trackData, blastOpts, **kwargs): |
739 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"]) | 744 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"]) |
740 if "parent" in blastOpts and blastOpts["parent"] != "None": | 745 if "parent" in blastOpts and blastOpts["parent"] != "None": |
742 cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")] | 747 cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")] |
743 if blastOpts.get("protein", "false") == "true": | 748 if blastOpts.get("protein", "false") == "true": |
744 cmd.append("--protein2dna") | 749 cmd.append("--protein2dna") |
745 cmd.extend([os.path.realpath(blastOpts["parent"]), gff3]) | 750 cmd.extend([os.path.realpath(blastOpts["parent"]), gff3]) |
746 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) | 751 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) |
747 logging.warn("### gff3rebase cmd = %s" % ' '.join(cmd)) | 752 logging.warn("### gff3rebase cmd = %s" % " ".join(cmd)) |
748 gff3_rebased.close() | 753 gff3_rebased.close() |
749 # Replace original gff3 file | 754 # Replace original gff3 file |
750 shutil.copy(gff3_rebased.name, gff3) | 755 shutil.copy(gff3_rebased.name, gff3) |
751 os.unlink(gff3_rebased.name) | 756 os.unlink(gff3_rebased.name) |
752 url = "%s.gff3.gz" % trackData["label"] | 757 url = "%s.gff3.gz" % trackData["label"] |
756 categ = trackData["category"] | 761 categ = trackData["category"] |
757 trackDict = { | 762 trackDict = { |
758 "type": "FeatureTrack", | 763 "type": "FeatureTrack", |
759 "trackId": tId, | 764 "trackId": tId, |
760 "name": trackData["name"], | 765 "name": trackData["name"], |
761 "assemblyNames": [self.genome_name], | 766 "assemblyNames": [trackData["assemblyNames"]], |
762 "category": [ | 767 "category": [ |
763 categ, | 768 categ, |
764 ], | 769 ], |
765 "adapter": { | 770 "adapter": { |
766 "type": "Gff3TabixAdapter", | 771 "type": "Gff3TabixAdapter", |
784 }, | 789 }, |
785 ], | 790 ], |
786 } | 791 } |
787 style_json = self._prepare_track_style(trackDict) | 792 style_json = self._prepare_track_style(trackDict) |
788 trackDict["style"] = style_json | 793 trackDict["style"] = style_json |
789 self.tracksToAdd.append(trackDict) | 794 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) |
790 self.trackIdlist.append(tId) | 795 self.trackIdlist.append(tId) |
791 os.unlink(gff3) | 796 os.unlink(gff3) |
792 | 797 |
793 def add_bigwig(self, data, trackData): | 798 def add_bigwig(self, data, trackData): |
794 useuri = trackData["useuri"].lower() == "yes" | 799 useuri = trackData["useuri"].lower() == "yes" |
808 "trackId": tId, | 813 "trackId": tId, |
809 "name": trackData["name"], | 814 "name": trackData["name"], |
810 "category": [ | 815 "category": [ |
811 categ, | 816 categ, |
812 ], | 817 ], |
813 "assemblyNames": [ | 818 "assemblyNames": [trackData["assemblyNames"]], |
814 self.genome_name, | |
815 ], | |
816 "adapter": { | 819 "adapter": { |
817 "type": "BigWigAdapter", | 820 "type": "BigWigAdapter", |
818 "bigWigLocation": bwloc, | 821 "bigWigLocation": bwloc, |
819 }, | 822 }, |
820 "displays": [ | 823 "displays": [ |
824 } | 827 } |
825 ], | 828 ], |
826 } | 829 } |
827 style_json = self._prepare_track_style(trackDict) | 830 style_json = self._prepare_track_style(trackDict) |
828 trackDict["style"] = style_json | 831 trackDict["style"] = style_json |
829 self.tracksToAdd.append(trackDict) | 832 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) |
830 self.trackIdlist.append(tId) | 833 self.trackIdlist.append(tId) |
831 | 834 |
832 def add_bam(self, data, trackData, bam_index=None, **kwargs): | 835 def add_bam(self, data, trackData, bam_index=None, **kwargs): |
833 tId = trackData["label"] | 836 tId = trackData["label"] |
834 useuri = trackData["useuri"].lower() == "yes" | 837 useuri = trackData["useuri"].lower() == "yes" |
860 "trackId": tId, | 863 "trackId": tId, |
861 "name": trackData["name"], | 864 "name": trackData["name"], |
862 "category": [ | 865 "category": [ |
863 categ, | 866 categ, |
864 ], | 867 ], |
865 "assemblyNames": [self.genome_name], | 868 "assemblyNames": [trackData["assemblyNames"]], |
866 "adapter": { | 869 "adapter": { |
867 "type": "BamAdapter", | 870 "type": "BamAdapter", |
868 "bamLocation": {"uri": url}, | 871 "bamLocation": {"uri": url}, |
869 "index": { | 872 "index": { |
870 "location": { | 873 "location": { |
879 }, | 882 }, |
880 ], | 883 ], |
881 } | 884 } |
882 style_json = self._prepare_track_style(trackDict) | 885 style_json = self._prepare_track_style(trackDict) |
883 trackDict["style"] = style_json | 886 trackDict["style"] = style_json |
884 self.tracksToAdd.append(trackDict) | 887 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) |
885 self.trackIdlist.append(tId) | 888 self.trackIdlist.append(tId) |
886 | 889 |
887 def add_cram(self, data, trackData, cram_index=None, **kwargs): | 890 def add_cram(self, data, trackData, cram_index=None, **kwargs): |
888 tId = trackData["label"] | 891 tId = trackData["label"] |
889 categ = trackData["category"] | 892 categ = trackData["category"] |
890 useuri = trackData["useuri"].lower() == "yes" | 893 useuri = trackData["useuri"].lower() == "yes" |
894 gsa = self.assmeta.get(trackData["assemblyNames"], None) | |
895 if gsa: | |
896 genseqad = gsa[0]["genome_sequence_adapter"] | |
897 else: | |
898 genseqad = "Not found" | |
899 logging.warn("No adapter found for cram %s in gsa=%s" % (tId, gsa)) | |
891 if useuri: | 900 if useuri: |
892 url = data | 901 url = data |
893 else: | 902 else: |
894 fname = "%s.cram" % trackData["label"] | 903 fname = "%s.cram" % trackData["label"] |
895 dest = "%s/%s" % (self.outdir, fname) | 904 dest = "%s/%s" % (self.outdir, fname) |
911 "trackId": tId, | 920 "trackId": tId, |
912 "name": trackData["name"], | 921 "name": trackData["name"], |
913 "category": [ | 922 "category": [ |
914 categ, | 923 categ, |
915 ], | 924 ], |
916 "assemblyNames": [self.genome_name], | 925 "assemblyNames": [trackData["assemblyNames"]], |
917 "adapter": { | 926 "adapter": { |
918 "type": "CramAdapter", | 927 "type": "CramAdapter", |
919 "cramLocation": {"uri": url}, | 928 "cramLocation": {"uri": url}, |
920 "craiLocation": { | 929 "craiLocation": { |
921 "uri": url + ".crai", | 930 "uri": url + ".crai", |
922 }, | 931 }, |
923 "sequenceAdapter": self.genome_sequence_adapter, | 932 "sequenceAdapter": genseqad, |
924 }, | 933 }, |
925 "displays": [ | 934 "displays": [ |
926 { | 935 { |
927 "type": "LinearAlignmentsDisplay", | 936 "type": "LinearAlignmentsDisplay", |
928 "displayId": "%s-LinearAlignmentsDisplay" % tId, | 937 "displayId": "%s-LinearAlignmentsDisplay" % tId, |
929 }, | 938 }, |
930 ], | 939 ], |
931 } | 940 } |
932 style_json = self._prepare_track_style(trackDict) | 941 style_json = self._prepare_track_style(trackDict) |
933 trackDict["style"] = style_json | 942 trackDict["style"] = style_json |
934 self.tracksToAdd.append(trackDict) | 943 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) |
935 self.trackIdlist.append(tId) | 944 self.trackIdlist.append(tId) |
936 | 945 |
937 def add_vcf(self, data, trackData): | 946 def add_vcf(self, data, trackData): |
938 tId = trackData["label"] | 947 tId = trackData["label"] |
939 # url = "%s/api/datasets/%s/display" % ( | 948 # url = "%s/api/datasets/%s/display" % ( |
953 self.subprocess_check_call(cmd) | 962 self.subprocess_check_call(cmd) |
954 trackDict = { | 963 trackDict = { |
955 "type": "VariantTrack", | 964 "type": "VariantTrack", |
956 "trackId": tId, | 965 "trackId": tId, |
957 "name": trackData["name"], | 966 "name": trackData["name"], |
958 "assemblyNames": [self.genome_name], | 967 "assemblyNames": [trackData["assemblyNames"]], |
959 "category": [ | 968 "category": [ |
960 categ, | 969 categ, |
961 ], | 970 ], |
962 "adapter": { | 971 "adapter": { |
963 "type": "VcfTabixAdapter", | 972 "type": "VcfTabixAdapter", |
983 }, | 992 }, |
984 ], | 993 ], |
985 } | 994 } |
986 style_json = self._prepare_track_style(trackDict) | 995 style_json = self._prepare_track_style(trackDict) |
987 trackDict["style"] = style_json | 996 trackDict["style"] = style_json |
988 self.tracksToAdd.append(trackDict) | 997 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) |
989 self.trackIdlist.append(tId) | 998 self.trackIdlist.append(tId) |
990 | 999 |
991 def _sort_gff(self, data, dest): | 1000 def _sort_gff(self, data, dest): |
992 # Only index if not already done | 1001 # Only index if not already done |
993 if not os.path.exists(dest): | 1002 if not os.path.exists(dest): |
1018 categ = trackData["category"] | 1027 categ = trackData["category"] |
1019 trackDict = { | 1028 trackDict = { |
1020 "type": "FeatureTrack", | 1029 "type": "FeatureTrack", |
1021 "trackId": tId, | 1030 "trackId": tId, |
1022 "name": trackData["name"], | 1031 "name": trackData["name"], |
1023 "assemblyNames": [self.genome_name], | 1032 "assemblyNames": [trackData["assemblyNames"]], |
1024 "category": [ | 1033 "category": [ |
1025 categ, | 1034 categ, |
1026 ], | 1035 ], |
1027 "adapter": { | 1036 "adapter": { |
1028 "type": "Gff3TabixAdapter", | 1037 "type": "Gff3TabixAdapter", |
1046 }, | 1055 }, |
1047 ], | 1056 ], |
1048 } | 1057 } |
1049 style_json = self._prepare_track_style(trackDict) | 1058 style_json = self._prepare_track_style(trackDict) |
1050 trackDict["style"] = style_json | 1059 trackDict["style"] = style_json |
1051 self.tracksToAdd.append(trackDict) | 1060 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) |
1052 self.trackIdlist.append(tId) | 1061 self.trackIdlist.append(tId) |
1053 | 1062 |
1054 def add_bed(self, data, ext, trackData): | 1063 def add_bed(self, data, ext, trackData): |
1055 tId = trackData["label"] | 1064 tId = trackData["label"] |
1056 categ = trackData["category"] | 1065 categ = trackData["category"] |
1063 self._sort_bed(data, dest) | 1072 self._sort_bed(data, dest) |
1064 trackDict = { | 1073 trackDict = { |
1065 "type": "FeatureTrack", | 1074 "type": "FeatureTrack", |
1066 "trackId": tId, | 1075 "trackId": tId, |
1067 "name": trackData["name"], | 1076 "name": trackData["name"], |
1068 "assemblyNames": [self.genome_name], | 1077 "assemblyNames": [trackData["assemblyNames"]], |
1069 "adapter": { | 1078 "adapter": { |
1070 "category": [ | 1079 "category": [ |
1071 categ, | 1080 categ, |
1072 ], | 1081 ], |
1073 "type": "BedTabixAdapter", | 1082 "type": "BedTabixAdapter", |
1095 }, | 1104 }, |
1096 ], | 1105 ], |
1097 } | 1106 } |
1098 style_json = self._prepare_track_style(trackDict) | 1107 style_json = self._prepare_track_style(trackDict) |
1099 trackDict["style"] = style_json | 1108 trackDict["style"] = style_json |
1100 self.tracksToAdd.append(trackDict) | 1109 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) |
1101 self.trackIdlist.append(tId) | 1110 self.trackIdlist.append(tId) |
1102 | 1111 |
1103 def add_paf(self, data, trackData, pafOpts, **kwargs): | 1112 def add_paf(self, data, trackData, pafOpts, **kwargs): |
1104 tname = trackData["name"] | 1113 tname = trackData["name"] |
1105 tId = trackData["label"] | 1114 tId = trackData["label"] |
1106 categ = trackData["category"] | 1115 categ = trackData["category"] |
1107 pgnames = [x.strip() for x in pafOpts["genome_label"].split(",") if len(x.strip()) > 0] | 1116 pgnames = [ |
1108 pgpaths = [x.strip() for x in pafOpts["genome"].split(",") if len(x.strip()) > 0] | 1117 x.strip() for x in pafOpts["genome_label"].split(",") if len(x.strip()) > 0 |
1109 passnames = [self.genome_name] # always first | 1118 ] |
1110 logging.debug("### add_paf got pafOpts=%s, pgnames=%s, pgpaths=%s for %s" % (pafOpts, pgnames, pgpaths, tId)) | 1119 pgpaths = [ |
1120 x.strip() for x in pafOpts["genome"].split(",") if len(x.strip()) > 0 | |
1121 ] | |
1122 passnames = [trackData["assemblyNames"]] # always first | |
1123 logging.debug( | |
1124 "### add_paf got pafOpts=%s, pgnames=%s, pgpaths=%s for %s" | |
1125 % (pafOpts, pgnames, pgpaths, tId) | |
1126 ) | |
1111 for i, gname in enumerate(pgnames): | 1127 for i, gname in enumerate(pgnames): |
1112 if len(gname.split()) > 1: | 1128 if len(gname.split()) > 1: |
1113 gname = gname.split()[0] | 1129 gname = gname.split()[0] |
1114 passnames.append(gname) | 1130 passnames.append(gname) |
1115 # trouble from spacey names in command lines avoidance | 1131 # trouble from spacey names in command lines avoidance |
1139 "name": tname, | 1155 "name": tname, |
1140 "adapter": { | 1156 "adapter": { |
1141 "type": "PAFAdapter", | 1157 "type": "PAFAdapter", |
1142 "pafLocation": {"uri": url}, | 1158 "pafLocation": {"uri": url}, |
1143 "assemblyNames": passnames, | 1159 "assemblyNames": passnames, |
1144 } | 1160 }, |
1145 } | 1161 } |
1146 style_json = { | 1162 style_json = { |
1147 "displays": [ | 1163 "displays": [ |
1148 { "type": "LinearBasicDisplay", | 1164 { |
1149 "displayId": "%s-LinearBasicyDisplay" % trackDict["trackId"] | 1165 "type": "LinearBasicDisplay", |
1150 } | 1166 "displayId": "%s-LinearBasicyDisplay" % trackDict["trackId"], |
1167 } | |
1151 ] | 1168 ] |
1152 } | 1169 } |
1153 trackDict["style"] = style_json | 1170 trackDict["style"] = style_json |
1154 self.tracksToAdd.append(trackDict) | 1171 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) |
1155 self.trackIdlist.append(tId) | 1172 self.trackIdlist.append(tId) |
1156 | 1173 |
1157 def process_annotations(self, track): | 1174 def process_annotations(self, track): |
1158 category = track["category"].replace("__pd__date__pd__", TODAY) | 1175 category = track["category"].replace("__pd__date__pd__", TODAY) |
1159 for i, ( | 1176 for i, ( |
1171 outputTrackConfig = { | 1188 outputTrackConfig = { |
1172 "category": category, | 1189 "category": category, |
1173 "style": {}, | 1190 "style": {}, |
1174 } | 1191 } |
1175 | 1192 |
1193 outputTrackConfig["assemblyNames"] = track["assemblyNames"] | |
1176 outputTrackConfig["key"] = track_human_label | 1194 outputTrackConfig["key"] = track_human_label |
1177 outputTrackConfig["useuri"] = useuri | 1195 outputTrackConfig["useuri"] = useuri |
1178 outputTrackConfig["path"] = dataset_path | 1196 outputTrackConfig["path"] = dataset_path |
1179 outputTrackConfig["ext"] = dataset_ext | 1197 outputTrackConfig["ext"] = dataset_ext |
1180 | 1198 |
1269 """ | 1287 """ |
1270 default session settings are hard and fragile. | 1288 default session settings are hard and fragile. |
1271 .add_default_view() and other configuration code adapted from | 1289 .add_default_view() and other configuration code adapted from |
1272 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py | 1290 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py |
1273 """ | 1291 """ |
1274 tracks_data = [] | |
1275 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 | 1292 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 |
1276 track_types = {} | 1293 track_types = {} |
1277 with open(self.config_json_file, "r") as config_file: | 1294 with open(self.config_json_file, "r") as config_file: |
1278 config_json = json.load(config_file) | 1295 config_json = json.load(config_file) |
1279 if self.config_json: | 1296 if self.config_json: |
1280 config_json.update(self.config_json) | 1297 config_json.update(self.config_json) |
1281 for track_conf in self.tracksToAdd: | 1298 if "defaultSession" in config_json: |
1282 tId = track_conf["trackId"] | 1299 session_json = config_json["defaultSession"] |
1283 track_types[tId] = track_conf["type"] | 1300 else: |
1284 style_data = default_data["style"].get(tId, None) | 1301 session_json = {} |
1285 if not style_data: | 1302 session_views = [] |
1286 logging.warn("### No style data in default data %s for %s" % (default_data, tId)) | 1303 for gnome in self.genome_names: |
1287 style_data = {"type": "LinearBasicDisplay"} | 1304 tracks_data = [] |
1288 if "displays" in track_conf: | 1305 for track_conf in self.tracksToAdd[gnome]: |
1289 disp = track_conf["displays"][0]["type"] | 1306 tId = track_conf["trackId"] |
1290 style_data["type"] = disp | 1307 track_types[tId] = track_conf["type"] |
1291 if track_conf.get("style_labels", None): | 1308 style_data = default_data["style"].get(tId, None) |
1292 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work | 1309 if not style_data: |
1293 # TODO move this to per track displays? | 1310 logging.warn( |
1294 style_data["labels"] = track_conf["style_labels"] | 1311 "### No style data in default data %s for %s" |
1295 tracks_data.append( | 1312 % (default_data, tId) |
1296 { | 1313 ) |
1297 "type": track_types[tId], | 1314 style_data = {"type": "LinearBasicDisplay"} |
1298 "configuration": tId, | 1315 if "displays" in track_conf: |
1299 "displays": [style_data], | 1316 disp = track_conf["displays"][0]["type"] |
1300 } | 1317 style_data["type"] = disp |
1301 ) | 1318 if track_conf.get("style_labels", None): |
1302 # The view for the assembly we're adding | 1319 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work |
1303 view_json = {"type": "LinearGenomeView", "tracks": tracks_data} | 1320 # TODO move this to per track displays? |
1304 refName = None | 1321 style_data["labels"] = track_conf["style_labels"] |
1305 drdict = { | 1322 tracks_data.append( |
1306 "reversed": False, | 1323 { |
1307 "assemblyName": self.genome_name, | 1324 "type": track_types[tId], |
1308 "start": 1, | 1325 "configuration": tId, |
1309 "end": 100000, | 1326 "displays": [style_data], |
1310 "refName": "x", | 1327 } |
1311 } | 1328 ) |
1312 | 1329 # The view for the assembly we're adding |
1313 if default_data.get("defaultLocation", ""): | 1330 view_json = {"type": "LinearGenomeView", "tracks": tracks_data} |
1314 ddl = default_data["defaultLocation"] | 1331 refName = None |
1315 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) | 1332 drdict = { |
1316 # allow commas like 100,000 but ignore as integer | 1333 "reversed": False, |
1317 if loc_match: | 1334 "assemblyName": gnome, |
1318 refName = loc_match.group(1) | 1335 "start": 1, |
1319 drdict["refName"] = refName | 1336 "end": 100000, |
1320 if loc_match.group(2) > "": | 1337 "refName": "x", |
1321 drdict["start"] = int(loc_match.group(2).replace(",", "")) | 1338 } |
1322 if loc_match.group(3) > "": | 1339 |
1323 drdict["end"] = int(loc_match.group(3).replace(",", "")) | 1340 if default_data.get("defaultLocation", ""): |
1341 ddl = default_data["defaultLocation"] | |
1342 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) | |
1343 # allow commas like 100,000 but ignore as integer | |
1344 if loc_match: | |
1345 refName = loc_match.group(1) | |
1346 drdict["refName"] = refName | |
1347 if loc_match.group(2) > "": | |
1348 drdict["start"] = int(loc_match.group(2).replace(",", "")) | |
1349 if loc_match.group(3) > "": | |
1350 drdict["end"] = int(loc_match.group(3).replace(",", "")) | |
1351 else: | |
1352 logging.info( | |
1353 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" | |
1354 % ddl | |
1355 ) | |
1356 else: | |
1357 drdict["refName"] = gnome | |
1358 if drdict.get("refName", None): | |
1359 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome | |
1360 view_json["displayedRegions"] = [ | |
1361 drdict, | |
1362 ] | |
1363 logging.info("@@@ defaultlocation %s for default session" % drdict) | |
1324 else: | 1364 else: |
1325 logging.info( | 1365 logging.info( |
1326 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" | 1366 "@@@ no contig name found for default session - please add one!" |
1327 % ddl | |
1328 ) | 1367 ) |
1329 else: | 1368 session_views.append(view_json) |
1330 drdict["refName"] = self.genome_firstcontig | |
1331 if drdict.get("refName", None): | |
1332 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome | |
1333 view_json["displayedRegions"] = [ | |
1334 drdict, | |
1335 ] | |
1336 | |
1337 logging.info("@@@ defaultlocation %s for default session" % drdict) | |
1338 else: | |
1339 logging.info( | |
1340 "@@@ no contig name found for default session - please add one!" | |
1341 ) | |
1342 session_name = default_data.get("session_name", "New session") | 1369 session_name = default_data.get("session_name", "New session") |
1343 for key, value in mapped_chars.items(): | 1370 for key, value in mapped_chars.items(): |
1344 session_name = session_name.replace(value, key) | 1371 session_name = session_name.replace(value, key) |
1345 # Merge with possibly existing defaultSession (if upgrading a jbrowse instance) | |
1346 session_json = {} | |
1347 if "defaultSession" in config_json: | |
1348 session_json = config_json["defaultSession"] | |
1349 | |
1350 session_json["name"] = session_name | 1372 session_json["name"] = session_name |
1351 | 1373 |
1352 if "views" not in session_json: | 1374 if "views" not in session_json: |
1353 session_json["views"] = [] | 1375 session_json["views"] = session_views |
1354 | 1376 else: |
1355 session_json["views"].append(view_json) | 1377 session_json["views"] += session_views |
1356 | 1378 |
1357 config_json["defaultSession"] = session_json | 1379 config_json["defaultSession"] = session_json |
1358 self.config_json.update(config_json) | 1380 self.config_json.update(config_json) |
1359 | 1381 |
1360 with open(self.config_json_file, "w") as config_file: | 1382 with open(self.config_json_file, "w") as config_file: |
1417 cmd = ["cp", os.path.join(INSTALLED_TO, "jb2_webserver.py"), dest] | 1439 cmd = ["cp", os.path.join(INSTALLED_TO, "jb2_webserver.py"), dest] |
1418 self.subprocess_check_call(cmd) | 1440 self.subprocess_check_call(cmd) |
1419 | 1441 |
1420 | 1442 |
1421 def parse_style_conf(item): | 1443 def parse_style_conf(item): |
1422 if item.text.lower() in ['false','true','yes','no']: | 1444 if item.text.lower() in ["false", "true", "yes", "no"]: |
1423 return item.text.lower in ("yes", "true") | 1445 return item.text.lower in ("yes", "true") |
1424 else: | 1446 else: |
1425 return item.text | 1447 return item.text |
1426 | 1448 |
1427 | 1449 |
1428 if __name__ == "__main__": | 1450 if __name__ == "__main__": |
1430 parser.add_argument("--xml", help="Track Configuration") | 1452 parser.add_argument("--xml", help="Track Configuration") |
1431 parser.add_argument( | 1453 parser.add_argument( |
1432 "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda" | 1454 "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda" |
1433 ) | 1455 ) |
1434 parser.add_argument("--outdir", help="Output directory", default="out") | 1456 parser.add_argument("--outdir", help="Output directory", default="out") |
1435 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1") | 1457 parser.add_argument("--version", "-V", action="version", version=JB2VER) |
1436 args = parser.parse_args() | 1458 args = parser.parse_args() |
1437 tree = ET.parse(args.xml) | 1459 tree = ET.parse(args.xml) |
1438 root = tree.getroot() | 1460 root = tree.getroot() |
1439 | 1461 |
1440 # This should be done ASAP | 1462 # This should be done ASAP |
1442 # Sometimes this comes as `localhost` without a protocol | 1464 # Sometimes this comes as `localhost` without a protocol |
1443 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): | 1465 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): |
1444 # so we'll prepend `http://` and hope for the best. Requests *should* | 1466 # so we'll prepend `http://` and hope for the best. Requests *should* |
1445 # be GET and not POST so it should redirect OK | 1467 # be GET and not POST so it should redirect OK |
1446 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL | 1468 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL |
1447 jc = JbrowseConnector( | 1469 |
1448 outdir=args.outdir, | 1470 jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path) |
1449 jbrowse2path=args.jbrowse2path, | |
1450 genomes=[ | |
1451 { | |
1452 "path": x.attrib["path"], | |
1453 "label": x.attrib["label"], | |
1454 "useuri": x.attrib["useuri"], | |
1455 "meta": metadata_from_node(x.find("metadata")), | |
1456 } | |
1457 for x in root.findall("metadata/genomes/genome") | |
1458 ], | |
1459 ) | |
1460 jc.process_genomes() | |
1461 | 1471 |
1462 default_session_data = { | 1472 default_session_data = { |
1463 "visibility": { | 1473 "visibility": { |
1464 "default_on": [], | 1474 "default_on": [], |
1465 "default_off": [], | 1475 "default_off": [], |
1466 }, | 1476 }, |
1467 "style": {}, | 1477 "style": {}, |
1468 "style_labels": {}, | 1478 "style_labels": {}, |
1469 } | 1479 } |
1470 | 1480 |
1471 for track in root.findall("tracks/track"): | 1481 for ass in root.findall("assembly"): |
1472 track_conf = {} | 1482 genomes = [ |
1473 track_conf["trackfiles"] = [] | 1483 { |
1474 | 1484 "path": x.attrib["path"], |
1475 is_multi_bigwig = False | 1485 "label": x.attrib["label"], |
1476 try: | 1486 "useuri": x.attrib["useuri"], |
1477 if track.find("options/wiggle/multibigwig") and ( | 1487 "meta": metadata_from_node(x.find("metadata")), |
1478 track.find("options/wiggle/multibigwig").text == "True" | 1488 } |
1479 ): | 1489 for x in ass.findall("metadata/genomes/genome") |
1480 is_multi_bigwig = True | 1490 ] |
1481 multi_bigwig_paths = [] | 1491 logging.warn("#!!! genomes=%s" % genomes) |
1482 except KeyError: | 1492 assref_name = jc.process_genomes(genomes) |
1483 pass | 1493 |
1484 | 1494 for track in ass.find("tracks"): |
1485 trackfiles = track.findall("files/trackFile") | 1495 track_conf = {} |
1486 if trackfiles: | 1496 track_conf["trackfiles"] = [] |
1487 for x in track.findall("files/trackFile"): | 1497 track_conf["assemblyNames"] = assref_name |
1488 track_conf["label"] = x.attrib["label"] | 1498 is_multi_bigwig = False |
1489 trackkey = track_conf["label"] | 1499 try: |
1490 track_conf["useuri"] = x.attrib["useuri"] | 1500 if track.find("options/wiggle/multibigwig") and ( |
1501 track.find("options/wiggle/multibigwig").text == "True" | |
1502 ): | |
1503 is_multi_bigwig = True | |
1504 multi_bigwig_paths = [] | |
1505 except KeyError: | |
1506 pass | |
1507 | |
1508 trackfiles = track.findall("files/trackFile") | |
1509 if trackfiles: | |
1510 for x in track.findall("files/trackFile"): | |
1511 track_conf["label"] = x.attrib["label"] | |
1512 trackkey = track_conf["label"] | |
1513 track_conf["useuri"] = x.attrib["useuri"] | |
1514 if is_multi_bigwig: | |
1515 multi_bigwig_paths.append( | |
1516 ( | |
1517 x.attrib["label"], | |
1518 x.attrib["useuri"], | |
1519 os.path.realpath(x.attrib["path"]), | |
1520 ) | |
1521 ) | |
1522 else: | |
1523 if trackfiles: | |
1524 metadata = metadata_from_node(x.find("metadata")) | |
1525 track_conf["dataset_id"] = metadata.get( | |
1526 "dataset_id", "None" | |
1527 ) | |
1528 if x.attrib["useuri"].lower() == "yes": | |
1529 tfa = ( | |
1530 x.attrib["path"], | |
1531 x.attrib["ext"], | |
1532 x.attrib["useuri"], | |
1533 x.attrib["label"], | |
1534 metadata, | |
1535 ) | |
1536 else: | |
1537 tfa = ( | |
1538 os.path.realpath(x.attrib["path"]), | |
1539 x.attrib["ext"], | |
1540 x.attrib["useuri"], | |
1541 x.attrib["label"], | |
1542 metadata, | |
1543 ) | |
1544 track_conf["trackfiles"].append(tfa) | |
1545 | |
1491 if is_multi_bigwig: | 1546 if is_multi_bigwig: |
1492 multi_bigwig_paths.append( | 1547 metadata = metadata_from_node(x.find("metadata")) |
1548 | |
1549 track_conf["trackfiles"].append( | |
1493 ( | 1550 ( |
1494 x.attrib["label"], | 1551 multi_bigwig_paths, # Passing an array of paths to represent as one track |
1495 x.attrib["useuri"], | 1552 "bigwig_multiple", |
1496 os.path.realpath(x.attrib["path"]), | 1553 "MultiBigWig", # Giving an hardcoded name for now |
1554 {}, # No metadata for multiple bigwig | |
1497 ) | 1555 ) |
1498 ) | 1556 ) |
1557 | |
1558 track_conf["category"] = track.attrib["cat"] | |
1559 track_conf["format"] = track.attrib["format"] | |
1560 track_conf["conf"] = etree_to_dict(track.find("options")) | |
1561 track_conf["category"] = track.attrib["cat"] | |
1562 track_conf["format"] = track.attrib["format"] | |
1563 keys = jc.process_annotations(track_conf) | |
1564 | |
1565 if keys: | |
1566 for key in keys: | |
1567 vis = track.attrib.get("visibility", "default_off") | |
1568 if not vis: | |
1569 vis = "default_off" | |
1570 default_session_data["visibility"][vis].append(key) | |
1571 if track.find("options/style"): | |
1572 default_session_data["style"][key] = { | |
1573 item.tag: parse_style_conf(item) | |
1574 for item in track.find("options/style") | |
1575 } | |
1499 else: | 1576 else: |
1500 if trackfiles: | 1577 default_session_data["style"][key] = {} |
1501 metadata = metadata_from_node(x.find("metadata")) | 1578 logging.warn("@@@@ no options/style found for %s" % (key)) |
1502 track_conf["dataset_id"] = metadata["dataset_id"] | 1579 |
1503 if x.attrib["useuri"].lower() == "yes": | 1580 if track.find("options/style_labels"): |
1504 tfa = ( | 1581 default_session_data["style_labels"][key] = { |
1505 x.attrib["path"], | 1582 item.tag: parse_style_conf(item) |
1506 x.attrib["ext"], | 1583 for item in track.find("options/style_labels") |
1507 x.attrib["useuri"], | 1584 } |
1508 x.attrib["label"], | |
1509 metadata, | |
1510 ) | |
1511 else: | |
1512 tfa = ( | |
1513 os.path.realpath(x.attrib["path"]), | |
1514 x.attrib["ext"], | |
1515 x.attrib["useuri"], | |
1516 x.attrib["label"], | |
1517 metadata, | |
1518 ) | |
1519 track_conf["trackfiles"].append(tfa) | |
1520 | |
1521 if is_multi_bigwig: | |
1522 metadata = metadata_from_node(x.find("metadata")) | |
1523 | |
1524 track_conf["trackfiles"].append( | |
1525 ( | |
1526 multi_bigwig_paths, # Passing an array of paths to represent as one track | |
1527 "bigwig_multiple", | |
1528 "MultiBigWig", # Giving an hardcoded name for now | |
1529 {}, # No metadata for multiple bigwig | |
1530 ) | |
1531 ) | |
1532 track_conf["category"] = track.attrib["cat"] | |
1533 track_conf["format"] = track.attrib["format"] | |
1534 track_conf["conf"] = etree_to_dict(track.find("options")) | |
1535 track_conf["category"] = track.attrib["cat"] | |
1536 track_conf["format"] = track.attrib["format"] | |
1537 keys = jc.process_annotations(track_conf) | |
1538 | |
1539 if keys: | |
1540 for key in keys: | |
1541 default_session_data["visibility"][ | |
1542 track.attrib.get("visibility", "default_off") | |
1543 ].append(key) | |
1544 if track.find("options/style"): | |
1545 default_session_data["style"][key] = { | |
1546 item.tag: parse_style_conf(item) for item in track.find("options/style") | |
1547 } | |
1548 else: | |
1549 default_session_data["style"][key] = {} | |
1550 logging.warn("@@@@ no options/style found for %s" % (key)) | |
1551 | |
1552 if track.find("options/style_labels"): | |
1553 default_session_data["style_labels"][key] = { | |
1554 item.tag: parse_style_conf(item) | |
1555 for item in track.find("options/style_labels") | |
1556 } | |
1557 default_session_data["defaultLocation"] = root.find( | 1585 default_session_data["defaultLocation"] = root.find( |
1558 "metadata/general/defaultLocation" | 1586 "metadata/general/defaultLocation" |
1559 ).text | 1587 ).text |
1560 default_session_data["session_name"] = root.find( | 1588 default_session_data["session_name"] = root.find( |
1561 "metadata/general/session_name" | 1589 "metadata/general/session_name" |
1569 "tertiary_color": root.find("metadata/general/tertiary_color").text, | 1597 "tertiary_color": root.find("metadata/general/tertiary_color").text, |
1570 "quaternary_color": root.find("metadata/general/quaternary_color").text, | 1598 "quaternary_color": root.find("metadata/general/quaternary_color").text, |
1571 "font_size": root.find("metadata/general/font_size").text, | 1599 "font_size": root.find("metadata/general/font_size").text, |
1572 } | 1600 } |
1573 jc.add_general_configuration(general_data) | 1601 jc.add_general_configuration(general_data) |
1574 trackconf = jc.config_json.get("tracks", None) | 1602 trackconf = jc.config_json.get("tracks", []) |
1575 if trackconf: | 1603 for gnome in jc.genome_names: |
1576 jc.config_json["tracks"].update(jc.tracksToAdd) | 1604 trackconf += jc.tracksToAdd[gnome] |
1577 else: | 1605 jc.config_json["tracks"] = trackconf |
1578 jc.config_json["tracks"] = jc.tracksToAdd | |
1579 jc.write_config() | 1606 jc.write_config() |
1580 jc.add_default_session(default_session_data) | 1607 jc.add_default_session(default_session_data) |
1581 # jc.text_index() not sure what broke here. | 1608 # jc.text_index() not sure what broke here. |