comparison jbrowse2.py @ 46:4181e97c70a7 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 3a43e9e0ffce0966101203102e769d1ced28618a
author fubar
date Mon, 04 Mar 2024 09:47:19 +0000
parents bea0916e1080
children 3e53204c2419
comparison
equal deleted inserted replaced
45:0ec526d2d8c1 46:4181e97c70a7
9 import re 9 import re
10 import shutil 10 import shutil
11 import struct 11 import struct
12 import subprocess 12 import subprocess
13 import tempfile 13 import tempfile
14 import urllib.request
14 import xml.etree.ElementTree as ET 15 import xml.etree.ElementTree as ET
15 from collections import defaultdict 16 from collections import defaultdict
16 17
17 logging.basicConfig(level=logging.INFO) 18 logging.basicConfig(level=logging.INFO)
18 log = logging.getLogger("jbrowse") 19 log = logging.getLogger("jbrowse")
444 ] 445 ]
445 } 446 }
446 447
447 def process_genomes(self): 448 def process_genomes(self):
448 assemblies = [] 449 assemblies = []
450 useuri = False
449 for i, genome_node in enumerate(self.genome_paths): 451 for i, genome_node in enumerate(self.genome_paths):
452 if genome_node["useuri"].strip().lower() == "yes":
453 useuri = True
450 genome_name = genome_node["meta"]["dataset_dname"].strip() 454 genome_name = genome_node["meta"]["dataset_dname"].strip()
451 if len(genome_name.split()) > 1: 455 if len(genome_name.split()) > 1:
452 genome_name = genome_name.split()[0] 456 genome_name = genome_name.split()[0]
453 # spaces and cruft break scripts when substituted 457 # spaces and cruft break scripts when substituted
454 if genome_name not in self.genome_names: 458 if genome_name not in self.genome_names:
455 # ignore dupes - can have multiple pafs with same references? 459 # ignore dupes - can have multiple pafs with same references?
456 fapath = genome_node["path"] 460 fapath = genome_node["path"]
457 assem = self.make_assembly(fapath, genome_name) 461 if not useuri:
462 fapath = os.path.realpath(fapath)
463 assem = self.make_assembly(fapath, genome_name, useuri)
458 assemblies.append(assem) 464 assemblies.append(assem)
459 self.genome_names.append(genome_name) 465 self.genome_names.append(genome_name)
460 if self.genome_name is None: 466 if self.genome_name is None:
461 self.genome_name = ( 467 self.genome_name = (
462 genome_name # first one for all tracks - other than paf 468 genome_name # first one for all tracks - other than paf
463 ) 469 )
464 self.genome_firstcontig = None 470 self.genome_firstcontig = None
465 fl = open(fapath, "r").readline().strip().split(">") 471 if not useuri:
466 if len(fl) > 1: 472 # https://lazarus.name/jbrowse/fish/bigwig_0_coverage_bedgraph_cov_count_count_bw.bigwig
467 fl = fl[1] 473 # https://lazarus.name/jbrowse/fish/klBraLanc5.haps_combined.decontam.20230620.fasta.fa.gz
468 if len(fl.split()) > 1: 474 fl = open(fapath, "r").readline()
469 self.genome_firstcontig = fl.split()[0].strip() 475 fls = fl.strip().split(">")
476 if len(fls) > 1:
477 fl = fls[1]
478 if len(fl.split()) > 1:
479 self.genome_firstcontig = fl.split()[0].strip()
480 else:
481 self.genome_firstcontig = fl
470 else: 482 else:
471 self.genome_firstcontig = fl 483 fl = urrlib.request.urlopen(faname+".fai").readline()
484 if fl: # is first row of the text fai so the first contig name
485 self.genome_firstcontig = fl.decode('utf8').strip().split()[0]
472 if self.config_json.get("assemblies", None): 486 if self.config_json.get("assemblies", None):
473 self.config_json["assemblies"] += assemblies 487 self.config_json["assemblies"] += assemblies
474 else: 488 else:
475 self.config_json["assemblies"] = assemblies 489 self.config_json["assemblies"] = assemblies
476 490
477 def make_assembly(self, fapath, gname): 491 def make_assembly(self, fapath, gname, useuri):
478 492 if useuri:
479 faname = gname + ".fa.gz" 493 faname = fapath
480 fadest = os.path.realpath(os.path.join(self.outdir, faname)) 494 adapter = {
481 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( 495 "type": "BgzipFastaAdapter",
482 fapath, 496 "fastaLocation": {
483 fadest, 497 "uri": faname,
484 fadest, 498 "locationType": "UriLocation",
485 fadest, 499 },
486 ) 500 "faiLocation": {
487 self.subprocess_popen(cmd) 501 "uri": faname + ".fai",
488 adapter = { 502 "locationType": "UriLocation",
489 "type": "BgzipFastaAdapter", 503 },
490 "fastaLocation": { 504 "gziLocation": {
491 "uri": faname, 505 "uri": faname + ".gzi",
492 }, 506 "locationType": "UriLocation",
493 "faiLocation": { 507 },
494 "uri": faname + ".fai", 508 }
495 }, 509 else:
496 "gziLocation": { 510 faname = gname + ".fa.gz"
497 "uri": faname + ".gzi", 511 fadest = os.path.realpath(os.path.join(self.outdir, faname))
498 }, 512 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % (
499 } 513 fapath,
514 fadest,
515 fadest,
516 fadest,
517 )
518 self.subprocess_popen(cmd)
519
520 adapter = {
521 "type": "BgzipFastaAdapter",
522 "fastaLocation": {
523 "uri": faname,
524 },
525 "faiLocation": {
526 "uri": faname + ".fai",
527 },
528 "gziLocation": {
529 "uri": faname + ".gzi",
530 },
531 }
500 self.genome_sequence_adapter = adapter 532 self.genome_sequence_adapter = adapter
501 trackDict = { 533 trackDict = {
502 "name": gname, 534 "name": gname,
503 "sequence": { 535 "sequence": {
504 "type": "ReferenceSequenceTrack", 536 "type": "ReferenceSequenceTrack",
526 ] 558 ]
527 self.subprocess_check_call(cmd) 559 self.subprocess_check_call(cmd)
528 560
529 def write_config(self): 561 def write_config(self):
530 with open(self.config_json_file, "w") as fp: 562 with open(self.config_json_file, "w") as fp:
531 json.dump(self.config_json, fp) 563 json.dump(self.config_json, fp, indent=2)
532 564
533 def text_index(self): 565 def text_index(self):
534 # Index tracks 566 # Index tracks
535 args = [ 567 args = [
536 "jbrowse", 568 "jbrowse",
565 """ 597 """
566 tId = trackData["label"] 598 tId = trackData["label"]
567 # can be served - if public. 599 # can be served - if public.
568 # dsId = trackData["metadata"]["dataset_id"] 600 # dsId = trackData["metadata"]["dataset_id"]
569 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) 601 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId)
570 hname = trackData["hic_url"] 602 useuri = trackData["useuri"].lower() == "yes"
571 floc = { 603 if useuri:
572 "uri": hname, 604 uri = data
573 } 605 else:
606 uri = trackData["hic_url"]
574 trackDict = { 607 trackDict = {
575 "type": "HicTrack", 608 "type": "HicTrack",
576 "trackId": tId, 609 "trackId": tId,
577 "name": hname, 610 "name": uri,
578 "assemblyNames": [self.genome_name], 611 "assemblyNames": [self.genome_name],
579 "adapter": { 612 "adapter": {
580 "type": "HicAdapter", 613 "type": "HicAdapter",
581 "hicLocation": floc, 614 "hicLocation": uri,
582 }, 615 },
583 "displays": [ 616 "displays": [
584 { 617 {
585 "type": "LinearHicDisplay", 618 "type": "LinearHicDisplay",
586 "displayId": "%s-LinearHicDisplay" % tId, 619 "displayId": "%s-LinearHicDisplay" % tId,
597 from https://github.com/cmdcolin/maf2bed 630 from https://github.com/cmdcolin/maf2bed
598 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name 631 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name
599 e.g. hg38.chr1 in the sequence identifiers. 632 e.g. hg38.chr1 in the sequence identifiers.
600 need the reference id - eg hg18, for maf2bed.pl as the first parameter 633 need the reference id - eg hg18, for maf2bed.pl as the first parameter
601 """ 634 """
635 tId = trackData["label"]
602 mafPlugin = { 636 mafPlugin = {
603 "plugins": [ 637 "plugins": [
604 { 638 {
605 "name": "MafViewer", 639 "name": "MafViewer",
606 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", 640 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js",
607 } 641 }
608 ] 642 ]
609 } 643 }
610 tId = trackData["label"] 644
611 fname = "%s.bed" % tId 645 fname = "%s.bed" % tId
612 dest = "%s/%s" % (self.outdir, fname) 646 dest = "%s/%s" % (self.outdir, fname)
613 gname = self.genome_name 647 gname = self.genome_name
614 cmd = [ 648 cmd = [
615 "bash", 649 "bash",
742 "resolution": 1, 776 "resolution": 1,
743 "posColor": "rgb(228, 26, 28)", 777 "posColor": "rgb(228, 26, 28)",
744 "negColor": "rgb(255, 255, 51)", 778 "negColor": "rgb(255, 255, 51)",
745 "constraints": {} 779 "constraints": {}
746 """ 780 """
747 url = "%s.bigwig" % trackData["label"] 781 useuri = trackData["useuri"].lower() == "yes"
748 # slashes in names cause path trouble 782 if useuri:
749 dest = os.path.join(self.outdir, url) 783 url = data
750 cmd = ["cp", data, dest] 784 else:
751 self.subprocess_check_call(cmd) 785 url = "%s.bigwig" % trackData["label"]
786 # slashes in names cause path trouble
787 dest = os.path.join(self.outdir, url)
788 cmd = ["cp", data, dest]
789 self.subprocess_check_call(cmd)
752 bwloc = {"uri": url} 790 bwloc = {"uri": url}
753 tId = trackData["label"] 791 tId = trackData["label"]
754 trackDict = { 792 trackDict = {
755 "type": "QuantitativeTrack", 793 "type": "QuantitativeTrack",
756 "trackId": tId, 794 "trackId": tId,
772 style_json = self._prepare_track_style(trackDict) 810 style_json = self._prepare_track_style(trackDict)
773 trackDict["style"] = style_json 811 trackDict["style"] = style_json
774 self.tracksToAdd.append(trackDict) 812 self.tracksToAdd.append(trackDict)
775 self.trackIdlist.append(tId) 813 self.trackIdlist.append(tId)
776 814
777 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): 815 def add_bam(self, data, trackData, bam_index=None, **kwargs):
778 tId = trackData["label"] 816 tId = trackData["label"]
779 fname = "%s.bam" % trackData["label"] 817 useuri = trackData["useuri"].lower() == "yes"
780 dest = "%s/%s" % (self.outdir, fname) 818 bindex = bam_index
781 url = fname 819 if useuri:
782 self.subprocess_check_call(["cp", data, dest]) 820 url = data
783 bloc = {"uri": url}
784 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)):
785 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
786 self.subprocess_check_call(
787 ["cp", os.path.realpath(bam_index), dest + ".bai"]
788 )
789 else: 821 else:
790 # Can happen in exotic condition 822 fname = "%s.bam" % trackData["label"]
791 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam 823 dest = "%s/%s" % (self.outdir, fname)
792 # => no index generated by galaxy, but there might be one next to the symlink target 824 url = fname
793 # this trick allows to skip the bam sorting made by galaxy if already done outside 825 bindex = fname + '.bai'
794 if os.path.exists(os.path.realpath(data) + ".bai"): 826 self.subprocess_check_call(["cp", data, dest])
795 self.symlink_or_copy(os.path.realpath(data) + ".bai", dest + ".bai") 827 if bam_index is not None and os.path.exists(bam_index):
796 else: 828 if not os.path.exists(bindex):
797 log.warn("Could not find a bam index (.bai file) for %s", data) 829 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
830 self.subprocess_check_call(
831 ["cp", bam_index, bindex]
832 )
833 else:
834 # Can happen in exotic condition
835 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
836 # => no index generated by galaxy, but there might be one next to the symlink target
837 # this trick allows to skip the bam sorting made by galaxy if already done outside
838 if os.path.exists(os.path.realpath(data) + ".bai"):
839 self.symlink_or_copy(os.path.realpath(data) + ".bai", bindex)
840 else:
841 log.warn("Could not find a bam index (.bai file) for %s", data)
798 trackDict = { 842 trackDict = {
799 "type": "AlignmentsTrack", 843 "type": "AlignmentsTrack",
800 "trackId": tId, 844 "trackId": tId,
801 "name": trackData["name"], 845 "name": trackData["name"],
802 "assemblyNames": [self.genome_name], 846 "assemblyNames": [self.genome_name],
803 "adapter": { 847 "adapter": {
804 "type": "BamAdapter", 848 "type": "BamAdapter",
805 "bamLocation": bloc, 849 "bamLocation": {"uri": url},
806 "index": { 850 "index": {
807 "location": { 851 "location": {
808 "uri": fname + ".bai", 852 "uri": bindex,
809 } 853 }
810 }, 854 },
811 }, 855 },
812 "displays": [ 856 "displays": [
813 { 857 {
819 style_json = self._prepare_track_style(trackDict) 863 style_json = self._prepare_track_style(trackDict)
820 trackDict["style"] = style_json 864 trackDict["style"] = style_json
821 self.tracksToAdd.append(trackDict) 865 self.tracksToAdd.append(trackDict)
822 self.trackIdlist.append(tId) 866 self.trackIdlist.append(tId)
823 867
824 def add_cram(self, data, trackData, cramOpts, cram_index=None, **kwargs): 868 def add_cram(self, data, trackData, cram_index=None, **kwargs):
825 tId = trackData["label"] 869 tId = trackData["label"]
826 fname = "%s.cram" % trackData["label"] 870 useuri = trackData["useuri"].lower() == "yes"
827 dest = "%s/%s" % (self.outdir, fname) 871 bindex = cram_index
828 url = fname 872 if useuri:
829 self.subprocess_check_call(["cp", data, dest]) 873 url = data
830 bloc = {"uri": url}
831 if cram_index is not None and os.path.exists(os.path.realpath(cram_index)):
832 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
833 self.subprocess_check_call(
834 ["cp", os.path.realpath(cram_index), dest + ".crai"]
835 )
836 else: 874 else:
837 # Can happen in exotic condition 875 fname = "%s.cram" % trackData["label"]
838 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam 876 dest = "%s/%s" % (self.outdir, fname)
839 # => no index generated by galaxy, but there might be one next to the symlink target 877 bindex = fname + '.bai'
840 # this trick allows to skip the bam sorting made by galaxy if already done outside 878 url = fname
841 if os.path.exists(os.path.realpath(data) + ".crai"): 879 self.subprocess_check_call(["cp", data, dest])
842 self.symlink_or_copy(os.path.realpath(data) + ".crai", dest + ".crai") 880
843 else: 881 if bindex is not None and os.path.exists(bindex):
844 log.warn("Could not find a cram index (.crai file) for %s", data) 882 if not os.path.exists(dest+'.crai'):
883 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
884 self.subprocess_check_call(
885 ["cp", os.path.realpath(cram_index), dest + ".crai"]
886 )
887 else:
888 # Can happen in exotic condition
889 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
890 # => no index generated by galaxy, but there might be one next to the symlink target
891 # this trick allows to skip the bam sorting made by galaxy if already done outside
892 if os.path.exists(os.path.realpath(data) + ".crai"):
893 self.symlink_or_copy(
894 os.path.realpath(data) + ".crai", dest + ".crai"
895 )
896 else:
897 log.warn("Could not find a cram index (.crai file) for %s", data)
845 trackDict = { 898 trackDict = {
846 "type": "AlignmentsTrack", 899 "type": "AlignmentsTrack",
847 "trackId": tId, 900 "trackId": tId,
848 "name": trackData["name"], 901 "name": trackData["name"],
849 "assemblyNames": [self.genome_name], 902 "assemblyNames": [self.genome_name],
850 "adapter": { 903 "adapter": {
851 "type": "CramAdapter", 904 "type": "CramAdapter",
852 "cramLocation": bloc, 905 "cramLocation": {"uri": url},
853 "craiLocation": { 906 "craiLocation": {
854 "uri": fname + ".crai", 907 "uri": bindex,
855 }, 908 },
856 "sequenceAdapter": self.genome_sequence_adapter, 909 "sequenceAdapter": self.genome_sequence_adapter,
857 }, 910 },
858 "displays": [ 911 "displays": [
859 { 912 {
871 tId = trackData["label"] 924 tId = trackData["label"]
872 # url = "%s/api/datasets/%s/display" % ( 925 # url = "%s/api/datasets/%s/display" % (
873 # self.giURL, 926 # self.giURL,
874 # trackData["metadata"]["dataset_id"], 927 # trackData["metadata"]["dataset_id"],
875 # ) 928 # )
876 url = "%s.vcf.gz" % tId 929
877 dest = "%s/%s" % (self.outdir, url) 930 useuri = trackData["useuri"].lower() == "yes"
878 cmd = "bgzip -c %s > %s" % (data, dest) 931 if useuri:
879 self.subprocess_popen(cmd) 932 url = data
880 cmd = ["tabix", "-f", "-p", "vcf", dest] 933 else:
881 self.subprocess_check_call(cmd) 934 url = "%s.vcf.gz" % tId
935 dest = "%s/%s" % (self.outdir, url)
936 cmd = "bgzip -c %s > %s" % (data, dest)
937 self.subprocess_popen(cmd)
938 cmd = ["tabix", "-f", "-p", "vcf", dest]
939 self.subprocess_check_call(cmd)
882 trackDict = { 940 trackDict = {
883 "type": "VariantTrack", 941 "type": "VariantTrack",
884 "trackId": tId, 942 "trackId": tId,
885 "name": trackData["name"], 943 "name": trackData["name"],
886 "assemblyNames": [self.genome_name], 944 "assemblyNames": [self.genome_name],
887 "adapter": { 945 "adapter": {
888 "type": "VcfTabixAdapter", 946 "type": "VcfTabixAdapter",
889 "vcfGzLocation": { 947 "vcfGzLocation": {
890 "uri": url, 948 "uri": url
891 }, 949 },
892 "index": { 950 "index": {
893 "location": { 951 "location": {
894 "uri": url + ".tbi", 952 "uri": url + ".tbi",
895 } 953 }
915 self.tracksToAdd.append(trackDict) 973 self.tracksToAdd.append(trackDict)
916 self.trackIdlist.append(tId) 974 self.trackIdlist.append(tId)
917 975
918 def _sort_gff(self, data, dest): 976 def _sort_gff(self, data, dest):
919 # Only index if not already done 977 # Only index if not already done
920 if not os.path.exists(dest + ".gz"): 978 if not os.path.exists(dest):
921 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s.gz'" % ( 979 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s'" % (
922 data, 980 data,
923 dest, 981 dest,
924 ) # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" 982 ) # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'"
925 self.subprocess_popen(cmd) 983 self.subprocess_popen(cmd)
926 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"]) 984 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest])
927 985
928 def _sort_bed(self, data, dest): 986 def _sort_bed(self, data, dest):
929 # Only index if not already done 987 # Only index if not already done
930 if not os.path.exists(dest): 988 if not os.path.exists(dest):
931 cmd = "sort -k1,1 -k2,2n '%s' | bgzip -c > '%s'" % (data, dest) 989 cmd = "sort -k1,1 -k2,2n '%s' | bgzip -c > '%s'" % (data, dest)
932 self.subprocess_popen(cmd) 990 self.subprocess_popen(cmd)
933 cmd = ["tabix", "-f", "-p", "bed", dest] 991 cmd = ["tabix", "-f", "-p", "bed", dest]
934 self.subprocess_check_call(cmd) 992 self.subprocess_check_call(cmd)
935 993
936 def add_gff(self, data, ext, trackData): 994 def add_gff(self, data, ext, trackData):
937 url = "%s.%s" % (trackData["label"], ext) 995 useuri = trackData["useuri"].lower() == "yes"
938 dest = "%s/%s" % (self.outdir, url) 996 if useuri:
939 self._sort_gff(data, dest) 997 url = trackData["path"]
940 url = url + ".gz" 998 else:
999 url = "%s.%s.gz" % (trackData["label"], ext)
1000 dest = "%s/%s" % (self.outdir, url)
1001 self._sort_gff(data, dest)
941 tId = trackData["label"] 1002 tId = trackData["label"]
942 trackDict = { 1003 trackDict = {
943 "type": "FeatureTrack", 1004 "type": "FeatureTrack",
944 "trackId": tId, 1005 "trackId": tId,
945 "name": trackData["name"], 1006 "name": trackData["name"],
970 trackDict["style"] = style_json 1031 trackDict["style"] = style_json
971 self.tracksToAdd.append(trackDict) 1032 self.tracksToAdd.append(trackDict)
972 self.trackIdlist.append(tId) 1033 self.trackIdlist.append(tId)
973 1034
974 def add_bed(self, data, ext, trackData): 1035 def add_bed(self, data, ext, trackData):
975 url = "%s.%s" % (trackData["label"], ext)
976 dest = "%s/%s.gz" % (self.outdir, url)
977 self._sort_bed(data, dest)
978 tId = trackData["label"] 1036 tId = trackData["label"]
979 url = url + ".gz" 1037 useuri = trackData["useuri"].lower() == "yes"
1038 if useuri:
1039 url = data
1040 else:
1041 url = "%s.%s.gz" % (trackData["label"], ext)
1042 dest = "%s/%s" % (self.outdir, url)
1043 self._sort_bed(data, dest)
980 trackDict = { 1044 trackDict = {
981 "type": "FeatureTrack", 1045 "type": "FeatureTrack",
982 "trackId": tId, 1046 "trackId": tId,
983 "name": trackData["name"], 1047 "name": trackData["name"],
984 "assemblyNames": [self.genome_name], 1048 "assemblyNames": [self.genome_name],
1066 def process_annotations(self, track): 1130 def process_annotations(self, track):
1067 category = track["category"].replace("__pd__date__pd__", TODAY) 1131 category = track["category"].replace("__pd__date__pd__", TODAY)
1068 for i, ( 1132 for i, (
1069 dataset_path, 1133 dataset_path,
1070 dataset_ext, 1134 dataset_ext,
1135 useuri,
1071 track_human_label, 1136 track_human_label,
1072 extra_metadata, 1137 extra_metadata,
1073 ) in enumerate(track["trackfiles"]): 1138 ) in enumerate(track["trackfiles"]):
1074 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) 1139 if not dataset_path.strip().startswith("http"):
1075 for key, value in mapped_chars.items(): 1140 # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
1076 track_human_label = track_human_label.replace(value, key) 1141 for key, value in mapped_chars.items():
1077 track_human_label = track_human_label.replace(" ", "_") 1142 track_human_label = track_human_label.replace(value, key)
1143 track_human_label = track_human_label.replace(" ", "_")
1078 outputTrackConfig = { 1144 outputTrackConfig = {
1079 "category": category, 1145 "category": category,
1080 "style": {}, 1146 "style": {},
1081 } 1147 }
1082 1148
1083 outputTrackConfig["key"] = track_human_label 1149 outputTrackConfig["key"] = track_human_label
1150 outputTrackConfig["useuri"] = useuri
1084 1151
1085 outputTrackConfig["trackset"] = track.get("trackset", {}) 1152 outputTrackConfig["trackset"] = track.get("trackset", {})
1086 outputTrackConfig["label"] = "%s_%i_%s" % ( 1153 outputTrackConfig["label"] = "%s_%i_%s" % (
1087 dataset_ext, 1154 dataset_ext,
1088 i, 1155 i,
1137 dataset_path, 1204 dataset_path,
1138 outputTrackConfig, 1205 outputTrackConfig,
1139 ) 1206 )
1140 elif dataset_ext == "bam": 1207 elif dataset_ext == "bam":
1141 real_indexes = track["conf"]["options"]["bam"]["bam_index"] 1208 real_indexes = track["conf"]["options"]["bam"]["bam_index"]
1142 if not isinstance(real_indexes, list):
1143 real_indexes = [real_indexes]
1144
1145 self.add_bam( 1209 self.add_bam(
1146 dataset_path, 1210 dataset_path,
1147 outputTrackConfig, 1211 outputTrackConfig,
1148 track["conf"]["options"]["bam"], 1212 bam_index=real_indexes,
1149 bam_index=real_indexes[i],
1150 ) 1213 )
1151 elif dataset_ext == "cram": 1214 elif dataset_ext == "cram":
1152 real_indexes = track["conf"]["options"]["cram"][ "cram_index"] 1215 real_indexes = track["conf"]["options"]["cram"]["cram_index"]
1153 if not isinstance(real_indexes, list):
1154 real_indexes = [real_indexes]
1155
1156 self.add_cram( 1216 self.add_cram(
1157 dataset_path, 1217 dataset_path,
1158 outputTrackConfig, 1218 outputTrackConfig,
1159 track["conf"]["options"]["cram"], 1219 cram_index=real_indexes,
1160 cram_index=real_indexes[i],
1161 ) 1220 )
1162 elif dataset_ext == "blastxml": 1221 elif dataset_ext == "blastxml":
1163 self.add_blastxml( 1222 self.add_blastxml(
1164 dataset_path, 1223 dataset_path,
1165 outputTrackConfig, 1224 outputTrackConfig,
1219 drdict = { 1278 drdict = {
1220 "reversed": False, 1279 "reversed": False,
1221 "assemblyName": self.genome_name, 1280 "assemblyName": self.genome_name,
1222 "start": 0, 1281 "start": 0,
1223 "end": 100000, 1282 "end": 100000,
1283 "refName": "x",
1224 } 1284 }
1225 1285
1226 if data.get("defaultLocation", ""): 1286 if data.get("defaultLocation", ""):
1227 ddl = data["defaultLocation"] 1287 ddl = data["defaultLocation"]
1228 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) 1288 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl)
1305 self.config_json.update(config_json) 1365 self.config_json.update(config_json)
1306 with open(config_path, "w") as config_file: 1366 with open(config_path, "w") as config_file:
1307 json.dump(self.config_json, config_file, indent=2) 1367 json.dump(self.config_json, config_file, indent=2)
1308 1368
1309 def clone_jbrowse(self): 1369 def clone_jbrowse(self):
1310 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now """ 1370 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now"""
1311 dest = self.outdir 1371 dest = self.outdir
1312 #self.subprocess_check_call(['jbrowse', 'create', dest, '--tag', f"{JB_VER}"]) 1372 # self.subprocess_check_call(['jbrowse', 'create', dest, '--tag', f"{JB_VER}"])
1313 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) 1373 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True)
1314 for fn in [ 1374 for fn in [
1315 "asset-manifest.json", 1375 "asset-manifest.json",
1316 "favicon.ico", 1376 "favicon.ico",
1317 "robots.txt", 1377 "robots.txt",
1339 1399
1340 1400
1341 if __name__ == "__main__": 1401 if __name__ == "__main__":
1342 parser = argparse.ArgumentParser(description="", epilog="") 1402 parser = argparse.ArgumentParser(description="", epilog="")
1343 parser.add_argument("--xml", help="Track Configuration") 1403 parser.add_argument("--xml", help="Track Configuration")
1344 parser.add_argument("--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda") 1404 parser.add_argument(
1405 "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda"
1406 )
1345 parser.add_argument("--outdir", help="Output directory", default="out") 1407 parser.add_argument("--outdir", help="Output directory", default="out")
1346 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1") 1408 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1")
1347 args = parser.parse_args() 1409 args = parser.parse_args()
1348 tree = ET.parse(args.xml) 1410 tree = ET.parse(args.xml)
1349 root = tree.getroot() 1411 root = tree.getroot()
1358 jc = JbrowseConnector( 1420 jc = JbrowseConnector(
1359 outdir=args.outdir, 1421 outdir=args.outdir,
1360 jbrowse2path=args.jbrowse2path, 1422 jbrowse2path=args.jbrowse2path,
1361 genomes=[ 1423 genomes=[
1362 { 1424 {
1363 "path": os.path.realpath(x.attrib["path"]), 1425 "path": x.attrib["path"],
1426 "label": x.attrib["label"],
1427 "useuri": x.attrib["useuri"],
1364 "meta": metadata_from_node(x.find("metadata")), 1428 "meta": metadata_from_node(x.find("metadata")),
1365 } 1429 }
1366 for x in root.findall("metadata/genomes/genome") 1430 for x in root.findall("metadata/genomes/genome")
1367 ], 1431 ],
1368 ) 1432 )
1393 pass 1457 pass
1394 1458
1395 trackfiles = track.findall("files/trackFile") 1459 trackfiles = track.findall("files/trackFile")
1396 if trackfiles: 1460 if trackfiles:
1397 for x in track.findall("files/trackFile"): 1461 for x in track.findall("files/trackFile"):
1462 track_conf["useuri"] = x.attrib["useuri"]
1398 if is_multi_bigwig: 1463 if is_multi_bigwig:
1399 multi_bigwig_paths.append( 1464 multi_bigwig_paths.append(
1400 ( 1465 (
1401 x.attrib["label"], 1466 x.attrib["label"],
1467 x.attrib["useuri"],
1402 os.path.realpath(x.attrib["path"]), 1468 os.path.realpath(x.attrib["path"]),
1403 ) 1469 )
1404 ) 1470 )
1405 else: 1471 else:
1406 if trackfiles: 1472 if trackfiles:
1407 metadata = metadata_from_node(x.find("metadata")) 1473 metadata = metadata_from_node(x.find("metadata"))
1408 track_conf["dataset_id"] = metadata["dataset_id"] 1474 track_conf["dataset_id"] = metadata["dataset_id"]
1409 track_conf["trackfiles"].append( 1475 if x.attrib["useuri"].lower() == "yes":
1410 ( 1476 tfa = (
1411 os.path.realpath(x.attrib["path"]), 1477 x.attrib["path"],
1412 x.attrib["ext"], 1478 x.attrib["ext"],
1479 x.attrib["useuri"],
1413 x.attrib["label"], 1480 x.attrib["label"],
1414 metadata, 1481 metadata,
1415 ) 1482 )
1416 ) 1483 else:
1484 tfa = (
1485 os.path.realpath(x.attrib["path"]),
1486 x.attrib["ext"],
1487 x.attrib["useuri"],
1488 x.attrib["label"],
1489 metadata,
1490 )
1491 track_conf["trackfiles"].append(tfa)
1417 1492
1418 if is_multi_bigwig: 1493 if is_multi_bigwig:
1419 metadata = metadata_from_node(x.find("metadata")) 1494 metadata = metadata_from_node(x.find("metadata"))
1420 1495
1421 track_conf["trackfiles"].append( 1496 track_conf["trackfiles"].append(
1445 # Only pertains to gff3 + blastxml. TODO? 1520 # Only pertains to gff3 + blastxml. TODO?
1446 track_conf["style"] = {t.tag: t.text for t in track.find("options/style")} 1521 track_conf["style"] = {t.tag: t.text for t in track.find("options/style")}
1447 except TypeError: 1522 except TypeError:
1448 track_conf["style"] = {} 1523 track_conf["style"] = {}
1449 pass 1524 pass
1450 track_conf["conf"] = etree_to_dict(track.find("options"))
1451 keys = jc.process_annotations(track_conf) 1525 keys = jc.process_annotations(track_conf)
1452 1526
1453 if keys: 1527 if keys:
1454 for key in keys: 1528 for key in keys:
1455 default_session_data["visibility"][ 1529 default_session_data["visibility"][