Mercurial > repos > fubar > jbrowse2
diff jbrowse2.py @ 62:ab0d6782a95f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 2b6d4a24585beb1ba5055e5d34aacb3b299b1943-dirty
author | fubar |
---|---|
date | Thu, 28 Mar 2024 04:51:06 +0000 |
parents | e7a6f7a7148d |
children | ac00dcfb5d1d |
line wrap: on
line diff
--- a/jbrowse2.py Tue Mar 26 00:52:34 2024 +0000 +++ b/jbrowse2.py Thu Mar 28 04:51:06 2024 +0000 @@ -1,4 +1,4 @@ - #!/usr/bin/env python +#!/usr/bin/env python import argparse import binascii @@ -15,15 +15,15 @@ import xml.etree.ElementTree as ET from collections import defaultdict -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=logging.DEBUG) log = logging.getLogger("jbrowse") JB2VER = "v2.10.3" # version pinned for cloning TODAY = datetime.datetime.now().strftime("%Y-%m-%d") +SELF_LOCATION = os.path.dirname(os.path.realpath(__file__)) GALAXY_INFRASTRUCTURE_URL = None - mapped_chars = { ">": "__gt__", "<": "__lt__", @@ -341,23 +341,23 @@ if node.findall("metadata"): for (key, value) in node.findall("metadata")[0].attrib.items(): metadata["metadata_%s" % key] = value - # Additional Mappings applied: - metadata[ - "dataset_edam_format" - ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( - metadata["dataset_edam_format"], metadata["dataset_file_ext"] - ) - metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( - metadata["history_user_email"] - ) - metadata["hist_name"] = metadata["history_display_name"] - metadata[ - "history_display_name" - ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( - galaxy=GALAXY_INFRASTRUCTURE_URL, - encoded_hist_id=metadata["history_id"], - hist_name=metadata["history_display_name"], - ) + # Additional Mappings applied: + metadata[ + "dataset_edam_format" + ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( + metadata["dataset_edam_format"], metadata["dataset_file_ext"] + ) + metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( + metadata["history_user_email"] + ) + metadata["hist_name"] = metadata["history_display_name"] + metadata[ + "history_display_name" + ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( + galaxy=GALAXY_INFRASTRUCTURE_URL, + encoded_hist_id=metadata.get("history_id", "not available"), + hist_name=metadata.get("history_display_name", "not available"), + ) if node.findall("tool"): for (key, value) in node.findall("tool")[0].attrib.items(): metadata["tool_%s" % key] = value @@ -373,33 +373,41 @@ class JbrowseConnector(object): - def __init__(self, outdir, jbrowse2path, genomes): + def __init__(self, outdir, jbrowse2path): + self.assemblies = [] # these require more than a few line diff. + self.assmeta = {} self.giURL = GALAXY_INFRASTRUCTURE_URL self.outdir = outdir + self.genome_firstcontig = None self.jbrowse2path = jbrowse2path os.makedirs(self.outdir, exist_ok=True) - self.genome_paths = genomes - self.genome_name = None self.genome_names = [] self.trackIdlist = [] - self.tracksToAdd = [] + self.tracksToAdd = {} self.config_json = {} self.config_json_file = os.path.join(outdir, "config.json") self.clone_jbrowse() - def subprocess_check_call(self, command, output=None): - if output: - log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) - subprocess.check_call(command, cwd=self.outdir, stdout=output) + def get_cwd(self, cwd): + if cwd: + return self.outdir else: - log.debug("cd %s && %s", self.outdir, " ".join(command)) - subprocess.check_call(command, cwd=self.outdir) + return subprocess.check_output(["pwd"]).decode("utf-8").strip() + # return None - def subprocess_popen(self, command): + def subprocess_check_call(self, command, output=None, cwd=True): + if output: + log.debug("cd %s && %s > %s", self.get_cwd(cwd), " ".join(command), output) + subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output) + else: + log.debug("cd %s && %s", self.get_cwd(cwd), " ".join(command)) + subprocess.check_call(command, cwd=self.get_cwd(cwd)) + + def subprocess_popen(self, command, cwd=True): log.debug(command) p = subprocess.Popen( command, - cwd=self.outdir, + cwd=self.get_cwd(cwd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, @@ -444,72 +452,72 @@ } return wstyle - def process_genomes(self): - assemblies = [] + def process_genomes(self, genomes): + assembly = [] + assmeta = [] useuri = False - for i, genome_node in enumerate(self.genome_paths): - if genome_node["useuri"].strip().lower() == "yes": + genome_names = [] + for i, genome_node in enumerate(genomes): + this_genome = {} + if genome_node["useuri"] == "yes": useuri = True - genome_name = genome_node["meta"]["dataset_dname"].strip() + genome_name = genome_node["label"].strip() if len(genome_name.split()) > 1: genome_name = genome_name.split()[0] # spaces and cruft break scripts when substituted - if genome_name not in self.genome_names: + if genome_name not in genome_names: # pafs with shared references fapath = genome_node["path"] if not useuri: fapath = os.path.realpath(fapath) assem = self.make_assembly(fapath, genome_name, useuri) - assemblies.append(assem) - self.genome_names.append(genome_name) - if self.genome_name is None: - self.genome_name = ( - genome_name # first one for all tracks - ) - self.genome_sequence_adapter = assem["sequence"]["adapter"] - self.genome_firstcontig = None + assembly.append(assem) + if len(genome_names) == 0: + this_genome["genome_name"] = genome_name # first one for all tracks + genome_names.append(genome_name) + this_genome["genome_sequence_adapter"] = assem["sequence"][ + "adapter" + ] + this_genome["genome_firstcontig"] = None if not useuri: fl = open(fapath, "r").readline() fls = fl.strip().split(">") if len(fls) > 1: fl = fls[1] if len(fl.split()) > 1: - self.genome_firstcontig = fl.split()[0].strip() + this_genome["genome_firstcontig"] = fl.split()[ + 0 + ].strip() else: - self.genome_firstcontig = fl + this_genome["genome_firstcontig"] = fl else: try: fl = urllib.request.urlopen(fapath + ".fai").readline() except: fl = None if fl: # is first row of the text fai so the first contig name - self.genome_firstcontig = ( + this_genome["genome_firstcontig"] = ( fl.decode("utf8").strip().split()[0] ) - else: - self.genome_firstcontig = None + assmeta.append(this_genome) + self.assemblies += assembly + self.assmeta[genome_names[0]] = assmeta + self.tracksToAdd[genome_names[0]] = [] if self.config_json.get("assemblies", None): - self.config_json["assemblies"] += assemblies + self.config_json["assemblies"] += assembly else: - self.config_json["assemblies"] = assemblies + self.config_json["assemblies"] = assembly + self.genome_names += genome_names + return this_genome["genome_name"] def make_assembly(self, fapath, gname, useuri): if useuri: faname = fapath adapter = { "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": faname, - "locationType": "UriLocation" - }, - "faiLocation": { - "uri": faname + ".fai", - "locationType": "UriLocation" - }, - "gziLocation": { - "uri": faname + ".gzi", - "locationType": "UriLocation" - } + "fastaLocation": {"uri": faname, "locationType": "UriLocation"}, + "faiLocation": {"uri": faname + ".fai", "locationType": "UriLocation"}, + "gziLocation": {"uri": faname + ".gzi", "locationType": "UriLocation"}, } else: faname = gname + ".fa.gz" @@ -532,7 +540,7 @@ }, "gziLocation": { "uri": faname + ".gzi", - } + }, } trackDict = { @@ -551,7 +559,7 @@ "type": "LinearGCContentDisplay", "displayId": "%s-LinearGCContentDisplay" % gname, }, - ] + ], } return trackDict @@ -582,7 +590,7 @@ "jbrowse", "text-index", "--target", - os.path.join(self.outdir, "data"), + self.outdir, "--assemblies", self.genome_name, ] @@ -628,17 +636,14 @@ trackDict = { "type": "HicTrack", "trackId": tId, - "name": trackData["name"], - "assemblyNames": [self.genome_name], + "name": trackData["name"], + "assemblyNames": [trackData["assemblyNames"]], "category": [ categ, ], - "adapter": { - "type": "HicAdapter", - "hicLocation": { "uri": uri } - } + "adapter": {"type": "HicAdapter", "hicLocation": {"uri": uri}}, } - self.tracksToAdd.append(trackDict) + self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) self.trackIdlist.append(tId) def add_maf(self, data, trackData): @@ -660,7 +665,7 @@ categ = trackData["category"] fname = "%s" % tId dest = "%s/%s" % (self.outdir, fname) - gname = self.genome_name + gname = trackData["assemblyNames"] cmd = [ "bash", @@ -671,13 +676,16 @@ dest, ] self.subprocess_check_call(cmd) - mafs = open(data,'r').readlines() - mafss = [x for x in mafs if (x.startswith('s\t') or x.startswith('s '))] + mafs = open(data, "r").readlines() + mafss = [x for x in mafs if (x.startswith("s\t") or x.startswith("s "))] samp = [x.split()[1] for x in mafss if len(x.split()) > 0] sampu = list(dict.fromkeys(samp)) - samples = [x.split('.')[0] for x in sampu] + samples = [x.split(".")[0] for x in sampu] samples.sort() - logging.warn("$$$$ cmd=%s, mafss=%s samp=%s samples=%s" % (' '.join(cmd), mafss, samp, samples)) + logging.warn( + "$$$$ cmd=%s, mafss=%s samp=%s samples=%s" + % (" ".join(cmd), mafss, samp, samples) + ) trackDict = { "type": "MafTrack", "trackId": tId, @@ -697,21 +705,18 @@ }, }, }, - "assemblyNames": [self.genome_name], + "assemblyNames": [trackData["assemblyNames"]], "displays": [ { "type": "LinearBasicDisplay", - "displayId": "%s-LinearBasicDisplay" % tId + "displayId": "%s-LinearBasicDisplay" % tId, }, - { - "type": "LinearArcDisplay", - "displayId": "%s-LinearArcDisplay" % tId - }, - ] + {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId}, + ], } style_json = self._prepare_track_style(trackDict) trackDict["style"] = style_json - self.tracksToAdd.append(trackDict) + self.tracksToAdd[gname].append(trackDict) self.trackIdlist.append(tId) if self.config_json.get("plugins", None): self.config_json["plugins"].append(mafPlugin[0]) @@ -732,7 +737,7 @@ ] subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) gff3_unrebased.close() - logging.warn("### blastxml to gff3 cmd = %s" % ' '.join(cmd)) + logging.warn("### blastxml to gff3 cmd = %s" % " ".join(cmd)) return gff3_unrebased.name def add_blastxml(self, data, trackData, blastOpts, **kwargs): @@ -744,7 +749,7 @@ cmd.append("--protein2dna") cmd.extend([os.path.realpath(blastOpts["parent"]), gff3]) subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) - logging.warn("### gff3rebase cmd = %s" % ' '.join(cmd)) + logging.warn("### gff3rebase cmd = %s" % " ".join(cmd)) gff3_rebased.close() # Replace original gff3 file shutil.copy(gff3_rebased.name, gff3) @@ -758,7 +763,7 @@ "type": "FeatureTrack", "trackId": tId, "name": trackData["name"], - "assemblyNames": [self.genome_name], + "assemblyNames": [trackData["assemblyNames"]], "category": [ categ, ], @@ -786,7 +791,7 @@ } style_json = self._prepare_track_style(trackDict) trackDict["style"] = style_json - self.tracksToAdd.append(trackDict) + self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) self.trackIdlist.append(tId) os.unlink(gff3) @@ -810,9 +815,7 @@ "category": [ categ, ], - "assemblyNames": [ - self.genome_name, - ], + "assemblyNames": [trackData["assemblyNames"]], "adapter": { "type": "BigWigAdapter", "bigWigLocation": bwloc, @@ -826,7 +829,7 @@ } style_json = self._prepare_track_style(trackDict) trackDict["style"] = style_json - self.tracksToAdd.append(trackDict) + self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) self.trackIdlist.append(tId) def add_bam(self, data, trackData, bam_index=None, **kwargs): @@ -862,7 +865,7 @@ "category": [ categ, ], - "assemblyNames": [self.genome_name], + "assemblyNames": [trackData["assemblyNames"]], "adapter": { "type": "BamAdapter", "bamLocation": {"uri": url}, @@ -881,13 +884,19 @@ } style_json = self._prepare_track_style(trackDict) trackDict["style"] = style_json - self.tracksToAdd.append(trackDict) + self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) self.trackIdlist.append(tId) def add_cram(self, data, trackData, cram_index=None, **kwargs): tId = trackData["label"] categ = trackData["category"] useuri = trackData["useuri"].lower() == "yes" + gsa = self.assmeta.get(trackData["assemblyNames"], None) + if gsa: + genseqad = gsa[0]["genome_sequence_adapter"] + else: + genseqad = "Not found" + logging.warn("No adapter found for cram %s in gsa=%s" % (tId, gsa)) if useuri: url = data else: @@ -913,14 +922,14 @@ "category": [ categ, ], - "assemblyNames": [self.genome_name], + "assemblyNames": [trackData["assemblyNames"]], "adapter": { "type": "CramAdapter", "cramLocation": {"uri": url}, "craiLocation": { "uri": url + ".crai", }, - "sequenceAdapter": self.genome_sequence_adapter, + "sequenceAdapter": genseqad, }, "displays": [ { @@ -931,7 +940,7 @@ } style_json = self._prepare_track_style(trackDict) trackDict["style"] = style_json - self.tracksToAdd.append(trackDict) + self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) self.trackIdlist.append(tId) def add_vcf(self, data, trackData): @@ -955,7 +964,7 @@ "type": "VariantTrack", "trackId": tId, "name": trackData["name"], - "assemblyNames": [self.genome_name], + "assemblyNames": [trackData["assemblyNames"]], "category": [ categ, ], @@ -985,7 +994,7 @@ } style_json = self._prepare_track_style(trackDict) trackDict["style"] = style_json - self.tracksToAdd.append(trackDict) + self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) self.trackIdlist.append(tId) def _sort_gff(self, data, dest): @@ -1020,7 +1029,7 @@ "type": "FeatureTrack", "trackId": tId, "name": trackData["name"], - "assemblyNames": [self.genome_name], + "assemblyNames": [trackData["assemblyNames"]], "category": [ categ, ], @@ -1048,7 +1057,7 @@ } style_json = self._prepare_track_style(trackDict) trackDict["style"] = style_json - self.tracksToAdd.append(trackDict) + self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) self.trackIdlist.append(tId) def add_bed(self, data, ext, trackData): @@ -1065,7 +1074,7 @@ "type": "FeatureTrack", "trackId": tId, "name": trackData["name"], - "assemblyNames": [self.genome_name], + "assemblyNames": [trackData["assemblyNames"]], "adapter": { "category": [ categ, @@ -1097,17 +1106,24 @@ } style_json = self._prepare_track_style(trackDict) trackDict["style"] = style_json - self.tracksToAdd.append(trackDict) + self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) self.trackIdlist.append(tId) def add_paf(self, data, trackData, pafOpts, **kwargs): tname = trackData["name"] tId = trackData["label"] categ = trackData["category"] - pgnames = [x.strip() for x in pafOpts["genome_label"].split(",") if len(x.strip()) > 0] - pgpaths = [x.strip() for x in pafOpts["genome"].split(",") if len(x.strip()) > 0] - passnames = [self.genome_name] # always first - logging.debug("### add_paf got pafOpts=%s, pgnames=%s, pgpaths=%s for %s" % (pafOpts, pgnames, pgpaths, tId)) + pgnames = [ + x.strip() for x in pafOpts["genome_label"].split(",") if len(x.strip()) > 0 + ] + pgpaths = [ + x.strip() for x in pafOpts["genome"].split(",") if len(x.strip()) > 0 + ] + passnames = [trackData["assemblyNames"]] # always first + logging.debug( + "### add_paf got pafOpts=%s, pgnames=%s, pgpaths=%s for %s" + % (pafOpts, pgnames, pgpaths, tId) + ) for i, gname in enumerate(pgnames): if len(gname.split()) > 1: gname = gname.split()[0] @@ -1141,17 +1157,18 @@ "type": "PAFAdapter", "pafLocation": {"uri": url}, "assemblyNames": passnames, - } + }, } style_json = { - "displays": [ - { "type": "LinearBasicDisplay", - "displayId": "%s-LinearBasicyDisplay" % trackDict["trackId"] - } + "displays": [ + { + "type": "LinearBasicDisplay", + "displayId": "%s-LinearBasicyDisplay" % trackDict["trackId"], + } ] } trackDict["style"] = style_json - self.tracksToAdd.append(trackDict) + self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) self.trackIdlist.append(tId) def process_annotations(self, track): @@ -1173,6 +1190,7 @@ "style": {}, } + outputTrackConfig["assemblyNames"] = track["assemblyNames"] outputTrackConfig["key"] = track_human_label outputTrackConfig["useuri"] = useuri outputTrackConfig["path"] = dataset_path @@ -1271,88 +1289,92 @@ .add_default_view() and other configuration code adapted from https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py """ - tracks_data = [] # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 track_types = {} with open(self.config_json_file, "r") as config_file: config_json = json.load(config_file) if self.config_json: config_json.update(self.config_json) - for track_conf in self.tracksToAdd: - tId = track_conf["trackId"] - track_types[tId] = track_conf["type"] - style_data = default_data["style"].get(tId, None) - if not style_data: - logging.warn("### No style data in default data %s for %s" % (default_data, tId)) - style_data = {"type": "LinearBasicDisplay"} - if "displays" in track_conf: - disp = track_conf["displays"][0]["type"] - style_data["type"] = disp - if track_conf.get("style_labels", None): - # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work - # TODO move this to per track displays? - style_data["labels"] = track_conf["style_labels"] - tracks_data.append( - { - "type": track_types[tId], - "configuration": tId, - "displays": [style_data], - } - ) - # The view for the assembly we're adding - view_json = {"type": "LinearGenomeView", "tracks": tracks_data} - refName = None - drdict = { - "reversed": False, - "assemblyName": self.genome_name, - "start": 1, - "end": 100000, - "refName": "x", - } + if "defaultSession" in config_json: + session_json = config_json["defaultSession"] + else: + session_json = {} + session_views = [] + for gnome in self.genome_names: + tracks_data = [] + for track_conf in self.tracksToAdd[gnome]: + tId = track_conf["trackId"] + track_types[tId] = track_conf["type"] + style_data = default_data["style"].get(tId, None) + if not style_data: + logging.warn( + "### No style data in default data %s for %s" + % (default_data, tId) + ) + style_data = {"type": "LinearBasicDisplay"} + if "displays" in track_conf: + disp = track_conf["displays"][0]["type"] + style_data["type"] = disp + if track_conf.get("style_labels", None): + # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work + # TODO move this to per track displays? + style_data["labels"] = track_conf["style_labels"] + tracks_data.append( + { + "type": track_types[tId], + "configuration": tId, + "displays": [style_data], + } + ) + # The view for the assembly we're adding + view_json = {"type": "LinearGenomeView", "tracks": tracks_data} + refName = None + drdict = { + "reversed": False, + "assemblyName": gnome, + "start": 1, + "end": 100000, + "refName": "x", + } - if default_data.get("defaultLocation", ""): - ddl = default_data["defaultLocation"] - loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) - # allow commas like 100,000 but ignore as integer - if loc_match: - refName = loc_match.group(1) - drdict["refName"] = refName - if loc_match.group(2) > "": - drdict["start"] = int(loc_match.group(2).replace(",", "")) - if loc_match.group(3) > "": - drdict["end"] = int(loc_match.group(3).replace(",", "")) + if default_data.get("defaultLocation", ""): + ddl = default_data["defaultLocation"] + loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) + # allow commas like 100,000 but ignore as integer + if loc_match: + refName = loc_match.group(1) + drdict["refName"] = refName + if loc_match.group(2) > "": + drdict["start"] = int(loc_match.group(2).replace(",", "")) + if loc_match.group(3) > "": + drdict["end"] = int(loc_match.group(3).replace(",", "")) + else: + logging.info( + "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" + % ddl + ) + else: + drdict["refName"] = gnome + if drdict.get("refName", None): + # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome + view_json["displayedRegions"] = [ + drdict, + ] + logging.info("@@@ defaultlocation %s for default session" % drdict) else: logging.info( - "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" - % ddl + "@@@ no contig name found for default session - please add one!" ) - else: - drdict["refName"] = self.genome_firstcontig - if drdict.get("refName", None): - # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome - view_json["displayedRegions"] = [ - drdict, - ] - - logging.info("@@@ defaultlocation %s for default session" % drdict) - else: - logging.info( - "@@@ no contig name found for default session - please add one!" - ) + session_views.append(view_json) session_name = default_data.get("session_name", "New session") for key, value in mapped_chars.items(): session_name = session_name.replace(value, key) - # Merge with possibly existing defaultSession (if upgrading a jbrowse instance) - session_json = {} - if "defaultSession" in config_json: - session_json = config_json["defaultSession"] - session_json["name"] = session_name if "views" not in session_json: - session_json["views"] = [] - - session_json["views"].append(view_json) + session_json["views"] = session_views + else: + session_json["views"] += session_views config_json["defaultSession"] = session_json self.config_json.update(config_json) @@ -1419,8 +1441,8 @@ def parse_style_conf(item): - if item.text.lower() in ['false','true','yes','no']: - return item.text.lower in ("yes", "true") + if item.text.lower() in ["false", "true", "yes", "no"]: + return item.text.lower in ("yes", "true") else: return item.text @@ -1432,7 +1454,7 @@ "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda" ) parser.add_argument("--outdir", help="Output directory", default="out") - parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1") + parser.add_argument("--version", "-V", action="version", version=JB2VER) args = parser.parse_args() tree = ET.parse(args.xml) root = tree.getroot() @@ -1444,20 +1466,8 @@ # so we'll prepend `http://` and hope for the best. Requests *should* # be GET and not POST so it should redirect OK GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL - jc = JbrowseConnector( - outdir=args.outdir, - jbrowse2path=args.jbrowse2path, - genomes=[ - { - "path": x.attrib["path"], - "label": x.attrib["label"], - "useuri": x.attrib["useuri"], - "meta": metadata_from_node(x.find("metadata")), - } - for x in root.findall("metadata/genomes/genome") - ], - ) - jc.process_genomes() + + jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path) default_session_data = { "visibility": { @@ -1468,92 +1478,110 @@ "style_labels": {}, } - for track in root.findall("tracks/track"): - track_conf = {} - track_conf["trackfiles"] = [] + for ass in root.findall("assembly"): + genomes = [ + { + "path": x.attrib["path"], + "label": x.attrib["label"], + "useuri": x.attrib["useuri"], + "meta": metadata_from_node(x.find("metadata")), + } + for x in ass.findall("metadata/genomes/genome") + ] + logging.warn("#!!! genomes=%s" % genomes) + assref_name = jc.process_genomes(genomes) + + for track in ass.find("tracks"): + track_conf = {} + track_conf["trackfiles"] = [] + track_conf["assemblyNames"] = assref_name + is_multi_bigwig = False + try: + if track.find("options/wiggle/multibigwig") and ( + track.find("options/wiggle/multibigwig").text == "True" + ): + is_multi_bigwig = True + multi_bigwig_paths = [] + except KeyError: + pass - is_multi_bigwig = False - try: - if track.find("options/wiggle/multibigwig") and ( - track.find("options/wiggle/multibigwig").text == "True" - ): - is_multi_bigwig = True - multi_bigwig_paths = [] - except KeyError: - pass + trackfiles = track.findall("files/trackFile") + if trackfiles: + for x in track.findall("files/trackFile"): + track_conf["label"] = x.attrib["label"] + trackkey = track_conf["label"] + track_conf["useuri"] = x.attrib["useuri"] + if is_multi_bigwig: + multi_bigwig_paths.append( + ( + x.attrib["label"], + x.attrib["useuri"], + os.path.realpath(x.attrib["path"]), + ) + ) + else: + if trackfiles: + metadata = metadata_from_node(x.find("metadata")) + track_conf["dataset_id"] = metadata.get( + "dataset_id", "None" + ) + if x.attrib["useuri"].lower() == "yes": + tfa = ( + x.attrib["path"], + x.attrib["ext"], + x.attrib["useuri"], + x.attrib["label"], + metadata, + ) + else: + tfa = ( + os.path.realpath(x.attrib["path"]), + x.attrib["ext"], + x.attrib["useuri"], + x.attrib["label"], + metadata, + ) + track_conf["trackfiles"].append(tfa) - trackfiles = track.findall("files/trackFile") - if trackfiles: - for x in track.findall("files/trackFile"): - track_conf["label"] = x.attrib["label"] - trackkey = track_conf["label"] - track_conf["useuri"] = x.attrib["useuri"] if is_multi_bigwig: - multi_bigwig_paths.append( + metadata = metadata_from_node(x.find("metadata")) + + track_conf["trackfiles"].append( ( - x.attrib["label"], - x.attrib["useuri"], - os.path.realpath(x.attrib["path"]), + multi_bigwig_paths, # Passing an array of paths to represent as one track + "bigwig_multiple", + "MultiBigWig", # Giving an hardcoded name for now + {}, # No metadata for multiple bigwig ) ) - else: - if trackfiles: - metadata = metadata_from_node(x.find("metadata")) - track_conf["dataset_id"] = metadata["dataset_id"] - if x.attrib["useuri"].lower() == "yes": - tfa = ( - x.attrib["path"], - x.attrib["ext"], - x.attrib["useuri"], - x.attrib["label"], - metadata, - ) - else: - tfa = ( - os.path.realpath(x.attrib["path"]), - x.attrib["ext"], - x.attrib["useuri"], - x.attrib["label"], - metadata, - ) - track_conf["trackfiles"].append(tfa) - if is_multi_bigwig: - metadata = metadata_from_node(x.find("metadata")) + track_conf["category"] = track.attrib["cat"] + track_conf["format"] = track.attrib["format"] + track_conf["conf"] = etree_to_dict(track.find("options")) + track_conf["category"] = track.attrib["cat"] + track_conf["format"] = track.attrib["format"] + keys = jc.process_annotations(track_conf) - track_conf["trackfiles"].append( - ( - multi_bigwig_paths, # Passing an array of paths to represent as one track - "bigwig_multiple", - "MultiBigWig", # Giving an hardcoded name for now - {}, # No metadata for multiple bigwig - ) - ) - track_conf["category"] = track.attrib["cat"] - track_conf["format"] = track.attrib["format"] - track_conf["conf"] = etree_to_dict(track.find("options")) - track_conf["category"] = track.attrib["cat"] - track_conf["format"] = track.attrib["format"] - keys = jc.process_annotations(track_conf) + if keys: + for key in keys: + vis = track.attrib.get("visibility", "default_off") + if not vis: + vis = "default_off" + default_session_data["visibility"][vis].append(key) + if track.find("options/style"): + default_session_data["style"][key] = { + item.tag: parse_style_conf(item) + for item in track.find("options/style") + } + else: + default_session_data["style"][key] = {} + logging.warn("@@@@ no options/style found for %s" % (key)) - if keys: - for key in keys: - default_session_data["visibility"][ - track.attrib.get("visibility", "default_off") - ].append(key) - if track.find("options/style"): - default_session_data["style"][key] = { - item.tag: parse_style_conf(item) for item in track.find("options/style") - } - else: - default_session_data["style"][key] = {} - logging.warn("@@@@ no options/style found for %s" % (key)) - - if track.find("options/style_labels"): - default_session_data["style_labels"][key] = { - item.tag: parse_style_conf(item) - for item in track.find("options/style_labels") - } + if track.find("options/style_labels"): + default_session_data["style_labels"][key] = { + item.tag: parse_style_conf(item) + for item in track.find("options/style_labels") + } default_session_data["defaultLocation"] = root.find( "metadata/general/defaultLocation" ).text @@ -1571,11 +1599,10 @@ "font_size": root.find("metadata/general/font_size").text, } jc.add_general_configuration(general_data) - trackconf = jc.config_json.get("tracks", None) - if trackconf: - jc.config_json["tracks"].update(jc.tracksToAdd) - else: - jc.config_json["tracks"] = jc.tracksToAdd + trackconf = jc.config_json.get("tracks", []) + for gnome in jc.genome_names: + trackconf += jc.tracksToAdd[gnome] + jc.config_json["tracks"] = trackconf jc.write_config() jc.add_default_session(default_session_data) # jc.text_index() not sure what broke here.