Mercurial > repos > fubar > jbrowse2
diff jbrowse2.py @ 57:94264fe60478 draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 4b5df41484f6bdf316edaf95b53c92d328ec1674-dirty
author | fubar |
---|---|
date | Thu, 21 Mar 2024 08:01:42 +0000 |
parents | c0097a584a8a |
children | f807e219cec3 |
line wrap: on
line diff
--- a/jbrowse2.py Tue Mar 19 02:33:40 2024 +0000 +++ b/jbrowse2.py Thu Mar 21 08:01:42 2024 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/env python + #!/usr/bin/env python # change to accumulating all configuration for config.json based on the default from the clone import argparse import binascii @@ -10,7 +10,6 @@ import shutil import struct import subprocess -import sys import tempfile import urllib.request import xml.etree.ElementTree as ET @@ -480,9 +479,11 @@ else: self.genome_firstcontig = fl else: - fl = urllib.request.urlopen(fapath+".fai").readline() - if fl: # is first row of the text fai so the first contig name - self.genome_firstcontig = fl.decode('utf8').strip().split()[0] + fl = urllib.request.urlopen(fapath + ".fai").readline() + if fl: # is first row of the text fai so the first contig name + self.genome_firstcontig = ( + fl.decode("utf8").strip().split()[0] + ) if self.config_json.get("assemblies", None): self.config_json["assemblies"] += assemblies else: @@ -538,6 +539,16 @@ "adapter": adapter, }, "rendering": {"type": "DivSequenceRenderer"}, + "displays": [ + { + "type": "LinearReferenceSequenceDisplay", + "displayId": "%s-LinearReferenceSequenceDisplay" % gname, + }, + { + "type": "LinearGCContentDisplay", + "displayId": "%s-LinearGCContentDisplay" % gname, + }, + ], } return trackDict @@ -604,13 +615,15 @@ uri = data else: uri = trackData["hic_url"] - categ = trackData['category'] + categ = trackData["category"] trackDict = { "type": "HicTrack", "trackId": tId, "name": uri, "assemblyNames": [self.genome_name], - "category": [categ,], + "category": [ + categ, + ], "adapter": { "type": "HicAdapter", "hicLocation": uri, @@ -622,8 +635,6 @@ }, ], } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) @@ -643,7 +654,7 @@ } ] } - categ = trackData['category'] + categ = trackData["category"] fname = "%s.bed" % tId dest = "%s/%s" % (self.outdir, fname) gname = self.genome_name @@ -665,11 +676,14 @@ soutp = outp.split("\n") samp = [x.split("s ")[1] for x in soutp if x.startswith("s ")] samples = [x.split(".")[0] for x in samp] + logging.warn("### maf convert cmd = %s,\nsamples=%s" % (' '.join(cmd), samples)) trackDict = { "type": "MafTrack", "trackId": tId, "name": trackData["name"], - "category": [categ,], + "category": [ + categ, + ], "adapter": { "type": "MafTabixAdapter", "samples": samples, @@ -694,8 +708,6 @@ }, ], } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) if self.config_json.get("plugins", None): @@ -717,11 +729,11 @@ ] subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) gff3_unrebased.close() + logging.warn("### blastxml to gff3 cmd = %s" % ' '.join(cmd)) return gff3_unrebased.name def add_blastxml(self, data, trackData, blastOpts, **kwargs): gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"]) - if "parent" in blastOpts and blastOpts["parent"] != "None": gff3_rebased = tempfile.NamedTemporaryFile(delete=False) cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")] @@ -729,23 +741,24 @@ cmd.append("--protein2dna") cmd.extend([os.path.realpath(blastOpts["parent"]), gff3]) subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) + logging.warn("### gff3rebase cmd = %s" % ' '.join(cmd)) gff3_rebased.close() - # Replace original gff3 file shutil.copy(gff3_rebased.name, gff3) os.unlink(gff3_rebased.name) - url = "%s.gff3" % trackData["label"] + url = "%s.gff3.gz" % trackData["label"] dest = "%s/%s" % (self.outdir, url) self._sort_gff(gff3, dest) - url = url + ".gz" tId = trackData["label"] - categ = trackData['category'] + categ = trackData["category"] trackDict = { "type": "FeatureTrack", "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], - "category": [categ,], + "category": [ + categ, + ], "adapter": { "type": "Gff3TabixAdapter", "gffGzLocation": { @@ -768,8 +781,6 @@ }, ], } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) os.unlink(gff3) @@ -794,12 +805,14 @@ self.subprocess_check_call(cmd) bwloc = {"uri": url} tId = trackData["label"] - categ = trackData['category'] + categ = trackData["category"] trackDict = { "type": "QuantitativeTrack", "trackId": tId, "name": trackData["name"], - "category": [categ,], + "category": [ + categ, + ], "assemblyNames": [ self.genome_name, ], @@ -814,8 +827,6 @@ } ], } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) @@ -823,21 +834,19 @@ tId = trackData["label"] useuri = trackData["useuri"].lower() == "yes" bindex = bam_index - categ = trackData['category'] + categ = trackData["category"] if useuri: url = data else: fname = "%s.bam" % trackData["label"] dest = "%s/%s" % (self.outdir, fname) url = fname - bindex = fname + '.bai' + bindex = fname + ".bai" self.subprocess_check_call(["cp", data, dest]) if bam_index is not None and os.path.exists(bam_index): if not os.path.exists(bindex): # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest - self.subprocess_check_call( - ["cp", bam_index, bindex] - ) + self.subprocess_check_call(["cp", bam_index, bindex]) else: # Can happen in exotic condition # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam @@ -851,7 +860,9 @@ "type": "AlignmentsTrack", "trackId": tId, "name": trackData["name"], - "category": [categ,], + "category": [ + categ, + ], "assemblyNames": [self.genome_name], "adapter": { "type": "BamAdapter", @@ -869,14 +880,12 @@ }, ], } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) def add_cram(self, data, trackData, cram_index=None, **kwargs): tId = trackData["label"] - categ = trackData['category'] + categ = trackData["category"] useuri = trackData["useuri"].lower() == "yes" if useuri: url = data @@ -886,27 +895,29 @@ url = fname self.subprocess_check_call(["cp", data, dest]) if cram_index is not None and os.path.exists(cram_index): - if not os.path.exists(dest+'.crai'): + if not os.path.exists(dest + ".crai"): # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest self.subprocess_check_call( ["cp", os.path.realpath(cram_index), dest + ".crai"] ) else: - cpath = os.path.realpath(dest) + '.crai' + cpath = os.path.realpath(dest) + ".crai" cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)] - logging.debug('executing cmd %s' % ' '.join(cmd)) + logging.debug("executing cmd %s" % " ".join(cmd)) self.subprocess_check_call(cmd) trackDict = { "type": "AlignmentsTrack", "trackId": tId, "name": trackData["name"], - "category": [categ,], + "category": [ + categ, + ], "assemblyNames": [self.genome_name], "adapter": { "type": "CramAdapter", "cramLocation": {"uri": url}, "craiLocation": { - "uri": url + '.crai', + "uri": url + ".crai", }, "sequenceAdapter": self.genome_sequence_adapter, }, @@ -917,8 +928,6 @@ }, ], } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) @@ -928,7 +937,7 @@ # self.giURL, # trackData["metadata"]["dataset_id"], # ) - categ = trackData['category'] + categ = trackData["category"] useuri = trackData["useuri"].lower() == "yes" if useuri: url = data @@ -944,12 +953,12 @@ "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], - "category": [categ,], + "category": [ + categ, + ], "adapter": { "type": "VcfTabixAdapter", - "vcfGzLocation": { - "uri": url - }, + "vcfGzLocation": {"uri": url}, "index": { "location": { "uri": url + ".tbi", @@ -971,8 +980,6 @@ }, ], } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) @@ -1003,13 +1010,15 @@ dest = "%s/%s" % (self.outdir, url) self._sort_gff(data, dest) tId = trackData["label"] - categ = trackData['category'] + categ = trackData["category"] trackDict = { "type": "FeatureTrack", "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], - "category": [categ,], + "category": [ + categ, + ], "adapter": { "type": "Gff3TabixAdapter", "gffGzLocation": { @@ -1032,14 +1041,12 @@ }, ], } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) def add_bed(self, data, ext, trackData): tId = trackData["label"] - categ = trackData['category'] + categ = trackData["category"] useuri = trackData["useuri"].lower() == "yes" if useuri: url = data @@ -1053,7 +1060,9 @@ "name": trackData["name"], "assemblyNames": [self.genome_name], "adapter": { - "category": [categ,], + "category": [ + categ, + ], "type": "BedTabixAdapter", "bedGzLocation": { "uri": url, @@ -1079,15 +1088,13 @@ }, ], } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) def add_paf(self, data, trackData, pafOpts, **kwargs): tname = trackData["name"] tId = trackData["label"] - categ = trackData['category'] + categ = trackData["category"] pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")] pgpaths = [x.strip() for x in pafOpts["genome"].split(",")] passnames = [self.genome_name] # always first @@ -1098,7 +1105,9 @@ # trouble from spacey names in command lines avoidance if gname not in self.genome_names: # ignore if already there - eg for duplicates among pafs. - useuri = pgpaths[i].startswith('http://') or pgpaths[i].startswith('https://') + useuri = pgpaths[i].startswith("http://") or pgpaths[i].startswith( + "https://" + ) asstrack = self.make_assembly(pgpaths[i], gname, useuri) self.genome_names.append(gname) if self.config_json.get("assemblies", None): @@ -1114,7 +1123,9 @@ "type": "SyntenyTrack", "trackId": tId, "assemblyNames": passnames, - "category": [categ,], + "category": [ + categ, + ], "name": tname, "adapter": { "type": "PAFAdapter", @@ -1122,18 +1133,16 @@ "assemblyNames": passnames, }, "displays": [ - { - "type": "LinearSyntenyDisplay", - "displayId": "%s-LinearSyntenyDisplay" % tId, - }, - { - "type": "DotPlotDisplay", - "displayId": "%s-DotPlotDisplay" % tId, - }, + { + "type": "LinearSyntenyDisplay", + "displayId": "%s-LinearSyntenyDisplay" % tId, + }, + { + "type": "DotPlotDisplay", + "displayId": "%s-DotPlotDisplay" % tId, + }, ], } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) @@ -1249,7 +1258,7 @@ # Return non-human label for use in other fields yield outputTrackConfig["label"] - def add_default_session(self, data): + def add_default_session(self, default_data): """ Add some default session settings: set some assemblies/tracks on/off """ @@ -1267,21 +1276,26 @@ for track_conf in self.tracksToAdd: track_types[track_conf["trackId"]] = track_conf["type"] tId = track_conf["trackId"] - if tId in data["visibility"]["default_on"]: + #if tId in data["visibility"]["default_on"]: + style_data = default_data["style"].get(tId, None) + if not style_data: + logging.warn("### No style data in default data for %s" % tId) style_data = {"type": "LinearBasicDisplay"} - if "displays" in track_conf: - style_data["type"] = track_conf["displays"][0]["type"] - if track_conf.get("style_labels", None): - # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work - # TODO move this to per track displays? - style_data["labels"] = track_conf["style_labels"] - tracks_data.append( - { - "type": track_types[tId], - "configuration": tId, - "displays": [style_data], - } - ) + if "displays" in track_conf: + disp = track_conf["displays"][0]["type"] + style_data["type"] = disp + style_data["configuration"] = "%s-%s" % (tId, disp) + if track_conf.get("style_labels", None): + # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work + # TODO move this to per track displays? + style_data["labels"] = track_conf["style_labels"] + tracks_data.append( + { + "type": track_types[tId], + "configuration": tId, + "displays": [style_data], + } + ) # The view for the assembly we're adding view_json = {"type": "LinearGenomeView", "tracks": tracks_data} @@ -1290,13 +1304,13 @@ drdict = { "reversed": False, "assemblyName": self.genome_name, - "start": 2000, - "end": 200000, + "start": 1, + "end": 100000, "refName": "x", } - if data.get("defaultLocation", ""): - ddl = data["defaultLocation"] + if default_data.get("defaultLocation", ""): + ddl = default_data["defaultLocation"] loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) # allow commas like 100,000 but ignore as integer if loc_match: @@ -1324,7 +1338,7 @@ logging.info( "@@@ no contig name found for default session - please add one!" ) - session_name = data.get("session_name", "New session") + session_name = default_data.get("session_name", "New session") for key, value in mapped_chars.items(): session_name = session_name.replace(value, key) # Merge with possibly existing defaultSession (if upgrading a jbrowse instance) @@ -1382,7 +1396,9 @@ """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now""" dest = self.outdir if realclone: - self.subprocess_check_call(['jbrowse', 'create', dest,"-f", '--tag', f"{JB2VER}"]) + self.subprocess_check_call( + ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"] + ) else: shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) for fn in [ @@ -1400,14 +1416,8 @@ def parse_style_conf(item): - if "type" in item.attrib and item.attrib["type"] in [ - "boolean", - "integer", - ]: - if item.attrib["type"] == "boolean": - return item.text in ("yes", "true", "True") - elif item.attrib["type"] == "integer": - return int(item.text) + if item.text.lower() in ['false','true','yes','no']: + return item.text.lower in ("yes", "true") else: return item.text @@ -1473,6 +1483,7 @@ trackfiles = track.findall("files/trackFile") if trackfiles: for x in track.findall("files/trackFile"): + track_conf["label"] = x.attrib["label"] track_conf["useuri"] = x.attrib["useuri"] if is_multi_bigwig: multi_bigwig_paths.append( @@ -1521,6 +1532,11 @@ track_conf["style"] = { item.tag: parse_style_conf(item) for item in track.find("options/style") } + else: + track_conf["style"] = {} + tst = track_conf["style"].get("type", None) + if tst: + track_conf["style"]["configuration"] = "%s-%s" % (track_conf["label"], tst) if track.find("options/style_labels"): track_conf["style_labels"] = { item.tag: parse_style_conf(item) @@ -1530,12 +1546,6 @@ track_conf["conf"] = etree_to_dict(track.find("options")) track_conf["category"] = track.attrib["cat"] track_conf["format"] = track.attrib["format"] - try: - # Only pertains to gff3 + blastxml. TODO? - track_conf["style"] = {t.tag: t.text for t in track.find("options/style")} - except TypeError: - track_conf["style"] = {} - pass keys = jc.process_annotations(track_conf) if keys: @@ -1544,9 +1554,7 @@ track.attrib.get("visibility", "default_off") ].append(key) if track_conf.get("style", None): - default_session_data["style"][key] = track_conf[ - "style" - ] # TODO do we need this anymore? + default_session_data["style"][key] = track_conf["style"] if track_conf.get("style_lables", None): default_session_data["style_labels"][key] = track_conf.get( "style_labels", None