Mercurial > repos > fubar > jbrowse2
diff jbrowse2.py @ 19:bde6b1d09f7d draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 1290bf486bc55c02fecd0327de10a28655a18e81-dirty
author | fubar |
---|---|
date | Tue, 30 Jan 2024 06:05:03 +0000 |
parents | 4c201a3d4755 |
children | 39b717d934a8 |
line wrap: on
line diff
--- a/jbrowse2.py Mon Jan 29 02:34:43 2024 +0000 +++ b/jbrowse2.py Tue Jan 30 06:05:03 2024 +0000 @@ -3,7 +3,6 @@ import argparse import binascii import datetime -import hashlib import json import logging import os @@ -23,7 +22,7 @@ TODAY = datetime.datetime.now().strftime("%Y-%m-%d") GALAXY_INFRASTRUCTURE_URL = None -JB2REL = "v2.10.0" +JB2REL = "v2.10.1" # version pinned for cloning mapped_chars = { @@ -232,7 +231,9 @@ elif "scaling" in track: if track["scaling"]["method"] == "ignore": if track["scaling"]["scheme"]["color"] != "__auto__": - trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"] + trackConfig["style"]["color"] = track["scaling"]["scheme"][ + "color" + ] else: trackConfig["style"]["color"] = self.hex_from_rgb( *self._get_colours() @@ -259,13 +260,18 @@ "blue": blue, } ) - trackConfig["style"]["color"] = color_function.replace("\n", "") + trackConfig["style"]["color"] = color_function.replace( + "\n", "" + ) elif trackFormat == "gene_calls": # Default values, based on GFF3 spec min_val = 0 max_val = 1000 # Get min/max and build a scoring function since JBrowse doesn't - if scales["type"] == "automatic" or scales["type"] == "__auto__": + if ( + scales["type"] == "automatic" + or scales["type"] == "__auto__" + ): min_val, max_val = self.min_max_gff(gff3) else: min_val = scales.get("min", 0) @@ -273,7 +279,9 @@ if scheme["color"] == "__auto__": user_color = "undefined" - auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) + auto_color = "'%s'" % self.hex_from_rgb( + *self._get_colours() + ) elif scheme["color"].startswith("#"): user_color = "'%s'" % self.hex_from_rgb( *self.rgb_from_hex(scheme["color"][1:]) @@ -281,7 +289,9 @@ auto_color = "undefined" else: user_color = "undefined" - auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) + auto_color = "'%s'" % self.hex_from_rgb( + *self._get_colours() + ) color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format( **{ @@ -293,7 +303,9 @@ } ) - trackConfig["style"]["color"] = color_function.replace("\n", "") + trackConfig["style"]["color"] = color_function.replace( + "\n", "" + ) return trackConfig @@ -336,40 +348,41 @@ for (key, value) in node.findall("dataset")[0].attrib.items(): metadata["dataset_%s" % key] = value - for (key, value) in node.findall("history")[0].attrib.items(): - metadata["history_%s" % key] = value - - for (key, value) in node.findall("metadata")[0].attrib.items(): - metadata["metadata_%s" % key] = value - - for (key, value) in node.findall("tool")[0].attrib.items(): - metadata["tool_%s" % key] = value + if node.findall("history"): + for (key, value) in node.findall("history")[0].attrib.items(): + metadata["history_%s" % key] = value - # Additional Mappings applied: - metadata[ - "dataset_edam_format" - ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( - metadata["dataset_edam_format"], metadata["dataset_file_ext"] - ) - metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( - metadata["history_user_email"] - ) - metadata["hist_name"] = metadata["history_display_name"] - metadata[ - "history_display_name" - ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( - galaxy=GALAXY_INFRASTRUCTURE_URL, - encoded_hist_id=metadata["history_id"], - hist_name=metadata["history_display_name"], - ) - metadata[ - "tool_tool" - ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format( - galaxy=GALAXY_INFRASTRUCTURE_URL, - encoded_id=metadata["dataset_id"], - tool_id=metadata["tool_tool_id"], - # tool_version=metadata['tool_tool_version'], - ) + if node.findall("metadata"): + for (key, value) in node.findall("metadata")[0].attrib.items(): + metadata["metadata_%s" % key] = value + # Additional Mappings applied: + metadata[ + "dataset_edam_format" + ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( + metadata["dataset_edam_format"], metadata["dataset_file_ext"] + ) + metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( + metadata["history_user_email"] + ) + metadata["hist_name"] = metadata["history_display_name"] + metadata[ + "history_display_name" + ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( + galaxy=GALAXY_INFRASTRUCTURE_URL, + encoded_hist_id=metadata["history_id"], + hist_name=metadata["history_display_name"], + ) + if node.findall("tool"): + for (key, value) in node.findall("tool")[0].attrib.items(): + metadata["tool_%s" % key] = value + metadata[ + "tool_tool" + ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}{tool_version}</a>'.format( + galaxy=GALAXY_INFRASTRUCTURE_URL, + encoded_id=metadata.get("dataset_id", ""), + tool_id=metadata.get("tool_tool_id", ""), + tool_version=metadata.get("tool_tool_version",""), + ) return metadata @@ -389,7 +402,9 @@ def subprocess_check_call(self, command, output=None): if output: - log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) + log.debug( + "cd %s && %s > %s", self.outdir, " ".join(command), output + ) subprocess.check_call(command, cwd=self.outdir, stdout=output) else: log.debug("cd %s && %s", self.outdir, " ".join(command)) @@ -468,13 +483,8 @@ self.config_json["assemblies"] = assemblies def make_assembly(self, fapath, gname): - hashData = [ - fapath, - gname, - ] - hashData = "|".join(hashData).encode("utf-8") - ghash = hashlib.md5(hashData).hexdigest() - faname = ghash + ".fa.gz" + + faname = gname + ".fa.gz" fadest = os.path.join(self.outdir, faname) cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( fapath, @@ -495,6 +505,7 @@ "uri": faname + ".gzi", }, } + self.genome_sequence_adapter = adapter trackDict = { "name": gname, "sequence": { @@ -604,7 +615,7 @@ "plugins": [ { "name": "MafViewer", - "url": "https://unpkg.com/browse/jbrowse-plugin-mafviewer@1.0.6/dist/jbrowse-plugin-mafviewer.umd.production.min.js", + "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js" } ] } @@ -623,7 +634,9 @@ self.subprocess_check_call(cmd) # Construct samples list # We could get this from galaxy metadata, not sure how easily. - ps = subprocess.Popen(["grep", "^s [^ ]*", "-o", data], stdout=subprocess.PIPE) + ps = subprocess.Popen( + ["grep", "^s [^ ]*", "-o", data], stdout=subprocess.PIPE + ) output = subprocess.check_output(("sort", "-u"), stdin=ps.stdout) ps.wait() outp = output.decode("ascii") @@ -783,7 +796,9 @@ url = fname self.subprocess_check_call(["cp", data, dest]) bloc = {"uri": url} - if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): + if bam_index is not None and os.path.exists( + os.path.realpath(bam_index) + ): # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest self.subprocess_check_call( ["cp", os.path.realpath(bam_index), dest + ".bai"] @@ -794,7 +809,9 @@ # => no index generated by galaxy, but there might be one next to the symlink target # this trick allows to skip the bam sorting made by galaxy if already done outside if os.path.exists(os.path.realpath(data) + ".bai"): - self.symlink_or_copy(os.path.realpath(data) + ".bai", dest + ".bai") + self.symlink_or_copy( + os.path.realpath(data) + ".bai", dest + ".bai" + ) else: log.warn("Could not find a bam index (.bai file) for %s", data) trackDict = { @@ -823,12 +840,62 @@ self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) + def add_cram(self, data, trackData, cramOpts, cram_index=None, **kwargs): + tId = trackData["label"] + fname = "%s.cram" % trackData["label"] + dest = "%s/%s" % (self.outdir, fname) + url = fname + self.subprocess_check_call(["cp", data, dest]) + bloc = {"uri": url} + if cram_index is not None and os.path.exists( + os.path.realpath(cram_index) + ): + # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest + self.subprocess_check_call( + ["cp", os.path.realpath(cram_index), dest + ".crai"] + ) + else: + # Can happen in exotic condition + # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam + # => no index generated by galaxy, but there might be one next to the symlink target + # this trick allows to skip the bam sorting made by galaxy if already done outside + if os.path.exists(os.path.realpath(data) + ".crai"): + self.symlink_or_copy( + os.path.realpath(data) + ".crai", dest + ".crai" + ) + else: + log.warn( + "Could not find a cram index (.crai file) for %s", data + ) + trackDict = { + "type": "AlignmentsTrack", + "trackId": tId, + "name": trackData["name"], + "assemblyNames": [self.genome_name], + "adapter": { + "type": "CramAdapter", + "cramLocation": bloc, + "craiLocation": {"uri": fname + ".crai",}, + "sequenceAdapter": self.genome_sequence_adapter, + }, + "displays": [ + { + "type": "LinearAlignmentsDisplay", + "displayId": "%s-LinearAlignmentsDisplay" % tId, + }, + ], + } + style_json = self._prepare_track_style(trackDict) + trackDict["style"] = style_json + self.tracksToAdd.append(trackDict) + self.trackIdlist.append(tId) + def add_vcf(self, data, trackData): tId = trackData["label"] - url = "%s/api/datasets/%s/display" % ( - self.giURL, - trackData["metadata"]["dataset_id"], - ) + # url = "%s/api/datasets/%s/display" % ( + # self.giURL, + # trackData["metadata"]["dataset_id"], + # ) url = "%s.vcf.gz" % tId dest = "%s/%s" % (self.outdir, url) cmd = "bgzip -c %s > %s" % (data, dest) @@ -879,7 +946,9 @@ dest, ) # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" self.subprocess_popen(cmd) - self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"]) + self.subprocess_check_call( + ["tabix", "-f", "-p", "gff", dest + ".gz"] + ) def _sort_bed(self, data, dest): # Only index if not already done @@ -1085,28 +1154,12 @@ outputTrackConfig["key"] = track_human_label - # We add extra data to hash for the case of REST + SPARQL. - if ( - "conf" in track - and "options" in track["conf"] - and "url" in track["conf"]["options"] - ): - rest_url = track["conf"]["options"]["url"] - else: - rest_url = "" outputTrackConfig["trackset"] = track.get("trackset", {}) - # I chose to use track['category'] instead of 'category' here. This - # is intentional. This way re-running the tool on a different date - # will not generate different hashes and make comparison of outputs - # much simpler. - hashData = [ - str(dataset_path), + outputTrackConfig["label"] = "%s_%i_%s" % ( + dataset_ext, + i, track_human_label, - track["category"], - rest_url, - ] - hashData = "|".join(hashData).encode("utf-8") - outputTrackConfig["label"] = hashlib.md5(hashData).hexdigest() + "_%s" % i + ) outputTrackConfig["metadata"] = extra_metadata outputTrackConfig["name"] = track_human_label @@ -1138,17 +1191,10 @@ outputTrackConfig, ) elif dataset_ext == "bam": - real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][ - "bam_index" - ] + real_indexes = track["conf"]["options"]["pileup"][ + "bam_indices" + ]["bam_index"] if not isinstance(real_indexes, list): - # <bam_indices> - # <bam_index>/path/to/a.bam.bai</bam_index> - # </bam_indices> - # - # The above will result in the 'bam_index' key containing a - # string. If there are two or more indices, the container - # becomes a list. Fun! real_indexes = [real_indexes] self.add_bam( @@ -1157,6 +1203,19 @@ track["conf"]["options"]["pileup"], bam_index=real_indexes[i], ) + elif dataset_ext == "cram": + real_indexes = track["conf"]["options"]["cram"][ + "cram_indices" + ]["cram_index"] + if not isinstance(real_indexes, list): + real_indexes = [real_indexes] + + self.add_cram( + dataset_path, + outputTrackConfig, + track["conf"]["options"]["cram"], + cram_index=real_indexes[i], + ) elif dataset_ext == "blastxml": self.add_blastxml( dataset_path, @@ -1290,14 +1349,18 @@ config_json.update(self.config_json) config_data = {} - config_data["disableAnalytics"] = data.get("analytics", "false") == "true" + config_data["disableAnalytics"] = ( + data.get("analytics", "false") == "true" + ) config_data["theme"] = { "palette": { "primary": {"main": data.get("primary_color", "#0D233F")}, "secondary": {"main": data.get("secondary_color", "#721E63")}, "tertiary": {"main": data.get("tertiary_color", "#135560")}, - "quaternary": {"main": data.get("quaternary_color", "#FFB11D")}, + "quaternary": { + "main": data.get("quaternary_color", "#FFB11D") + }, }, "typography": {"fontSize": int(data.get("font_size", 10))}, } @@ -1351,9 +1414,10 @@ parser = argparse.ArgumentParser(description="", epilog="") parser.add_argument("--xml", help="Track Configuration") parser.add_argument("--outdir", help="Output directory", default="out") - parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1") + parser.add_argument( + "--version", "-V", action="version", version="%(prog)s 2.0.1" + ) args = parser.parse_args() - tree = ET.parse(args.xml) root = tree.getroot() @@ -1448,7 +1512,8 @@ track_conf["format"] = track.attrib["format"] if track.find("options/style"): track_conf["style"] = { - item.tag: parse_style_conf(item) for item in track.find("options/style") + item.tag: parse_style_conf(item) + for item in track.find("options/style") } if track.find("options/style_labels"): track_conf["style_labels"] = { @@ -1461,7 +1526,9 @@ track_conf["format"] = track.attrib["format"] try: # Only pertains to gff3 + blastxml. TODO? - track_conf["style"] = {t.tag: t.text for t in track.find("options/style")} + track_conf["style"] = { + t.tag: t.text for t in track.find("options/style") + } except TypeError: track_conf["style"] = {} pass @@ -1492,7 +1559,9 @@ "primary_color": root.find("metadata/general/primary_color").text, "secondary_color": root.find("metadata/general/secondary_color").text, "tertiary_color": root.find("metadata/general/tertiary_color").text, - "quaternary_color": root.find("metadata/general/quaternary_color").text, + "quaternary_color": root.find( + "metadata/general/quaternary_color" + ).text, "font_size": root.find("metadata/general/font_size").text, } jc.add_general_configuration(general_data)