# HG changeset patch
# User fubar
# Date 1706168381 0
# Node ID cce8dacb240f63d145be81926828b8e9fd772db9
# Parent 7c2e28e144f32d17e2df3af7cb5dc822a41f1a93
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 1a20cf06627d429a211427753f223467188dbe7f-dirty
diff -r 7c2e28e144f3 -r cce8dacb240f Galaxy-History-jbrowse2samples.tar.gz
Binary file Galaxy-History-jbrowse2samples.tar.gz has changed
diff -r 7c2e28e144f3 -r cce8dacb240f abjbrowse2.py
--- a/abjbrowse2.py Mon Jan 22 12:05:09 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1272 +0,0 @@
-#!/usr/bin/env python
-import argparse
-import binascii
-import datetime
-import hashlib
-import json
-import logging
-import os
-import re
-import shutil
-import struct
-import subprocess
-import tempfile
-import xml.etree.ElementTree as ET
-from collections import defaultdict
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("jbrowse")
-TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
-GALAXY_INFRASTRUCTURE_URL = None
-
-
-class ColorScaling(object):
-
- COLOR_FUNCTION_TEMPLATE = """
- function(feature, variableName, glyphObject, track) {{
- var score = {score};
- {opacity}
- return 'rgba({red}, {green}, {blue}, ' + opacity + ')';
- }}
- """
-
- COLOR_FUNCTION_TEMPLATE_QUAL = r"""
- function(feature, variableName, glyphObject, track) {{
- var search_up = function self(sf, attr){{
- if(sf.get(attr) !== undefined){{
- return sf.get(attr);
- }}
- if(sf.parent() === undefined) {{
- return;
- }}else{{
- return self(sf.parent(), attr);
- }}
- }};
-
- var search_down = function self(sf, attr){{
- if(sf.get(attr) !== undefined){{
- return sf.get(attr);
- }}
- if(sf.children() === undefined) {{
- return;
- }}else{{
- var kids = sf.children();
- for(var child_idx in kids){{
- var x = self(kids[child_idx], attr);
- if(x !== undefined){{
- return x;
- }}
- }}
- return;
- }}
- }};
-
- var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
- var score = (search_up(feature, 'score') || search_down(feature, 'score'));
- {opacity}
- if(score === undefined){{ opacity = 1; }}
- var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
- var red = parseInt(result[1], 16);
- var green = parseInt(result[2], 16);
- var blue = parseInt(result[3], 16);
- if(isNaN(opacity) || opacity < 0){{ opacity = 0; }}
- return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')';
- }}
- """
-
- OPACITY_MATH = {
- "linear": """
- var opacity = (score - ({min})) / (({max}) - ({min}));
- """,
- "logarithmic": """
- var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min}));
- """,
- "blast": """
- var opacity = 0;
- if(score == 0.0) {{
- opacity = 1;
- }} else {{
- opacity = (20 - Math.log10(score)) / 180;
- }}
- """,
- }
-
- BREWER_COLOUR_IDX = 0
- BREWER_COLOUR_SCHEMES = [
- (166, 206, 227),
- (31, 120, 180),
- (178, 223, 138),
- (51, 160, 44),
- (251, 154, 153),
- (227, 26, 28),
- (253, 191, 111),
- (255, 127, 0),
- (202, 178, 214),
- (106, 61, 154),
- (255, 255, 153),
- (177, 89, 40),
- (228, 26, 28),
- (55, 126, 184),
- (77, 175, 74),
- (152, 78, 163),
- (255, 127, 0),
- ]
-
- BREWER_DIVERGING_PALLETES = {
- "BrBg": ("#543005", "#003c30"),
- "PiYg": ("#8e0152", "#276419"),
- "PRGn": ("#40004b", "#00441b"),
- "PuOr": ("#7f3b08", "#2d004b"),
- "RdBu": ("#67001f", "#053061"),
- "RdGy": ("#67001f", "#1a1a1a"),
- "RdYlBu": ("#a50026", "#313695"),
- "RdYlGn": ("#a50026", "#006837"),
- "Spectral": ("#9e0142", "#5e4fa2"),
- }
-
- def __init__(self):
- self.brewer_colour_idx = 0
-
- def rgb_from_hex(self, hexstr):
- # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
- return struct.unpack("BBB", binascii.unhexlify(hexstr))
-
- def min_max_gff(self, gff_file):
- min_val = None
- max_val = None
- with open(gff_file, "r") as handle:
- for line in handle:
- try:
- value = float(line.split("\t")[5])
- min_val = min(value, (min_val or value))
- max_val = max(value, (max_val or value))
-
- if value < min_val:
- min_val = value
-
- if value > max_val:
- max_val = value
- except Exception:
- pass
- return min_val, max_val
-
- def hex_from_rgb(self, r, g, b):
- return "#%02x%02x%02x" % (r, g, b)
-
- def _get_colours(self):
- r, g, b = self.BREWER_COLOUR_SCHEMES[
- self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)
- ]
- self.brewer_colour_idx += 1
- return r, g, b
-
- def parse_menus(self, track):
- trackConfig = {"menuTemplate": [{}, {}, {}, {}]}
-
- if "menu" in track["menus"]:
- menu_list = [track["menus"]["menu"]]
- if isinstance(track["menus"]["menu"], list):
- menu_list = track["menus"]["menu"]
-
- for m in menu_list:
- tpl = {
- "action": m["action"],
- "label": m.get("label", "{name}"),
- "iconClass": m.get("iconClass", "dijitIconBookmark"),
- }
- if "url" in m:
- tpl["url"] = m["url"]
- if "content" in m:
- tpl["content"] = m["content"]
- if "title" in m:
- tpl["title"] = m["title"]
-
- trackConfig["menuTemplate"].append(tpl)
-
- return trackConfig
-
- def parse_colours(self, track, trackFormat, gff3=None):
- # Wiggle tracks have a bicolor pallete
- trackConfig = {"style": {}}
- if trackFormat == "wiggle":
-
- trackConfig["style"]["pos_color"] = track["wiggle"]["color_pos"]
- trackConfig["style"]["neg_color"] = track["wiggle"]["color_neg"]
-
- if trackConfig["style"]["pos_color"] == "__auto__":
- trackConfig["style"]["neg_color"] = self.hex_from_rgb(
- *self._get_colours()
- )
- trackConfig["style"]["pos_color"] = self.hex_from_rgb(
- *self._get_colours()
- )
-
- # Wiggle tracks can change colour at a specified place
- bc_pivot = track["wiggle"]["bicolor_pivot"]
- if bc_pivot not in ("mean", "zero"):
- # The values are either one of those two strings
- # or a number
- bc_pivot = float(bc_pivot)
- trackConfig["bicolor_pivot"] = bc_pivot
- elif "scaling" in track:
- if track["scaling"]["method"] == "ignore":
- if track["scaling"]["scheme"]["color"] != "__auto__":
- trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"]
- else:
- trackConfig["style"]["color"] = self.hex_from_rgb(
- *self._get_colours()
- )
- else:
- # Scored method
- algo = track["scaling"]["algo"]
- # linear, logarithmic, blast
- scales = track["scaling"]["scales"]
- # type __auto__, manual (min, max)
- scheme = track["scaling"]["scheme"]
- # scheme -> (type (opacity), color)
- # ==================================
- # GENE CALLS OR BLAST
- # ==================================
- if trackFormat == "blast":
- red, green, blue = self._get_colours()
- color_function = self.COLOR_FUNCTION_TEMPLATE.format(
- **{
- "score": "feature._parent.get('score')",
- "opacity": self.OPACITY_MATH["blast"],
- "red": red,
- "green": green,
- "blue": blue,
- }
- )
- trackConfig["style"]["color"] = color_function.replace("\n", "")
- elif trackFormat == "gene_calls":
- # Default values, based on GFF3 spec
- min_val = 0
- max_val = 1000
- # Get min/max and build a scoring function since JBrowse doesn't
- if scales["type"] == "automatic" or scales["type"] == "__auto__":
- min_val, max_val = self.min_max_gff(gff3)
- else:
- min_val = scales.get("min", 0)
- max_val = scales.get("max", 1000)
-
- if scheme["color"] == "__auto__":
- user_color = "undefined"
- auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
- elif scheme["color"].startswith("#"):
- user_color = "'%s'" % self.hex_from_rgb(
- *self.rgb_from_hex(scheme["color"][1:])
- )
- auto_color = "undefined"
- else:
- user_color = "undefined"
- auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
-
- color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(
- **{
- "opacity": self.OPACITY_MATH[algo].format(
- **{"max": max_val, "min": min_val}
- ),
- "user_spec_color": user_color,
- "auto_gen_color": auto_color,
- }
- )
-
- trackConfig["style"]["color"] = color_function.replace("\n", "")
- return trackConfig
-
-
-def etree_to_dict(t):
- if t is None:
- return {}
-
- d = {t.tag: {} if t.attrib else None}
- children = list(t)
- if children:
- dd = defaultdict(list)
- for dc in map(etree_to_dict, children):
- for k, v in dc.items():
- dd[k].append(v)
- d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
- if t.attrib:
- d[t.tag].update(("@" + k, v) for k, v in t.attrib.items())
- if t.text:
- text = t.text.strip()
- if children or t.attrib:
- if text:
- d[t.tag]["#text"] = text
- else:
- d[t.tag] = text
- return d
-
-
-# score comes from feature._parent.get('score') or feature.get('score')
-
-INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
-
-
-def metadata_from_node(node):
- metadata = {}
- try:
- if len(node.findall("dataset")) != 1:
- # exit early
- return metadata
- except Exception:
- return {}
-
- for (key, value) in node.findall("dataset")[0].attrib.items():
- metadata["dataset_%s" % key] = value
-
- for (key, value) in node.findall("history")[0].attrib.items():
- metadata["history_%s" % key] = value
-
- for (key, value) in node.findall("metadata")[0].attrib.items():
- metadata["metadata_%s" % key] = value
-
- for (key, value) in node.findall("tool")[0].attrib.items():
- metadata["tool_%s" % key] = value
-
- # Additional Mappings applied:
- metadata[
- "dataset_edam_format"
- ] = '{1}'.format(
- metadata["dataset_edam_format"], metadata["dataset_file_ext"]
- )
- metadata["history_user_email"] = '{0}'.format(
- metadata["history_user_email"]
- )
- metadata[
- "history_display_name"
- ] = '{hist_name}'.format(
- galaxy=GALAXY_INFRASTRUCTURE_URL,
- encoded_hist_id=metadata["history_id"],
- hist_name=metadata["history_display_name"],
- )
- metadata[
- "tool_tool"
- ] = '{tool_id}'.format(
- galaxy=GALAXY_INFRASTRUCTURE_URL,
- encoded_id=metadata["dataset_id"],
- tool_id=metadata["tool_tool_id"],
- # tool_version=metadata['tool_tool_version'],
- )
- return metadata
-
-
-class JbrowseConnector(object):
- def __init__(self, jbrowse, outdir, genomes):
- self.cs = ColorScaling()
- self.jbrowse = jbrowse
- self.outdir = outdir
- self.genome_paths = genomes
- self.tracksToIndex = []
-
- # This is the id of the current assembly
- self.assembly_ids = {}
- self.current_assembly_id = []
-
- # If upgrading, look at the existing data
- self.check_existing(self.outdir)
-
- self.clone_jbrowse(self.jbrowse, self.outdir)
-
- self.process_genomes()
-
- def subprocess_check_call(self, command, output=None):
- if output:
- log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output)
- subprocess.check_call(command, cwd=self.outdir, stdout=output)
- else:
- log.debug("cd %s && %s", self.outdir, " ".join(command))
- subprocess.check_call(command, cwd=self.outdir)
-
- def subprocess_popen(self, command):
- log.debug("cd %s && %s", self.outdir, command)
- p = subprocess.Popen(
- command,
- shell=True,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- )
- output, err = p.communicate()
- retcode = p.returncode
- if retcode != 0:
- log.error("cd %s && %s", self.outdir, command)
- log.error(output)
- log.error(err)
- raise RuntimeError("Command failed with exit code %s" % (retcode))
-
- def subprocess_check_output(self, command):
- log.debug("cd %s && %s", self.outdir, " ".join(command))
- return subprocess.check_output(command, cwd=self.outdir)
-
- def symlink_or_copy(self, src, dest):
- if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool(
- os.environ["GALAXY_JBROWSE_SYMLINKS"]
- ):
- cmd = ["ln", "-s", src, dest]
- else:
- cmd = ["cp", src, dest]
-
- return self.subprocess_check_call(cmd)
-
- def symlink_or_copy_load_action(self):
- if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool(
- os.environ["GALAXY_JBROWSE_SYMLINKS"]
- ):
- return "symlink"
- else:
- return "copy"
-
- def check_existing(self, destination):
- existing = os.path.join(destination, "data", "config.json")
- if os.path.exists(existing):
- with open(existing, "r") as existing_conf:
- conf = json.load(existing_conf)
- if "assemblies" in conf:
- for assembly in conf["assemblies"]:
- if "name" in assembly:
- self.assembly_ids[assembly["name"]] = None
-
- def process_genomes(self):
- for genome_node in self.genome_paths:
- # We only expect one input genome per run. This for loop is just
- # easier to write than the alternative / catches any possible
- # issues.
- self.add_assembly(genome_node["path"], genome_node["label"])
-
- def add_assembly(self, path, label, default=True):
- # Find a non-existing filename for the new genome
- # (to avoid colision when upgrading an existing instance)
- rel_seq_path = os.path.join("data", "assembly")
- seq_path = os.path.join(self.outdir, rel_seq_path)
- fn_try = 1
- while (
- os.path.exists(seq_path + ".fasta")
- or os.path.exists(seq_path + ".fasta.gz")
- or os.path.exists(seq_path + ".fasta.gz.fai")
- or os.path.exists(seq_path + ".fasta.gz.gzi")
- ):
- rel_seq_path = os.path.join("data", "assembly%s" % fn_try)
- seq_path = os.path.join(self.outdir, rel_seq_path)
- fn_try += 1
-
- # Find a non-existing label for the new genome
- # (to avoid colision when upgrading an existing instance)
- lab_try = 1
- uniq_label = label
- while uniq_label in self.assembly_ids:
- uniq_label = label + str(lab_try)
- lab_try += 1
-
- # Find a default scaffold to display
- # TODO this may not be necessary in the future, see https://github.com/GMOD/jbrowse-components/issues/2708
- with open(path, "r") as fa_handle:
- fa_header = fa_handle.readline()[1:].strip().split(" ")[0]
-
- self.assembly_ids[uniq_label] = fa_header
- if default:
- self.current_assembly_id = uniq_label
-
- copied_genome = seq_path + ".fasta"
- shutil.copy(path, copied_genome)
-
- # Compress with bgzip
- cmd = ["bgzip", copied_genome]
- self.subprocess_check_call(cmd)
-
- # FAI Index
- cmd = ["samtools", "faidx", copied_genome + ".gz"]
- self.subprocess_check_call(cmd)
-
- self.subprocess_check_call(
- [
- "jbrowse",
- "add-assembly",
- "--load",
- "inPlace",
- "--name",
- uniq_label,
- "--type",
- "bgzipFasta",
- "--target",
- os.path.join(self.outdir, "data"),
- "--skipCheck",
- rel_seq_path + ".fasta.gz",
- ]
- )
-
- return uniq_label
-
- def text_index(self):
- # Index tracks
- args = [
- "jbrowse",
- "text-index",
- "--target",
- os.path.join(self.outdir, "data"),
- "--assemblies",
- self.current_assembly_id,
- ]
-
- tracks = ",".join(self.tracksToIndex)
- if tracks:
- args += ["--tracks", tracks]
-
- self.subprocess_check_call(args)
-
- def _blastxml_to_gff3(self, xml, min_gap=10):
- gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
- cmd = [
- "python",
- os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"),
- "--trim",
- "--trim_end",
- "--include_seq",
- "--min_gap",
- str(min_gap),
- xml,
- ]
- log.debug("cd %s && %s > %s", self.outdir, " ".join(cmd), gff3_unrebased.name)
- subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
- gff3_unrebased.close()
- return gff3_unrebased.name
-
- def _prepare_track_style(self, xml_conf):
-
- style_data = {"type": "LinearBasicDisplay"}
-
- if "display" in xml_conf["style"]:
- style_data["type"] = xml_conf["style"]["display"]
- del xml_conf["style"]["display"]
-
- style_data["displayId"] = "%s_%s" % (xml_conf["label"], style_data["type"])
-
- style_data.update(xml_conf["style"])
-
- return {"displays": [style_data]}
-
- def add_blastxml(self, data, trackData, blastOpts, **kwargs):
- gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"])
-
- if "parent" in blastOpts and blastOpts["parent"] != "None":
- gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
- cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")]
- if blastOpts.get("protein", "false") == "true":
- cmd.append("--protein2dna")
- cmd.extend([os.path.realpath(blastOpts["parent"]), gff3])
- log.debug("cd %s && %s > %s", self.outdir, " ".join(cmd), gff3_rebased.name)
- subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
- gff3_rebased.close()
-
- # Replace original gff3 file
- shutil.copy(gff3_rebased.name, gff3)
- os.unlink(gff3_rebased.name)
-
- rel_dest = os.path.join("data", trackData["label"] + ".gff")
- dest = os.path.join(self.outdir, rel_dest)
-
- self._sort_gff(gff3, dest)
- os.unlink(gff3)
-
- style_json = self._prepare_track_style(trackData)
-
- self._add_track(
- trackData["label"],
- trackData["key"],
- trackData["category"],
- rel_dest + ".gz",
- config=style_json,
- )
-
- def add_bigwig(self, data, trackData, wiggleOpts, **kwargs):
-
- rel_dest = os.path.join("data", trackData["label"] + ".bw")
- dest = os.path.join(self.outdir, rel_dest)
- self.symlink_or_copy(os.path.realpath(data), dest)
-
- style_json = self._prepare_track_style(trackData)
-
- self._add_track(
- trackData["label"],
- trackData["key"],
- trackData["category"],
- rel_dest,
- config=style_json,
- )
-
- # Anything ending in "am" (Bam or Cram)
- def add_xam(self, data, trackData, xamOpts, index=None, ext="bam", **kwargs):
-
- index_ext = "bai"
- if ext == "cram":
- index_ext = "crai"
-
- rel_dest = os.path.join("data", trackData["label"] + ".%s" % ext)
- dest = os.path.join(self.outdir, rel_dest)
-
- self.symlink_or_copy(os.path.realpath(data), dest)
-
- if index is not None and os.path.exists(os.path.realpath(index)):
- # xai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
- self.subprocess_check_call(
- ["cp", os.path.realpath(index), dest + ".%s" % index_ext]
- )
- else:
- # Can happen in exotic condition
- # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
- # => no index generated by galaxy, but there might be one next to the symlink target
- # this trick allows to skip the bam sorting made by galaxy if already done outside
- if os.path.exists(os.path.realpath(data) + ".%s" % index_ext):
- self.symlink_or_copy(
- os.path.realpath(data) + ".%s" % index_ext, dest + ".%s" % index_ext
- )
- else:
- log.warn(
- "Could not find a bam index (.%s file) for %s", (index_ext, data)
- )
-
- style_json = self._prepare_track_style(trackData)
-
- self._add_track(
- trackData["label"],
- trackData["key"],
- trackData["category"],
- rel_dest,
- config=style_json,
- )
-
- def add_vcf(self, data, trackData, vcfOpts={}, zipped=False, **kwargs):
-
- if zipped:
- rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz")
- dest = os.path.join(self.outdir, rel_dest)
- shutil.copy(os.path.realpath(data), dest)
- else:
- rel_dest = os.path.join("data", trackData["label"] + ".vcf")
- dest = os.path.join(self.outdir, rel_dest)
- shutil.copy(os.path.realpath(data), dest)
-
- cmd = ["bgzip", dest]
- self.subprocess_check_call(cmd)
- cmd = ["tabix", dest + ".gz"]
- self.subprocess_check_call(cmd)
-
- rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz")
-
- style_json = self._prepare_track_style(trackData)
-
- self._add_track(
- trackData["label"],
- trackData["key"],
- trackData["category"],
- rel_dest,
- config=style_json,
- )
-
- def add_gff(self, data, format, trackData, gffOpts, **kwargs):
- rel_dest = os.path.join("data", trackData["label"] + ".gff")
- dest = os.path.join(self.outdir, rel_dest)
-
- self._sort_gff(data, dest)
-
- style_json = self._prepare_track_style(trackData)
-
- self._add_track(
- trackData["label"],
- trackData["key"],
- trackData["category"],
- rel_dest + ".gz",
- config=style_json,
- )
-
- def add_bed(self, data, format, trackData, gffOpts, **kwargs):
- rel_dest = os.path.join("data", trackData["label"] + ".bed")
- dest = os.path.join(self.outdir, rel_dest)
-
- self._sort_bed(data, dest)
-
- style_json = self._prepare_track_style(trackData)
-
- self._add_track(
- trackData["label"],
- trackData["key"],
- trackData["category"],
- rel_dest + ".gz",
- config=style_json,
- )
-
- def add_paf(self, data, trackData, pafOpts, **kwargs):
- rel_dest = os.path.join("data", trackData["label"] + ".paf")
- dest = os.path.join(self.outdir, rel_dest)
-
- self.symlink_or_copy(os.path.realpath(data), dest)
-
- added_assembly = self.add_assembly(
- pafOpts["genome"], pafOpts["genome_label"], default=False
- )
-
- style_json = self._prepare_track_style(trackData)
-
- self._add_track(
- trackData["label"],
- trackData["key"],
- trackData["category"],
- rel_dest,
- assemblies=[self.current_assembly_id, added_assembly],
- config=style_json,
- )
-
- def add_hic(self, data, trackData, hicOpts, **kwargs):
- rel_dest = os.path.join("data", trackData["label"] + ".hic")
- dest = os.path.join(self.outdir, rel_dest)
-
- self.symlink_or_copy(os.path.realpath(data), dest)
-
- style_json = self._prepare_track_style(trackData)
-
- self._add_track(
- trackData["label"],
- trackData["key"],
- trackData["category"],
- rel_dest,
- config=style_json,
- )
-
- def add_sparql(self, url, query, query_refnames, trackData):
-
- json_track_data = {
- "type": "FeatureTrack",
- "trackId": id,
- "name": trackData["label"],
- "adapter": {
- "type": "SPARQLAdapter",
- "endpoint": {"uri": url, "locationType": "UriLocation"},
- "queryTemplate": query,
- },
- "category": [trackData["category"]],
- "assemblyNames": [self.current_assembly_id],
- }
-
- if query_refnames:
- json_track_data["adapter"]["refNamesQueryTemplate"]: query_refnames
-
- self.subprocess_check_call(
- [
- "jbrowse",
- "add-track-json",
- "--target",
- os.path.join(self.outdir, "data"),
- json_track_data,
- ]
- )
-
- # Doesn't work as of 1.6.4, might work in the future
- # self.subprocess_check_call([
- # 'jbrowse', 'add-track',
- # '--trackType', 'sparql',
- # '--name', trackData['label'],
- # '--category', trackData['category'],
- # '--target', os.path.join(self.outdir, 'data'),
- # '--trackId', id,
- # '--config', '{"queryTemplate": "%s"}' % query,
- # url])
-
- def _add_track(self, id, label, category, path, assemblies=[], config=None):
-
- assemblies_opt = self.current_assembly_id
- if assemblies:
- assemblies_opt = ",".join(assemblies)
-
- cmd = [
- "jbrowse",
- "add-track",
- "--load",
- "inPlace",
- "--name",
- label,
- "--category",
- category,
- "--target",
- os.path.join(self.outdir, "data"),
- "--trackId",
- id,
- "--assemblyNames",
- assemblies_opt,
- ]
-
- if config:
- cmd.append("--config")
- cmd.append(json.dumps(config))
-
- cmd.append(path)
-
- self.subprocess_check_call(cmd)
-
- def _sort_gff(self, data, dest):
- # Only index if not already done
- if not os.path.exists(dest):
- cmd = "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" % (data, dest)
- self.subprocess_popen(cmd)
-
- self.subprocess_check_call(["bgzip", "-f", dest])
- self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"])
-
- def _sort_bed(self, data, dest):
- # Only index if not already done
- if not os.path.exists(dest):
- cmd = ["sort", "-k1,1", "-k2,2n", data]
- with open(dest, "w") as handle:
- self.subprocess_check_call(cmd, output=handle)
-
- self.subprocess_check_call(["bgzip", "-f", dest])
- self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"])
-
- def process_annotations(self, track):
-
- category = track["category"].replace("__pd__date__pd__", TODAY)
- outputTrackConfig = {
- "category": category,
- }
-
- mapped_chars = {
- ">": "__gt__",
- "<": "__lt__",
- "'": "__sq__",
- '"': "__dq__",
- "[": "__ob__",
- "]": "__cb__",
- "{": "__oc__",
- "}": "__cc__",
- "@": "__at__",
- "#": "__pd__",
- "": "__cn__",
- }
-
- for i, (
- dataset_path,
- dataset_ext,
- track_human_label,
- extra_metadata,
- ) in enumerate(track["trackfiles"]):
- # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
- for key, value in mapped_chars.items():
- track_human_label = track_human_label.replace(value, key)
-
- log.info(
- "Processing track %s / %s (%s)",
- category,
- track_human_label,
- dataset_ext,
- )
- outputTrackConfig["key"] = track_human_label
- # We add extra data to hash for the case of REST + SPARQL.
- if (
- "conf" in track
- and "options" in track["conf"]
- and "url" in track["conf"]["options"]
- ):
- rest_url = track["conf"]["options"]["url"]
- else:
- rest_url = ""
-
- # I chose to use track['category'] instead of 'category' here. This
- # is intentional. This way re-running the tool on a different date
- # will not generate different hashes and make comparison of outputs
- # much simpler.
- hashData = [
- str(dataset_path),
- track_human_label,
- track["category"],
- rest_url,
- self.current_assembly_id,
- ]
- hashData = "|".join(hashData).encode("utf-8")
- outputTrackConfig["label"] = hashlib.md5(hashData).hexdigest() + "_%s" % i
- outputTrackConfig["metadata"] = extra_metadata
-
- outputTrackConfig["style"] = track["style"]
-
- if "menus" in track["conf"]["options"]:
- menus = self.cs.parse_menus(track["conf"]["options"])
- outputTrackConfig.update(menus)
-
- if dataset_ext in ("gff", "gff3"):
- self.add_gff(
- dataset_path,
- dataset_ext,
- outputTrackConfig,
- track["conf"]["options"]["gff"],
- )
- elif dataset_ext == "bed":
- self.add_bed(
- dataset_path,
- dataset_ext,
- outputTrackConfig,
- track["conf"]["options"]["gff"],
- )
- elif dataset_ext == "bigwig":
- self.add_bigwig(
- dataset_path, outputTrackConfig, track["conf"]["options"]["wiggle"]
- )
- elif dataset_ext == "bam":
- real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][
- "bam_index"
- ]
- if not isinstance(real_indexes, list):
- #
- # /path/to/a.bam.bai
- #
- #
- # The above will result in the 'bam_index' key containing a
- # string. If there are two or more indices, the container
- # becomes a list. Fun!
- real_indexes = [real_indexes]
-
- self.add_xam(
- dataset_path,
- outputTrackConfig,
- track["conf"]["options"]["pileup"],
- index=real_indexes[i],
- ext="bam",
- )
- elif dataset_ext == "cram":
- real_indexes = track["conf"]["options"]["cram"]["cram_indices"][
- "cram_index"
- ]
- if not isinstance(real_indexes, list):
- #
- # /path/to/a.bam.bai
- #
- #
- # The above will result in the 'bam_index' key containing a
- # string. If there are two or more indices, the container
- # becomes a list. Fun!
- real_indexes = [real_indexes]
-
- self.add_xam(
- dataset_path,
- outputTrackConfig,
- track["conf"]["options"]["cram"],
- index=real_indexes[i],
- ext="cram",
- )
- elif dataset_ext == "blastxml":
- self.add_blastxml(
- dataset_path, outputTrackConfig, track["conf"]["options"]["blast"]
- )
- elif dataset_ext == "vcf":
- self.add_vcf(dataset_path, outputTrackConfig)
- elif dataset_ext == "vcf_bgzip":
- self.add_vcf(dataset_path, outputTrackConfig, zipped=True)
- elif dataset_ext == "rest":
- self.add_rest(
- track["conf"]["options"]["rest"]["url"], outputTrackConfig
- )
- elif dataset_ext == "synteny":
- self.add_paf(
- dataset_path, outputTrackConfig, track["conf"]["options"]["synteny"]
- )
- elif dataset_ext == "hic":
- self.add_hic(
- dataset_path, outputTrackConfig, track["conf"]["options"]["hic"]
- )
- elif dataset_ext == "sparql":
- sparql_query = track["conf"]["options"]["sparql"]["query"]
- for key, value in mapped_chars.items():
- sparql_query = sparql_query.replace(value, key)
- sparql_query_refnames = track["conf"]["options"]["sparql"][
- "query_refnames"
- ]
- for key, value in mapped_chars.items():
- sparql_query_refnames = sparql_query_refnames.replace(value, key)
- self.add_sparql(
- track["conf"]["options"]["sparql"]["url"],
- sparql_query,
- sparql_query_refnames,
- outputTrackConfig,
- )
- else:
- log.warn("Do not know how to handle %s", dataset_ext)
-
- # Return non-human label for use in other fields
- yield outputTrackConfig["label"]
-
- def add_default_session(self, data):
- """
- Add some default session settings: set some assemblies/tracks on/off
- """
- tracks_data = []
-
- # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
-
- # We need to know the track type from the config.json generated just before
- config_path = os.path.join(self.outdir, "data", "config.json")
- track_types = {}
- with open(config_path, "r") as config_file:
- config_json = json.load(config_file)
-
- for track_conf in config_json["tracks"]:
- track_types[track_conf["trackId"]] = track_conf["type"]
-
- for on_track in data["visibility"]["default_on"]:
- # TODO several problems with this currently
- # - we are forced to copy the same kind of style config as the per track config from _prepare_track_style (not exactly the same though)
- # - we get an error when refreshing the page
- # - this could be solved by session specs, see https://github.com/GMOD/jbrowse-components/issues/2708
- style_data = {"type": "LinearBasicDisplay", "height": 100}
-
- if on_track in data["style"]:
- if "display" in data["style"][on_track]:
- style_data["type"] = data["style"][on_track]["display"]
- del data["style"][on_track]["display"]
-
- style_data.update(data["style"][on_track])
-
- if on_track in data["style_labels"]:
- # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
- # TODO move this to per track displays?
- style_data["labels"] = data["style_labels"][on_track]
-
- tracks_data.append(
- {
- "type": track_types[on_track],
- "configuration": on_track,
- "displays": [style_data],
- }
- )
-
- # The view for the assembly we're adding
- view_json = {"type": "LinearGenomeView", "tracks": tracks_data}
-
- refName = None
- if data.get("defaultLocation", ""):
- loc_match = re.search(r"^(\w+):(\d+)\.+(\d+)$", data["defaultLocation"])
- if loc_match:
- refName = loc_match.group(1)
- start = int(loc_match.group(2))
- end = int(loc_match.group(3))
- elif self.assembly_ids[self.current_assembly_id] is not None:
- refName = self.assembly_ids[self.current_assembly_id]
- start = 0
- end = 1000000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708
-
- if refName is not None:
- # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
- view_json["displayedRegions"] = [
- {
- "refName": refName,
- "start": start,
- "end": end,
- "reversed": False,
- "assemblyName": self.current_assembly_id,
- }
- ]
-
- session_name = data.get("session_name", "New session")
- if not session_name:
- session_name = "New session"
-
- # Merge with possibly existing defaultSession (if upgrading a jbrowse instance)
- session_json = {}
- if "defaultSession" in config_json:
- session_json = config_json["defaultSession"]
-
- session_json["name"] = session_name
-
- if "views" not in session_json:
- session_json["views"] = []
-
- session_json["views"].append(view_json)
-
- config_json["defaultSession"] = session_json
-
- with open(config_path, "w") as config_file:
- json.dump(config_json, config_file, indent=2)
-
- def add_general_configuration(self, data):
- """
- Add some general configuration to the config.json file
- """
-
- config_path = os.path.join(self.outdir, "data", "config.json")
- with open(config_path, "r") as config_file:
- config_json = json.load(config_file)
-
- config_data = {}
-
- config_data["disableAnalytics"] = data.get("analytics", "false") == "true"
-
- config_data["theme"] = {
- "palette": {
- "primary": {"main": data.get("primary_color", "#0D233F")},
- "secondary": {"main": data.get("secondary_color", "#721E63")},
- "tertiary": {"main": data.get("tertiary_color", "#135560")},
- "quaternary": {"main": data.get("quaternary_color", "#FFB11D")},
- },
- "typography": {"fontSize": int(data.get("font_size", 10))},
- }
-
- config_json["configuration"].update(config_data)
-
- with open(config_path, "w") as config_file:
- json.dump(config_json, config_file, indent=2)
-
- def clone_jbrowse(self, jbrowse_dir, destination):
- """Clone a JBrowse directory into a destination directory."""
-
- copytree(jbrowse_dir, destination)
-
- try:
- shutil.rmtree(os.path.join(destination, "test_data"))
- except OSError as e:
- log.error("Error: %s - %s." % (e.filename, e.strerror))
-
- if not os.path.exists(os.path.join(destination, "data")):
- # It can already exist if upgrading an instance
- os.makedirs(os.path.join(destination, "data"))
- log.info("makedir %s" % (os.path.join(destination, "data")))
-
- os.symlink("./data/config.json", os.path.join(destination, "config.json"))
-
-
-def copytree(src, dst, symlinks=False, ignore=None):
- for item in os.listdir(src):
- s = os.path.join(src, item)
- d = os.path.join(dst, item)
- if os.path.isdir(s):
- shutil.copytree(s, d, symlinks, ignore)
- else:
- shutil.copy2(s, d)
-
-
-def parse_style_conf(item):
- if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]:
- if item.attrib["type"] == "boolean":
- return item.text in ("yes", "true", "True")
- elif item.attrib["type"] == "integer":
- return int(item.text)
- else:
- return item.text
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="", epilog="")
- parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration")
-
- parser.add_argument("--jbrowse", help="Folder containing a jbrowse release")
- parser.add_argument("--outdir", help="Output directory", default="out")
- parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0")
- args = parser.parse_args()
-
- tree = ET.parse(args.xml.name)
- root = tree.getroot()
-
- # This should be done ASAP
- GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text
- # Sometimes this comes as `localhost` without a protocol
- if not GALAXY_INFRASTRUCTURE_URL.startswith("http"):
- # so we'll prepend `http://` and hope for the best. Requests *should*
- # be GET and not POST so it should redirect OK
- GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL
-
- jc = JbrowseConnector(
- jbrowse=args.jbrowse,
- outdir=args.outdir,
- genomes=[
- {
- "path": os.path.realpath(x.attrib["path"]),
- "meta": metadata_from_node(x.find("metadata")),
- "label": x.attrib["label"],
- }
- for x in root.findall("metadata/genomes/genome")
- ],
- )
-
- default_session_data = {
- "visibility": {
- "default_on": [],
- "default_off": [],
- },
- "style": {},
- "style_labels": {},
- }
-
- # TODO add metadata to tracks
- for track in root.findall("tracks/track"):
- track_conf = {}
- track_conf["trackfiles"] = []
-
- trackfiles = track.findall("files/trackFile")
- if trackfiles:
- for x in track.findall("files/trackFile"):
- if trackfiles:
- metadata = metadata_from_node(x.find("metadata"))
-
- track_conf["trackfiles"].append(
- (
- os.path.realpath(x.attrib["path"]),
- x.attrib["ext"],
- x.attrib["label"],
- metadata,
- )
- )
- else:
- # For tracks without files (rest, sparql)
- track_conf["trackfiles"].append(
- (
- "", # N/A, no path for rest or sparql
- track.attrib["format"],
- track.find("options/label").text,
- {},
- )
- )
-
- track_conf["category"] = track.attrib["cat"]
- track_conf["format"] = track.attrib["format"]
- track_conf["style"] = {
- item.tag: parse_style_conf(item) for item in track.find("options/style")
- }
-
- track_conf["style"] = {
- item.tag: parse_style_conf(item) for item in track.find("options/style")
- }
-
- track_conf["style_labels"] = {
- item.tag: parse_style_conf(item)
- for item in track.find("options/style_labels")
- }
-
- track_conf["conf"] = etree_to_dict(track.find("options"))
- keys = jc.process_annotations(track_conf)
-
- for key in keys:
- default_session_data["visibility"][
- track.attrib.get("visibility", "default_off")
- ].append(key)
-
- default_session_data["style"][key] = track_conf[
- "style"
- ] # TODO do we need this anymore?
- default_session_data["style_labels"][key] = track_conf["style_labels"]
-
- default_session_data["defaultLocation"] = root.find(
- "metadata/general/defaultLocation"
- ).text
- default_session_data["session_name"] = root.find(
- "metadata/general/session_name"
- ).text
-
- general_data = {
- "analytics": root.find("metadata/general/analytics").text,
- "primary_color": root.find("metadata/general/primary_color").text,
- "secondary_color": root.find("metadata/general/secondary_color").text,
- "tertiary_color": root.find("metadata/general/tertiary_color").text,
- "quaternary_color": root.find("metadata/general/quaternary_color").text,
- "font_size": root.find("metadata/general/font_size").text,
- }
-
- jc.add_default_session(default_session_data)
- jc.add_general_configuration(general_data)
- jc.text_index()
diff -r 7c2e28e144f3 -r cce8dacb240f abjbrowse2.xml
--- a/abjbrowse2.xml Mon Jan 22 12:05:09 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1295 +0,0 @@
-
- genome browser
-
- macros.xml
-
-
- topic_3307
- topic_0092
-
-
- operation_0573
- operation_0564
-
-
- jbrowse
-
-
- python '${__tool_directory__}/jbrowse2.py' --version
- to test the files I want to test. Hmph.
-#if str($uglyTestingHack) == "enabled":
- cp $trackxml $output
-#end if
- ]]>
-
-
-
-
-
- #if str($reference_genome.genome_type_select) == "indexed":
-
-
- s
- #else
-
-
-
-
- #else
- user_email="anonymous"
- user_id="-1"
- display_name="Unnamed History"/>
- #end if
-
-
-
-
- #end if
-
-
- ${jbgen.defaultLocation}
- ${jbgen.enableAnalytics}
- ${jbgen.primary_color}
- ${jbgen.secondary_color}
- ${jbgen.tertiary_color}
- ${jbgen.quaternary_color}
- ${jbgen.font_size}
- ${jbgen.session_name}
-
- ${__app__.config.galaxy_infrastructure_url}
-
-
- #for $tg in $track_groups:
- #for $track in $tg.data_tracks:
-
- #end for
- #end for
-
-
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- `__, who you can help you
-with missing features or bugs in the tool.
-
-Options
--------
-
-The first option you encounter is the **Fasta Sequence(s)**. This option
-now accepts multiple fasta files, allowing you to build JBrowse
-instances that contain data for multiple genomes or chrosomomes
-(generally known as "landmark features" in gff3 terminology.) Up to 30
-will be shown from the dropdown selector within JBrowse, this is a known
-issue.
-
-**Genetic Code** is a new feature in v0.4 of JiG / v1.12.0 of JBrowse,
-which allows users to specify a non standard genetic code, and have
-JBrowse highlight the correct start and stop codons.
-
-**Track Groups** represent a set of tracks in a single category. These
-can be used to let your users understand relationships between large
-groups of tracks.
-
-.. image:: sections.png
-
-Annotation Tracks
------------------
-
-Within Track Groups, you have one or more **Annotation Tracks**. Each
-Annotation Track is a groups of datasets which have similar styling.
-This allows you to rapidly build up JBrowse instances without having to
-configure tracks individually. A massive improvement over previous
-versions. For example, if you have five different GFF3 files from
-various gene callers that you wish to display, you can take advantage of
-this feature to style all of them similarly.
-
-There are a few different types of tracks supported, each with their own
-set of options:
-
-GFF3/BED
-~~~~~~~~
-
-These are your standard feature tracks. They usually highlight genes,
-mRNAs and other features of interest along a genomic region. The
-underlying tool and this help documentation focus primarily on GFF3
-data, and have not been tested extensively with other formats. Automatic
-min/max detection will fail under BED datasets.
-
-The data may be of a subclass we call **match/match part** data. This
-consists of top level ``match`` features, with a child ``match_part``
-feature, and is often used in displaying alignments. (See "Alignments"
-section on the `GFF3
-specification `__ for more
-information). If the data is match/match part, you will need to specify
-the top level match feature name, as it can be one of a few different SO
-terms, and JiG does not yet have the ability to understand SO terms.
-
-Next up is the **Styling Options** section, which lets you control a few
-properties on how the track is styled. Most of these you will not need
-to configure and can safely leave on defaults. Occasionally you will
-want to change what information is shown in the end product.
-
-.. image:: styling.png
-
-In the above image you can see some black text, and some blue text. The
-source of the black text is configured with the **style.label** option,
-and the source of the blue text is configured with the
-**style.description** option.
-
-Feature Score Scaling & Colouring Options
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-First, you need to choose between ignoring the score attribute of GFF3
-files, or using it. If you choose to ignore it, all features will be
-coloured with a solid colour. If you choose to use it, features will
-have slightly different colours based on their scores.
-
-.. image:: opacity.png
-
-If you choose **Ignore score**, you may choose between automatically
-choosing a colour, or manually specifying one. The automatically chosen
-colours vary along a brewer palette and generally look quite nice with
-no human intervention required. The manual colour choice is somewhat
-self explanatory. Clicking on the small coloured square will bring up a
-colour palette.
-
-If you choose **Base on score**, you're faced with a dizzying array of
-options. First is the function to map the colour choices to colour
-values. JiG comes with a few functions built in such as linear scaling,
-logarithmic scaling, and blast scaling.
-
-The **linear scaling** method says "take these values, and they map
-directly to a range of output values". **Logarithmic scaling** says
-"please take the log of the score before mapping", and **Blast scaling**
-is further specialised to handle blast data more nicely. These are
-convenience functions to help transform the wide array of possible
-values in the GFF3 score attribute to more meaningful numbers. If you
-need more comprehensive score scaling, it is recommended that you
-pre-process your GFF3 files somehow.
-
-Once you've selected a scaling method, you can choose to manually
-specify the minimum and maximum expected values, or you can let JiG
-determine them for you automatically.
-
-Finally, opacity is the only mapping we currently provide. Future
-iterations will attempt to improve upon this and provide more colour
-scales. The Opacity option maps the highest scoring features to full
-opacity, and everything else to lower ones.
-
-BAM Pileups
-~~~~~~~~~~~
-
-We support BAM files and can automatically generate SNP tracks based on
-that bam data.
-
-.. image:: bam.png
-
-This is *strongly discouraged* for high coverage density datasets.
-Unfortunately there are no other configuration options exposed for bam
-files.
-
-BlastXML
-~~~~~~~~
-
-.. image:: blast.png
-
-JiG now supports both blastn and blastp datasets. JiG internally uses a
-blastXML to gapped GFF3 tool to convert your blastxml datasets into a
-format amenable to visualization in JBrowse. This tool is also
-available separately from the IUC on the toolshed.
-
-**Minimum Gap Size** reflects how long a gap must be before it becomes a
-real gap in the processed gff3 file. In the picture above, various sizes
-of gaps can be seen. If the minimum gap size was set much higher, say
-100nt, many of the smaller gaps would disappear, and the features on
-both sides would be merged into one, longer feature. This setting is
-inversely proportional to runtime and output file size. *Do not set this
-to a low value for large datasets*. By setting this number lower, you
-will have extremely large outputs and extremely long runtimes. The
-default was configured based off of the author's experience, but the
-author only works on small viruses. It is *strongly* recommended that
-you filter your blast results before display, e.g. picking out the top
-10 hits or so.
-
-**Protein blast search** option merely informs underlying tools that
-they should adjust feature locations by 3x.
-
-Styling Options
-^^^^^^^^^^^^^^^
-
-Please see the styling options for GFF3 datasets, they are identical.
-
-Feature Score Scaling & Coloring Options
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Please see the score scaling and colouring options for GFF3 datasets,
-they are identical. Remember to set your score scaling to "blast" method
-if you do use it.
-
-Bigwig XY
-~~~~~~~~~
-
-.. image:: bigwig.png
-
-**XYPlot**
-
-BigWig tracks can be displayed as a "density" plot which is continuous
-line which varies in colour, or as an "XYplot." XYplots are preferable
-for users to visually identify specific features in a bigwig track,
-however density tracks are more visually compact.
-
-**Variance Band** is an option available to XYPlots, and can be seen in
-the third and fourth tracks in the above picture. This overlays a mean
-line, and 1 and 2 standard deviation areas.
-
-**Track Scaling** is different from colour scaling, instead it
-configures how the track behaves inside of JBrowse. **Autoscaling
-globally** means that JBrowse will determine the minimum and maximum for
-the track, and fix the bounds of the viewport to that. E.g. if your
-track ranges from 1-1000, and the region you're currently zoomed to only
-goes from 0-50, then the viewport range will still show 1-1000. This is
-good for global genomic context. However you may wish to consider
-**autoscaling locally** instead. In the example of a region which varies
-from 0-50, autoscaling locally would cause the individual track's
-viewport to re-adjust and show just the 0-50 region. If neither of these
-options are palatable, you may manually hardcode the minimum and
-maximums for the track to scale to.
-
-Colour Options
-^^^^^^^^^^^^^^
-
-BigWig tracks have two colours in JBrowse, a positive and a negative
-colour.
-
-As always you may manually choose a colour, or let JiG choose for you.
-
-One of the more interesting options is the **Bicolor pivot**. This
-option allows you to control the point at which JBrowse switches from
-the positive colour to the negative. In the above graphic, you can see
-this has been configured to "mean" for the first two (orange and blue)
-tracks.
-
-VCFs/SNPs
-~~~~~~~~~
-
-These tracks do not support any special configuration.
-
-@ATTRIBUTION@
-]]>
-
-
-
diff -r 7c2e28e144f3 -r cce8dacb240f config.json.sample
--- a/config.json.sample Mon Jan 22 12:05:09 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-{
- "configuration": {
- "rpc": {
- "defaultDriver": "WebWorkerRpcDriver",
- "drivers": {
- "MainThreadRpcDriver": {},
- "WebWorkerRpcDriver": {}
- }
- },
- "logoPath": {
- "locationType": "UriLocation",
- "uri": ""
- }
- },
- "plugins": [],
- "assemblies": [],
- "tracks": [],
- "internetAccounts": [],
- "aggregateTextSearchAdapters": [],
- "connections": [],
- "defaultSession": {}
-}
diff -r 7c2e28e144f3 -r cce8dacb240f jbrowse2.py
--- a/jbrowse2.py Mon Jan 22 12:05:09 2024 +0000
+++ b/jbrowse2.py Thu Jan 25 07:39:41 2024 +0000
@@ -18,7 +18,7 @@
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("jbrowse")
-JB2VER = "v2.10.0"
+JB2VER = "v2.10.1"
# version pinned for cloning
TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
@@ -458,10 +458,10 @@
self.genome_name = (
genome_name # first one for all tracks - other than paf
)
- if self.config_json.get("assemblies", None):
- self.config_json["assemblies"] += assemblies
- else:
- self.config_json["assemblies"] = assemblies
+ if self.config_json.get("assemblies", None):
+ self.config_json["assemblies"] += assemblies
+ else:
+ self.config_json["assemblies"] = assemblies
def make_assembly(self, fapath, gname):
hashData = [
@@ -727,6 +727,14 @@
os.unlink(gff3)
def add_bigwig(self, data, trackData):
+ """ "type": "LinearWiggleDisplay",
+ "configuration": {},
+ "selectedRendering": "",
+ "resolution": 1,
+ "posColor": "rgb(228, 26, 28)",
+ "negColor": "rgb(255, 255, 51)",
+ "constraints": {}
+ """
url = "%s.bigwig" % trackData["label"]
# slashes in names cause path trouble
dest = os.path.join(self.outdir, url)
@@ -756,7 +764,7 @@
trackDict["style"] = style_json
self.tracksToAdd.append(trackDict)
self.trackIdlist.append(tId)
- logging.debug("#### wig trackData=%s" % str(trackData))
+ logging.info("#### wig trackData=%s" % str(trackData))
def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
tId = trackData["label"]
@@ -949,11 +957,11 @@
tname = trackData["name"]
tId = trackData["label"]
pgname = pafOpts["genome_label"]
- if len(pgname.split() > 1):
+ if len(pgname.split()) > 1:
pgname = pgname.split()[
0
] # trouble from spacey names in command lines avoidance
- asstrack, gname = self.make_assembly(pafOpts["genome"], pgname)
+ asstrack = self.make_assembly(pafOpts["genome"], pgname)
self.genome_names.append(pgname)
if self.config_json.get("assemblies", None):
self.config_json["assemblies"].append(asstrack)
@@ -961,7 +969,6 @@
self.config_json["assemblies"] = [
asstrack,
]
-
url = "%s.paf" % (trackData["label"])
dest = "%s/%s" % (self.outdir, url)
self.symlink_or_copy(os.path.realpath(data), dest)
@@ -975,6 +982,16 @@
"pafLocation": {"uri": url},
"assemblyNames": [self.genome_name, pgname],
},
+ # "displays": [
+ # {
+ # "type": "LinearSyntenyDisplay",
+ # "displayId": "%s-LinearSyntenyDisplay" % tId,
+ # },
+ # {
+ # "type": "DotPlotDisplay",
+ # "displayId": "%s-DotPlotDisplay" % tId,
+ # },
+ # ],
}
style_json = self._prepare_track_style(trackDict)
trackDict["style"] = style_json
@@ -1130,6 +1147,10 @@
)
elif dataset_ext == "vcf":
self.add_vcf(dataset_path, outputTrackConfig)
+ elif dataset_ext == "paf":
+ self.add_paf(
+ dataset_path, outputTrackConfig, track["conf"]["options"]["synteny"]
+ )
else:
log.warn("Do not know how to handle %s", dataset_ext)
# Return non-human label for use in other fields
@@ -1177,7 +1198,7 @@
ddl = data["defaultLocation"]
loc_match = re.search(
r"^([^:]+):(\d+)\.+(\d+)$", ddl
- ) # was re.search(r"^(\w.+):(\d+)\.+(\d+)$"
+ )
if loc_match:
refName = loc_match.group(1)
start = int(loc_match.group(2))
@@ -1407,6 +1428,15 @@
}
track_conf["conf"] = etree_to_dict(track.find("options"))
+ track_conf["category"] = track.attrib["cat"]
+ track_conf["format"] = track.attrib["format"]
+ try:
+ # Only pertains to gff3 + blastxml. TODO?
+ track_conf["style"] = {t.tag: t.text for t in track.find("options/style")}
+ except TypeError:
+ track_conf["style"] = {}
+ pass
+ track_conf["conf"] = etree_to_dict(track.find("options"))
keys = jc.process_annotations(track_conf)
if keys:
@@ -1422,34 +1452,26 @@
default_session_data["style_labels"][key] = track_conf.get(
"style_labels", None
)
-
- default_session_data["defaultLocation"] = root.find(
- "metadata/general/defaultLocation"
- ).text
- default_session_data["session_name"] = root.find(
- "metadata/general/session_name"
- ).text
-
- general_data = {
- "analytics": root.find("metadata/general/analytics").text,
- "primary_color": root.find("metadata/general/primary_color").text,
- "secondary_color": root.find("metadata/general/secondary_color").text,
- "tertiary_color": root.find("metadata/general/tertiary_color").text,
- "quaternary_color": root.find("metadata/general/quaternary_color").text,
- "font_size": root.find("metadata/general/font_size").text,
- }
- track_conf["category"] = track.attrib["cat"]
- track_conf["format"] = track.attrib["format"]
- try:
- # Only pertains to gff3 + blastxml. TODO?
- track_conf["style"] = {t.tag: t.text for t in track.find("options/style")}
- except TypeError:
- track_conf["style"] = {}
- pass
- track_conf["conf"] = etree_to_dict(track.find("options"))
- jc.add_general_configuration(general_data)
- jc.config_json["tracks"] = jc.tracksToAdd
+ default_session_data["defaultLocation"] = root.find(
+ "metadata/general/defaultLocation"
+ ).text
+ default_session_data["session_name"] = root.find(
+ "metadata/general/session_name"
+ ).text
+ general_data = {
+ "analytics": root.find("metadata/general/analytics").text,
+ "primary_color": root.find("metadata/general/primary_color").text,
+ "secondary_color": root.find("metadata/general/secondary_color").text,
+ "tertiary_color": root.find("metadata/general/tertiary_color").text,
+ "quaternary_color": root.find("metadata/general/quaternary_color").text,
+ "font_size": root.find("metadata/general/font_size").text,
+ }
+ jc.add_general_configuration(general_data)
+ trackconf = jc.config_json.get("tracks", None)
+ if trackconf:
+ jc.config_json["tracks"].update(jc.tracksToAdd)
+ else:
+ jc.config_json["tracks"] = jc.tracksToAdd
jc.write_config()
jc.add_default_session(default_session_data)
-
# jc.text_index() not sure what broke here.
diff -r 7c2e28e144f3 -r cce8dacb240f jbrowse2.xml
--- a/jbrowse2.xml Mon Jan 22 12:05:09 2024 +0000
+++ b/jbrowse2.xml Thu Jan 25 07:39:41 2024 +0000
@@ -33,7 +33,7 @@
#if str($reference_genome.genome_type_select) == "indexed":
- s
+
#else
@@ -297,12 +297,12 @@
-
-
+
diff -r 7c2e28e144f3 -r cce8dacb240f macros.xml
--- a/macros.xml Mon Jan 22 12:05:09 2024 +0000
+++ b/macros.xml Thu Jan 25 07:39:41 2024 +0000
@@ -1,6 +1,6 @@
- 2.10.0
+ 2.10.1
topic_3307
diff -r 7c2e28e144f3 -r cce8dacb240f readme.rst
--- a/readme.rst Mon Jan 22 12:05:09 2024 +0000
+++ b/readme.rst Thu Jan 25 07:39:41 2024 +0000
@@ -38,11 +38,15 @@
- 2.10.0+galaxy2
- UPDATED existing JBrowse1.16.11 code to JBrowse 2.10.0
+ - was working well enough for VGP when previous PR discovered
+ - too late to backport all the fixes
+ - working default session and some other ideas copied instead.
- seems to work well with defaults.
- need to document and implement track settings by running the browser locally.
- works well enough to be useful in workflows such as TreeValGal.
- JB2 seems to set defaults wisely.
- not yet ideal for users who need fine grained track control.
+ - synteny works.
Wrapper License (MIT/BSD Style)