Mercurial > repos > fubar > jbrowse2

diff abjbrowse2.py @ 13:1d86925dbb4c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 873a12803692b0a84814a6dc08331d772d0e5492-dirty
author: fubar
date: Mon, 22 Jan 2024 11:52:19 +0000
children: 7c2e28e144f3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abjbrowse2.py	Mon Jan 22 11:52:19 2024 +0000
@@ -0,0 +1,1097 @@
+#!/usr/bin/env python
+import argparse
+import binascii
+import datetime
+import hashlib
+import json
+import logging
+import os
+import re
+import shutil
+import struct
+import subprocess
+import tempfile
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger('jbrowse')
+TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
+GALAXY_INFRASTRUCTURE_URL = None
+
+
+class ColorScaling(object):
+
+    COLOR_FUNCTION_TEMPLATE = """
+    function(feature, variableName, glyphObject, track) {{
+        var score = {score};
+        {opacity}
+        return 'rgba({red}, {green}, {blue}, ' + opacity + ')';
+    }}
+    """
+
+    COLOR_FUNCTION_TEMPLATE_QUAL = r"""
+    function(feature, variableName, glyphObject, track) {{
+        var search_up = function self(sf, attr){{
+            if(sf.get(attr) !== undefined){{
+                return sf.get(attr);
+            }}
+            if(sf.parent() === undefined) {{
+                return;
+            }}else{{
+                return self(sf.parent(), attr);
+            }}
+        }};
+
+        var search_down = function self(sf, attr){{
+            if(sf.get(attr) !== undefined){{
+                return sf.get(attr);
+            }}
+            if(sf.children() === undefined) {{
+                return;
+            }}else{{
+                var kids = sf.children();
+                for(var child_idx in kids){{
+                    var x = self(kids[child_idx], attr);
+                    if(x !== undefined){{
+                        return x;
+                    }}
+                }}
+                return;
+            }}
+        }};
+
+        var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
+        var score = (search_up(feature, 'score') || search_down(feature, 'score'));
+        {opacity}
+        if(score === undefined){{ opacity = 1; }}
+        var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
+        var red = parseInt(result[1], 16);
+        var green = parseInt(result[2], 16);
+        var blue = parseInt(result[3], 16);
+        if(isNaN(opacity) || opacity < 0){{ opacity = 0; }}
+        return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')';
+    }}
+    """
+
+    OPACITY_MATH = {
+        'linear': """
+            var opacity = (score - ({min})) / (({max}) - ({min}));
+        """,
+        'logarithmic': """
+            var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min}));
+        """,
+        'blast': """
+            var opacity = 0;
+            if(score == 0.0) {{
+                opacity = 1;
+            }} else {{
+                opacity = (20 - Math.log10(score)) / 180;
+            }}
+        """
+    }
+
+    BREWER_COLOUR_IDX = 0
+    BREWER_COLOUR_SCHEMES = [
+        (166, 206, 227),
+        (31, 120, 180),
+        (178, 223, 138),
+        (51, 160, 44),
+        (251, 154, 153),
+        (227, 26, 28),
+        (253, 191, 111),
+        (255, 127, 0),
+        (202, 178, 214),
+        (106, 61, 154),
+        (255, 255, 153),
+        (177, 89, 40),
+        (228, 26, 28),
+        (55, 126, 184),
+        (77, 175, 74),
+        (152, 78, 163),
+        (255, 127, 0),
+    ]
+
+    BREWER_DIVERGING_PALLETES = {
+        'BrBg': ("#543005", "#003c30"),
+        'PiYg': ("#8e0152", "#276419"),
+        'PRGn': ("#40004b", "#00441b"),
+        'PuOr': ("#7f3b08", "#2d004b"),
+        'RdBu': ("#67001f", "#053061"),
+        'RdGy': ("#67001f", "#1a1a1a"),
+        'RdYlBu': ("#a50026", "#313695"),
+        'RdYlGn': ("#a50026", "#006837"),
+        'Spectral': ("#9e0142", "#5e4fa2"),
+    }
+
+    def __init__(self):
+        self.brewer_colour_idx = 0
+
+    def rgb_from_hex(self, hexstr):
+        # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
+        return struct.unpack('BBB', binascii.unhexlify(hexstr))
+
+    def min_max_gff(self, gff_file):
+        min_val = None
+        max_val = None
+        with open(gff_file, 'r') as handle:
+            for line in handle:
+                try:
+                    value = float(line.split('\t')[5])
+                    min_val = min(value, (min_val or value))
+                    max_val = max(value, (max_val or value))
+
+                    if value < min_val:
+                        min_val = value
+
+                    if value > max_val:
+                        max_val = value
+                except Exception:
+                    pass
+        return min_val, max_val
+
+    def hex_from_rgb(self, r, g, b):
+        return '#%02x%02x%02x' % (r, g, b)
+
+    def _get_colours(self):
+        r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)]
+        self.brewer_colour_idx += 1
+        return r, g, b
+
+    def parse_menus(self, track):
+        trackConfig = {'menuTemplate': [{}, {}, {}, {}]}
+
+        if 'menu' in track['menus']:
+            menu_list = [track['menus']['menu']]
+            if isinstance(track['menus']['menu'], list):
+                menu_list = track['menus']['menu']
+
+            for m in menu_list:
+                tpl = {
+                    'action': m['action'],
+                    'label': m.get('label', '{name}'),
+                    'iconClass': m.get('iconClass', 'dijitIconBookmark'),
+                }
+                if 'url' in m:
+                    tpl['url'] = m['url']
+                if 'content' in m:
+                    tpl['content'] = m['content']
+                if 'title' in m:
+                    tpl['title'] = m['title']
+
+                trackConfig['menuTemplate'].append(tpl)
+
+        return trackConfig
+
+    def parse_colours(self, track, trackFormat, gff3=None):
+        # Wiggle tracks have a bicolor pallete
+        trackConfig = {'style': {}}
+        if trackFormat == 'wiggle':
+
+            trackConfig['style']['pos_color'] = track['wiggle']['color_pos']
+            trackConfig['style']['neg_color'] = track['wiggle']['color_neg']
+
+            if trackConfig['style']['pos_color'] == '__auto__':
+                trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours())
+                trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours())
+
+            # Wiggle tracks can change colour at a specified place
+            bc_pivot = track['wiggle']['bicolor_pivot']
+            if bc_pivot not in ('mean', 'zero'):
+                # The values are either one of those two strings
+                # or a number
+                bc_pivot = float(bc_pivot)
+            trackConfig['bicolor_pivot'] = bc_pivot
+        elif 'scaling' in track:
+            if track['scaling']['method'] == 'ignore':
+                if track['scaling']['scheme']['color'] != '__auto__':
+                    trackConfig['style']['color'] = track['scaling']['scheme']['color']
+                else:
+                    trackConfig['style']['color'] = self.hex_from_rgb(*self._get_colours())
+            else:
+                # Scored method
+                algo = track['scaling']['algo']
+                # linear, logarithmic, blast
+                scales = track['scaling']['scales']
+                # type __auto__, manual (min, max)
+                scheme = track['scaling']['scheme']
+                # scheme -> (type (opacity), color)
+                # ==================================
+                # GENE CALLS OR BLAST
+                # ==================================
+                if trackFormat == 'blast':
+                    red, green, blue = self._get_colours()
+                    color_function = self.COLOR_FUNCTION_TEMPLATE.format(**{
+                        'score': "feature._parent.get('score')",
+                        'opacity': self.OPACITY_MATH['blast'],
+                        'red': red,
+                        'green': green,
+                        'blue': blue,
+                    })
+                    trackConfig['style']['color'] = color_function.replace('\n', '')
+                elif trackFormat == 'gene_calls':
+                    # Default values, based on GFF3 spec
+                    min_val = 0
+                    max_val = 1000
+                    # Get min/max and build a scoring function since JBrowse doesn't
+                    if scales['type'] == 'automatic' or scales['type'] == '__auto__':
+                        min_val, max_val = self.min_max_gff(gff3)
+                    else:
+                        min_val = scales.get('min', 0)
+                        max_val = scales.get('max', 1000)
+
+                    if scheme['color'] == '__auto__':
+                        user_color = 'undefined'
+                        auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
+                    elif scheme['color'].startswith('#'):
+                        user_color = "'%s'" % self.hex_from_rgb(*self.rgb_from_hex(scheme['color'][1:]))
+                        auto_color = 'undefined'
+                    else:
+                        user_color = 'undefined'
+                        auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
+
+                    color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{
+                        'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val, 'min': min_val}),
+                        'user_spec_color': user_color,
+                        'auto_gen_color': auto_color,
+                    })
+
+                    trackConfig['style']['color'] = color_function.replace('\n', '')
+        return trackConfig
+
+
+def etree_to_dict(t):
+    if t is None:
+        return {}
+
+    d = {t.tag: {} if t.attrib else None}
+    children = list(t)
+    if children:
+        dd = defaultdict(list)
+        for dc in map(etree_to_dict, children):
+            for k, v in dc.items():
+                dd[k].append(v)
+        d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
+    if t.attrib:
+        d[t.tag].update(('@' + k, v) for k, v in t.attrib.items())
+    if t.text:
+        text = t.text.strip()
+        if children or t.attrib:
+            if text:
+                d[t.tag]['#text'] = text
+        else:
+            d[t.tag] = text
+    return d
+
+
+# score comes from feature._parent.get('score') or feature.get('score')
+
+INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
+
+
+def metadata_from_node(node):
+    metadata = {}
+    try:
+        if len(node.findall('dataset')) != 1:
+            # exit early
+            return metadata
+    except Exception:
+        return {}
+
+    for (key, value) in node.findall('dataset')[0].attrib.items():
+        metadata['dataset_%s' % key] = value
+
+    for (key, value) in node.findall('history')[0].attrib.items():
+        metadata['history_%s' % key] = value
+
+    for (key, value) in node.findall('metadata')[0].attrib.items():
+        metadata['metadata_%s' % key] = value
+
+    for (key, value) in node.findall('tool')[0].attrib.items():
+        metadata['tool_%s' % key] = value
+
+    # Additional Mappings applied:
+    metadata['dataset_edam_format'] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(metadata['dataset_edam_format'], metadata['dataset_file_ext'])
+    metadata['history_user_email'] = '<a href="mailto:{0}">{0}</a>'.format(metadata['history_user_email'])
+    metadata['history_display_name'] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
+        galaxy=GALAXY_INFRASTRUCTURE_URL,
+        encoded_hist_id=metadata['history_id'],
+        hist_name=metadata['history_display_name']
+    )
+    metadata['tool_tool'] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format(
+        galaxy=GALAXY_INFRASTRUCTURE_URL,
+        encoded_id=metadata['dataset_id'],
+        tool_id=metadata['tool_tool_id'],
+        # tool_version=metadata['tool_tool_version'],
+    )
+    return metadata
+
+
+class JbrowseConnector(object):
+
+    def __init__(self, jbrowse, outdir, genomes):
+        self.cs = ColorScaling()
+        self.jbrowse = jbrowse
+        self.outdir = outdir
+        self.genome_paths = genomes
+        self.tracksToIndex = []
+
+        # This is the id of the current assembly
+        self.assembly_ids = {}
+        self.current_assembly_id = []
+
+        # If upgrading, look at the existing data
+        self.check_existing(self.outdir)
+
+        self.clone_jbrowse(self.jbrowse, self.outdir)
+
+        self.process_genomes()
+
+    def subprocess_check_call(self, command, output=None):
+        if output:
+            log.debug('cd %s && %s >  %s', self.outdir, ' '.join(command), output)
+            subprocess.check_call(command, cwd=self.outdir, stdout=output)
+        else:
+            log.debug('cd %s && %s', self.outdir, ' '.join(command))
+            subprocess.check_call(command, cwd=self.outdir)
+
+    def subprocess_popen(self, command):
+        log.debug('cd %s && %s', self.outdir, command)
+        p = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        output, err = p.communicate()
+        retcode = p.returncode
+        if retcode != 0:
+            log.error('cd %s && %s', self.outdir, command)
+            log.error(output)
+            log.error(err)
+            raise RuntimeError("Command failed with exit code %s" % (retcode))
+
+    def subprocess_check_output(self, command):
+        log.debug('cd %s && %s', self.outdir, ' '.join(command))
+        return subprocess.check_output(command, cwd=self.outdir)
+
+    def symlink_or_copy(self, src, dest):
+        if 'GALAXY_JBROWSE_SYMLINKS' in os.environ and bool(os.environ['GALAXY_JBROWSE_SYMLINKS']):
+            cmd = ['ln', '-s', src, dest]
+        else:
+            cmd = ['cp', src, dest]
+
+        return self.subprocess_check_call(cmd)
+
+    def symlink_or_copy_load_action(self):
+        if 'GALAXY_JBROWSE_SYMLINKS' in os.environ and bool(os.environ['GALAXY_JBROWSE_SYMLINKS']):
+            return 'symlink'
+        else:
+            return 'copy'
+
+    def check_existing(self, destination):
+        existing = os.path.join(destination, 'data', "config.json")
+        if os.path.exists(existing):
+            with open(existing, 'r') as existing_conf:
+                conf = json.load(existing_conf)
+                if 'assemblies' in conf:
+                    for assembly in conf['assemblies']:
+                        if 'name' in assembly:
+                            self.assembly_ids[assembly['name']] = None
+
+    def process_genomes(self):
+        for genome_node in self.genome_paths:
+            # We only expect one input genome per run. This for loop is just
+            # easier to write than the alternative / catches any possible
+            # issues.
+            self.add_assembly(genome_node['path'], genome_node['label'])
+
+    def add_assembly(self, path, label, default=True):
+        # Find a non-existing filename for the new genome
+        # (to avoid colision when upgrading an existing instance)
+        rel_seq_path = os.path.join('data', 'assembly')
+        seq_path = os.path.join(self.outdir, rel_seq_path)
+        fn_try = 1
+        while (os.path.exists(seq_path + '.fasta') or os.path.exists(seq_path + '.fasta.gz')
+               or os.path.exists(seq_path + '.fasta.gz.fai') or os.path.exists(seq_path + '.fasta.gz.gzi')):
+            rel_seq_path = os.path.join('data', 'assembly%s' % fn_try)
+            seq_path = os.path.join(self.outdir, rel_seq_path)
+            fn_try += 1
+
+        # Find a non-existing label for the new genome
+        # (to avoid colision when upgrading an existing instance)
+        lab_try = 1
+        uniq_label = label
+        while uniq_label in self.assembly_ids:
+            uniq_label = label + str(lab_try)
+            lab_try += 1
+
+        # Find a default scaffold to display
+        # TODO this may not be necessary in the future, see https://github.com/GMOD/jbrowse-components/issues/2708
+        with open(path, 'r') as fa_handle:
+            fa_header = fa_handle.readline()[1:].strip().split(' ')[0]
+
+        self.assembly_ids[uniq_label] = fa_header
+        if default:
+            self.current_assembly_id = uniq_label
+
+        copied_genome = seq_path + '.fasta'
+        shutil.copy(path, copied_genome)
+
+        # Compress with bgzip
+        cmd = ['bgzip', copied_genome]
+        self.subprocess_check_call(cmd)
+
+        # FAI Index
+        cmd = ['samtools', 'faidx', copied_genome + '.gz']
+        self.subprocess_check_call(cmd)
+
+        self.subprocess_check_call([
+            'jbrowse', 'add-assembly',
+            '--load', 'inPlace',
+            '--name', uniq_label,
+            '--type', 'bgzipFasta',
+            '--target', os.path.join(self.outdir, 'data'),
+            '--skipCheck',
+            rel_seq_path + '.fasta.gz'])
+
+        return uniq_label
+
+    def text_index(self):
+        # Index tracks
+        args = [
+            'jbrowse', 'text-index',
+            '--target', os.path.join(self.outdir, 'data'),
+            '--assemblies', self.current_assembly_id,
+        ]
+
+        tracks = ','.join(self.tracksToIndex)
+        if tracks:
+            args += ['--tracks', tracks]
+
+            self.subprocess_check_call(args)
+
+    def _blastxml_to_gff3(self, xml, min_gap=10):
+        gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
+        cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
+               '--trim', '--trim_end', '--include_seq', '--min_gap', str(min_gap), xml]
+        log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name)
+        subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
+        gff3_unrebased.close()
+        return gff3_unrebased.name
+
+    def _prepare_track_style(self, xml_conf):
+
+        style_data = {
+            "type": "LinearBasicDisplay"
+        }
+
+        if 'display' in xml_conf['style']:
+            style_data['type'] = xml_conf['style']['display']
+            del xml_conf['style']['display']
+
+        style_data['displayId'] = "%s_%s" % (xml_conf['label'], style_data['type'])
+
+        style_data.update(xml_conf['style'])
+
+        return {'displays': [style_data]}
+
+    def add_blastxml(self, data, trackData, blastOpts, **kwargs):
+        gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts['min_gap'])
+
+        if 'parent' in blastOpts and blastOpts['parent'] != 'None':
+            gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
+            cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')]
+            if blastOpts.get('protein', 'false') == 'true':
+                cmd.append('--protein2dna')
+            cmd.extend([os.path.realpath(blastOpts['parent']), gff3])
+            log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_rebased.name)
+            subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
+            gff3_rebased.close()
+
+            # Replace original gff3 file
+            shutil.copy(gff3_rebased.name, gff3)
+            os.unlink(gff3_rebased.name)
+
+        rel_dest = os.path.join('data', trackData['label'] + '.gff')
+        dest = os.path.join(self.outdir, rel_dest)
+
+        self._sort_gff(gff3, dest)
+        os.unlink(gff3)
+
+        style_json = self._prepare_track_style(trackData)
+
+        self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json)
+
+    def add_bigwig(self, data, trackData, wiggleOpts, **kwargs):
+
+        rel_dest = os.path.join('data', trackData['label'] + '.bw')
+        dest = os.path.join(self.outdir, rel_dest)
+        self.symlink_or_copy(os.path.realpath(data), dest)
+
+        style_json = self._prepare_track_style(trackData)
+
+        self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
+
+    # Anything ending in "am" (Bam or Cram)
+    def add_xam(self, data, trackData, xamOpts, index=None, ext="bam", **kwargs):
+
+        index_ext = "bai"
+        if ext == "cram":
+            index_ext = "crai"
+
+        rel_dest = os.path.join('data', trackData['label'] + '.%s' % ext)
+        dest = os.path.join(self.outdir, rel_dest)
+
+        self.symlink_or_copy(os.path.realpath(data), dest)
+
+        if index is not None and os.path.exists(os.path.realpath(index)):
+            # xai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
+            self.subprocess_check_call(['cp', os.path.realpath(index), dest + '.%s' % index_ext])
+        else:
+            # Can happen in exotic condition
+            # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
+            #      => no index generated by galaxy, but there might be one next to the symlink target
+            #      this trick allows to skip the bam sorting made by galaxy if already done outside
+            if os.path.exists(os.path.realpath(data) + '.%s' % index_ext):
+                self.symlink_or_copy(os.path.realpath(data) + '.%s' % index_ext, dest + '.%s' % index_ext)
+            else:
+                log.warn('Could not find a bam index (.%s file) for %s', (index_ext, data))
+
+        style_json = self._prepare_track_style(trackData)
+
+        self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
+
+    def add_vcf(self, data, trackData, vcfOpts={}, zipped=False, **kwargs):
+
+        if zipped:
+            rel_dest = os.path.join('data', trackData['label'] + '.vcf.gz')
+            dest = os.path.join(self.outdir, rel_dest)
+            shutil.copy(os.path.realpath(data), dest)
+        else:
+            rel_dest = os.path.join('data', trackData['label'] + '.vcf')
+            dest = os.path.join(self.outdir, rel_dest)
+            shutil.copy(os.path.realpath(data), dest)
+
+            cmd = ['bgzip', dest]
+            self.subprocess_check_call(cmd)
+            cmd = ['tabix', dest + '.gz']
+            self.subprocess_check_call(cmd)
+
+            rel_dest = os.path.join('data', trackData['label'] + '.vcf.gz')
+
+        style_json = self._prepare_track_style(trackData)
+
+        self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
+
+    def add_gff(self, data, format, trackData, gffOpts, **kwargs):
+        rel_dest = os.path.join('data', trackData['label'] + '.gff')
+        dest = os.path.join(self.outdir, rel_dest)
+
+        self._sort_gff(data, dest)
+
+        style_json = self._prepare_track_style(trackData)
+
+        self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json)
+
+    def add_bed(self, data, format, trackData, gffOpts, **kwargs):
+        rel_dest = os.path.join('data', trackData['label'] + '.bed')
+        dest = os.path.join(self.outdir, rel_dest)
+
+        self._sort_bed(data, dest)
+
+        style_json = self._prepare_track_style(trackData)
+
+        self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json)
+
+    def add_paf(self, data, trackData, pafOpts, **kwargs):
+        rel_dest = os.path.join('data', trackData['label'] + '.paf')
+        dest = os.path.join(self.outdir, rel_dest)
+
+        self.symlink_or_copy(os.path.realpath(data), dest)
+
+        added_assembly = self.add_assembly(pafOpts['genome'], pafOpts['genome_label'], default=False)
+
+        style_json = self._prepare_track_style(trackData)
+
+        self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, assemblies=[self.current_assembly_id, added_assembly], config=style_json)
+
+    def add_hic(self, data, trackData, hicOpts, **kwargs):
+        rel_dest = os.path.join('data', trackData['label'] + '.hic')
+        dest = os.path.join(self.outdir, rel_dest)
+
+        self.symlink_or_copy(os.path.realpath(data), dest)
+
+        style_json = self._prepare_track_style(trackData)
+
+        self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
+
+    def add_sparql(self, url, query, query_refnames, trackData):
+
+        json_track_data = {
+            "type": "FeatureTrack",
+            "trackId": id,
+            "name": trackData['label'],
+            "adapter": {
+                "type": "SPARQLAdapter",
+                "endpoint": {
+                    "uri": url,
+                    "locationType": "UriLocation"
+                },
+                "queryTemplate": query
+            },
+            "category": [
+                trackData['category']
+            ],
+            "assemblyNames": [
+                self.current_assembly_id
+            ]
+        }
+
+        if query_refnames:
+            json_track_data['adapter']['refNamesQueryTemplate']: query_refnames
+
+        self.subprocess_check_call([
+            'jbrowse', 'add-track-json',
+            '--target', os.path.join(self.outdir, 'data'),
+            json_track_data])
+
+        # Doesn't work as of 1.6.4, might work in the future
+        # self.subprocess_check_call([
+        #     'jbrowse', 'add-track',
+        #     '--trackType', 'sparql',
+        #     '--name', trackData['label'],
+        #     '--category', trackData['category'],
+        #     '--target', os.path.join(self.outdir, 'data'),
+        #     '--trackId', id,
+        #     '--config', '{"queryTemplate": "%s"}' % query,
+        #     url])
+
+    def _add_track(self, id, label, category, path, assemblies=[], config=None):
+
+        assemblies_opt = self.current_assembly_id
+        if assemblies:
+            assemblies_opt = ','.join(assemblies)
+
+        cmd = [
+            'jbrowse', 'add-track',
+            '--load', 'inPlace',
+            '--name', label,
+            '--category', category,
+            '--target', os.path.join(self.outdir, 'data'),
+            '--trackId', id,
+            '--assemblyNames', assemblies_opt
+        ]
+
+        if config:
+            cmd.append('--config')
+            cmd.append(json.dumps(config))
+
+        cmd.append(path)
+
+        self.subprocess_check_call(cmd)
+
+    def _sort_gff(self, data, dest):
+        # Only index if not already done
+        if not os.path.exists(dest):
+            cmd = "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" % (data, dest)
+            self.subprocess_popen(cmd)
+
+            self.subprocess_check_call(['bgzip', '-f', dest])
+            self.subprocess_check_call(['tabix', '-f', '-p', 'gff', dest + '.gz'])
+
+    def _sort_bed(self, data, dest):
+        # Only index if not already done
+        if not os.path.exists(dest):
+            cmd = ['sort', '-k1,1', '-k2,2n', data]
+            with open(dest, 'w') as handle:
+                self.subprocess_check_call(cmd, output=handle)
+
+            self.subprocess_check_call(['bgzip', '-f', dest])
+            self.subprocess_check_call(['tabix', '-f', '-p', 'bed', dest + '.gz'])
+
+    def process_annotations(self, track):
+
+        category = track['category'].replace('__pd__date__pd__', TODAY)
+        outputTrackConfig = {
+            'category': category,
+        }
+
+        mapped_chars = {
+            '>': '__gt__',
+            '<': '__lt__',
+            "'": '__sq__',
+            '"': '__dq__',
+            '[': '__ob__',
+            ']': '__cb__',
+            '{': '__oc__',
+            '}': '__cc__',
+            '@': '__at__',
+            '#': '__pd__',
+            "": '__cn__'
+        }
+
+        for i, (dataset_path, dataset_ext, track_human_label, extra_metadata) in enumerate(track['trackfiles']):
+            # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
+            for key, value in mapped_chars.items():
+                track_human_label = track_human_label.replace(value, key)
+
+            log.info('Processing track %s / %s (%s)', category, track_human_label, dataset_ext)
+            outputTrackConfig['key'] = track_human_label
+            # We add extra data to hash for the case of REST + SPARQL.
+            if 'conf' in track and 'options' in track['conf'] and 'url' in track['conf']['options']:
+                rest_url = track['conf']['options']['url']
+            else:
+                rest_url = ''
+
+            # I chose to use track['category'] instead of 'category' here. This
+            # is intentional. This way re-running the tool on a different date
+            # will not generate different hashes and make comparison of outputs
+            # much simpler.
+            hashData = [str(dataset_path), track_human_label, track['category'], rest_url, self.current_assembly_id]
+            hashData = '|'.join(hashData).encode('utf-8')
+            outputTrackConfig['label'] = hashlib.md5(hashData).hexdigest() + '_%s' % i
+            outputTrackConfig['metadata'] = extra_metadata
+
+            outputTrackConfig['style'] = track['style']
+
+            if 'menus' in track['conf']['options']:
+                menus = self.cs.parse_menus(track['conf']['options'])
+                outputTrackConfig.update(menus)
+
+            if dataset_ext in ('gff', 'gff3'):
+                self.add_gff(dataset_path, dataset_ext, outputTrackConfig,
+                             track['conf']['options']['gff'])
+            elif dataset_ext == 'bed':
+                self.add_bed(dataset_path, dataset_ext, outputTrackConfig,
+                             track['conf']['options']['gff'])
+            elif dataset_ext == 'bigwig':
+                self.add_bigwig(dataset_path, outputTrackConfig,
+                                track['conf']['options']['wiggle'])
+            elif dataset_ext == 'bam':
+                real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index']
+                if not isinstance(real_indexes, list):
+                    # <bam_indices>
+                    #  <bam_index>/path/to/a.bam.bai</bam_index>
+                    # </bam_indices>
+                    #
+                    # The above will result in the 'bam_index' key containing a
+                    # string. If there are two or more indices, the container
+                    # becomes a list. Fun!
+                    real_indexes = [real_indexes]
+
+                self.add_xam(dataset_path, outputTrackConfig,
+                             track['conf']['options']['pileup'],
+                             index=real_indexes[i], ext="bam")
+            elif dataset_ext == 'cram':
+                real_indexes = track['conf']['options']['cram']['cram_indices']['cram_index']
+                if not isinstance(real_indexes, list):
+                    # <bam_indices>
+                    #  <bam_index>/path/to/a.bam.bai</bam_index>
+                    # </bam_indices>
+                    #
+                    # The above will result in the 'bam_index' key containing a
+                    # string. If there are two or more indices, the container
+                    # becomes a list. Fun!
+                    real_indexes = [real_indexes]
+
+                self.add_xam(dataset_path, outputTrackConfig,
+                             track['conf']['options']['cram'],
+                             index=real_indexes[i], ext="cram")
+            elif dataset_ext == 'blastxml':
+                self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast'])
+            elif dataset_ext == 'vcf':
+                self.add_vcf(dataset_path, outputTrackConfig)
+            elif dataset_ext == 'vcf_bgzip':
+                self.add_vcf(dataset_path, outputTrackConfig, zipped=True)
+            elif dataset_ext == 'rest':
+                self.add_rest(track['conf']['options']['rest']['url'], outputTrackConfig)
+            elif dataset_ext == 'synteny':
+                self.add_paf(dataset_path, outputTrackConfig,
+                             track['conf']['options']['synteny'])
+            elif dataset_ext == 'hic':
+                self.add_hic(dataset_path, outputTrackConfig,
+                             track['conf']['options']['hic'])
+            elif dataset_ext == 'sparql':
+                sparql_query = track['conf']['options']['sparql']['query']
+                for key, value in mapped_chars.items():
+                    sparql_query = sparql_query.replace(value, key)
+                sparql_query_refnames = track['conf']['options']['sparql']['query_refnames']
+                for key, value in mapped_chars.items():
+                    sparql_query_refnames = sparql_query_refnames.replace(value, key)
+                self.add_sparql(track['conf']['options']['sparql']['url'], sparql_query, sparql_query_refnames, outputTrackConfig)
+            else:
+                log.warn('Do not know how to handle %s', dataset_ext)
+
+            # Return non-human label for use in other fields
+            yield outputTrackConfig['label']
+
+    def add_default_session(self, data):
+        """
+            Add some default session settings: set some assemblies/tracks on/off
+        """
+        tracks_data = []
+
+        # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
+
+        # We need to know the track type from the config.json generated just before
+        config_path = os.path.join(self.outdir, 'data', 'config.json')
+        track_types = {}
+        with open(config_path, 'r') as config_file:
+            config_json = json.load(config_file)
+
+        for track_conf in config_json['tracks']:
+            track_types[track_conf['trackId']] = track_conf['type']
+
+        for on_track in data['visibility']['default_on']:
+            # TODO several problems with this currently
+            # - we are forced to copy the same kind of style config as the per track config from _prepare_track_style (not exactly the same though)
+            # - we get an error when refreshing the page
+            # - this could be solved by session specs, see https://github.com/GMOD/jbrowse-components/issues/2708
+            style_data = {
+                "type": "LinearBasicDisplay",
+                "height": 100
+            }
+
+            if on_track in data['style']:
+                if 'display' in data['style'][on_track]:
+                    style_data['type'] = data['style'][on_track]['display']
+                    del data['style'][on_track]['display']
+
+                style_data.update(data['style'][on_track])
+
+            if on_track in data['style_labels']:
+                # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
+                # TODO move this to per track displays?
+                style_data['labels'] = data['style_labels'][on_track]
+
+            tracks_data.append({
+                "type": track_types[on_track],
+                "configuration": on_track,
+                "displays": [
+                    style_data
+                ]
+            })
+
+        # The view for the assembly we're adding
+        view_json = {
+            "type": "LinearGenomeView",
+            "tracks": tracks_data
+        }
+
+        refName = None
+        if data.get('defaultLocation', ''):
+            loc_match = re.search(r'^(\w+):(\d+)\.+(\d+)$', data['defaultLocation'])
+            if loc_match:
+                refName = loc_match.group(1)
+                start = int(loc_match.group(2))
+                end = int(loc_match.group(3))
+        elif self.assembly_ids[self.current_assembly_id] is not None:
+            refName = self.assembly_ids[self.current_assembly_id]
+            start = 0
+            end = 1000000  # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708
+
+        if refName is not None:
+            # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
+            view_json['displayedRegions'] = [{
+                "refName": refName,
+                "start": start,
+                "end": end,
+                "reversed": False,
+                "assemblyName": self.current_assembly_id
+            }]
+
+        session_name = data.get('session_name', "New session")
+        if not session_name:
+            session_name = "New session"
+
+        # Merge with possibly existing defaultSession (if upgrading a jbrowse instance)
+        session_json = {}
+        if 'defaultSession' in config_json:
+            session_json = config_json['defaultSession']
+
+        session_json["name"] = session_name
+
+        if 'views' not in session_json:
+            session_json['views'] = []
+
+        session_json['views'].append(view_json)
+
+        config_json['defaultSession'] = session_json
+
+        with open(config_path, 'w') as config_file:
+            json.dump(config_json, config_file, indent=2)
+
+    def add_general_configuration(self, data):
+        """
+            Add some general configuration to the config.json file
+        """
+
+        config_path = os.path.join(self.outdir, 'data', 'config.json')
+        with open(config_path, 'r') as config_file:
+            config_json = json.load(config_file)
+
+        config_data = {}
+
+        config_data['disableAnalytics'] = data.get('analytics', 'false') == 'true'
+
+        config_data['theme'] = {
+            "palette": {
+                "primary": {
+                    "main": data.get('primary_color', '#0D233F')
+                },
+                "secondary": {
+                    "main": data.get('secondary_color', '#721E63')
+                },
+                "tertiary": {
+                    "main": data.get('tertiary_color', '#135560')
+                },
+                "quaternary": {
+                    "main": data.get('quaternary_color', '#FFB11D')
+                },
+            },
+            "typography": {
+                "fontSize": int(data.get('font_size', 10))
+            },
+        }
+
+        config_json['configuration'].update(config_data)
+
+        with open(config_path, 'w') as config_file:
+            json.dump(config_json, config_file, indent=2)
+
+    def clone_jbrowse(self, jbrowse_dir, destination):
+        """Clone a JBrowse directory into a destination directory.
+        """
+
+        copytree(jbrowse_dir, destination)
+
+        try:
+            shutil.rmtree(os.path.join(destination, 'test_data'))
+        except OSError as e:
+            log.error("Error: %s - %s." % (e.filename, e.strerror))
+
+        if not os.path.exists(os.path.join(destination, 'data')):
+            # It can already exist if upgrading an instance
+            os.makedirs(os.path.join(destination, 'data'))
+            log.info("makedir %s" % (os.path.join(destination, 'data')))
+
+        os.symlink('./data/config.json', os.path.join(destination, 'config.json'))
+
+
+def copytree(src, dst, symlinks=False, ignore=None):
+    for item in os.listdir(src):
+        s = os.path.join(src, item)
+        d = os.path.join(dst, item)
+        if os.path.isdir(s):
+            shutil.copytree(s, d, symlinks, ignore)
+        else:
+            shutil.copy2(s, d)
+
+
+def parse_style_conf(item):
+    if 'type' in item.attrib and item.attrib['type'] in ['boolean', 'integer']:
+        if item.attrib['type'] == 'boolean':
+            return item.text in ("yes", "true", "True")
+        elif item.attrib['type'] == 'integer':
+            return int(item.text)
+    else:
+        return item.text
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="", epilog="")
+    parser.add_argument('xml', type=argparse.FileType('r'), help='Track Configuration')
+
+    parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
+    parser.add_argument('--outdir', help='Output directory', default='out')
+    parser.add_argument('--version', '-V', action='version', version="%(prog)s 0.8.0")
+    args = parser.parse_args()
+
+    tree = ET.parse(args.xml.name)
+    root = tree.getroot()
+
+    # This should be done ASAP
+    GALAXY_INFRASTRUCTURE_URL = root.find('metadata/galaxyUrl').text
+    # Sometimes this comes as `localhost` without a protocol
+    if not GALAXY_INFRASTRUCTURE_URL.startswith('http'):
+        # so we'll prepend `http://` and hope for the best. Requests *should*
+        # be GET and not POST so it should redirect OK
+        GALAXY_INFRASTRUCTURE_URL = 'http://' + GALAXY_INFRASTRUCTURE_URL
+
+    jc = JbrowseConnector(
+        jbrowse=args.jbrowse,
+        outdir=args.outdir,
+        genomes=[
+            {
+                'path': os.path.realpath(x.attrib['path']),
+                'meta': metadata_from_node(x.find('metadata')),
+                'label': x.attrib['label']
+            }
+            for x in root.findall('metadata/genomes/genome')
+        ]
+    )
+
+    default_session_data = {
+        'visibility': {
+            'default_on': [],
+            'default_off': [],
+        },
+        'style': {},
+        'style_labels': {}
+    }
+
+    # TODO add metadata to tracks
+    for track in root.findall('tracks/track'):
+        track_conf = {}
+        track_conf['trackfiles'] = []
+
+        trackfiles = track.findall('files/trackFile')
+        if trackfiles:
+            for x in track.findall('files/trackFile'):
+                if trackfiles:
+                    metadata = metadata_from_node(x.find('metadata'))
+
+                    track_conf['trackfiles'].append((
+                        os.path.realpath(x.attrib['path']),
+                        x.attrib['ext'],
+                        x.attrib['label'],
+                        metadata
+                    ))
+        else:
+            # For tracks without files (rest, sparql)
+            track_conf['trackfiles'].append((
+                '',  # N/A, no path for rest or sparql
+                track.attrib['format'],
+                track.find('options/label').text,
+                {}
+            ))
+
+        track_conf['category'] = track.attrib['cat']
+        track_conf['format'] = track.attrib['format']
+        track_conf['style'] = {item.tag: parse_style_conf(item) for item in track.find('options/style')}
+
+        track_conf['style'] = {item.tag: parse_style_conf(item) for item in track.find('options/style')}
+
+        track_conf['style_labels'] = {item.tag: parse_style_conf(item) for item in track.find('options/style_labels')}
+
+        track_conf['conf'] = etree_to_dict(track.find('options'))
+        keys = jc.process_annotations(track_conf)
+
+        for key in keys:
+            default_session_data['visibility'][track.attrib.get('visibility', 'default_off')].append(key)
+
+        default_session_data['style'][key] = track_conf['style']  # TODO do we need this anymore?
+        default_session_data['style_labels'][key] = track_conf['style_labels']
+
+    default_session_data['defaultLocation'] = root.find('metadata/general/defaultLocation').text
+    default_session_data['session_name'] = root.find('metadata/general/session_name').text
+
+    general_data = {
+        'analytics': root.find('metadata/general/analytics').text,
+        'primary_color': root.find('metadata/general/primary_color').text,
+        'secondary_color': root.find('metadata/general/secondary_color').text,
+        'tertiary_color': root.find('metadata/general/tertiary_color').text,
+        'quaternary_color': root.find('metadata/general/quaternary_color').text,
+        'font_size': root.find('metadata/general/font_size').text,
+    }
+
+    jc.add_default_session(default_session_data)
+    jc.add_general_configuration(general_data)
+    jc.text_index()
+
author	fubar
date	Mon, 22 Jan 2024 11:52:19 +0000
parents
children	7c2e28e144f3