diff jbrowse.py @ 17:ff11d442feed draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
author iuc
date Wed, 15 Nov 2017 15:15:27 -0500
parents b5c5470d7c09
children 558d652cd681
line wrap: on
line diff
--- a/jbrowse.py	Wed Sep 13 13:07:20 2017 -0400
+++ b/jbrowse.py	Wed Nov 15 15:15:27 2017 -0500
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 import argparse
-import codecs
+import binascii
 import copy
+import datetime
 import hashlib
 import json
 import logging
@@ -14,9 +15,10 @@
 from collections import defaultdict
 
 from Bio.Data import CodonTable
-
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger('jbrowse')
+TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
+GALAXY_INFRASTRUCTURE_URL = None
 
 
 class ColorScaling(object):
@@ -63,6 +65,7 @@
         var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
         var score = (search_up(feature, 'score') || search_down(feature, 'score'));
         {opacity}
+        if(score === undefined){{ opacity = 1; }}
         var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
         var red = parseInt(result[1], 16);
         var green = parseInt(result[2], 16);
@@ -82,11 +85,11 @@
         """,
         'blast': """
             var opacity = 0;
-            if(score == 0.0) {
+            if(score == 0.0) {{
                 opacity = 1;
-            } else{
+            }} else {{
                 opacity = (20 - Math.log10(score)) / 180;
-            }
+            }}
         """
     }
 
@@ -128,7 +131,7 @@
 
     def rgb_from_hex(self, hexstr):
         # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
-        return struct.unpack('BBB', codecs.decode(hexstr, 'hex'))
+        return struct.unpack('BBB', binascii.unhexlify(hexstr))
 
     def min_max_gff(self, gff_file):
         min_val = None
@@ -285,6 +288,44 @@
 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
 
 
+def metadata_from_node(node):
+    metadata = {}
+    try:
+        if len(node.findall('dataset')) != 1:
+            # exit early
+            return metadata
+    except Exception:
+        return {}
+
+    for (key, value) in node.findall('dataset')[0].attrib.items():
+        metadata['dataset_%s' % key] = value
+
+    for (key, value) in node.findall('history')[0].attrib.items():
+        metadata['history_%s' % key] = value
+
+    for (key, value) in node.findall('metadata')[0].attrib.items():
+        metadata['metadata_%s' % key] = value
+
+    for (key, value) in node.findall('tool')[0].attrib.items():
+        metadata['tool_%s' % key] = value
+
+    # Additional Mappings applied:
+    metadata['dataset_edam_format'] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(metadata['dataset_edam_format'], metadata['dataset_file_ext'])
+    metadata['history_user_email'] = '<a href="mailto:{0}">{0}</a>'.format(metadata['history_user_email'])
+    metadata['history_display_name'] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
+        galaxy=GALAXY_INFRASTRUCTURE_URL,
+        encoded_hist_id=metadata['history_id'],
+        hist_name=metadata['history_display_name']
+    )
+    metadata['tool_tool'] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format(
+        galaxy=GALAXY_INFRASTRUCTURE_URL,
+        encoded_id=metadata['dataset_id'],
+        tool_id=metadata['tool_tool_id'],
+        tool_version=metadata['tool_tool_version'],
+    )
+    return metadata
+
+
 class JbrowseConnector(object):
 
     def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1):
@@ -312,6 +353,12 @@
                 # Ignore if the folder exists
                 pass
 
+            try:
+                os.makedirs(os.path.join(self.outdir, 'data', 'raw'))
+            except OSError:
+                # Ignore if the folder exists
+                pass
+
         self.process_genomes()
         self.update_gencode()
 
@@ -338,21 +385,20 @@
         return os.path.realpath(os.path.join(self.jbrowse, 'bin', command))
 
     def process_genomes(self):
-        for genome_path in self.genome_paths:
+        for genome_node in self.genome_paths:
+            # TODO: Waiting on https://github.com/GMOD/jbrowse/pull/884
             self.subprocess_check_call([
                 'perl', self._jbrowse_bin('prepare-refseqs.pl'),
-                '--fasta', genome_path])
+                '--fasta', genome_node['path']])
 
     def generate_names(self):
         # Generate names
-
         args = [
             'perl', self._jbrowse_bin('generate-names.pl'),
             '--hashBits', '16'
         ]
 
         tracks = ','.join(self.tracksToIndex)
-
         if tracks:
             args += ['--tracks', tracks]
         else:
@@ -362,7 +408,6 @@
         self.subprocess_check_call(args)
 
     def _add_json(self, json_data):
-
         cmd = [
             'perl', self._jbrowse_bin('add-json.pl'),
             json.dumps(json_data),
@@ -421,7 +466,7 @@
                '--key', trackData['key'],
                '--clientConfig', json.dumps(clientConfig),
                '--config', json.dumps(config),
-               '--trackType', 'JBrowse/View/Track/CanvasFeatures'
+               '--trackType', 'BlastView/View/Track/CanvasFeatures'
                ]
 
         # className in --clientConfig is ignored, it needs to be set with --className
@@ -455,6 +500,8 @@
         else:
             trackData['autoscale'] = wiggleOpts.get('autoscale', 'local')
 
+        trackData['scale'] = wiggleOpts['scale']
+
         self._add_track_json(trackData)
 
     def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
@@ -506,7 +553,7 @@
         })
         self._add_track_json(trackData)
 
-    def add_features(self, data, format, trackData, gffOpts, **kwargs):
+    def add_features(self, data, format, trackData, gffOpts, metadata=None, **kwargs):
         cmd = [
             'perl', self._jbrowse_bin('flatfile-to-json.pl'),
             self.TN_TABLE.get(format, 'gff'),
@@ -549,6 +596,8 @@
             '--trackType', gffOpts['trackType']
         ]
 
+        if metadata:
+            config.update({'metadata': metadata})
         cmd.extend(['--config', json.dumps(config)])
 
         self.subprocess_check_call(cmd)
@@ -556,14 +605,32 @@
         if gffOpts.get('index', 'false') == 'true':
             self.tracksToIndex.append("%s" % trackData['label'])
 
+    def add_rest(self, url, trackData):
+        data = {
+            "label": trackData['label'],
+            "key": trackData['key'],
+            "category": trackData['category'],
+            "type": "JBrowse/View/Track/HTMLFeatures",
+            "storeClass": "JBrowse/Store/SeqFeature/REST",
+            "baseUrl": url,
+            "query": {
+                "organism": "tyrannosaurus"
+            }
+        }
+        self._add_track_json(data)
+
     def process_annotations(self, track):
+        category = track['category'].replace('__pd__date__pd__', TODAY)
         outputTrackConfig = {
             'style': {
                 'label': track['style'].get('label', 'description'),
                 'className': track['style'].get('className', 'feature'),
                 'description': track['style'].get('description', ''),
             },
-            'category': track['category'],
+            'overridePlugins': track['style'].get('overridePlugins', False) == 'True',
+            'overrideDraggable': track['style'].get('overrideDraggable', False) == 'True',
+            'maxHeight': track['style'].get('maxHeight', '600'),
+            'category': category,
         }
 
         mapped_chars = {
@@ -579,15 +646,26 @@
             '#': '__pd__'
         }
 
-        for i, (dataset_path, dataset_ext, track_human_label) in enumerate(track['trackfiles']):
+        for i, (dataset_path, dataset_ext, track_human_label, extra_metadata) in enumerate(track['trackfiles']):
             # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
             for key, value in mapped_chars.items():
                 track_human_label = track_human_label.replace(value, key)
 
-            log.info('Processing %s / %s', track['category'], track_human_label)
+            log.info('Processing %s / %s', category, track_human_label)
             outputTrackConfig['key'] = track_human_label
-            hashData = [dataset_path, track_human_label, track['category']]
-            outputTrackConfig['label'] = hashlib.md5('|'.join(hashData).encode('utf-8')).hexdigest() + '_%s' % i
+            # We add extra data to hash for the case of REST + SPARQL.
+            try:
+                rest_url = track['conf']['options']['url']
+            except KeyError:
+                rest_url = ''
+
+            # I chose to use track['category'] instead of 'category' here. This
+            # is intentional. This way re-running the tool on a different date
+            # will not generate different hashes and make comparison of outputs
+            # much simpler.
+            hashData = [dataset_path, track_human_label, track['category'], rest_url]
+            hashData = '|'.join(hashData).encode('utf-8')
+            outputTrackConfig['label'] = hashlib.md5(hashData).hexdigest() + '_%s' % i
 
             # Colour parsing is complex due to different track types having
             # different colour options.
@@ -608,10 +686,10 @@
             # import sys; sys.exit()
             if dataset_ext in ('gff', 'gff3', 'bed'):
                 self.add_features(dataset_path, dataset_ext, outputTrackConfig,
-                                  track['conf']['options']['gff'])
+                                  track['conf']['options']['gff'], metadata=extra_metadata)
             elif dataset_ext == 'bigwig':
                 self.add_bigwig(dataset_path, outputTrackConfig,
-                                track['conf']['options']['wiggle'])
+                                track['conf']['options']['wiggle'], metadata=extra_metadata)
             elif dataset_ext == 'bam':
                 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index']
                 if not isinstance(real_indexes, list):
@@ -626,11 +704,15 @@
 
                 self.add_bam(dataset_path, outputTrackConfig,
                              track['conf']['options']['pileup'],
-                             bam_index=real_indexes[i])
+                             bam_index=real_indexes[i], metadata=extra_metadata)
             elif dataset_ext == 'blastxml':
-                self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast'])
+                self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast'], metadata=extra_metadata)
             elif dataset_ext == 'vcf':
-                self.add_vcf(dataset_path, outputTrackConfig)
+                self.add_vcf(dataset_path, outputTrackConfig, metadata=extra_metadata)
+            elif dataset_ext == 'rest':
+                self.add_rest(track['conf']['options']['url'], outputTrackConfig, metadata=extra_metadata)
+            else:
+                log.warn('Do not know how to handle %s', dataset_ext)
 
             # Return non-human label for use in other fields
             yield outputTrackConfig['label']
@@ -659,10 +741,65 @@
         generalData['show_overview'] = (data['general']['show_overview'] == 'true')
         generalData['show_menu'] = (data['general']['show_menu'] == 'true')
         generalData['hideGenomeOptions'] = (data['general']['hideGenomeOptions'] == 'true')
+        generalData['plugins'] = data['plugins']
 
         viz_data.update(generalData)
         self._add_json(viz_data)
 
+        if 'GCContent' in data['plugins_python']:
+            self._add_track_json({
+                "storeClass": "JBrowse/Store/SeqFeature/SequenceChunks",
+                "type": "GCContent/View/Track/GCContentXY",
+                "label": "GCContentXY",
+                "urlTemplate": "seq/{refseq_dirpath}/{refseq}-",
+                "bicolor_pivot": 0.5
+                # TODO: Expose params for everyone.
+            })
+
+        if 'ComboTrackSelector' in data['plugins_python']:
+            with open(os.path.join(self.outdir, 'data', 'trackList.json'), 'r') as handle:
+                trackListJson = json.load(handle)
+                trackListJson.update({
+                    "trackSelector": {
+                        "renameFacets": {
+                            "tool_tool": "Tool ID",
+                            "tool_tool_id": "Tool ID",
+                            "tool_tool_version": "Tool Version",
+                            "dataset_edam_format": "EDAM",
+                            "dataset_size": "Size",
+                            "history_display_name": "History Name",
+                            "history_user_email": "Owner",
+                            "metadata_dbkey": "Dbkey",
+                        },
+                        "displayColumns": [
+                            "key",
+                            "tool_tool",
+                            "tool_tool_version",
+                            "dataset_edam_format",
+                            "dataset_size",
+                            "history_display_name",
+                            "history_user_email",
+                            "metadata_dbkey",
+                        ],
+                        "type": "Faceted",
+                        "title": ["Galaxy Metadata"],
+                        "escapeHTMLInData": False
+                    },
+                    "trackMetadata": {
+                        "indexFacets": [
+                            "category",
+                            "key",
+                            "tool_tool_id",
+                            "tool_tool_version",
+                            "dataset_edam_format",
+                            "history_user_email",
+                            "history_display_name"
+                        ]
+                    }
+                })
+                with open(os.path.join(self.outdir, 'data', 'trackList2.json'), 'w') as handle:
+                    json.dump(trackListJson, handle)
+
     def clone_jbrowse(self, jbrowse_dir, destination):
         """Clone a JBrowse directory into a destination directory.
         """
@@ -677,9 +814,14 @@
 
         # http://unix.stackexchange.com/a/38691/22785
         # JBrowse releases come with some broken symlinks
-        cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+']
+        cmd = ['find', destination, '-type', 'l', '-xtype', 'l']
         log.debug(' '.join(cmd))
-        subprocess.check_call(cmd)
+        symlinks = subprocess.check_output(cmd)
+        for i in symlinks:
+            try:
+                os.unlink(i)
+            except OSError:
+                pass
 
 
 if __name__ == '__main__':
@@ -689,6 +831,7 @@
     parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
     parser.add_argument('--outdir', help='Output directory', default='out')
     parser.add_argument('--standalone', help='Standalone mode includes a copy of JBrowse', action='store_true')
+    parser.add_argument('--version', '-V', action='version', version="%(prog)s 0.7.0")
     args = parser.parse_args()
 
     tree = ET.parse(args.xml.name)
@@ -697,7 +840,13 @@
     jc = JbrowseConnector(
         jbrowse=args.jbrowse,
         outdir=args.outdir,
-        genomes=[os.path.realpath(x.text) for x in root.findall('metadata/genomes/genome')],
+        genomes=[
+            {
+                'path': os.path.realpath(x.attrib['path']),
+                'meta': metadata_from_node(x.find('metadata'))
+            }
+            for x in root.findall('metadata/genomes/genome')
+        ],
         standalone=args.standalone,
         gencode=root.find('metadata/gencode').text
     )
@@ -719,21 +868,74 @@
             'show_overview': root.find('metadata/general/show_overview').text,
             'show_menu': root.find('metadata/general/show_menu').text,
             'hideGenomeOptions': root.find('metadata/general/hideGenomeOptions').text,
-        }
+        },
+        'plugins': [{
+            'location': 'https://cdn.rawgit.com/TAMU-CPT/blastview/97572a21b7f011c2b4d9a0b5af40e292d694cbef/',
+            'name': 'BlastView'
+        }],
+        'plugins_python': ['BlastView'],
     }
+
+    plugins = root.find('plugins').attrib
+    if plugins['GCContent'] == 'True':
+        extra_data['plugins_python'].append('GCContent')
+        extra_data['plugins'].append({
+            'location': 'https://cdn.rawgit.com/elsiklab/gccontent/5c8b0582ecebf9edf684c76af8075fb3d30ec3fa/',
+            'name': 'GCContent'
+        })
+
+    if plugins['Bookmarks'] == 'True':
+        extra_data['plugins'].append({
+            'location': 'https://cdn.rawgit.com/TAMU-CPT/bookmarks-jbrowse/5242694120274c86e1ccd5cb0e5e943e78f82393/',
+            'name': 'Bookmarks'
+        })
+
+    if plugins['ComboTrackSelector'] == 'True':
+        extra_data['plugins_python'].append('ComboTrackSelector')
+        extra_data['plugins'].append({
+            'location': 'https://cdn.rawgit.com/Arabidopsis-Information-Portal/ComboTrackSelector/52403928d5ccbe2e3a86b0fa5eb8e61c0f2e2f57',
+            'icon': 'https://galaxyproject.org/images/logos/galaxy-icon-square.png',
+            'name': 'ComboTrackSelector'
+        })
+
+    if plugins['theme'] == 'Minimalist':
+        extra_data['plugins'].append({
+            'location': 'https://cdn.rawgit.com/erasche/jbrowse-minimalist-theme/d698718442da306cf87f033c72ddb745f3077775/',
+            'name': 'MinimalistTheme'
+        })
+    elif plugins['theme'] == 'Dark':
+        extra_data['plugins'].append({
+            'location': 'https://cdn.rawgit.com/erasche/jbrowse-dark-theme/689eceb7e33bbc1b9b15518d45a5a79b2e5d0a26/',
+            'name': 'DarkTheme'
+        })
+
+    GALAXY_INFRASTRUCTURE_URL = root.find('metadata/galaxyUrl').text
+    # Sometimes this comes as `localhost` without a protocol
+    if not GALAXY_INFRASTRUCTURE_URL.startswith('http'):
+        # so we'll prepend `http://` and hope for the best. Requests *should*
+        # be GET and not POST so it should redirect OK
+        GALAXY_INFRASTRUCTURE_URL = 'http://' + GALAXY_INFRASTRUCTURE_URL
+
     for track in root.findall('tracks/track'):
         track_conf = {}
-        track_conf['trackfiles'] = [
-            (os.path.realpath(x.attrib['path']), x.attrib['ext'], x.attrib['label'])
-            for x in track.findall('files/trackFile')
-        ]
+        track_conf['trackfiles'] = []
+
+        for x in track.findall('files/trackFile'):
+            metadata = metadata_from_node(x.find('metadata'))
+
+            track_conf['trackfiles'].append((
+                os.path.realpath(x.attrib['path']),
+                x.attrib['ext'],
+                x.attrib['label'],
+                metadata
+            ))
 
         track_conf['category'] = track.attrib['cat']
         track_conf['format'] = track.attrib['format']
         try:
             # Only pertains to gff3 + blastxml. TODO?
             track_conf['style'] = {t.tag: t.text for t in track.find('options/style')}
-        except TypeError:
+        except TypeError as te:
             track_conf['style'] = {}
             pass
         track_conf['conf'] = etree_to_dict(track.find('options'))
@@ -743,4 +945,3 @@
             extra_data['visibility'][track.attrib.get('visibility', 'default_off')].append(key)
 
     jc.add_final_data(extra_data)
-    jc.generate_names()