diff jbrowse.py @ 1:497c6bb3b717 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
author iuc
date Thu, 18 Jun 2015 12:10:51 -0400
parents 2c9e5136b416
children 7342f467507b
line wrap: on
line diff
--- a/jbrowse.py	Mon May 04 17:21:38 2015 -0400
+++ b/jbrowse.py	Thu Jun 18 12:10:51 2015 -0400
@@ -1,9 +1,45 @@
 #!/usr/bin/env python
+from string import Template
 import os
-import shutil
 import argparse
 import subprocess
 import hashlib
+import tempfile
+import json
+import yaml
+import logging
+logging.basicConfig()
+log = logging.getLogger(__name__)
+
+COLOR_FUNCTION_TEMPLATE = Template("""
+function(feature, variableName, glyphObject, track) {
+    var score = ${score};
+    ${opacity}
+    return 'rgba(${red}, ${green}, ${blue}, ' + opacity + ')';
+}
+""")
+
+BLAST_OPACITY_MATH = """
+var opacity = 0;
+if(score == 0.0) {
+    opacity = 1;
+} else{
+    opacity = (20 - Math.log10(score)) / 180;
+}
+"""
+
+BREWER_COLOUR_IDX = 0
+BREWER_COLOUR_SCHEMES = [
+    (228, 26, 28),
+    (55, 126, 184),
+    (77, 175, 74),
+    (152, 78, 163),
+    (255, 127, 0),
+]
+
+
+# score comes from feature._parent.get('score') or feature.get('score')
+# Opacity math
 
 TN_TABLE = {
     'gff3': '--gff',
@@ -12,40 +48,298 @@
     # 'genbank': '--gbk',
 }
 
-
-def process_genome(jbrowse_dir, genome):
-    subprocess.check_output(['perl', 'bin/prepare-refseqs.pl', '--fasta', genome], cwd=jbrowse_dir)
+INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
 
 
-def process_annotations(jbrowse_dir, annot_file, annot_label, annot_format,
-                        **kwargs):
-    key = hashlib.md5(annot_file).hexdigest()
+class JbrowseConnector(object):
+
+    def __init__(self, jbrowse, jbrowse_dir, outdir, genome):
+        self.jbrowse = jbrowse
+        self.jbrowse_dir = jbrowse_dir
+        self.outdir = outdir
+        self.genome_path = genome
+        self.brewer_colour_idx = 0
+
+        self.clone_jbrowse(self.jbrowse, self.outdir)
+        self.process_genome()
+
+    def subprocess_check_call(self, command):
+        log.debug('cd %s && %s', self.jbrowse_dir, ' '.join(command))
+        subprocess.check_call(command, cwd=self.jbrowse_dir)
+
+    def _get_colours(self):
+        r, g, b = BREWER_COLOUR_SCHEMES[self.brewer_colour_idx]
+        self.brewer_colour_idx += 1
+        return r, g, b
+
+    def process_genome(self):
+        self.subprocess_check_call(['perl', 'bin/prepare-refseqs.pl',
+                                    '--fasta', self.genome_path])
+
+    def _add_json(self, json_data):
+        if len(json_data.keys()) == 0:
+            return
+
+        tmp = tempfile.NamedTemporaryFile(delete=False)
+        tmp.write(json.dumps(json_data))
+        tmp.close()
+        cmd = ['perl', 'bin/add-track-json.pl', tmp.name,
+               os.path.join('data', 'trackList.json')]
+        self.subprocess_check_call(cmd)
+        os.unlink(tmp.name)
+
+    def add_blastxml(self, data, key, **kwargs):
+        gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
+        cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
+               '--trim_end', '--min_gap', str(kwargs['min_gap']), data]
+        subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_unrebased)
+        gff3_unrebased.close()
+
+        gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
+        cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')]
+        if kwargs['protein']:
+            cmd.append('--protein2dna')
+        cmd.extend([kwargs['parent'], gff3_unrebased.name])
+        subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_rebased)
+        gff3_rebased.close()
+
+        label = hashlib.md5(data).hexdigest()
+
+        red, green, blue = self._get_colours()
+        color_function = COLOR_FUNCTION_TEMPLATE.substitute({
+            'score': "feature._parent.get('score')",
+            'opacity': BLAST_OPACITY_MATH,
+            'red': red,
+            'green': green,
+            'blue': blue,
+        })
+
+        clientConfig = {
+            'label': 'description',
+            'color': color_function.replace('\n', ''),
+            'description': 'Hit_titles',
+        }
+        config = {'glyph': 'JBrowse/View/FeatureGlyph/Segments'}
+        if 'category' in kwargs:
+            config['category'] = kwargs['category']
+
+        cmd = ['perl', 'bin/flatfile-to-json.pl',
+               '--gff', gff3_rebased.name,
+               '--trackLabel', label,
+               '--key', key,
+               '--clientConfig', json.dumps(clientConfig),
+               '--config', json.dumps(config),
+               '--trackType', 'JBrowse/View/Track/CanvasFeatures'
+               ]
+
+        self.subprocess_check_call(cmd)
+        os.unlink(gff3_rebased.name)
+        os.unlink(gff3_unrebased.name)
+
+    def _min_max_gff(self, gff_file):
+        min_val = None
+        max_val = None
+        with open(gff_file, 'r') as handle:
+            for line in handle:
+                try:
+                    value = float(line.split('\t')[5])
+                    min_val = min(value, (min_val or value))
+                    max_val = max(value, (max_val or value))
+
+                    if value < min_val:
+                        min_val = value
+
+                    if value > max_val:
+                        max_val = value
+                except Exception:
+                    pass
+        return min_val, max_val
+
+    def add_bigwig(self, data, key, **kwargs):
+        label = hashlib.md5(data).hexdigest()
+        dest = os.path.join('data', 'raw', os.path.basename(data))
+        cmd = ['ln', '-s', data, dest]
+        self.subprocess_check_call(cmd)
+
+        track_data = {
+            "label": label,
+            "urlTemplate": os.path.join('..', dest),
+            "bicolor_pivot": "zero",
+            "storeClass": "JBrowse/Store/SeqFeature/BigWig",
+            "type": "JBrowse/View/Track/Wiggle/Density",
+            "key": key,
+        }
+        track_data.update(kwargs)
+
+        if 'min' not in track_data and 'max' not in track_data \
+                and 'autoscale' not in track_data:
+            track_data['autoscale'] = 'local'
+
+        self._add_json(track_data)
 
-    cmd = ['perl', 'bin/flatfile-to-json.pl', TN_TABLE.get(annot_format, 'gff'),
-           annot_file, '--trackLabel', key, '--key', annot_label]
-    subprocess.check_output(cmd, cwd=jbrowse_dir)
+    def add_bam(self, data, key, **kwargs):
+        label = hashlib.md5(data).hexdigest()
+        dest = os.path.join('data', 'raw', os.path.basename(data))
+        # ln?
+        cmd = ['ln', '-s', data, dest]
+        self.subprocess_check_call(cmd)
+
+        bai_source = kwargs['bam_index']
+        cmd = ['ln', '-s', bai_source, dest + '.bai']
+        self.subprocess_check_call(cmd)
+
+        track_data = {
+            "urlTemplate": os.path.join('..', dest),
+            "key": key,
+            "label": label,
+            "type": "JBrowse/View/Track/Alignments2",
+            "storeClass": "JBrowse/Store/SeqFeature/BAM",
+        }
+        if 'category' in kwargs:
+            track_data['category'] = kwargs['category']
+        self._add_json(track_data)
+
+        if kwargs.get('auto_snp', False):
+            track_data = {
+                "storeClass": "JBrowse/Store/SeqFeature/BAM",
+                "urlTemplate": os.path.join('..', dest),
+                "type": "JBrowse/View/Track/SNPCoverage",
+                "key": key + " - SNPs/Coverage",
+                "label": label + "_autosnp",
+            }
+            if 'category' in kwargs:
+                track_data['category'] = kwargs['category']
+            self._add_json(track_data)
+
+    def add_vcf(self, data, key, **kwargs):
+        label = hashlib.md5(data).hexdigest()
+        dest = os.path.join('data', 'raw', os.path.basename(data))
+        # ln?
+        cmd = ['ln', '-s', data, dest]
+        self.subprocess_check_call(cmd)
+        cmd = ['bgzip', dest]
+        self.subprocess_check_call(cmd)
+        cmd = ['tabix', '-p', 'vcf', dest + '.gz']
+        self.subprocess_check_call(cmd)
+
+        track_data = {
+            "key": key,
+            "label": label,
+            "urlTemplate": os.path.join('..', dest + '.gz'),
+            "type": "JBrowse/View/Track/HTMLVariants",
+            "storeClass": "JBrowse/Store/SeqFeature/VCFTabix",
+        }
+        track_data.update(kwargs)
+        self._add_json(track_data)
+
+    def add_features(self, data, key, format, **kwargs):
+        label = hashlib.md5(data).hexdigest()
+        cmd = ['perl', 'bin/flatfile-to-json.pl',
+               TN_TABLE.get(format, 'gff'), data,
+               '--trackLabel', label,
+               '--key', key]
+
+        config = {}
+        if 'category' in kwargs:
+            config['category'] = kwargs['category']
+
+        if kwargs.get('match', False):
+            clientConfig = {
+                'label': 'description',
+                'description': 'Hit_titles',
+            }
+
+            # Get min/max and build a scoring function since JBrowse doesn't
+            min_val, max_val = self._min_max_gff(data)
+
+            if min_val is not None and max_val is not None:
+                MIN_MAX_OPACITY_MATH = Template("""
+                var opacity = (score - ${min}) * (1/(${max} - ${min}));
+                """).substitute({
+                    'max': max_val,
+                    'min': min_val,
+                })
+
+                red, green, blue = self._get_colours()
+                color_function = COLOR_FUNCTION_TEMPLATE.substitute({
+                    'score': "feature.get('score')",
+                    'opacity': MIN_MAX_OPACITY_MATH,
+                    'red': red,
+                    'green': green,
+                    'blue': blue,
+                })
+
+                clientConfig['color'] = color_function.replace('\n', '')
+
+            config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments'
+
+            cmd += ['--clientConfig', json.dumps(clientConfig),
+                    '--trackType', 'JBrowse/View/Track/CanvasFeatures'
+                    ]
+
+        cmd.extend(['--config', json.dumps(config)])
+
+        self.subprocess_check_call(cmd)
+
+    def process_annotations(self, data, key, format, **kwargs):
+        if format in ('gff', 'gff3', 'bed'):
+            self.add_features(data, key, format, **kwargs)
+        elif format == 'bigwig':
+            self.add_bigwig(data, key, **kwargs)
+        elif format == 'bam':
+            self.add_bam(data, key, **kwargs)
+        elif format == 'blastxml':
+            self.add_blastxml(data, key, **kwargs)
+        elif format == 'vcf':
+            self.add_vcf(data, key, **kwargs)
+
+    def clone_jbrowse(self, jbrowse_dir, destination):
+        # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks
+        # unlike copytree
+        cmd = ['mkdir', '-p', destination]
+        subprocess.check_call(cmd)
+        cmd = ['cp', '-r', jbrowse_dir, destination]
+        subprocess.check_call(cmd)
+        cmd = ['mkdir', '-p', os.path.join(destination, 'JBrowse-1.11.6',
+                                           'data', 'raw')]
+        subprocess.check_call(cmd)
+
+        # http://unix.stackexchange.com/a/38691/22785
+        # JBrowse releases come with some broken symlinks
+        cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+']
+        subprocess.check_call(cmd)
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description="", epilog="")
     parser.add_argument('genome', type=file, help='Input genome file')
-
-    parser.add_argument('--gff3', type=file, nargs='*', help='GFF3/BED/GBK File')
-    parser.add_argument('--gff3_format', choices=['gff3', 'gff', 'bed', 'gbk'], nargs='*', help='GFF3/BED/GBK Format')
-    parser.add_argument('--gff3_label', type=str, nargs='*', help='GFF3/BED/GBK Label')
+    parser.add_argument('yaml', type=file, help='Track Configuration')
 
     parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
     parser.add_argument('--outdir', help='Output directory', default='out')
     args = parser.parse_args()
 
-    jbrowse_dir = os.path.join(args.outdir, 'JBrowse-1.11.6')
-    shutil.copytree(args.jbrowse, jbrowse_dir)
+    jc = JbrowseConnector(
+        jbrowse=args.jbrowse,
+        jbrowse_dir=os.path.join(args.outdir, 'JBrowse-1.11.6'),
+        outdir=args.outdir,
+        genome=os.path.realpath(args.genome.name),
+    )
 
-    process_genome(jbrowse_dir, os.path.realpath(args.genome.name))
+    track_data = yaml.load(args.yaml)
+    for track in track_data:
+        path = os.path.realpath(track['file'])
+        extra = track.get('options', {})
+        if '__unused__' in extra:
+            del extra['__unused__']
 
-    for gff3, gff3_format, gff3_label in zip(args.gff3, args.gff3_format, args.gff3_label):
-        gff3_path = os.path.realpath(gff3.name)
-        process_annotations(jbrowse_dir, gff3_path, gff3_label, gff3_format)
+        for possible_partial_path in ('bam_index', 'parent'):
+            if possible_partial_path in extra:
+                extra[possible_partial_path] = os.path.realpath(
+                    extra[possible_partial_path])
+        extra['category'] = track.get('category', 'Default')
+
+        jc.process_annotations(path, track['label'], track['ext'], **extra)
 
     print """
     <html>
@@ -54,6 +348,9 @@
             window.location=JBrowse-1.11.6/index.html
         </script>
         <a href="JBrowse-1.11.6/index.html">Go to JBrowse</a>
+        <p>Please note that JBrowse functions best on production Galaxy
+        instances. The paste server used in development instances has issues
+        serving the volumes of data regularly involved in JBrowse</p>
         </body>
     </html>
     """