Mercurial > repos > iuc > jbrowse
diff jbrowse.py @ 3:7342f467507b draft
Uploaded v0.4 of JBrowse
author | iuc |
---|---|
date | Thu, 31 Dec 2015 13:58:43 -0500 |
parents | 497c6bb3b717 |
children | ae9382cfb6ac |
line wrap: on
line diff
--- a/jbrowse.py Tue Jun 23 12:10:15 2015 -0400 +++ b/jbrowse.py Thu Dec 31 13:58:43 2015 -0500 @@ -1,142 +1,133 @@ #!/usr/bin/env python -from string import Template import os +import copy import argparse import subprocess import hashlib +import struct import tempfile +import shutil import json -import yaml +from Bio.Data import CodonTable +import xml.etree.ElementTree as ET import logging -logging.basicConfig() -log = logging.getLogger(__name__) - -COLOR_FUNCTION_TEMPLATE = Template(""" -function(feature, variableName, glyphObject, track) { - var score = ${score}; - ${opacity} - return 'rgba(${red}, ${green}, ${blue}, ' + opacity + ')'; -} -""") - -BLAST_OPACITY_MATH = """ -var opacity = 0; -if(score == 0.0) { - opacity = 1; -} else{ - opacity = (20 - Math.log10(score)) / 180; -} -""" - -BREWER_COLOUR_IDX = 0 -BREWER_COLOUR_SCHEMES = [ - (228, 26, 28), - (55, 126, 184), - (77, 175, 74), - (152, 78, 163), - (255, 127, 0), -] +from collections import defaultdict +logging.basicConfig(level=logging.INFO) +log = logging.getLogger('jbrowse') -# score comes from feature._parent.get('score') or feature.get('score') -# Opacity math +class ColorScaling(object): + + COLOR_FUNCTION_TEMPLATE = """ + function(feature, variableName, glyphObject, track) {{ + var score = {score}; + {opacity} + return 'rgba({red}, {green}, {blue}, ' + opacity + ')'; + }} + """ + + COLOR_FUNCTION_TEMPLATE_QUAL = """ + function(feature, variableName, glyphObject, track) {{ + var search_up = function self(sf, attr){{ + if(sf.get(attr) !== undefined){{ + return sf.get(attr); + }} + if(sf.parent() === undefined) {{ + return; + }}else{{ + return self(sf.parent(), attr); + }} + }}; -TN_TABLE = { - 'gff3': '--gff', - 'gff': '--gff', - 'bed': '--bed', - # 'genbank': '--gbk', -} + var search_down = function self(sf, attr){{ + if(sf.get(attr) !== undefined){{ + return sf.get(attr); + }} + if(sf.children() === undefined) {{ + return; + }}else{{ + var kids = sf.children(); + for(var child_idx in kids){{ + var x = self(kids[child_idx], attr); + if(x !== undefined){{ + return x; + }} + }} + return; + }} + }}; + + var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color}); + var score = (search_up(feature, 'score') || search_down(feature, 'score')); + {opacity} + var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color); + var red = parseInt(result[1], 16); + var green = parseInt(result[2], 16); + var blue = parseInt(result[3], 16); + if(isNaN(opacity) || opacity < 0){{ opacity = 0; }} + return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')'; + }} + """ -INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) - - -class JbrowseConnector(object): + OPACITY_MATH = { + 'linear': """ + var opacity = (score - ({min})) / (({max}) - ({min})); + """, + 'logarithmic': """ + var opacity = (score - ({min})) / (({max}) - ({min})); + opacity = Math.log10(opacity) + Math.log10({max}); + """, + 'blast': """ + var opacity = 0; + if(score == 0.0) { + opacity = 1; + } else{ + opacity = (20 - Math.log10(score)) / 180; + } + """ + } - def __init__(self, jbrowse, jbrowse_dir, outdir, genome): - self.jbrowse = jbrowse - self.jbrowse_dir = jbrowse_dir - self.outdir = outdir - self.genome_path = genome + BREWER_COLOUR_IDX = 0 + BREWER_COLOUR_SCHEMES = [ + (166, 206, 227), + (31, 120, 180), + (178, 223, 138), + (51, 160, 44), + (251, 154, 153), + (227, 26, 28), + (253, 191, 111), + (255, 127, 0), + (202, 178, 214), + (106, 61, 154), + (255, 255, 153), + (177, 89, 40), + (228, 26, 28), + (55, 126, 184), + (77, 175, 74), + (152, 78, 163), + (255, 127, 0), + ] + + BREWER_DIVERGING_PALLETES = { + 'BrBg': ("#543005", "#003c30"), + 'PiYg': ("#8e0152", "#276419"), + 'PRGn': ("#40004b", "#00441b"), + 'PuOr': ("#7f3b08", "#2d004b"), + 'RdBu': ("#67001f", "#053061"), + 'RdGy': ("#67001f", "#1a1a1a"), + 'RdYlBu': ("#a50026", "#313695"), + 'RdYlGn': ("#a50026", "#006837"), + 'Spectral': ("#9e0142", "#5e4fa2"), + } + + def __init__(self): self.brewer_colour_idx = 0 - self.clone_jbrowse(self.jbrowse, self.outdir) - self.process_genome() - - def subprocess_check_call(self, command): - log.debug('cd %s && %s', self.jbrowse_dir, ' '.join(command)) - subprocess.check_call(command, cwd=self.jbrowse_dir) - - def _get_colours(self): - r, g, b = BREWER_COLOUR_SCHEMES[self.brewer_colour_idx] - self.brewer_colour_idx += 1 - return r, g, b - - def process_genome(self): - self.subprocess_check_call(['perl', 'bin/prepare-refseqs.pl', - '--fasta', self.genome_path]) - - def _add_json(self, json_data): - if len(json_data.keys()) == 0: - return - - tmp = tempfile.NamedTemporaryFile(delete=False) - tmp.write(json.dumps(json_data)) - tmp.close() - cmd = ['perl', 'bin/add-track-json.pl', tmp.name, - os.path.join('data', 'trackList.json')] - self.subprocess_check_call(cmd) - os.unlink(tmp.name) - - def add_blastxml(self, data, key, **kwargs): - gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) - cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), - '--trim_end', '--min_gap', str(kwargs['min_gap']), data] - subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_unrebased) - gff3_unrebased.close() + def rgb_from_hex(self, hexstr): + # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back + return struct.unpack('BBB',hexstr.decode('hex')) - gff3_rebased = tempfile.NamedTemporaryFile(delete=False) - cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')] - if kwargs['protein']: - cmd.append('--protein2dna') - cmd.extend([kwargs['parent'], gff3_unrebased.name]) - subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_rebased) - gff3_rebased.close() - - label = hashlib.md5(data).hexdigest() - - red, green, blue = self._get_colours() - color_function = COLOR_FUNCTION_TEMPLATE.substitute({ - 'score': "feature._parent.get('score')", - 'opacity': BLAST_OPACITY_MATH, - 'red': red, - 'green': green, - 'blue': blue, - }) - - clientConfig = { - 'label': 'description', - 'color': color_function.replace('\n', ''), - 'description': 'Hit_titles', - } - config = {'glyph': 'JBrowse/View/FeatureGlyph/Segments'} - if 'category' in kwargs: - config['category'] = kwargs['category'] - - cmd = ['perl', 'bin/flatfile-to-json.pl', - '--gff', gff3_rebased.name, - '--trackLabel', label, - '--key', key, - '--clientConfig', json.dumps(clientConfig), - '--config', json.dumps(config), - '--trackType', 'JBrowse/View/Track/CanvasFeatures' - ] - - self.subprocess_check_call(cmd) - os.unlink(gff3_rebased.name) - os.unlink(gff3_unrebased.name) - - def _min_max_gff(self, gff_file): + def min_max_gff(self, gff_file): min_val = None max_val = None with open(gff_file, 'r') as handle: @@ -155,65 +146,296 @@ pass return min_val, max_val - def add_bigwig(self, data, key, **kwargs): - label = hashlib.md5(data).hexdigest() - dest = os.path.join('data', 'raw', os.path.basename(data)) - cmd = ['ln', '-s', data, dest] - self.subprocess_check_call(cmd) + def hex_from_rgb(self, r, g, b): + return '#%02x%02x%02x' % (r, g, b) + + def _get_colours(self): + r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx] + self.brewer_colour_idx += 1 + return r, g, b + + def parse_colours(self, track, trackFormat, gff3=None): + # Wiggle tracks have a bicolor pallete + trackConfig = {'style': {}} + if trackFormat == 'wiggle': + + trackConfig['style']['pos_color'] = track['wiggle']['color_pos'] + trackConfig['style']['neg_color'] = track['wiggle']['color_neg'] + + if trackConfig['style']['pos_color'] == '__auto__': + trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours()) + trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours()) + - track_data = { - "label": label, - "urlTemplate": os.path.join('..', dest), - "bicolor_pivot": "zero", - "storeClass": "JBrowse/Store/SeqFeature/BigWig", - "type": "JBrowse/View/Track/Wiggle/Density", - "key": key, + # Wiggle tracks can change colour at a specified place + bc_pivot = track['wiggle']['bicolor_pivot'] + if bc_pivot not in ('mean', 'zero'): + # The values are either one of those two strings + # or a number + bc_pivot = float(bc_pivot) + trackConfig['bicolor_pivot'] = bc_pivot + elif 'scaling' in track: + if track['scaling']['method'] == 'ignore': + if track['scaling']['scheme']['color'] != '__auto__': + trackConfig['style']['color'] = track['scaling']['scheme']['color'] + else: + trackConfig['style']['color'] = self.hex_from_rgb(*self._get_colours()) + else: + # Scored method + algo = track['scaling']['algo'] + # linear, logarithmic, blast + scales = track['scaling']['scales'] + # type __auto__, manual (min, max) + scheme = track['scaling']['scheme'] + # scheme -> (type (opacity), color) + # ================================== + # GENE CALLS OR BLAST + # ================================== + if trackFormat == 'blast': + red, green, blue = self._get_colours() + color_function = self.COLOR_FUNCTION_TEMPLATE.format(**{ + 'score': "feature._parent.get('score')", + 'opacity': self.OPACITY_MATH['blast'], + 'red': red, + 'green': green, + 'blue': blue, + }) + trackConfig['style']['color'] = color_function.replace('\n', '') + elif trackFormat == 'gene_calls': + # Default values, based on GFF3 spec + min_val = 0 + max_val = 1000 + # Get min/max and build a scoring function since JBrowse doesn't + if scales['type'] == 'automatic': + min_val, max_val = self.min_max_gff(gff3) + else: + min_val = scales['min'] + max_val = scales['max'] + + if scheme['color'] == '__auto__': + user_color = 'undefined' + auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) + elif scheme['color'].startswith('#'): + user_color = "'%s'" % self.hex_from_rgb(*self.rgb_from_hex(scheme['color'][1:])) + auto_color = 'undefined' + else: + user_color = 'undefined' + auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) + + color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{ + 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val,'min': min_val}), + 'user_spec_color': user_color, + 'auto_gen_color': auto_color, + }) + + trackConfig['style']['color'] = color_function.replace('\n', '') + return trackConfig + + +def etree_to_dict(t): + d = {t.tag: {} if t.attrib else None} + children = list(t) + if children: + dd = defaultdict(list) + for dc in map(etree_to_dict, children): + for k, v in dc.iteritems(): + dd[k].append(v) + d = {t.tag: {k:v[0] if len(v) == 1 else v for k, v in dd.iteritems()}} + if t.attrib: + d[t.tag].update(('@' + k, v) for k, v in t.attrib.iteritems()) + if t.text: + text = t.text.strip() + if children or t.attrib: + if text: + d[t.tag]['#text'] = text + else: + d[t.tag] = text + return d + + +# score comes from feature._parent.get('score') or feature.get('score') + +INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) + + +class JbrowseConnector(object): + + def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1): + self.TN_TABLE = { + 'gff3': '--gff', + 'gff': '--gff', + 'bed': '--bed', + 'genbank': '--gbk', } - track_data.update(kwargs) + + self.cs = ColorScaling() + self.jbrowse = jbrowse + self.outdir = outdir + self.genome_paths = genomes + self.standalone = standalone + self.gencode = gencode + + if standalone: + self.clone_jbrowse(self.jbrowse, self.outdir) + else: + try: + os.makedirs(self.outdir) + except OSError: + # Ignore if the folder exists + pass + + self.process_genomes() + self.update_gencode() + + def update_gencode(self): + table = CodonTable.unambiguous_dna_by_id[int(self.gencode)] + trackList = os.path.join(self.outdir, 'data', 'trackList.json') + with open(trackList, 'r') as handle: + trackListData = json.load(handle) - if 'min' not in track_data and 'max' not in track_data \ - and 'autoscale' not in track_data: - track_data['autoscale'] = 'local' + trackListData['tracks'][0].update({ + 'codonStarts': table.start_codons, + 'codonStops': table.stop_codons, + 'codonTable': table.forward_table, + }) + + with open(trackList, 'w') as handle: + json.dump(trackListData, handle, indent=2) + + + def subprocess_check_call(self, command): + log.debug('cd %s && %s', self.outdir, ' '.join(command)) + subprocess.check_call(command, cwd=self.outdir) + + def _jbrowse_bin(self, command): + return os.path.realpath(os.path.join(self.jbrowse, 'bin', command)) + + def process_genomes(self): + for genome_path in self.genome_paths: + self.subprocess_check_call([ + 'perl', self._jbrowse_bin('prepare-refseqs.pl'), + '--fasta', genome_path]) + + def _add_json(self, json_data): + if len(json_data.keys()) == 0: + return + + tmp = tempfile.NamedTemporaryFile(delete=False) + tmp.write(json.dumps(json_data)) + tmp.close() + cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name, + os.path.join('data', 'trackList.json')] + self.subprocess_check_call(cmd) + os.unlink(tmp.name) - self._add_json(track_data) + def _add_track_json(self, json_data): + if len(json_data.keys()) == 0: + return + + tmp = tempfile.NamedTemporaryFile(delete=False) + tmp.write(json.dumps(json_data)) + tmp.close() + cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name, + os.path.join('data', 'trackList.json')] + self.subprocess_check_call(cmd) + os.unlink(tmp.name) + + + def _blastxml_to_gff3(self, xml, min_gap=10): + gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) + cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), + '--trim', '--trim_end', '--min_gap', str(min_gap), xml] + log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name) + subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) + gff3_unrebased.close() + return gff3_unrebased.name + + def add_blastxml(self, data, trackData, blastOpts, **kwargs): + gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts['min_gap']) - def add_bam(self, data, key, **kwargs): - label = hashlib.md5(data).hexdigest() - dest = os.path.join('data', 'raw', os.path.basename(data)) - # ln? - cmd = ['ln', '-s', data, dest] + if 'parent' in blastOpts: + gff3_rebased = tempfile.NamedTemporaryFile(delete=False) + cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')] + if blastOpts.get('protein', 'false') == 'true': + cmd.append('--protein2dna') + cmd.extend([os.path.realpath(blastOpts['parent']), gff3]) + log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_rebased.name) + subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) + gff3_rebased.close() + + # Replace original gff3 file + shutil.copy(gff3_rebased.name, gff3) + os.unlink(gff3_rebased.name) + + config = { + 'glyph': 'JBrowse/View/FeatureGlyph/Segments', + "category": trackData['category'], + } + + clientConfig = trackData['style'] + + cmd = ['perl', self._jbrowse_bin('flatfile-to-json.pl'), + '--gff', gff3, + '--trackLabel', trackData['label'], + '--key', trackData['key'], + '--clientConfig', json.dumps(clientConfig), + '--config', json.dumps(config), + '--trackType', 'JBrowse/View/Track/CanvasFeatures' + ] + + self.subprocess_check_call(cmd) + os.unlink(gff3) + + def add_bigwig(self, data, trackData, wiggleOpts, **kwargs): + dest = os.path.join('data', 'raw', trackData['label'] + '.bw') + cmd = ['ln', data, dest] self.subprocess_check_call(cmd) - bai_source = kwargs['bam_index'] - cmd = ['ln', '-s', bai_source, dest + '.bai'] + trackData.update({ + "urlTemplate": os.path.join('..', dest), + "storeClass": "JBrowse/Store/SeqFeature/BigWig", + "type": "JBrowse/View/Track/Wiggle/Density", + }) + + trackData['type'] = wiggleOpts['type'] + trackData['variance_band'] = True if wiggleOpts['variance_band'] == 'true' else False + + if 'min' in wiggleOpts and 'max' in wiggleOpts: + trackData['min_score'] = wiggleOpts['min'] + trackData['max_score'] = wiggleOpts['max'] + else: + trackData['autoscale'] = wiggleOpts.get('autoscale', 'local') + + self._add_track_json(trackData) + + def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): + dest = os.path.join('data', 'raw', trackData['label'] + '.bam') + cmd = ['ln', '-s', os.path.realpath(data), dest] self.subprocess_check_call(cmd) - track_data = { + cmd = ['ln', '-s', os.path.realpath(bam_index), dest + '.bai'] + self.subprocess_check_call(cmd) + + trackData.update({ "urlTemplate": os.path.join('..', dest), - "key": key, - "label": label, "type": "JBrowse/View/Track/Alignments2", "storeClass": "JBrowse/Store/SeqFeature/BAM", - } - if 'category' in kwargs: - track_data['category'] = kwargs['category'] - self._add_json(track_data) + }) + - if kwargs.get('auto_snp', False): - track_data = { - "storeClass": "JBrowse/Store/SeqFeature/BAM", - "urlTemplate": os.path.join('..', dest), + self._add_track_json(trackData) + + if bamOpts.get('auto_snp', 'false') == 'true': + trackData2 = copy.copy(trackData) + trackData2.update({ "type": "JBrowse/View/Track/SNPCoverage", - "key": key + " - SNPs/Coverage", - "label": label + "_autosnp", - } - if 'category' in kwargs: - track_data['category'] = kwargs['category'] - self._add_json(track_data) + "key": trackData['key'] + " - SNPs/Coverage", + "label": trackData['label'] + "_autosnp", + }) + self._add_track_json(trackData2) - def add_vcf(self, data, key, **kwargs): - label = hashlib.md5(data).hexdigest() - dest = os.path.join('data', 'raw', os.path.basename(data)) + def add_vcf(self, data, trackData, vcfOpts={}, **kwargs): + dest = os.path.join('data', 'raw', trackData['label'] + '.vcf') # ln? cmd = ['ln', '-s', data, dest] self.subprocess_check_call(cmd) @@ -222,135 +444,146 @@ cmd = ['tabix', '-p', 'vcf', dest + '.gz'] self.subprocess_check_call(cmd) - track_data = { - "key": key, - "label": label, + trackData.update({ "urlTemplate": os.path.join('..', dest + '.gz'), "type": "JBrowse/View/Track/HTMLVariants", "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", - } - track_data.update(kwargs) - self._add_json(track_data) - - def add_features(self, data, key, format, **kwargs): - label = hashlib.md5(data).hexdigest() - cmd = ['perl', 'bin/flatfile-to-json.pl', - TN_TABLE.get(format, 'gff'), data, - '--trackLabel', label, - '--key', key] - - config = {} - if 'category' in kwargs: - config['category'] = kwargs['category'] - - if kwargs.get('match', False): - clientConfig = { - 'label': 'description', - 'description': 'Hit_titles', - } - - # Get min/max and build a scoring function since JBrowse doesn't - min_val, max_val = self._min_max_gff(data) + }) + self._add_track_json(trackData) - if min_val is not None and max_val is not None: - MIN_MAX_OPACITY_MATH = Template(""" - var opacity = (score - ${min}) * (1/(${max} - ${min})); - """).substitute({ - 'max': max_val, - 'min': min_val, - }) + def add_features(self, data, format, trackData, gffOpts, **kwargs): + cmd = [ + 'perl', self._jbrowse_bin('flatfile-to-json.pl'), + self.TN_TABLE.get(format, 'gff'), + data, + '--trackLabel', trackData['label'], + '--trackType', 'JBrowse/View/Track/CanvasFeatures', + '--key', trackData['key'] + ] - red, green, blue = self._get_colours() - color_function = COLOR_FUNCTION_TEMPLATE.substitute({ - 'score': "feature.get('score')", - 'opacity': MIN_MAX_OPACITY_MATH, - 'red': red, - 'green': green, - 'blue': blue, - }) + config = copy.copy(trackData) + clientConfig = trackData['style'] + del config['style'] - clientConfig['color'] = color_function.replace('\n', '') - + if 'match' in gffOpts: config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' + cmd += ['--type', gffOpts['match']] - cmd += ['--clientConfig', json.dumps(clientConfig), - '--trackType', 'JBrowse/View/Track/CanvasFeatures' - ] + cmd += ['--clientConfig', json.dumps(clientConfig), + '--trackType', 'JBrowse/View/Track/CanvasFeatures' + ] cmd.extend(['--config', json.dumps(config)]) self.subprocess_check_call(cmd) - def process_annotations(self, data, key, format, **kwargs): - if format in ('gff', 'gff3', 'bed'): - self.add_features(data, key, format, **kwargs) - elif format == 'bigwig': - self.add_bigwig(data, key, **kwargs) - elif format == 'bam': - self.add_bam(data, key, **kwargs) - elif format == 'blastxml': - self.add_blastxml(data, key, **kwargs) - elif format == 'vcf': - self.add_vcf(data, key, **kwargs) + + def process_annotations(self, track): + outputTrackConfig = { + 'style': { + 'label': track['style'].get('label', 'description'), + 'className': track['style'].get('className', 'feature'), + 'description': track['style'].get('description', ''), + }, + 'category': track['category'], + } + + for i, (dataset_path, dataset_ext, track_human_label) in enumerate(track['trackfiles']): + log.info('Processing %s / %s', track['category'], track_human_label) + outputTrackConfig['key'] = track_human_label + hashData = [dataset_path, track_human_label, track['category']] + outputTrackConfig['label'] = hashlib.md5('|'.join(hashData)).hexdigest() + '_%s' % i + + # Colour parsing is complex due to different track types having + # different colour options. + colourOptions = self.cs.parse_colours(track['conf']['options'], track['format'], gff3=dataset_path) + # This used to be done with a dict.update() call, however that wiped out any previous style settings... + for key in colourOptions: + if key == 'style': + for subkey in colourOptions['style']: + outputTrackConfig['style'][subkey] = colourOptions['style'][subkey] + else: + outputTrackConfig[key] = colourOptions[key] + + if dataset_ext in ('gff', 'gff3', 'bed'): + self.add_features(dataset_path, dataset_ext, outputTrackConfig, + track['conf']['options']['gff']) + elif dataset_ext == 'bigwig': + self.add_bigwig(dataset_path, outputTrackConfig, + track['conf']['options']['wiggle']) + elif dataset_ext == 'bam': + real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index'] + if not isinstance(real_indexes, list): + # <bam_indices> + # <bam_index>/path/to/a.bam.bai</bam_index> + # </bam_indices> + # + # The above will result in the 'bam_index' key containing a + # string. If there are two or more indices, the container + # becomes a list. Fun! + real_indexes = [real_indexes] + + self.add_bam(dataset_path, outputTrackConfig, + track['conf']['options']['pileup'], + bam_index=real_indexes[i]) + elif dataset_ext == 'blastxml': + self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast']) + elif dataset_ext == 'vcf': + self.add_vcf(dataset_path, outputTrackConfig) def clone_jbrowse(self, jbrowse_dir, destination): + """Clone a JBrowse directory into a destination directory. + """ # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks # unlike copytree - cmd = ['mkdir', '-p', destination] + cmd = ['cp', '-r', os.path.join(jbrowse_dir, '.'), destination] + log.debug(' '.join(cmd)) subprocess.check_call(cmd) - cmd = ['cp', '-r', jbrowse_dir, destination] - subprocess.check_call(cmd) - cmd = ['mkdir', '-p', os.path.join(destination, 'JBrowse-1.11.6', - 'data', 'raw')] + cmd = ['mkdir', '-p', os.path.join(destination, 'data', 'raw')] + log.debug(' '.join(cmd)) subprocess.check_call(cmd) # http://unix.stackexchange.com/a/38691/22785 # JBrowse releases come with some broken symlinks cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+'] + log.debug(' '.join(cmd)) subprocess.check_call(cmd) if __name__ == '__main__': parser = argparse.ArgumentParser(description="", epilog="") - parser.add_argument('genome', type=file, help='Input genome file') - parser.add_argument('yaml', type=file, help='Track Configuration') + parser.add_argument('xml', type=file, help='Track Configuration') parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') parser.add_argument('--outdir', help='Output directory', default='out') + parser.add_argument('--standalone', help='Standalone mode includes a copy of JBrowse', action='store_true') args = parser.parse_args() + tree = ET.parse(args.xml.name) + root = tree.getroot() + jc = JbrowseConnector( jbrowse=args.jbrowse, - jbrowse_dir=os.path.join(args.outdir, 'JBrowse-1.11.6'), outdir=args.outdir, - genome=os.path.realpath(args.genome.name), + genomes=[os.path.realpath(x.text) for x in root.findall('metadata/genomes/genome')], + standalone=args.standalone, + gencode=root.find('metadata/gencode').text ) - track_data = yaml.load(args.yaml) - for track in track_data: - path = os.path.realpath(track['file']) - extra = track.get('options', {}) - if '__unused__' in extra: - del extra['__unused__'] - - for possible_partial_path in ('bam_index', 'parent'): - if possible_partial_path in extra: - extra[possible_partial_path] = os.path.realpath( - extra[possible_partial_path]) - extra['category'] = track.get('category', 'Default') + for track in root.findall('tracks/track'): + track_conf = {} + track_conf['trackfiles'] = [ + (os.path.realpath(x.attrib['path']), x.attrib['ext'], x.attrib['label']) + for x in track.findall('files/trackFile') + ] - jc.process_annotations(path, track['label'], track['ext'], **extra) - - print """ - <html> - <body> - <script type="text/javascript"> - window.location=JBrowse-1.11.6/index.html - </script> - <a href="JBrowse-1.11.6/index.html">Go to JBrowse</a> - <p>Please note that JBrowse functions best on production Galaxy - instances. The paste server used in development instances has issues - serving the volumes of data regularly involved in JBrowse</p> - </body> - </html> - """ + track_conf['category'] = track.attrib['cat'] + track_conf['format'] = track.attrib['format'] + try: + # Only pertains to gff3 + blastxml. TODO? + track_conf['style'] = {t.tag: t.text for t in track.find('options/style')} + except TypeError, te: + track_conf['style'] = {} + pass + track_conf['conf'] = etree_to_dict(track.find('options')) + jc.process_annotations(track_conf)