Mercurial > repos > iuc > jbrowse
comparison jbrowse.py @ 3:7342f467507b draft
Uploaded v0.4 of JBrowse
| author | iuc |
|---|---|
| date | Thu, 31 Dec 2015 13:58:43 -0500 |
| parents | 497c6bb3b717 |
| children | ae9382cfb6ac |
comparison
equal
deleted
inserted
replaced
| 2:b6a0e126dbee | 3:7342f467507b |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 from string import Template | |
| 3 import os | 2 import os |
| 3 import copy | |
| 4 import argparse | 4 import argparse |
| 5 import subprocess | 5 import subprocess |
| 6 import hashlib | 6 import hashlib |
| 7 import struct | |
| 7 import tempfile | 8 import tempfile |
| 9 import shutil | |
| 8 import json | 10 import json |
| 9 import yaml | 11 from Bio.Data import CodonTable |
| 12 import xml.etree.ElementTree as ET | |
| 10 import logging | 13 import logging |
| 11 logging.basicConfig() | 14 from collections import defaultdict |
| 12 log = logging.getLogger(__name__) | 15 logging.basicConfig(level=logging.INFO) |
| 13 | 16 log = logging.getLogger('jbrowse') |
| 14 COLOR_FUNCTION_TEMPLATE = Template(""" | 17 |
| 15 function(feature, variableName, glyphObject, track) { | 18 |
| 16 var score = ${score}; | 19 class ColorScaling(object): |
| 17 ${opacity} | 20 |
| 18 return 'rgba(${red}, ${green}, ${blue}, ' + opacity + ')'; | 21 COLOR_FUNCTION_TEMPLATE = """ |
| 19 } | 22 function(feature, variableName, glyphObject, track) {{ |
| 20 """) | 23 var score = {score}; |
| 21 | 24 {opacity} |
| 22 BLAST_OPACITY_MATH = """ | 25 return 'rgba({red}, {green}, {blue}, ' + opacity + ')'; |
| 23 var opacity = 0; | 26 }} |
| 24 if(score == 0.0) { | 27 """ |
| 25 opacity = 1; | 28 |
| 26 } else{ | 29 COLOR_FUNCTION_TEMPLATE_QUAL = """ |
| 27 opacity = (20 - Math.log10(score)) / 180; | 30 function(feature, variableName, glyphObject, track) {{ |
| 28 } | 31 var search_up = function self(sf, attr){{ |
| 29 """ | 32 if(sf.get(attr) !== undefined){{ |
| 30 | 33 return sf.get(attr); |
| 31 BREWER_COLOUR_IDX = 0 | 34 }} |
| 32 BREWER_COLOUR_SCHEMES = [ | 35 if(sf.parent() === undefined) {{ |
| 33 (228, 26, 28), | 36 return; |
| 34 (55, 126, 184), | 37 }}else{{ |
| 35 (77, 175, 74), | 38 return self(sf.parent(), attr); |
| 36 (152, 78, 163), | 39 }} |
| 37 (255, 127, 0), | 40 }}; |
| 38 ] | 41 |
| 39 | 42 var search_down = function self(sf, attr){{ |
| 40 | 43 if(sf.get(attr) !== undefined){{ |
| 41 # score comes from feature._parent.get('score') or feature.get('score') | 44 return sf.get(attr); |
| 42 # Opacity math | 45 }} |
| 43 | 46 if(sf.children() === undefined) {{ |
| 44 TN_TABLE = { | 47 return; |
| 45 'gff3': '--gff', | 48 }}else{{ |
| 46 'gff': '--gff', | 49 var kids = sf.children(); |
| 47 'bed': '--bed', | 50 for(var child_idx in kids){{ |
| 48 # 'genbank': '--gbk', | 51 var x = self(kids[child_idx], attr); |
| 49 } | 52 if(x !== undefined){{ |
| 50 | 53 return x; |
| 51 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) | 54 }} |
| 52 | 55 }} |
| 53 | 56 return; |
| 54 class JbrowseConnector(object): | 57 }} |
| 55 | 58 }}; |
| 56 def __init__(self, jbrowse, jbrowse_dir, outdir, genome): | 59 |
| 57 self.jbrowse = jbrowse | 60 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color}); |
| 58 self.jbrowse_dir = jbrowse_dir | 61 var score = (search_up(feature, 'score') || search_down(feature, 'score')); |
| 59 self.outdir = outdir | 62 {opacity} |
| 60 self.genome_path = genome | 63 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color); |
| 64 var red = parseInt(result[1], 16); | |
| 65 var green = parseInt(result[2], 16); | |
| 66 var blue = parseInt(result[3], 16); | |
| 67 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }} | |
| 68 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')'; | |
| 69 }} | |
| 70 """ | |
| 71 | |
| 72 OPACITY_MATH = { | |
| 73 'linear': """ | |
| 74 var opacity = (score - ({min})) / (({max}) - ({min})); | |
| 75 """, | |
| 76 'logarithmic': """ | |
| 77 var opacity = (score - ({min})) / (({max}) - ({min})); | |
| 78 opacity = Math.log10(opacity) + Math.log10({max}); | |
| 79 """, | |
| 80 'blast': """ | |
| 81 var opacity = 0; | |
| 82 if(score == 0.0) { | |
| 83 opacity = 1; | |
| 84 } else{ | |
| 85 opacity = (20 - Math.log10(score)) / 180; | |
| 86 } | |
| 87 """ | |
| 88 } | |
| 89 | |
| 90 BREWER_COLOUR_IDX = 0 | |
| 91 BREWER_COLOUR_SCHEMES = [ | |
| 92 (166, 206, 227), | |
| 93 (31, 120, 180), | |
| 94 (178, 223, 138), | |
| 95 (51, 160, 44), | |
| 96 (251, 154, 153), | |
| 97 (227, 26, 28), | |
| 98 (253, 191, 111), | |
| 99 (255, 127, 0), | |
| 100 (202, 178, 214), | |
| 101 (106, 61, 154), | |
| 102 (255, 255, 153), | |
| 103 (177, 89, 40), | |
| 104 (228, 26, 28), | |
| 105 (55, 126, 184), | |
| 106 (77, 175, 74), | |
| 107 (152, 78, 163), | |
| 108 (255, 127, 0), | |
| 109 ] | |
| 110 | |
| 111 BREWER_DIVERGING_PALLETES = { | |
| 112 'BrBg': ("#543005", "#003c30"), | |
| 113 'PiYg': ("#8e0152", "#276419"), | |
| 114 'PRGn': ("#40004b", "#00441b"), | |
| 115 'PuOr': ("#7f3b08", "#2d004b"), | |
| 116 'RdBu': ("#67001f", "#053061"), | |
| 117 'RdGy': ("#67001f", "#1a1a1a"), | |
| 118 'RdYlBu': ("#a50026", "#313695"), | |
| 119 'RdYlGn': ("#a50026", "#006837"), | |
| 120 'Spectral': ("#9e0142", "#5e4fa2"), | |
| 121 } | |
| 122 | |
| 123 def __init__(self): | |
| 61 self.brewer_colour_idx = 0 | 124 self.brewer_colour_idx = 0 |
| 62 | 125 |
| 63 self.clone_jbrowse(self.jbrowse, self.outdir) | 126 def rgb_from_hex(self, hexstr): |
| 64 self.process_genome() | 127 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back |
| 65 | 128 return struct.unpack('BBB',hexstr.decode('hex')) |
| 66 def subprocess_check_call(self, command): | 129 |
| 67 log.debug('cd %s && %s', self.jbrowse_dir, ' '.join(command)) | 130 def min_max_gff(self, gff_file): |
| 68 subprocess.check_call(command, cwd=self.jbrowse_dir) | |
| 69 | |
| 70 def _get_colours(self): | |
| 71 r, g, b = BREWER_COLOUR_SCHEMES[self.brewer_colour_idx] | |
| 72 self.brewer_colour_idx += 1 | |
| 73 return r, g, b | |
| 74 | |
| 75 def process_genome(self): | |
| 76 self.subprocess_check_call(['perl', 'bin/prepare-refseqs.pl', | |
| 77 '--fasta', self.genome_path]) | |
| 78 | |
| 79 def _add_json(self, json_data): | |
| 80 if len(json_data.keys()) == 0: | |
| 81 return | |
| 82 | |
| 83 tmp = tempfile.NamedTemporaryFile(delete=False) | |
| 84 tmp.write(json.dumps(json_data)) | |
| 85 tmp.close() | |
| 86 cmd = ['perl', 'bin/add-track-json.pl', tmp.name, | |
| 87 os.path.join('data', 'trackList.json')] | |
| 88 self.subprocess_check_call(cmd) | |
| 89 os.unlink(tmp.name) | |
| 90 | |
| 91 def add_blastxml(self, data, key, **kwargs): | |
| 92 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) | |
| 93 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), | |
| 94 '--trim_end', '--min_gap', str(kwargs['min_gap']), data] | |
| 95 subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_unrebased) | |
| 96 gff3_unrebased.close() | |
| 97 | |
| 98 gff3_rebased = tempfile.NamedTemporaryFile(delete=False) | |
| 99 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')] | |
| 100 if kwargs['protein']: | |
| 101 cmd.append('--protein2dna') | |
| 102 cmd.extend([kwargs['parent'], gff3_unrebased.name]) | |
| 103 subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_rebased) | |
| 104 gff3_rebased.close() | |
| 105 | |
| 106 label = hashlib.md5(data).hexdigest() | |
| 107 | |
| 108 red, green, blue = self._get_colours() | |
| 109 color_function = COLOR_FUNCTION_TEMPLATE.substitute({ | |
| 110 'score': "feature._parent.get('score')", | |
| 111 'opacity': BLAST_OPACITY_MATH, | |
| 112 'red': red, | |
| 113 'green': green, | |
| 114 'blue': blue, | |
| 115 }) | |
| 116 | |
| 117 clientConfig = { | |
| 118 'label': 'description', | |
| 119 'color': color_function.replace('\n', ''), | |
| 120 'description': 'Hit_titles', | |
| 121 } | |
| 122 config = {'glyph': 'JBrowse/View/FeatureGlyph/Segments'} | |
| 123 if 'category' in kwargs: | |
| 124 config['category'] = kwargs['category'] | |
| 125 | |
| 126 cmd = ['perl', 'bin/flatfile-to-json.pl', | |
| 127 '--gff', gff3_rebased.name, | |
| 128 '--trackLabel', label, | |
| 129 '--key', key, | |
| 130 '--clientConfig', json.dumps(clientConfig), | |
| 131 '--config', json.dumps(config), | |
| 132 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
| 133 ] | |
| 134 | |
| 135 self.subprocess_check_call(cmd) | |
| 136 os.unlink(gff3_rebased.name) | |
| 137 os.unlink(gff3_unrebased.name) | |
| 138 | |
| 139 def _min_max_gff(self, gff_file): | |
| 140 min_val = None | 131 min_val = None |
| 141 max_val = None | 132 max_val = None |
| 142 with open(gff_file, 'r') as handle: | 133 with open(gff_file, 'r') as handle: |
| 143 for line in handle: | 134 for line in handle: |
| 144 try: | 135 try: |
| 153 max_val = value | 144 max_val = value |
| 154 except Exception: | 145 except Exception: |
| 155 pass | 146 pass |
| 156 return min_val, max_val | 147 return min_val, max_val |
| 157 | 148 |
| 158 def add_bigwig(self, data, key, **kwargs): | 149 def hex_from_rgb(self, r, g, b): |
| 159 label = hashlib.md5(data).hexdigest() | 150 return '#%02x%02x%02x' % (r, g, b) |
| 160 dest = os.path.join('data', 'raw', os.path.basename(data)) | 151 |
| 161 cmd = ['ln', '-s', data, dest] | 152 def _get_colours(self): |
| 162 self.subprocess_check_call(cmd) | 153 r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx] |
| 163 | 154 self.brewer_colour_idx += 1 |
| 164 track_data = { | 155 return r, g, b |
| 165 "label": label, | 156 |
| 157 def parse_colours(self, track, trackFormat, gff3=None): | |
| 158 # Wiggle tracks have a bicolor pallete | |
| 159 trackConfig = {'style': {}} | |
| 160 if trackFormat == 'wiggle': | |
| 161 | |
| 162 trackConfig['style']['pos_color'] = track['wiggle']['color_pos'] | |
| 163 trackConfig['style']['neg_color'] = track['wiggle']['color_neg'] | |
| 164 | |
| 165 if trackConfig['style']['pos_color'] == '__auto__': | |
| 166 trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours()) | |
| 167 trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours()) | |
| 168 | |
| 169 | |
| 170 # Wiggle tracks can change colour at a specified place | |
| 171 bc_pivot = track['wiggle']['bicolor_pivot'] | |
| 172 if bc_pivot not in ('mean', 'zero'): | |
| 173 # The values are either one of those two strings | |
| 174 # or a number | |
| 175 bc_pivot = float(bc_pivot) | |
| 176 trackConfig['bicolor_pivot'] = bc_pivot | |
| 177 elif 'scaling' in track: | |
| 178 if track['scaling']['method'] == 'ignore': | |
| 179 if track['scaling']['scheme']['color'] != '__auto__': | |
| 180 trackConfig['style']['color'] = track['scaling']['scheme']['color'] | |
| 181 else: | |
| 182 trackConfig['style']['color'] = self.hex_from_rgb(*self._get_colours()) | |
| 183 else: | |
| 184 # Scored method | |
| 185 algo = track['scaling']['algo'] | |
| 186 # linear, logarithmic, blast | |
| 187 scales = track['scaling']['scales'] | |
| 188 # type __auto__, manual (min, max) | |
| 189 scheme = track['scaling']['scheme'] | |
| 190 # scheme -> (type (opacity), color) | |
| 191 # ================================== | |
| 192 # GENE CALLS OR BLAST | |
| 193 # ================================== | |
| 194 if trackFormat == 'blast': | |
| 195 red, green, blue = self._get_colours() | |
| 196 color_function = self.COLOR_FUNCTION_TEMPLATE.format(**{ | |
| 197 'score': "feature._parent.get('score')", | |
| 198 'opacity': self.OPACITY_MATH['blast'], | |
| 199 'red': red, | |
| 200 'green': green, | |
| 201 'blue': blue, | |
| 202 }) | |
| 203 trackConfig['style']['color'] = color_function.replace('\n', '') | |
| 204 elif trackFormat == 'gene_calls': | |
| 205 # Default values, based on GFF3 spec | |
| 206 min_val = 0 | |
| 207 max_val = 1000 | |
| 208 # Get min/max and build a scoring function since JBrowse doesn't | |
| 209 if scales['type'] == 'automatic': | |
| 210 min_val, max_val = self.min_max_gff(gff3) | |
| 211 else: | |
| 212 min_val = scales['min'] | |
| 213 max_val = scales['max'] | |
| 214 | |
| 215 if scheme['color'] == '__auto__': | |
| 216 user_color = 'undefined' | |
| 217 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) | |
| 218 elif scheme['color'].startswith('#'): | |
| 219 user_color = "'%s'" % self.hex_from_rgb(*self.rgb_from_hex(scheme['color'][1:])) | |
| 220 auto_color = 'undefined' | |
| 221 else: | |
| 222 user_color = 'undefined' | |
| 223 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) | |
| 224 | |
| 225 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{ | |
| 226 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val,'min': min_val}), | |
| 227 'user_spec_color': user_color, | |
| 228 'auto_gen_color': auto_color, | |
| 229 }) | |
| 230 | |
| 231 trackConfig['style']['color'] = color_function.replace('\n', '') | |
| 232 return trackConfig | |
| 233 | |
| 234 | |
| 235 def etree_to_dict(t): | |
| 236 d = {t.tag: {} if t.attrib else None} | |
| 237 children = list(t) | |
| 238 if children: | |
| 239 dd = defaultdict(list) | |
| 240 for dc in map(etree_to_dict, children): | |
| 241 for k, v in dc.iteritems(): | |
| 242 dd[k].append(v) | |
| 243 d = {t.tag: {k:v[0] if len(v) == 1 else v for k, v in dd.iteritems()}} | |
| 244 if t.attrib: | |
| 245 d[t.tag].update(('@' + k, v) for k, v in t.attrib.iteritems()) | |
| 246 if t.text: | |
| 247 text = t.text.strip() | |
| 248 if children or t.attrib: | |
| 249 if text: | |
| 250 d[t.tag]['#text'] = text | |
| 251 else: | |
| 252 d[t.tag] = text | |
| 253 return d | |
| 254 | |
| 255 | |
| 256 # score comes from feature._parent.get('score') or feature.get('score') | |
| 257 | |
| 258 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) | |
| 259 | |
| 260 | |
| 261 class JbrowseConnector(object): | |
| 262 | |
| 263 def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1): | |
| 264 self.TN_TABLE = { | |
| 265 'gff3': '--gff', | |
| 266 'gff': '--gff', | |
| 267 'bed': '--bed', | |
| 268 'genbank': '--gbk', | |
| 269 } | |
| 270 | |
| 271 self.cs = ColorScaling() | |
| 272 self.jbrowse = jbrowse | |
| 273 self.outdir = outdir | |
| 274 self.genome_paths = genomes | |
| 275 self.standalone = standalone | |
| 276 self.gencode = gencode | |
| 277 | |
| 278 if standalone: | |
| 279 self.clone_jbrowse(self.jbrowse, self.outdir) | |
| 280 else: | |
| 281 try: | |
| 282 os.makedirs(self.outdir) | |
| 283 except OSError: | |
| 284 # Ignore if the folder exists | |
| 285 pass | |
| 286 | |
| 287 self.process_genomes() | |
| 288 self.update_gencode() | |
| 289 | |
| 290 def update_gencode(self): | |
| 291 table = CodonTable.unambiguous_dna_by_id[int(self.gencode)] | |
| 292 trackList = os.path.join(self.outdir, 'data', 'trackList.json') | |
| 293 with open(trackList, 'r') as handle: | |
| 294 trackListData = json.load(handle) | |
| 295 | |
| 296 trackListData['tracks'][0].update({ | |
| 297 'codonStarts': table.start_codons, | |
| 298 'codonStops': table.stop_codons, | |
| 299 'codonTable': table.forward_table, | |
| 300 }) | |
| 301 | |
| 302 with open(trackList, 'w') as handle: | |
| 303 json.dump(trackListData, handle, indent=2) | |
| 304 | |
| 305 | |
| 306 def subprocess_check_call(self, command): | |
| 307 log.debug('cd %s && %s', self.outdir, ' '.join(command)) | |
| 308 subprocess.check_call(command, cwd=self.outdir) | |
| 309 | |
| 310 def _jbrowse_bin(self, command): | |
| 311 return os.path.realpath(os.path.join(self.jbrowse, 'bin', command)) | |
| 312 | |
| 313 def process_genomes(self): | |
| 314 for genome_path in self.genome_paths: | |
| 315 self.subprocess_check_call([ | |
| 316 'perl', self._jbrowse_bin('prepare-refseqs.pl'), | |
| 317 '--fasta', genome_path]) | |
| 318 | |
| 319 def _add_json(self, json_data): | |
| 320 if len(json_data.keys()) == 0: | |
| 321 return | |
| 322 | |
| 323 tmp = tempfile.NamedTemporaryFile(delete=False) | |
| 324 tmp.write(json.dumps(json_data)) | |
| 325 tmp.close() | |
| 326 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name, | |
| 327 os.path.join('data', 'trackList.json')] | |
| 328 self.subprocess_check_call(cmd) | |
| 329 os.unlink(tmp.name) | |
| 330 | |
| 331 def _add_track_json(self, json_data): | |
| 332 if len(json_data.keys()) == 0: | |
| 333 return | |
| 334 | |
| 335 tmp = tempfile.NamedTemporaryFile(delete=False) | |
| 336 tmp.write(json.dumps(json_data)) | |
| 337 tmp.close() | |
| 338 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name, | |
| 339 os.path.join('data', 'trackList.json')] | |
| 340 self.subprocess_check_call(cmd) | |
| 341 os.unlink(tmp.name) | |
| 342 | |
| 343 | |
| 344 def _blastxml_to_gff3(self, xml, min_gap=10): | |
| 345 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) | |
| 346 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), | |
| 347 '--trim', '--trim_end', '--min_gap', str(min_gap), xml] | |
| 348 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name) | |
| 349 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) | |
| 350 gff3_unrebased.close() | |
| 351 return gff3_unrebased.name | |
| 352 | |
| 353 def add_blastxml(self, data, trackData, blastOpts, **kwargs): | |
| 354 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts['min_gap']) | |
| 355 | |
| 356 if 'parent' in blastOpts: | |
| 357 gff3_rebased = tempfile.NamedTemporaryFile(delete=False) | |
| 358 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')] | |
| 359 if blastOpts.get('protein', 'false') == 'true': | |
| 360 cmd.append('--protein2dna') | |
| 361 cmd.extend([os.path.realpath(blastOpts['parent']), gff3]) | |
| 362 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_rebased.name) | |
| 363 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) | |
| 364 gff3_rebased.close() | |
| 365 | |
| 366 # Replace original gff3 file | |
| 367 shutil.copy(gff3_rebased.name, gff3) | |
| 368 os.unlink(gff3_rebased.name) | |
| 369 | |
| 370 config = { | |
| 371 'glyph': 'JBrowse/View/FeatureGlyph/Segments', | |
| 372 "category": trackData['category'], | |
| 373 } | |
| 374 | |
| 375 clientConfig = trackData['style'] | |
| 376 | |
| 377 cmd = ['perl', self._jbrowse_bin('flatfile-to-json.pl'), | |
| 378 '--gff', gff3, | |
| 379 '--trackLabel', trackData['label'], | |
| 380 '--key', trackData['key'], | |
| 381 '--clientConfig', json.dumps(clientConfig), | |
| 382 '--config', json.dumps(config), | |
| 383 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
| 384 ] | |
| 385 | |
| 386 self.subprocess_check_call(cmd) | |
| 387 os.unlink(gff3) | |
| 388 | |
| 389 def add_bigwig(self, data, trackData, wiggleOpts, **kwargs): | |
| 390 dest = os.path.join('data', 'raw', trackData['label'] + '.bw') | |
| 391 cmd = ['ln', data, dest] | |
| 392 self.subprocess_check_call(cmd) | |
| 393 | |
| 394 trackData.update({ | |
| 166 "urlTemplate": os.path.join('..', dest), | 395 "urlTemplate": os.path.join('..', dest), |
| 167 "bicolor_pivot": "zero", | |
| 168 "storeClass": "JBrowse/Store/SeqFeature/BigWig", | 396 "storeClass": "JBrowse/Store/SeqFeature/BigWig", |
| 169 "type": "JBrowse/View/Track/Wiggle/Density", | 397 "type": "JBrowse/View/Track/Wiggle/Density", |
| 170 "key": key, | 398 }) |
| 171 } | 399 |
| 172 track_data.update(kwargs) | 400 trackData['type'] = wiggleOpts['type'] |
| 173 | 401 trackData['variance_band'] = True if wiggleOpts['variance_band'] == 'true' else False |
| 174 if 'min' not in track_data and 'max' not in track_data \ | 402 |
| 175 and 'autoscale' not in track_data: | 403 if 'min' in wiggleOpts and 'max' in wiggleOpts: |
| 176 track_data['autoscale'] = 'local' | 404 trackData['min_score'] = wiggleOpts['min'] |
| 177 | 405 trackData['max_score'] = wiggleOpts['max'] |
| 178 self._add_json(track_data) | 406 else: |
| 179 | 407 trackData['autoscale'] = wiggleOpts.get('autoscale', 'local') |
| 180 def add_bam(self, data, key, **kwargs): | 408 |
| 181 label = hashlib.md5(data).hexdigest() | 409 self._add_track_json(trackData) |
| 182 dest = os.path.join('data', 'raw', os.path.basename(data)) | 410 |
| 411 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | |
| 412 dest = os.path.join('data', 'raw', trackData['label'] + '.bam') | |
| 413 cmd = ['ln', '-s', os.path.realpath(data), dest] | |
| 414 self.subprocess_check_call(cmd) | |
| 415 | |
| 416 cmd = ['ln', '-s', os.path.realpath(bam_index), dest + '.bai'] | |
| 417 self.subprocess_check_call(cmd) | |
| 418 | |
| 419 trackData.update({ | |
| 420 "urlTemplate": os.path.join('..', dest), | |
| 421 "type": "JBrowse/View/Track/Alignments2", | |
| 422 "storeClass": "JBrowse/Store/SeqFeature/BAM", | |
| 423 }) | |
| 424 | |
| 425 | |
| 426 self._add_track_json(trackData) | |
| 427 | |
| 428 if bamOpts.get('auto_snp', 'false') == 'true': | |
| 429 trackData2 = copy.copy(trackData) | |
| 430 trackData2.update({ | |
| 431 "type": "JBrowse/View/Track/SNPCoverage", | |
| 432 "key": trackData['key'] + " - SNPs/Coverage", | |
| 433 "label": trackData['label'] + "_autosnp", | |
| 434 }) | |
| 435 self._add_track_json(trackData2) | |
| 436 | |
| 437 def add_vcf(self, data, trackData, vcfOpts={}, **kwargs): | |
| 438 dest = os.path.join('data', 'raw', trackData['label'] + '.vcf') | |
| 183 # ln? | 439 # ln? |
| 184 cmd = ['ln', '-s', data, dest] | 440 cmd = ['ln', '-s', data, dest] |
| 185 self.subprocess_check_call(cmd) | 441 self.subprocess_check_call(cmd) |
| 186 | |
| 187 bai_source = kwargs['bam_index'] | |
| 188 cmd = ['ln', '-s', bai_source, dest + '.bai'] | |
| 189 self.subprocess_check_call(cmd) | |
| 190 | |
| 191 track_data = { | |
| 192 "urlTemplate": os.path.join('..', dest), | |
| 193 "key": key, | |
| 194 "label": label, | |
| 195 "type": "JBrowse/View/Track/Alignments2", | |
| 196 "storeClass": "JBrowse/Store/SeqFeature/BAM", | |
| 197 } | |
| 198 if 'category' in kwargs: | |
| 199 track_data['category'] = kwargs['category'] | |
| 200 self._add_json(track_data) | |
| 201 | |
| 202 if kwargs.get('auto_snp', False): | |
| 203 track_data = { | |
| 204 "storeClass": "JBrowse/Store/SeqFeature/BAM", | |
| 205 "urlTemplate": os.path.join('..', dest), | |
| 206 "type": "JBrowse/View/Track/SNPCoverage", | |
| 207 "key": key + " - SNPs/Coverage", | |
| 208 "label": label + "_autosnp", | |
| 209 } | |
| 210 if 'category' in kwargs: | |
| 211 track_data['category'] = kwargs['category'] | |
| 212 self._add_json(track_data) | |
| 213 | |
| 214 def add_vcf(self, data, key, **kwargs): | |
| 215 label = hashlib.md5(data).hexdigest() | |
| 216 dest = os.path.join('data', 'raw', os.path.basename(data)) | |
| 217 # ln? | |
| 218 cmd = ['ln', '-s', data, dest] | |
| 219 self.subprocess_check_call(cmd) | |
| 220 cmd = ['bgzip', dest] | 442 cmd = ['bgzip', dest] |
| 221 self.subprocess_check_call(cmd) | 443 self.subprocess_check_call(cmd) |
| 222 cmd = ['tabix', '-p', 'vcf', dest + '.gz'] | 444 cmd = ['tabix', '-p', 'vcf', dest + '.gz'] |
| 223 self.subprocess_check_call(cmd) | 445 self.subprocess_check_call(cmd) |
| 224 | 446 |
| 225 track_data = { | 447 trackData.update({ |
| 226 "key": key, | |
| 227 "label": label, | |
| 228 "urlTemplate": os.path.join('..', dest + '.gz'), | 448 "urlTemplate": os.path.join('..', dest + '.gz'), |
| 229 "type": "JBrowse/View/Track/HTMLVariants", | 449 "type": "JBrowse/View/Track/HTMLVariants", |
| 230 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", | 450 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", |
| 451 }) | |
| 452 self._add_track_json(trackData) | |
| 453 | |
| 454 def add_features(self, data, format, trackData, gffOpts, **kwargs): | |
| 455 cmd = [ | |
| 456 'perl', self._jbrowse_bin('flatfile-to-json.pl'), | |
| 457 self.TN_TABLE.get(format, 'gff'), | |
| 458 data, | |
| 459 '--trackLabel', trackData['label'], | |
| 460 '--trackType', 'JBrowse/View/Track/CanvasFeatures', | |
| 461 '--key', trackData['key'] | |
| 462 ] | |
| 463 | |
| 464 config = copy.copy(trackData) | |
| 465 clientConfig = trackData['style'] | |
| 466 del config['style'] | |
| 467 | |
| 468 if 'match' in gffOpts: | |
| 469 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' | |
| 470 cmd += ['--type', gffOpts['match']] | |
| 471 | |
| 472 cmd += ['--clientConfig', json.dumps(clientConfig), | |
| 473 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
| 474 ] | |
| 475 | |
| 476 cmd.extend(['--config', json.dumps(config)]) | |
| 477 | |
| 478 self.subprocess_check_call(cmd) | |
| 479 | |
| 480 | |
| 481 def process_annotations(self, track): | |
| 482 outputTrackConfig = { | |
| 483 'style': { | |
| 484 'label': track['style'].get('label', 'description'), | |
| 485 'className': track['style'].get('className', 'feature'), | |
| 486 'description': track['style'].get('description', ''), | |
| 487 }, | |
| 488 'category': track['category'], | |
| 231 } | 489 } |
| 232 track_data.update(kwargs) | 490 |
| 233 self._add_json(track_data) | 491 for i, (dataset_path, dataset_ext, track_human_label) in enumerate(track['trackfiles']): |
| 234 | 492 log.info('Processing %s / %s', track['category'], track_human_label) |
| 235 def add_features(self, data, key, format, **kwargs): | 493 outputTrackConfig['key'] = track_human_label |
| 236 label = hashlib.md5(data).hexdigest() | 494 hashData = [dataset_path, track_human_label, track['category']] |
| 237 cmd = ['perl', 'bin/flatfile-to-json.pl', | 495 outputTrackConfig['label'] = hashlib.md5('|'.join(hashData)).hexdigest() + '_%s' % i |
| 238 TN_TABLE.get(format, 'gff'), data, | 496 |
| 239 '--trackLabel', label, | 497 # Colour parsing is complex due to different track types having |
| 240 '--key', key] | 498 # different colour options. |
| 241 | 499 colourOptions = self.cs.parse_colours(track['conf']['options'], track['format'], gff3=dataset_path) |
| 242 config = {} | 500 # This used to be done with a dict.update() call, however that wiped out any previous style settings... |
| 243 if 'category' in kwargs: | 501 for key in colourOptions: |
| 244 config['category'] = kwargs['category'] | 502 if key == 'style': |
| 245 | 503 for subkey in colourOptions['style']: |
| 246 if kwargs.get('match', False): | 504 outputTrackConfig['style'][subkey] = colourOptions['style'][subkey] |
| 247 clientConfig = { | 505 else: |
| 248 'label': 'description', | 506 outputTrackConfig[key] = colourOptions[key] |
| 249 'description': 'Hit_titles', | 507 |
| 250 } | 508 if dataset_ext in ('gff', 'gff3', 'bed'): |
| 251 | 509 self.add_features(dataset_path, dataset_ext, outputTrackConfig, |
| 252 # Get min/max and build a scoring function since JBrowse doesn't | 510 track['conf']['options']['gff']) |
| 253 min_val, max_val = self._min_max_gff(data) | 511 elif dataset_ext == 'bigwig': |
| 254 | 512 self.add_bigwig(dataset_path, outputTrackConfig, |
| 255 if min_val is not None and max_val is not None: | 513 track['conf']['options']['wiggle']) |
| 256 MIN_MAX_OPACITY_MATH = Template(""" | 514 elif dataset_ext == 'bam': |
| 257 var opacity = (score - ${min}) * (1/(${max} - ${min})); | 515 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index'] |
| 258 """).substitute({ | 516 if not isinstance(real_indexes, list): |
| 259 'max': max_val, | 517 # <bam_indices> |
| 260 'min': min_val, | 518 # <bam_index>/path/to/a.bam.bai</bam_index> |
| 261 }) | 519 # </bam_indices> |
| 262 | 520 # |
| 263 red, green, blue = self._get_colours() | 521 # The above will result in the 'bam_index' key containing a |
| 264 color_function = COLOR_FUNCTION_TEMPLATE.substitute({ | 522 # string. If there are two or more indices, the container |
| 265 'score': "feature.get('score')", | 523 # becomes a list. Fun! |
| 266 'opacity': MIN_MAX_OPACITY_MATH, | 524 real_indexes = [real_indexes] |
| 267 'red': red, | 525 |
| 268 'green': green, | 526 self.add_bam(dataset_path, outputTrackConfig, |
| 269 'blue': blue, | 527 track['conf']['options']['pileup'], |
| 270 }) | 528 bam_index=real_indexes[i]) |
| 271 | 529 elif dataset_ext == 'blastxml': |
| 272 clientConfig['color'] = color_function.replace('\n', '') | 530 self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast']) |
| 273 | 531 elif dataset_ext == 'vcf': |
| 274 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' | 532 self.add_vcf(dataset_path, outputTrackConfig) |
| 275 | |
| 276 cmd += ['--clientConfig', json.dumps(clientConfig), | |
| 277 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
| 278 ] | |
| 279 | |
| 280 cmd.extend(['--config', json.dumps(config)]) | |
| 281 | |
| 282 self.subprocess_check_call(cmd) | |
| 283 | |
| 284 def process_annotations(self, data, key, format, **kwargs): | |
| 285 if format in ('gff', 'gff3', 'bed'): | |
| 286 self.add_features(data, key, format, **kwargs) | |
| 287 elif format == 'bigwig': | |
| 288 self.add_bigwig(data, key, **kwargs) | |
| 289 elif format == 'bam': | |
| 290 self.add_bam(data, key, **kwargs) | |
| 291 elif format == 'blastxml': | |
| 292 self.add_blastxml(data, key, **kwargs) | |
| 293 elif format == 'vcf': | |
| 294 self.add_vcf(data, key, **kwargs) | |
| 295 | 533 |
| 296 def clone_jbrowse(self, jbrowse_dir, destination): | 534 def clone_jbrowse(self, jbrowse_dir, destination): |
| 535 """Clone a JBrowse directory into a destination directory. | |
| 536 """ | |
| 297 # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks | 537 # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks |
| 298 # unlike copytree | 538 # unlike copytree |
| 299 cmd = ['mkdir', '-p', destination] | 539 cmd = ['cp', '-r', os.path.join(jbrowse_dir, '.'), destination] |
| 540 log.debug(' '.join(cmd)) | |
| 300 subprocess.check_call(cmd) | 541 subprocess.check_call(cmd) |
| 301 cmd = ['cp', '-r', jbrowse_dir, destination] | 542 cmd = ['mkdir', '-p', os.path.join(destination, 'data', 'raw')] |
| 302 subprocess.check_call(cmd) | 543 log.debug(' '.join(cmd)) |
| 303 cmd = ['mkdir', '-p', os.path.join(destination, 'JBrowse-1.11.6', | |
| 304 'data', 'raw')] | |
| 305 subprocess.check_call(cmd) | 544 subprocess.check_call(cmd) |
| 306 | 545 |
| 307 # http://unix.stackexchange.com/a/38691/22785 | 546 # http://unix.stackexchange.com/a/38691/22785 |
| 308 # JBrowse releases come with some broken symlinks | 547 # JBrowse releases come with some broken symlinks |
| 309 cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+'] | 548 cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+'] |
| 549 log.debug(' '.join(cmd)) | |
| 310 subprocess.check_call(cmd) | 550 subprocess.check_call(cmd) |
| 311 | 551 |
| 312 | 552 |
| 313 if __name__ == '__main__': | 553 if __name__ == '__main__': |
| 314 parser = argparse.ArgumentParser(description="", epilog="") | 554 parser = argparse.ArgumentParser(description="", epilog="") |
| 315 parser.add_argument('genome', type=file, help='Input genome file') | 555 parser.add_argument('xml', type=file, help='Track Configuration') |
| 316 parser.add_argument('yaml', type=file, help='Track Configuration') | |
| 317 | 556 |
| 318 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') | 557 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') |
| 319 parser.add_argument('--outdir', help='Output directory', default='out') | 558 parser.add_argument('--outdir', help='Output directory', default='out') |
| 559 parser.add_argument('--standalone', help='Standalone mode includes a copy of JBrowse', action='store_true') | |
| 320 args = parser.parse_args() | 560 args = parser.parse_args() |
| 561 | |
| 562 tree = ET.parse(args.xml.name) | |
| 563 root = tree.getroot() | |
| 321 | 564 |
| 322 jc = JbrowseConnector( | 565 jc = JbrowseConnector( |
| 323 jbrowse=args.jbrowse, | 566 jbrowse=args.jbrowse, |
| 324 jbrowse_dir=os.path.join(args.outdir, 'JBrowse-1.11.6'), | |
| 325 outdir=args.outdir, | 567 outdir=args.outdir, |
| 326 genome=os.path.realpath(args.genome.name), | 568 genomes=[os.path.realpath(x.text) for x in root.findall('metadata/genomes/genome')], |
| 569 standalone=args.standalone, | |
| 570 gencode=root.find('metadata/gencode').text | |
| 327 ) | 571 ) |
| 328 | 572 |
| 329 track_data = yaml.load(args.yaml) | 573 for track in root.findall('tracks/track'): |
| 330 for track in track_data: | 574 track_conf = {} |
| 331 path = os.path.realpath(track['file']) | 575 track_conf['trackfiles'] = [ |
| 332 extra = track.get('options', {}) | 576 (os.path.realpath(x.attrib['path']), x.attrib['ext'], x.attrib['label']) |
| 333 if '__unused__' in extra: | 577 for x in track.findall('files/trackFile') |
| 334 del extra['__unused__'] | 578 ] |
| 335 | 579 |
| 336 for possible_partial_path in ('bam_index', 'parent'): | 580 track_conf['category'] = track.attrib['cat'] |
| 337 if possible_partial_path in extra: | 581 track_conf['format'] = track.attrib['format'] |
| 338 extra[possible_partial_path] = os.path.realpath( | 582 try: |
| 339 extra[possible_partial_path]) | 583 # Only pertains to gff3 + blastxml. TODO? |
| 340 extra['category'] = track.get('category', 'Default') | 584 track_conf['style'] = {t.tag: t.text for t in track.find('options/style')} |
| 341 | 585 except TypeError, te: |
| 342 jc.process_annotations(path, track['label'], track['ext'], **extra) | 586 track_conf['style'] = {} |
| 343 | 587 pass |
| 344 print """ | 588 track_conf['conf'] = etree_to_dict(track.find('options')) |
| 345 <html> | 589 jc.process_annotations(track_conf) |
| 346 <body> | |
| 347 <script type="text/javascript"> | |
| 348 window.location=JBrowse-1.11.6/index.html | |
| 349 </script> | |
| 350 <a href="JBrowse-1.11.6/index.html">Go to JBrowse</a> | |
| 351 <p>Please note that JBrowse functions best on production Galaxy | |
| 352 instances. The paste server used in development instances has issues | |
| 353 serving the volumes of data regularly involved in JBrowse</p> | |
| 354 </body> | |
| 355 </html> | |
| 356 """ |
