comparison jbrowse.py @ 8:ad4b9d7eae6a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
author iuc
date Tue, 29 Nov 2016 10:55:30 -0500
parents ae9382cfb6ac
children 1a6d882d340d
comparison
equal deleted inserted replaced
7:1e74f16adaa1 8:ad4b9d7eae6a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 import argparse
3 import copy
4 import hashlib
5 import json
6 import logging
2 import os 7 import os
3 import copy 8 import shutil
4 import argparse 9 import struct
5 import subprocess 10 import subprocess
6 import hashlib
7 import struct
8 import tempfile 11 import tempfile
9 import shutil 12 import xml.etree.ElementTree as ET
10 import json 13 from collections import defaultdict
14
11 from Bio.Data import CodonTable 15 from Bio.Data import CodonTable
12 import xml.etree.ElementTree as ET 16
13 import logging
14 from collections import defaultdict
15 logging.basicConfig(level=logging.INFO) 17 logging.basicConfig(level=logging.INFO)
16 log = logging.getLogger('jbrowse') 18 log = logging.getLogger('jbrowse')
17 19
18 20
19 class ColorScaling(object): 21 class ColorScaling(object):
77 var opacity = (score - ({min})) / (({max}) - ({min})); 79 var opacity = (score - ({min})) / (({max}) - ({min}));
78 opacity = Math.log10(opacity) + Math.log10({max}); 80 opacity = Math.log10(opacity) + Math.log10({max});
79 """, 81 """,
80 'blast': """ 82 'blast': """
81 var opacity = 0; 83 var opacity = 0;
82 if(score == 0.0) {{ 84 if(score == 0.0) {
83 opacity = 1; 85 opacity = 1;
84 }} else {{ 86 } else{
85 opacity = (20 - Math.log10(score)) / 180; 87 opacity = (20 - Math.log10(score)) / 180;
86 }} 88 }
87 """ 89 """
88 } 90 }
89 91
90 BREWER_COLOUR_IDX = 0 92 BREWER_COLOUR_IDX = 0
91 BREWER_COLOUR_SCHEMES = [ 93 BREWER_COLOUR_SCHEMES = [
123 def __init__(self): 125 def __init__(self):
124 self.brewer_colour_idx = 0 126 self.brewer_colour_idx = 0
125 127
126 def rgb_from_hex(self, hexstr): 128 def rgb_from_hex(self, hexstr):
127 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back 129 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
128 return struct.unpack('BBB',hexstr.decode('hex')) 130 return struct.unpack('BBB', hexstr.decode('hex'))
129 131
130 def min_max_gff(self, gff_file): 132 def min_max_gff(self, gff_file):
131 min_val = None 133 min_val = None
132 max_val = None 134 max_val = None
133 with open(gff_file, 'r') as handle: 135 with open(gff_file, 'r') as handle:
152 def _get_colours(self): 154 def _get_colours(self):
153 r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)] 155 r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)]
154 self.brewer_colour_idx += 1 156 self.brewer_colour_idx += 1
155 return r, g, b 157 return r, g, b
156 158
159 def parse_menus(self, track):
160 trackConfig = {'menuTemplate': [{}, {}, {}]}
161
162 if 'menu' in track['menus']:
163 menu_list = [track['menus']['menu']]
164 if isinstance(track['menus']['menu'], list):
165 menu_list = track['menus']['menu']
166
167 for m in menu_list:
168 tpl = {
169 'action': m['action'],
170 'label': m.get('label', '{name}'),
171 'iconClass': m.get('iconClass', 'dijitIconBookmark'),
172 }
173 if 'url' in m:
174 tpl['url'] = m['url']
175 if 'content' in m:
176 tpl['content'] = m['content']
177 if 'title' in m:
178 tpl['title'] = m['title']
179
180 trackConfig['menuTemplate'].append(tpl)
181
182 return trackConfig
183
157 def parse_colours(self, track, trackFormat, gff3=None): 184 def parse_colours(self, track, trackFormat, gff3=None):
158 # Wiggle tracks have a bicolor pallete 185 # Wiggle tracks have a bicolor pallete
159 trackConfig = {'style': {}} 186 trackConfig = {'style': {}}
160 if trackFormat == 'wiggle': 187 if trackFormat == 'wiggle':
161 188
163 trackConfig['style']['neg_color'] = track['wiggle']['color_neg'] 190 trackConfig['style']['neg_color'] = track['wiggle']['color_neg']
164 191
165 if trackConfig['style']['pos_color'] == '__auto__': 192 if trackConfig['style']['pos_color'] == '__auto__':
166 trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours()) 193 trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours())
167 trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours()) 194 trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours())
168
169 195
170 # Wiggle tracks can change colour at a specified place 196 # Wiggle tracks can change colour at a specified place
171 bc_pivot = track['wiggle']['bicolor_pivot'] 197 bc_pivot = track['wiggle']['bicolor_pivot']
172 if bc_pivot not in ('mean', 'zero'): 198 if bc_pivot not in ('mean', 'zero'):
173 # The values are either one of those two strings 199 # The values are either one of those two strings
221 else: 247 else:
222 user_color = 'undefined' 248 user_color = 'undefined'
223 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) 249 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
224 250
225 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{ 251 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{
226 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val,'min': min_val}), 252 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val, 'min': min_val}),
227 'user_spec_color': user_color, 253 'user_spec_color': user_color,
228 'auto_gen_color': auto_color, 254 'auto_gen_color': auto_color,
229 }) 255 })
230 256
231 trackConfig['style']['color'] = color_function.replace('\n', '') 257 trackConfig['style']['color'] = color_function.replace('\n', '')
238 if children: 264 if children:
239 dd = defaultdict(list) 265 dd = defaultdict(list)
240 for dc in map(etree_to_dict, children): 266 for dc in map(etree_to_dict, children):
241 for k, v in dc.iteritems(): 267 for k, v in dc.iteritems():
242 dd[k].append(v) 268 dd[k].append(v)
243 d = {t.tag: {k:v[0] if len(v) == 1 else v for k, v in dd.iteritems()}} 269 d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.iteritems()}}
244 if t.attrib: 270 if t.attrib:
245 d[t.tag].update(('@' + k, v) for k, v in t.attrib.iteritems()) 271 d[t.tag].update(('@' + k, v) for k, v in t.attrib.iteritems())
246 if t.text: 272 if t.text:
247 text = t.text.strip() 273 text = t.text.strip()
248 if children or t.attrib: 274 if children or t.attrib:
249 if text: 275 if text:
250 d[t.tag]['#text'] = text 276 d[t.tag]['#text'] = text
251 else: 277 else:
252 d[t.tag] = text 278 d[t.tag] = text
253 return d 279 return d
254 280
255 281
272 self.jbrowse = jbrowse 298 self.jbrowse = jbrowse
273 self.outdir = outdir 299 self.outdir = outdir
274 self.genome_paths = genomes 300 self.genome_paths = genomes
275 self.standalone = standalone 301 self.standalone = standalone
276 self.gencode = gencode 302 self.gencode = gencode
303 self.tracksToIndex = []
277 304
278 if standalone: 305 if standalone:
279 self.clone_jbrowse(self.jbrowse, self.outdir) 306 self.clone_jbrowse(self.jbrowse, self.outdir)
280 else: 307 else:
281 try: 308 try:
300 }) 327 })
301 328
302 with open(trackList, 'w') as handle: 329 with open(trackList, 'w') as handle:
303 json.dump(trackListData, handle, indent=2) 330 json.dump(trackListData, handle, indent=2)
304 331
305
306 def subprocess_check_call(self, command): 332 def subprocess_check_call(self, command):
307 log.debug('cd %s && %s', self.outdir, ' '.join(command)) 333 log.debug('cd %s && %s', self.outdir, ' '.join(command))
308 subprocess.check_call(command, cwd=self.outdir) 334 subprocess.check_call(command, cwd=self.outdir)
309 335
310 def _jbrowse_bin(self, command): 336 def _jbrowse_bin(self, command):
314 for genome_path in self.genome_paths: 340 for genome_path in self.genome_paths:
315 self.subprocess_check_call([ 341 self.subprocess_check_call([
316 'perl', self._jbrowse_bin('prepare-refseqs.pl'), 342 'perl', self._jbrowse_bin('prepare-refseqs.pl'),
317 '--fasta', genome_path]) 343 '--fasta', genome_path])
318 344
319 # Generate name 345 def generate_names(self):
320 # self.subprocess_check_call([ 346 # Generate names
321 # 'perl', self._jbrowse_bin('generate-names.pl'), 347
322 # '--hashBits', '16' 348 args = [
323 # ]) 349 'perl', self._jbrowse_bin('generate-names.pl'),
350 '--hashBits', '16'
351 ]
352
353 tracks = ','.join(self.tracksToIndex)
354
355 if tracks:
356 args += ['--tracks', tracks]
357 else:
358 # No tracks to index, index only the refseq
359 args += ['--tracks', 'DNA']
360
361 self.subprocess_check_call(args)
324 362
325 def _add_json(self, json_data): 363 def _add_json(self, json_data):
326 364
327 cmd = [ 365 cmd = [
328 'perl', self._jbrowse_bin('add-json.pl'), 366 'perl', self._jbrowse_bin('add-json.pl'),
340 tmp.close() 378 tmp.close()
341 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name, 379 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name,
342 os.path.join('data', 'trackList.json')] 380 os.path.join('data', 'trackList.json')]
343 self.subprocess_check_call(cmd) 381 self.subprocess_check_call(cmd)
344 os.unlink(tmp.name) 382 os.unlink(tmp.name)
345
346 383
347 def _blastxml_to_gff3(self, xml, min_gap=10): 384 def _blastxml_to_gff3(self, xml, min_gap=10):
348 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) 385 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
349 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), 386 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
350 '--trim', '--trim_end', '--min_gap', str(min_gap), xml] 387 '--trim', '--trim_end', '--min_gap', str(min_gap), xml]
387 ] 424 ]
388 425
389 self.subprocess_check_call(cmd) 426 self.subprocess_check_call(cmd)
390 os.unlink(gff3) 427 os.unlink(gff3)
391 428
429 if blastOpts.get('index', 'false') == 'true':
430 self.tracksToIndex.append("%s" % trackData['label'])
431
392 def add_bigwig(self, data, trackData, wiggleOpts, **kwargs): 432 def add_bigwig(self, data, trackData, wiggleOpts, **kwargs):
393 dest = os.path.join('data', 'raw', trackData['label'] + '.bw') 433 dest = os.path.join('data', 'raw', trackData['label'] + '.bw')
394 cmd = ['ln', data, dest] 434 cmd = ['ln', data, dest]
395 self.subprocess_check_call(cmd) 435 self.subprocess_check_call(cmd)
396 436
423 "urlTemplate": os.path.join('..', dest), 463 "urlTemplate": os.path.join('..', dest),
424 "type": "JBrowse/View/Track/Alignments2", 464 "type": "JBrowse/View/Track/Alignments2",
425 "storeClass": "JBrowse/Store/SeqFeature/BAM", 465 "storeClass": "JBrowse/Store/SeqFeature/BAM",
426 }) 466 })
427 467
428
429 self._add_track_json(trackData) 468 self._add_track_json(trackData)
430 469
431 if bamOpts.get('auto_snp', 'false') == 'true': 470 if bamOpts.get('auto_snp', 'false') == 'true':
432 trackData2 = copy.copy(trackData) 471 trackData2 = copy.copy(trackData)
433 trackData2.update({ 472 trackData2.update({
434 "type": "JBrowse/View/Track/SNPCoverage", 473 "type": "JBrowse/View/Track/SNPCoverage",
435 "key": trackData['key'] + " - SNPs/Coverage", 474 "key": trackData['key'] + " - SNPs/Coverage",
436 "label": trackData['label'] + "_autosnp", 475 "label": trackData['label'] + "_autosnp",
437 }) 476 })
438 self._add_track_json(trackData2) 477 self._add_track_json(trackData2)
439 478
440 def add_vcf(self, data, trackData, vcfOpts={}, **kwargs): 479 def add_vcf(self, data, trackData, vcfOpts={}, **kwargs):
441 dest = os.path.join('data', 'raw', trackData['label'] + '.vcf') 480 dest = os.path.join('data', 'raw', trackData['label'] + '.vcf')
466 505
467 config = copy.copy(trackData) 506 config = copy.copy(trackData)
468 clientConfig = trackData['style'] 507 clientConfig = trackData['style']
469 del config['style'] 508 del config['style']
470 509
471 if 'match' in gffOpts: 510 if 'match' in gffOpts:
472 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' 511 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments'
473 cmd += ['--type', gffOpts['match']] 512 cmd += ['--type', gffOpts['match']]
474 513
475 cmd += ['--clientConfig', json.dumps(clientConfig), 514 cmd += ['--clientConfig', json.dumps(clientConfig),
476 ] 515 ]
486 525
487 cmd.extend(['--config', json.dumps(config)]) 526 cmd.extend(['--config', json.dumps(config)])
488 527
489 self.subprocess_check_call(cmd) 528 self.subprocess_check_call(cmd)
490 529
530 if gffOpts.get('index', 'false') == 'true':
531 self.tracksToIndex.append("%s" % trackData['label'])
491 532
492 def process_annotations(self, track): 533 def process_annotations(self, track):
493 outputTrackConfig = { 534 outputTrackConfig = {
494 'style': { 535 'style': {
495 'label': track['style'].get('label', 'description'), 536 'label': track['style'].get('label', 'description'),
496 'className': track['style'].get('className', 'feature'), 537 'className': track['style'].get('className', 'feature'),
497 'description': track['style'].get('description', ''), 538 'description': track['style'].get('description', ''),
498 }, 539 },
499 'category': track['category'], 540 'category': track['category'],
500 } 541 }
501 542
514 for subkey in colourOptions['style']: 555 for subkey in colourOptions['style']:
515 outputTrackConfig['style'][subkey] = colourOptions['style'][subkey] 556 outputTrackConfig['style'][subkey] = colourOptions['style'][subkey]
516 else: 557 else:
517 outputTrackConfig[key] = colourOptions[key] 558 outputTrackConfig[key] = colourOptions[key]
518 559
560 menus = self.cs.parse_menus(track['conf']['options'])
561 outputTrackConfig.update(menus)
562
519 # import pprint; pprint.pprint(track) 563 # import pprint; pprint.pprint(track)
520 # import sys; sys.exit() 564 # import sys; sys.exit()
521 if dataset_ext in ('gff', 'gff3', 'bed'): 565 if dataset_ext in ('gff', 'gff3', 'bed'):
522 self.add_features(dataset_path, dataset_ext, outputTrackConfig, 566 self.add_features(dataset_path, dataset_ext, outputTrackConfig,
523 track['conf']['options']['gff']) 567 track['conf']['options']['gff'])
524 elif dataset_ext == 'bigwig': 568 elif dataset_ext == 'bigwig':
525 self.add_bigwig(dataset_path, outputTrackConfig, 569 self.add_bigwig(dataset_path, outputTrackConfig,
526 track['conf']['options']['wiggle']) 570 track['conf']['options']['wiggle'])
527 elif dataset_ext == 'bam': 571 elif dataset_ext == 'bam':
528 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index'] 572 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index']
643 track_conf['category'] = track.attrib['cat'] 687 track_conf['category'] = track.attrib['cat']
644 track_conf['format'] = track.attrib['format'] 688 track_conf['format'] = track.attrib['format']
645 try: 689 try:
646 # Only pertains to gff3 + blastxml. TODO? 690 # Only pertains to gff3 + blastxml. TODO?
647 track_conf['style'] = {t.tag: t.text for t in track.find('options/style')} 691 track_conf['style'] = {t.tag: t.text for t in track.find('options/style')}
648 except TypeError, te: 692 except TypeError:
649 track_conf['style'] = {} 693 track_conf['style'] = {}
650 pass 694 pass
651 track_conf['conf'] = etree_to_dict(track.find('options')) 695 track_conf['conf'] = etree_to_dict(track.find('options'))
652 keys = jc.process_annotations(track_conf) 696 keys = jc.process_annotations(track_conf)
653 697
654
655 for key in keys: 698 for key in keys:
656 extra_data['visibility'][track.attrib.get('visibility', 'default_off')].append(key) 699 extra_data['visibility'][track.attrib.get('visibility', 'default_off')].append(key)
657 700
658 jc.add_final_data(extra_data) 701 jc.add_final_data(extra_data)
702 jc.generate_names()