comparison jbrowse.py @ 33:0ae74c70b267 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 369a727966d697c56633b27ad2757db40fef0dc3"
author iuc
date Fri, 30 Aug 2019 03:36:39 -0400
parents 2bb2e07a7a21
children 9de82b4963e6
comparison
equal deleted inserted replaced
32:6b774e9d6387 33:0ae74c70b267
329 329
330 330
331 class JbrowseConnector(object): 331 class JbrowseConnector(object):
332 332
333 def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1): 333 def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1):
334 self.TN_TABLE = {
335 'gff3': '--gff',
336 'gff': '--gff',
337 'bed': '--bed',
338 'genbank': '--gbk',
339 }
340
341 self.cs = ColorScaling() 334 self.cs = ColorScaling()
342 self.jbrowse = jbrowse 335 self.jbrowse = jbrowse
343 self.outdir = outdir 336 self.outdir = outdir
344 self.genome_paths = genomes 337 self.genome_paths = genomes
345 self.standalone = standalone 338 self.standalone = standalone
377 }) 370 })
378 371
379 with open(trackList, 'w') as handle: 372 with open(trackList, 'w') as handle:
380 json.dump(trackListData, handle, indent=2) 373 json.dump(trackListData, handle, indent=2)
381 374
382 def subprocess_check_call(self, command): 375 def subprocess_check_call(self, command, output=None):
383 log.debug('cd %s && %s', self.outdir, ' '.join(command)) 376 if output:
384 subprocess.check_call(command, cwd=self.outdir) 377 log.debug('cd %s && %s > %s', self.outdir, ' '.join(command), output)
378 subprocess.check_call(command, cwd=self.outdir, stdout=output)
379 else:
380 log.debug('cd %s && %s', self.outdir, ' '.join(command))
381 subprocess.check_call(command, cwd=self.outdir)
385 382
386 def subprocess_popen(self, command): 383 def subprocess_popen(self, command):
387 log.debug('cd %s && %s', self.outdir, command) 384 log.debug('cd %s && %s', self.outdir, command)
388 p = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 385 p = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
389 output, err = p.communicate() 386 output, err = p.communicate()
556 553
557 trackData['scale'] = wiggleOpts['scale'] 554 trackData['scale'] = wiggleOpts['scale']
558 555
559 self._add_track_json(trackData) 556 self._add_track_json(trackData)
560 557
558 def add_maf(self, data, trackData, mafOpts, **kwargs):
559 script = os.path.realpath(os.path.join(self.jbrowse, 'plugins', 'MAFViewer', 'bin', 'maf2bed.pl'))
560 dest = os.path.join('data', 'raw', trackData['label'] + '.txt')
561
562 tmp1 = tempfile.NamedTemporaryFile(delete=False)
563 tmp1.close()
564
565 # Process MAF to bed-like
566 cmd = [script, data]
567 self.subprocess_check_call(cmd, output=tmp1.path)
568
569 # Sort / Index it
570 self._sort_bed(tmp1.path, dest)
571 # Cleanup
572 try:
573 os.remove(tmp1.path)
574 except OSError:
575 pass
576
577 # Construct samples list
578 # We could get this from galaxy metadata, not sure how easily.
579 ps = subprocess.Popen(['grep', '^s [^ ]*', '-o', data], stdout=subprocess.PIPE)
580 output = subprocess.check_output(('sort', '-u'), stdin=ps.stdout)
581 ps.wait()
582 samples = [x[2:] for x in output]
583
584 trackData.update({
585 "storeClass": "MAFViewer/Store/SeqFeature/MAFTabix",
586 "type": "MAFViewer/View/Track/MAF",
587 "urlTemplate": trackData['label'] + '.txt.gz',
588 "samples": samples,
589 })
590
591 self._add_track_json(trackData)
592
561 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): 593 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
562 dest = os.path.join('data', 'raw', trackData['label'] + '.bam') 594 dest = os.path.join('data', 'raw', trackData['label'] + '.bam')
563 cmd = ['ln', '-s', os.path.realpath(data), dest] 595 cmd = ['ln', '-s', os.path.realpath(data), dest]
564 self.subprocess_check_call(cmd) 596 self.subprocess_check_call(cmd)
565 597
608 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", 640 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix",
609 }) 641 })
610 self._add_track_json(trackData) 642 self._add_track_json(trackData)
611 643
612 def _sort_gff(self, data, dest): 644 def _sort_gff(self, data, dest):
613 645 # Only index if not already done
614 if not os.path.exists(dest): 646 if not os.path.exists(dest):
615 # Only index if not already done
616 cmd = "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" % (data, dest) 647 cmd = "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" % (data, dest)
617 self.subprocess_popen(cmd) 648 self.subprocess_popen(cmd)
618 649
619 cmd = ['bgzip', '-f', dest] 650 self.subprocess_check_call(['bgzip', '-f', dest])
620 self.subprocess_popen(' '.join(cmd)) 651 self.subprocess_check_call(['tabix', '-f', '-p', 'gff', dest + '.gz'])
621 cmd = ['tabix', '-f', '-p', 'gff', dest + '.gz'] 652
622 self.subprocess_popen(' '.join(cmd)) 653 def _sort_bed(self, data, dest):
623 654 # Only index if not already done
624 def add_features(self, data, format, trackData, gffOpts, **kwargs): 655 if not os.path.exists(dest):
625 656 cmd = ['sort', '-k1,1', '-k2,2n', data]
657 self.subprocess_check_call(cmd, output=dest)
658
659 self.subprocess_check_call(['bgzip', '-f', dest])
660 self.subprocess_check_call(['tabix', '-f', '-p', 'bed', dest + '.gz'])
661
662 def add_gff(self, data, format, trackData, gffOpts, **kwargs):
626 dest = os.path.join(self.outdir, 'data', 'raw', trackData['label'] + '.gff') 663 dest = os.path.join(self.outdir, 'data', 'raw', trackData['label'] + '.gff')
627 664
628 self._sort_gff(data, dest) 665 self._sort_gff(data, dest)
629 666
630 url = os.path.join('raw', trackData['label'] + '.gff.gz') 667 url = os.path.join('raw', trackData['label'] + '.gff.gz')
652 elif trackType in ['JBrowse/View/Track/HTMLFeatures', 'NeatHTMLFeatures/View/Track/NeatFeatures']: 689 elif trackType in ['JBrowse/View/Track/HTMLFeatures', 'NeatHTMLFeatures/View/Track/NeatFeatures']:
653 if 'topLevelFeatures' in gffOpts and gffOpts['topLevelFeatures']: 690 if 'topLevelFeatures' in gffOpts and gffOpts['topLevelFeatures']:
654 trackData['topLevelFeatures'] = gffOpts['topLevelFeatures'] 691 trackData['topLevelFeatures'] = gffOpts['topLevelFeatures']
655 692
656 self._add_track_json(trackData) 693 self._add_track_json(trackData)
694
695 if gffOpts.get('index', 'false') == 'true':
696 self.tracksToIndex.append("%s" % trackData['label'])
697
698 def add_bed(self, data, format, trackData, gffOpts, **kwargs):
699 dest = os.path.join(self.outdir, 'data', 'raw', trackData['label'] + '.bed')
700
701 self._sort_bed(data, dest)
702
703 url = os.path.join('raw', trackData['label'] + '.bed.gz')
704 trackData.update({
705 "urlTemplate": url,
706 "storeClass": "JBrowse/Store/SeqFeature/BEDTabix",
707 })
708
709 if 'match' in gffOpts:
710 trackData['glyph'] = 'JBrowse/View/FeatureGlyph/Segments'
711
712 trackType = gffOpts.get('trackType', 'JBrowse/View/Track/CanvasFeatures')
713 trackData['type'] = trackType
714
715 if trackType in ['JBrowse/View/Track/CanvasFeatures', 'NeatCanvasFeatures/View/Track/NeatFeatures']:
716 if 'transcriptType' in gffOpts and gffOpts['transcriptType']:
717 trackData['transcriptType'] = gffOpts['transcriptType']
718 if 'subParts' in gffOpts and gffOpts['subParts']:
719 trackData['subParts'] = gffOpts['subParts']
720 if 'impliedUTRs' in gffOpts and gffOpts['impliedUTRs']:
721 trackData['impliedUTRs'] = gffOpts['impliedUTRs']
722 elif trackType in ['JBrowse/View/Track/HTMLFeatures', 'NeatHTMLFeatures/View/Track/NeatFeatures']:
723 if 'topLevelFeatures' in gffOpts and gffOpts['topLevelFeatures']:
724 trackData['topLevelFeatures'] = gffOpts['topLevelFeatures']
725
726 self._add_track_json(trackData)
727
728 if gffOpts.get('index', 'false') == 'true':
729 self.tracksToIndex.append("%s" % trackData['label'])
730
731 def add_genbank(self, data, format, trackData, gffOpts, **kwargs):
732 cmd = [
733 'perl', self._jbrowse_bin('flatfile-to-json.pl'),
734 '--genbank', data,
735 '--trackLabel', trackData['label'],
736 '--key', trackData['key']
737 ]
738
739 # className in --clientConfig is ignored, it needs to be set with --className
740 if 'className' in trackData['style']:
741 cmd += ['--className', trackData['style']['className']]
742
743 config = copy.copy(trackData)
744 clientConfig = trackData['style']
745 del config['style']
746
747 if 'match' in gffOpts:
748 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments'
749 if bool(gffOpts['match']):
750 # Can be empty for CanvasFeatures = will take all by default
751 cmd += ['--type', gffOpts['match']]
752
753 cmd += ['--clientConfig', json.dumps(clientConfig)]
754
755 trackType = 'JBrowse/View/Track/CanvasFeatures'
756 if 'trackType' in gffOpts:
757 trackType = gffOpts['trackType']
758
759 if trackType == 'JBrowse/View/Track/CanvasFeatures':
760 if 'transcriptType' in gffOpts and gffOpts['transcriptType']:
761 config['transcriptType'] = gffOpts['transcriptType']
762 if 'subParts' in gffOpts and gffOpts['subParts']:
763 config['subParts'] = gffOpts['subParts']
764 if 'impliedUTRs' in gffOpts and gffOpts['impliedUTRs']:
765 config['impliedUTRs'] = gffOpts['impliedUTRs']
766 elif trackType == 'JBrowse/View/Track/HTMLFeatures':
767 if 'transcriptType' in gffOpts and gffOpts['transcriptType']:
768 cmd += ['--type', gffOpts['transcriptType']]
769
770 cmd += [
771 '--trackType', gffOpts['trackType']
772 ]
773
774 cmd.extend(['--config', json.dumps(config)])
775
776 self.subprocess_check_call(cmd)
657 777
658 if gffOpts.get('index', 'false') == 'true': 778 if gffOpts.get('index', 'false') == 'true':
659 self.tracksToIndex.append("%s" % trackData['label']) 779 self.tracksToIndex.append("%s" % trackData['label'])
660 780
661 def add_rest(self, url, trackData): 781 def add_rest(self, url, trackData):
780 if customTrackConfig: 900 if customTrackConfig:
781 self.set_custom_track_options(customTrackConfig, outputTrackConfig, mapped_chars) 901 self.set_custom_track_options(customTrackConfig, outputTrackConfig, mapped_chars)
782 902
783 # import pprint; pprint.pprint(track) 903 # import pprint; pprint.pprint(track)
784 # import sys; sys.exit() 904 # import sys; sys.exit()
785 if dataset_ext in ('gff', 'gff3', 'bed'): 905 if dataset_ext in ('gff', 'gff3'):
786 self.add_features(dataset_path, dataset_ext, outputTrackConfig, 906 self.add_gff(dataset_path, dataset_ext, outputTrackConfig,
787 track['conf']['options']['gff']) 907 track['conf']['options']['gff'])
908 elif dataset_ext in ('bed', ):
909 self.add_bed(dataset_path, dataset_ext, outputTrackConfig,
910 track['conf']['options']['gff'])
911 elif dataset_ext in ('genbank', ):
912 self.add_genbank(dataset_path, dataset_ext, outputTrackConfig,
913 track['conf']['options']['gff'])
788 elif dataset_ext == 'bigwig': 914 elif dataset_ext == 'bigwig':
789 self.add_bigwig(dataset_path, outputTrackConfig, 915 self.add_bigwig(dataset_path, outputTrackConfig,
790 track['conf']['options']['wiggle']) 916 track['conf']['options']['wiggle'])
791 elif dataset_ext == 'bigwig_multiple': 917 elif dataset_ext == 'bigwig_multiple':
792 self.add_bigwig_multiple(dataset_path, outputTrackConfig, 918 self.add_bigwig_multiple(dataset_path, outputTrackConfig,
793 track['conf']['options']['wiggle']) 919 track['conf']['options']['wiggle'])
920 elif dataset_ext == 'maf':
921 self.add_maf(dataset_path, outputTrackConfig,
922 track['conf']['options']['maf'])
794 elif dataset_ext == 'bam': 923 elif dataset_ext == 'bam':
795 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index'] 924 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index']
796 if not isinstance(real_indexes, list): 925 if not isinstance(real_indexes, list):
797 # <bam_indices> 926 # <bam_indices>
798 # <bam_index>/path/to/a.bam.bai</bam_index> 927 # <bam_index>/path/to/a.bam.bai</bam_index>