Mercurial > repos > iuc > jbrowse
diff jbrowse.py @ 33:0ae74c70b267 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 369a727966d697c56633b27ad2757db40fef0dc3"
author | iuc |
---|---|
date | Fri, 30 Aug 2019 03:36:39 -0400 |
parents | 2bb2e07a7a21 |
children | 9de82b4963e6 |
line wrap: on
line diff
--- a/jbrowse.py Tue Jul 16 05:08:10 2019 -0400 +++ b/jbrowse.py Fri Aug 30 03:36:39 2019 -0400 @@ -331,13 +331,6 @@ class JbrowseConnector(object): def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1): - self.TN_TABLE = { - 'gff3': '--gff', - 'gff': '--gff', - 'bed': '--bed', - 'genbank': '--gbk', - } - self.cs = ColorScaling() self.jbrowse = jbrowse self.outdir = outdir @@ -379,9 +372,13 @@ with open(trackList, 'w') as handle: json.dump(trackListData, handle, indent=2) - def subprocess_check_call(self, command): - log.debug('cd %s && %s', self.outdir, ' '.join(command)) - subprocess.check_call(command, cwd=self.outdir) + def subprocess_check_call(self, command, output=None): + if output: + log.debug('cd %s && %s > %s', self.outdir, ' '.join(command), output) + subprocess.check_call(command, cwd=self.outdir, stdout=output) + else: + log.debug('cd %s && %s', self.outdir, ' '.join(command)) + subprocess.check_call(command, cwd=self.outdir) def subprocess_popen(self, command): log.debug('cd %s && %s', self.outdir, command) @@ -558,6 +555,41 @@ self._add_track_json(trackData) + def add_maf(self, data, trackData, mafOpts, **kwargs): + script = os.path.realpath(os.path.join(self.jbrowse, 'plugins', 'MAFViewer', 'bin', 'maf2bed.pl')) + dest = os.path.join('data', 'raw', trackData['label'] + '.txt') + + tmp1 = tempfile.NamedTemporaryFile(delete=False) + tmp1.close() + + # Process MAF to bed-like + cmd = [script, data] + self.subprocess_check_call(cmd, output=tmp1.path) + + # Sort / Index it + self._sort_bed(tmp1.path, dest) + # Cleanup + try: + os.remove(tmp1.path) + except OSError: + pass + + # Construct samples list + # We could get this from galaxy metadata, not sure how easily. + ps = subprocess.Popen(['grep', '^s [^ ]*', '-o', data], stdout=subprocess.PIPE) + output = subprocess.check_output(('sort', '-u'), stdin=ps.stdout) + ps.wait() + samples = [x[2:] for x in output] + + trackData.update({ + "storeClass": "MAFViewer/Store/SeqFeature/MAFTabix", + "type": "MAFViewer/View/Track/MAF", + "urlTemplate": trackData['label'] + '.txt.gz', + "samples": samples, + }) + + self._add_track_json(trackData) + def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): dest = os.path.join('data', 'raw', trackData['label'] + '.bam') cmd = ['ln', '-s', os.path.realpath(data), dest] @@ -610,19 +642,24 @@ self._add_track_json(trackData) def _sort_gff(self, data, dest): - + # Only index if not already done if not os.path.exists(dest): - # Only index if not already done cmd = "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" % (data, dest) self.subprocess_popen(cmd) - cmd = ['bgzip', '-f', dest] - self.subprocess_popen(' '.join(cmd)) - cmd = ['tabix', '-f', '-p', 'gff', dest + '.gz'] - self.subprocess_popen(' '.join(cmd)) + self.subprocess_check_call(['bgzip', '-f', dest]) + self.subprocess_check_call(['tabix', '-f', '-p', 'gff', dest + '.gz']) - def add_features(self, data, format, trackData, gffOpts, **kwargs): + def _sort_bed(self, data, dest): + # Only index if not already done + if not os.path.exists(dest): + cmd = ['sort', '-k1,1', '-k2,2n', data] + self.subprocess_check_call(cmd, output=dest) + self.subprocess_check_call(['bgzip', '-f', dest]) + self.subprocess_check_call(['tabix', '-f', '-p', 'bed', dest + '.gz']) + + def add_gff(self, data, format, trackData, gffOpts, **kwargs): dest = os.path.join(self.outdir, 'data', 'raw', trackData['label'] + '.gff') self._sort_gff(data, dest) @@ -658,6 +695,89 @@ if gffOpts.get('index', 'false') == 'true': self.tracksToIndex.append("%s" % trackData['label']) + def add_bed(self, data, format, trackData, gffOpts, **kwargs): + dest = os.path.join(self.outdir, 'data', 'raw', trackData['label'] + '.bed') + + self._sort_bed(data, dest) + + url = os.path.join('raw', trackData['label'] + '.bed.gz') + trackData.update({ + "urlTemplate": url, + "storeClass": "JBrowse/Store/SeqFeature/BEDTabix", + }) + + if 'match' in gffOpts: + trackData['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' + + trackType = gffOpts.get('trackType', 'JBrowse/View/Track/CanvasFeatures') + trackData['type'] = trackType + + if trackType in ['JBrowse/View/Track/CanvasFeatures', 'NeatCanvasFeatures/View/Track/NeatFeatures']: + if 'transcriptType' in gffOpts and gffOpts['transcriptType']: + trackData['transcriptType'] = gffOpts['transcriptType'] + if 'subParts' in gffOpts and gffOpts['subParts']: + trackData['subParts'] = gffOpts['subParts'] + if 'impliedUTRs' in gffOpts and gffOpts['impliedUTRs']: + trackData['impliedUTRs'] = gffOpts['impliedUTRs'] + elif trackType in ['JBrowse/View/Track/HTMLFeatures', 'NeatHTMLFeatures/View/Track/NeatFeatures']: + if 'topLevelFeatures' in gffOpts and gffOpts['topLevelFeatures']: + trackData['topLevelFeatures'] = gffOpts['topLevelFeatures'] + + self._add_track_json(trackData) + + if gffOpts.get('index', 'false') == 'true': + self.tracksToIndex.append("%s" % trackData['label']) + + def add_genbank(self, data, format, trackData, gffOpts, **kwargs): + cmd = [ + 'perl', self._jbrowse_bin('flatfile-to-json.pl'), + '--genbank', data, + '--trackLabel', trackData['label'], + '--key', trackData['key'] + ] + + # className in --clientConfig is ignored, it needs to be set with --className + if 'className' in trackData['style']: + cmd += ['--className', trackData['style']['className']] + + config = copy.copy(trackData) + clientConfig = trackData['style'] + del config['style'] + + if 'match' in gffOpts: + config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' + if bool(gffOpts['match']): + # Can be empty for CanvasFeatures = will take all by default + cmd += ['--type', gffOpts['match']] + + cmd += ['--clientConfig', json.dumps(clientConfig)] + + trackType = 'JBrowse/View/Track/CanvasFeatures' + if 'trackType' in gffOpts: + trackType = gffOpts['trackType'] + + if trackType == 'JBrowse/View/Track/CanvasFeatures': + if 'transcriptType' in gffOpts and gffOpts['transcriptType']: + config['transcriptType'] = gffOpts['transcriptType'] + if 'subParts' in gffOpts and gffOpts['subParts']: + config['subParts'] = gffOpts['subParts'] + if 'impliedUTRs' in gffOpts and gffOpts['impliedUTRs']: + config['impliedUTRs'] = gffOpts['impliedUTRs'] + elif trackType == 'JBrowse/View/Track/HTMLFeatures': + if 'transcriptType' in gffOpts and gffOpts['transcriptType']: + cmd += ['--type', gffOpts['transcriptType']] + + cmd += [ + '--trackType', gffOpts['trackType'] + ] + + cmd.extend(['--config', json.dumps(config)]) + + self.subprocess_check_call(cmd) + + if gffOpts.get('index', 'false') == 'true': + self.tracksToIndex.append("%s" % trackData['label']) + def add_rest(self, url, trackData): data = { "label": trackData['label'], @@ -782,15 +902,24 @@ # import pprint; pprint.pprint(track) # import sys; sys.exit() - if dataset_ext in ('gff', 'gff3', 'bed'): - self.add_features(dataset_path, dataset_ext, outputTrackConfig, - track['conf']['options']['gff']) + if dataset_ext in ('gff', 'gff3'): + self.add_gff(dataset_path, dataset_ext, outputTrackConfig, + track['conf']['options']['gff']) + elif dataset_ext in ('bed', ): + self.add_bed(dataset_path, dataset_ext, outputTrackConfig, + track['conf']['options']['gff']) + elif dataset_ext in ('genbank', ): + self.add_genbank(dataset_path, dataset_ext, outputTrackConfig, + track['conf']['options']['gff']) elif dataset_ext == 'bigwig': self.add_bigwig(dataset_path, outputTrackConfig, track['conf']['options']['wiggle']) elif dataset_ext == 'bigwig_multiple': self.add_bigwig_multiple(dataset_path, outputTrackConfig, track['conf']['options']['wiggle']) + elif dataset_ext == 'maf': + self.add_maf(dataset_path, outputTrackConfig, + track['conf']['options']['maf']) elif dataset_ext == 'bam': real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index'] if not isinstance(real_indexes, list):