diff jbrowse.py @ 33:0ae74c70b267 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 369a727966d697c56633b27ad2757db40fef0dc3"
author iuc
date Fri, 30 Aug 2019 03:36:39 -0400
parents 2bb2e07a7a21
children 9de82b4963e6
line wrap: on
line diff
--- a/jbrowse.py	Tue Jul 16 05:08:10 2019 -0400
+++ b/jbrowse.py	Fri Aug 30 03:36:39 2019 -0400
@@ -331,13 +331,6 @@
 class JbrowseConnector(object):
 
     def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1):
-        self.TN_TABLE = {
-            'gff3': '--gff',
-            'gff': '--gff',
-            'bed': '--bed',
-            'genbank': '--gbk',
-        }
-
         self.cs = ColorScaling()
         self.jbrowse = jbrowse
         self.outdir = outdir
@@ -379,9 +372,13 @@
         with open(trackList, 'w') as handle:
             json.dump(trackListData, handle, indent=2)
 
-    def subprocess_check_call(self, command):
-        log.debug('cd %s && %s', self.outdir, ' '.join(command))
-        subprocess.check_call(command, cwd=self.outdir)
+    def subprocess_check_call(self, command, output=None):
+        if output:
+            log.debug('cd %s && %s >  %s', self.outdir, ' '.join(command), output)
+            subprocess.check_call(command, cwd=self.outdir, stdout=output)
+        else:
+            log.debug('cd %s && %s', self.outdir, ' '.join(command))
+            subprocess.check_call(command, cwd=self.outdir)
 
     def subprocess_popen(self, command):
         log.debug('cd %s && %s', self.outdir, command)
@@ -558,6 +555,41 @@
 
         self._add_track_json(trackData)
 
+    def add_maf(self, data, trackData, mafOpts, **kwargs):
+        script = os.path.realpath(os.path.join(self.jbrowse, 'plugins', 'MAFViewer', 'bin', 'maf2bed.pl'))
+        dest = os.path.join('data', 'raw', trackData['label'] + '.txt')
+
+        tmp1 = tempfile.NamedTemporaryFile(delete=False)
+        tmp1.close()
+
+        # Process MAF to bed-like
+        cmd = [script, data]
+        self.subprocess_check_call(cmd, output=tmp1.path)
+
+        # Sort / Index it
+        self._sort_bed(tmp1.path, dest)
+        # Cleanup
+        try:
+            os.remove(tmp1.path)
+        except OSError:
+            pass
+
+        # Construct samples list
+        # We could get this from galaxy metadata, not sure how easily.
+        ps = subprocess.Popen(['grep', '^s [^ ]*', '-o', data], stdout=subprocess.PIPE)
+        output = subprocess.check_output(('sort', '-u'), stdin=ps.stdout)
+        ps.wait()
+        samples = [x[2:] for x in output]
+
+        trackData.update({
+            "storeClass": "MAFViewer/Store/SeqFeature/MAFTabix",
+            "type": "MAFViewer/View/Track/MAF",
+            "urlTemplate": trackData['label'] + '.txt.gz',
+            "samples": samples,
+        })
+
+        self._add_track_json(trackData)
+
     def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
         dest = os.path.join('data', 'raw', trackData['label'] + '.bam')
         cmd = ['ln', '-s', os.path.realpath(data), dest]
@@ -610,19 +642,24 @@
         self._add_track_json(trackData)
 
     def _sort_gff(self, data, dest):
-
+        # Only index if not already done
         if not os.path.exists(dest):
-            # Only index if not already done
             cmd = "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" % (data, dest)
             self.subprocess_popen(cmd)
 
-            cmd = ['bgzip', '-f', dest]
-            self.subprocess_popen(' '.join(cmd))
-            cmd = ['tabix', '-f', '-p', 'gff', dest + '.gz']
-            self.subprocess_popen(' '.join(cmd))
+            self.subprocess_check_call(['bgzip', '-f', dest])
+            self.subprocess_check_call(['tabix', '-f', '-p', 'gff', dest + '.gz'])
 
-    def add_features(self, data, format, trackData, gffOpts, **kwargs):
+    def _sort_bed(self, data, dest):
+        # Only index if not already done
+        if not os.path.exists(dest):
+            cmd = ['sort', '-k1,1', '-k2,2n', data]
+            self.subprocess_check_call(cmd, output=dest)
 
+            self.subprocess_check_call(['bgzip', '-f', dest])
+            self.subprocess_check_call(['tabix', '-f', '-p', 'bed', dest + '.gz'])
+
+    def add_gff(self, data, format, trackData, gffOpts, **kwargs):
         dest = os.path.join(self.outdir, 'data', 'raw', trackData['label'] + '.gff')
 
         self._sort_gff(data, dest)
@@ -658,6 +695,89 @@
         if gffOpts.get('index', 'false') == 'true':
             self.tracksToIndex.append("%s" % trackData['label'])
 
+    def add_bed(self, data, format, trackData, gffOpts, **kwargs):
+        dest = os.path.join(self.outdir, 'data', 'raw', trackData['label'] + '.bed')
+
+        self._sort_bed(data, dest)
+
+        url = os.path.join('raw', trackData['label'] + '.bed.gz')
+        trackData.update({
+            "urlTemplate": url,
+            "storeClass": "JBrowse/Store/SeqFeature/BEDTabix",
+        })
+
+        if 'match' in gffOpts:
+            trackData['glyph'] = 'JBrowse/View/FeatureGlyph/Segments'
+
+        trackType = gffOpts.get('trackType', 'JBrowse/View/Track/CanvasFeatures')
+        trackData['type'] = trackType
+
+        if trackType in ['JBrowse/View/Track/CanvasFeatures', 'NeatCanvasFeatures/View/Track/NeatFeatures']:
+            if 'transcriptType' in gffOpts and gffOpts['transcriptType']:
+                trackData['transcriptType'] = gffOpts['transcriptType']
+            if 'subParts' in gffOpts and gffOpts['subParts']:
+                trackData['subParts'] = gffOpts['subParts']
+            if 'impliedUTRs' in gffOpts and gffOpts['impliedUTRs']:
+                trackData['impliedUTRs'] = gffOpts['impliedUTRs']
+        elif trackType in ['JBrowse/View/Track/HTMLFeatures', 'NeatHTMLFeatures/View/Track/NeatFeatures']:
+            if 'topLevelFeatures' in gffOpts and gffOpts['topLevelFeatures']:
+                trackData['topLevelFeatures'] = gffOpts['topLevelFeatures']
+
+        self._add_track_json(trackData)
+
+        if gffOpts.get('index', 'false') == 'true':
+            self.tracksToIndex.append("%s" % trackData['label'])
+
+    def add_genbank(self, data, format, trackData, gffOpts, **kwargs):
+        cmd = [
+            'perl', self._jbrowse_bin('flatfile-to-json.pl'),
+            '--genbank', data,
+            '--trackLabel', trackData['label'],
+            '--key', trackData['key']
+        ]
+
+        # className in --clientConfig is ignored, it needs to be set with --className
+        if 'className' in trackData['style']:
+            cmd += ['--className', trackData['style']['className']]
+
+        config = copy.copy(trackData)
+        clientConfig = trackData['style']
+        del config['style']
+
+        if 'match' in gffOpts:
+            config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments'
+            if bool(gffOpts['match']):
+                # Can be empty for CanvasFeatures = will take all by default
+                cmd += ['--type', gffOpts['match']]
+
+        cmd += ['--clientConfig', json.dumps(clientConfig)]
+
+        trackType = 'JBrowse/View/Track/CanvasFeatures'
+        if 'trackType' in gffOpts:
+            trackType = gffOpts['trackType']
+
+        if trackType == 'JBrowse/View/Track/CanvasFeatures':
+            if 'transcriptType' in gffOpts and gffOpts['transcriptType']:
+                config['transcriptType'] = gffOpts['transcriptType']
+            if 'subParts' in gffOpts and gffOpts['subParts']:
+                config['subParts'] = gffOpts['subParts']
+            if 'impliedUTRs' in gffOpts and gffOpts['impliedUTRs']:
+                config['impliedUTRs'] = gffOpts['impliedUTRs']
+        elif trackType == 'JBrowse/View/Track/HTMLFeatures':
+            if 'transcriptType' in gffOpts and gffOpts['transcriptType']:
+                cmd += ['--type', gffOpts['transcriptType']]
+
+        cmd += [
+            '--trackType', gffOpts['trackType']
+        ]
+
+        cmd.extend(['--config', json.dumps(config)])
+
+        self.subprocess_check_call(cmd)
+
+        if gffOpts.get('index', 'false') == 'true':
+            self.tracksToIndex.append("%s" % trackData['label'])
+
     def add_rest(self, url, trackData):
         data = {
             "label": trackData['label'],
@@ -782,15 +902,24 @@
 
             # import pprint; pprint.pprint(track)
             # import sys; sys.exit()
-            if dataset_ext in ('gff', 'gff3', 'bed'):
-                self.add_features(dataset_path, dataset_ext, outputTrackConfig,
-                                  track['conf']['options']['gff'])
+            if dataset_ext in ('gff', 'gff3'):
+                self.add_gff(dataset_path, dataset_ext, outputTrackConfig,
+                             track['conf']['options']['gff'])
+            elif dataset_ext in ('bed', ):
+                self.add_bed(dataset_path, dataset_ext, outputTrackConfig,
+                             track['conf']['options']['gff'])
+            elif dataset_ext in ('genbank', ):
+                self.add_genbank(dataset_path, dataset_ext, outputTrackConfig,
+                                 track['conf']['options']['gff'])
             elif dataset_ext == 'bigwig':
                 self.add_bigwig(dataset_path, outputTrackConfig,
                                 track['conf']['options']['wiggle'])
             elif dataset_ext == 'bigwig_multiple':
                 self.add_bigwig_multiple(dataset_path, outputTrackConfig,
                                          track['conf']['options']['wiggle'])
+            elif dataset_ext == 'maf':
+                self.add_maf(dataset_path, outputTrackConfig,
+                             track['conf']['options']['maf'])
             elif dataset_ext == 'bam':
                 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index']
                 if not isinstance(real_indexes, list):