diff jbrowse2.py @ 134:ed3a21033188 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit bb6736899ac3029f73455637a04a006fcd857fc2
author bgruening
date Sun, 20 Oct 2024 07:11:16 +0000
parents cec274db51c0
children 21bb464c1d53
line wrap: on
line diff
--- a/jbrowse2.py	Fri Oct 18 11:57:55 2024 +0000
+++ b/jbrowse2.py	Sun Oct 20 07:11:16 2024 +0000
@@ -13,7 +13,6 @@
 import string
 import struct
 import subprocess
-import tempfile
 import urllib.request
 import xml.etree.ElementTree as ET
 from collections import defaultdict
@@ -446,7 +445,7 @@
         p = subprocess.Popen(
             command,
             cwd=self.outdir,
-            shell=False,
+            shell=True,
             stdin=subprocess.PIPE,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
@@ -554,9 +553,8 @@
         else:
             faname = gname + ".fa.gz"
             fadest = os.path.realpath(os.path.join(self.outdir, faname))
-            cmd = ["bgzip", "-i", "-c", fapath, "-I", fadest + ".gzi"]
-            with open(fadest, "wb") as fout:
-                self.subprocess_check_call(cmd, output=fout)
+            cmd = "bgzip -k -i -c -I '%s.gzi' '%s' > '%s'" % (fadest, fapath, fadest)
+            subprocess.run(cmd, shell=True)
             cmd = ["samtools", "faidx", fadest]
             self.subprocess_check_call(cmd)
             contig = open(fadest + ".fai", "r").readline().strip()
@@ -753,38 +751,59 @@
         else:
             self.config_json.update(mafPlugin)
 
-    def _blastxml_to_gff3(self, xml, min_gap=10):
-        gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
-        cmd = [
-            "python",
-            os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"),
-            "--trim",
-            "--trim_end",
-            "--include_seq",
-            "--min_gap",
-            str(min_gap),
-            xml,
-        ]
-        subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
-        gff3_unrebased.close()
-        logging.debug("### blastxml to gff3 cmd = %s" % " ".join(cmd))
-        return gff3_unrebased.name
+    def _sort_gff(self, data, dest):
+        # Only index if not already done
+        if not os.path.exists(dest):
+            e = os.environ
+            e["SHELL"] = "/bin/sh"
+            cmd = "jbrowse sort-gff %s | bgzip -c > %s" % (data, dest)
+            subprocess.run(cmd, env=e, shell=True)
+            self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest])
 
-    def add_blastxml(self, data, trackData, blastOpts, **kwargs):
-        gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"])
-        if "parent" in blastOpts and blastOpts["parent"] != "None":
-            gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
-            cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")]
-            if blastOpts.get("protein", "false") == "true":
-                cmd.append("--protein2dna")
-            cmd.extend([os.path.realpath(blastOpts["parent"]), gff3])
-            subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
-            logging.debug("### gff3rebase cmd = %s" % " ".join(cmd))
-            gff3_rebased.close()
-            # Replace original gff3 file
-            shutil.copy(gff3_rebased.name, gff3)
-            os.unlink(gff3_rebased.name)
-        self.add_gff(gff3, trackData, **kwargs)
+    def add_gff(self, data, trackData):
+        tId = trackData["label"]
+        useuri = trackData["useuri"].lower() == "yes"
+        if useuri:
+            url = trackData["path"]
+        else:
+            url = tId + ".gz"
+            dest = os.path.join(self.outdir, url)
+            self._sort_gff(data, dest)
+        categ = trackData["category"]
+        trackDict = {
+            "type": "FeatureTrack",
+            "trackId": tId,
+            "name": trackData["name"],
+            "assemblyNames": [trackData["assemblyNames"]],
+            "category": [
+                categ,
+            ],
+            "adapter": {
+                "type": "Gff3TabixAdapter",
+                "gffGzLocation": {
+                    "uri": url,
+                },
+                "index": {
+                    "location": {
+                        "uri": url + ".tbi",
+                    }
+                },
+            },
+            "displays": [
+                {
+                    "type": "LinearBasicDisplay",
+                    "displayId": "%s-LinearBasicDisplay" % tId,
+                },
+                {
+                    "type": "LinearArcDisplay",
+                    "displayId": "%s-LinearArcDisplay" % tId,
+                },
+            ],
+        }
+        style_json = self._prepare_track_style(trackDict)
+        trackDict["style"] = style_json
+        self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
+        self.trackIdlist.append(tId)
 
     def add_bigwig(self, data, trackData):
         tId = trackData["label"]
@@ -838,9 +857,9 @@
             bindex = fname + ".bai"
             bi = bam_indexes.split(",")
             bam_index = [
-                x.split(" ~ ")[1].strip()
+                x.split("~~~")[1].strip()
                 for x in bi
-                if " ~ " in x and x.split(" ~ ")[0].strip() == realFName
+                if "~~~" in x and x.split("~~~")[0].strip() == realFName
             ]
             logging.debug(
                 "===realFName=%s got %s as bam_indexes %s as bi, %s for bam_index"
@@ -900,9 +919,9 @@
             self.subprocess_check_call(["cp", data, dest])
             ci = cram_indexes.split(",")
             cram_index = [
-                x.split(" ~ ")[1].strip()
+                x.split("~~~")[1].strip()
                 for x in ci
-                if " ~ " in x and x.split(" ~ ")[0].strip() == realFName
+                if "~~~" in x and x.split("~~~")[0].strip() == realFName
             ]
             logging.debug(
                 "===realFName=%s got %s as cram_indexes %s as ci, %s for cram_index"
@@ -997,15 +1016,6 @@
         self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
         self.trackIdlist.append(tId)
 
-    def _sort_gff(self, data, dest):
-        # Only index if not already done
-        if not os.path.exists(dest):
-            e = os.environ
-            e['SHELL'] = '/bin/sh'
-            cmd = ['/bin/sh', '-c', "jbrowse sort-gff %s | bgzip -c > %s" % (data, dest)]
-            subprocess.run(cmd, env=e)
-            self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest])
-
     def _sort_bed(self, data, dest):
         # Only index if not already done
         if not os.path.exists(dest):
@@ -1017,51 +1027,6 @@
             cmd = ["tabix", "-f", "-p", "bed", dest]
             self.subprocess_check_call(cmd)
 
-    def add_gff(self, data, trackData):
-        tId = trackData["label"]
-        useuri = trackData["useuri"].lower() == "yes"
-        if useuri:
-            url = trackData["path"]
-        else:
-            url = tId + ".gz"
-            dest = os.path.join(self.outdir, url)
-            self._sort_gff(data, dest)
-        categ = trackData["category"]
-        trackDict = {
-            "type": "FeatureTrack",
-            "trackId": tId,
-            "name": trackData["name"],
-            "assemblyNames": [trackData["assemblyNames"]],
-            "category": [
-                categ,
-            ],
-            "adapter": {
-                "type": "Gff3TabixAdapter",
-                "gffGzLocation": {
-                    "uri": url,
-                },
-                "index": {
-                    "location": {
-                        "uri": url + ".tbi",
-                    }
-                },
-            },
-            "displays": [
-                {
-                    "type": "LinearBasicDisplay",
-                    "displayId": "%s-LinearBasicDisplay" % tId,
-                },
-                {
-                    "type": "LinearArcDisplay",
-                    "displayId": "%s-LinearArcDisplay" % tId,
-                },
-            ],
-        }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
-        self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
-        self.trackIdlist.append(tId)
-
     def add_bed(self, data, ext, trackData):
         bedPlugin = {"name": "BedScorePlugin", "umdLoc": {"uri": "bedscoreplugin.js"}}
         tId = trackData["label"]
@@ -1139,7 +1104,7 @@
         useuri = data.startswith("http://") or data.startswith("https://")
         if not useuri:
             if canPIF:
-                fakeName = "%s.paf" % tId
+                fakeName = os.path.join(self.outdir, "%s.paf" % tId)
                 url = "%s.pif.gz" % tId
                 cmd = ["cp", data, fakeName]
                 self.subprocess_check_call(cmd)
@@ -1147,7 +1112,7 @@
                     "jbrowse",
                     "make-pif",
                     fakeName,
-                ]  # jbrowse pif input.paf --out output.pif.gz # specify output file, creates output.pif.gz.tbi also
+                ]
                 self.subprocess_check_call(cmd)
                 usePIF = True
             else:
@@ -1160,7 +1125,7 @@
         categ = trackData["category"]
         pg = pafOpts["genome"].split(",")
         pgc = [x.strip() for x in pg if x.strip() > ""]
-        gnomes = [x.split(" ~ ") for x in pgc]
+        gnomes = [x.split("~~~") for x in pgc]
         logging.debug("pg=%s, gnomes=%s" % (pg, gnomes))
         passnames = [trackData["assemblyNames"]]  # always first
         for i, (gpath, gname) in enumerate(gnomes):
@@ -1336,12 +1301,6 @@
                     outputTrackConfig,
                     cram_indexes=real_indexes,
                 )
-            elif dataset_ext == "blastxml":
-                self.add_blastxml(
-                    dataset_path,
-                    outputTrackConfig,
-                    track["conf"]["options"]["blast"],
-                )
             elif dataset_ext == "vcf":
                 self.add_vcf(dataset_path, outputTrackConfig)
             elif dataset_ext == "paf":