diff jbrowse2.py @ 127:fbabf7498471 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
author fubar
date Mon, 07 Oct 2024 02:11:55 +0000
parents 160f33c6ba85
children fce4ed3b1702
line wrap: on
line diff
--- a/jbrowse2.py	Sun Oct 06 07:16:36 2024 +0000
+++ b/jbrowse2.py	Mon Oct 07 02:11:55 2024 +0000
@@ -361,43 +361,43 @@
     except Exception:
         return {}
 
-    for (key, value) in node.findall("dataset")[0].attrib.items():
+    for key, value in node.findall("dataset")[0].attrib.items():
         metadata["dataset_%s" % key] = value
 
     if node.findall("history"):
-        for (key, value) in node.findall("history")[0].attrib.items():
+        for key, value in node.findall("history")[0].attrib.items():
             metadata["history_%s" % key] = value
 
     if node.findall("metadata"):
-        for (key, value) in node.findall("metadata")[0].attrib.items():
+        for key, value in node.findall("metadata")[0].attrib.items():
             metadata["metadata_%s" % key] = value
         # Additional Mappings applied:
-        metadata[
-            "dataset_edam_format"
-        ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(
-            metadata["dataset_edam_format"], metadata["dataset_file_ext"]
+        metadata["dataset_edam_format"] = (
+            '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(
+                metadata["dataset_edam_format"], metadata["dataset_file_ext"]
+            )
         )
         metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format(
             metadata["history_user_email"]
         )
         metadata["hist_name"] = metadata["history_display_name"]
-        metadata[
-            "history_display_name"
-        ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
-            galaxy=GALAXY_INFRASTRUCTURE_URL,
-            encoded_hist_id=metadata.get("history_id", "not available"),
-            hist_name=metadata.get("history_display_name", "not available"),
+        metadata["history_display_name"] = (
+            '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
+                galaxy=GALAXY_INFRASTRUCTURE_URL,
+                encoded_hist_id=metadata.get("history_id", "not available"),
+                hist_name=metadata.get("history_display_name", "not available"),
+            )
         )
     if node.findall("tool"):
-        for (key, value) in node.findall("tool")[0].attrib.items():
+        for key, value in node.findall("tool")[0].attrib.items():
             metadata["tool_%s" % key] = value
-        metadata[
-            "tool_tool"
-        ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}{tool_version}</a>'.format(
-            galaxy=GALAXY_INFRASTRUCTURE_URL,
-            encoded_id=metadata.get("dataset_id", ""),
-            tool_id=metadata.get("tool_tool_id", ""),
-            tool_version=metadata.get("tool_tool_version", ""),
+        metadata["tool_tool"] = (
+            '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}{tool_version}</a>'.format(
+                galaxy=GALAXY_INFRASTRUCTURE_URL,
+                encoded_id=metadata.get("dataset_id", ""),
+                tool_id=metadata.get("tool_tool_id", ""),
+                tool_version=metadata.get("tool_tool_version", ""),
+            )
         )
     return metadata
 
@@ -619,10 +619,7 @@
 
     def text_index(self):
         # Index tracks
-        args = [
-            "jbrowse",
-            "text-index"
-        ]
+        args = ["jbrowse", "text-index"]
         self.subprocess_check_call(args)
 
     def add_hic(self, data, trackData):
@@ -676,7 +673,6 @@
                 categ,
             ],
             "adapter": {"type": "HicAdapter", "hicLocation": {"uri": uri}},
-
         }
         self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
         self.trackIdlist.append(tId)
@@ -1127,7 +1123,9 @@
         if self.config_json.get("plugins", None):
             self.config_json["plugins"].append(bedPlugin)
         else:
-            self.config_json["plugins"] = [bedPlugin,]
+            self.config_json["plugins"] = [
+                bedPlugin,
+            ]
         self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
         self.trackIdlist.append(tId)
 
@@ -1135,14 +1133,23 @@
         tname = trackData["name"]
         tId = trackData["label"]
         url = tId
+        usePIF = False  # much faster if indexed remotely or locally
         useuri = data.startswith("http://") or data.startswith("https://")
         if not useuri:
             dest = os.path.join(self.outdir, url)
             self.symlink_or_copy(os.path.realpath(data), dest)
+            cmd = ["jbrowse", "make-pif", dest]
+            self.subprocess_check_call(cmd)
+            usePIF = True
+            url = '%s.pif.gz' % tId
             nrow = self.getNrow(dest)
         else:
             url = data
-            nrow = self.getNrow(url)
+            if data.endswith(".pif.gz") or data.endswith(".paf.gz"):  # is tabix
+                usePIF = True
+                nrow = 1
+            else:
+                nrow = self.getNrow(url)
         categ = trackData["category"]
         pg = pafOpts["genome"].split(",")
         pgc = [x.strip() for x in pg if x.strip() > ""]
@@ -1175,11 +1182,6 @@
                 categ,
             ],
             "name": tname,
-            "adapter": {
-                "type": "PAFAdapter",
-                "pafLocation": {"uri": url},
-                "assemblyNames": passnames,
-            },
             "displays": [
                 {
                     "type": "LGVSyntenyDisplay",
@@ -1199,7 +1201,24 @@
                 },
             ],
         }
-        if nrow > 10000:
+        if usePIF:
+            trackDict["adapter"] = {
+                "type": "PairwiseIndexedPAFAdapter",
+                "pifGzLocation": {"uri": url},
+                "assemblyNames": passnames,
+                "index": {
+                    "location": {
+                        "uri": url + ".tbi",
+                    }
+                },
+            }
+        else:
+            trackDict["adapter"] = {
+                "type": "PAFAdapter",
+                "pafLocation": {"uri": url},
+                "assemblyNames": passnames,
+            }
+        if (not usePIF) and (nrow > 10000):
             style_json = {
                 "type": "LGVSyntenyDisplay",
                 "displayId": "%s-LGVSyntenyDisplay" % tId,
@@ -1336,7 +1355,9 @@
          https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py
         """
         #  TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
-        bpPerPx = self.bpPerPx  # Browser window width is unknown and default session cannot be used to figure it out in JB2 code so could be 200-2000+ pixels.
+        bpPerPx = (
+            self.bpPerPx
+        )  # Browser window width is unknown and default session cannot be used to figure it out in JB2 code so could be 200-2000+ pixels.
         track_types = {}
         with open(self.config_json_file, "r") as config_file:
             config_json = json.load(config_file)
@@ -1361,17 +1382,16 @@
                             % (tId, default_data)
                         )
                     else:
-                        logging.debug(
-                            "style data for %s = %s"
-                            % (tId, style_data)
-                        )
-                    if style_data.get('type', None) is None:
+                        logging.debug("style data for %s = %s" % (tId, style_data))
+                    if style_data.get("type", None) is None:
                         style_data["type"] = "LinearBasicDisplay"
                     if "displays" in track_conf:
                         disp = track_conf["displays"][0]["type"]
                         style_data["type"] = disp
                     if track_conf.get("displays", None):
-                        style_data["configuration"] = track_conf["displays"][0]["displayId"]
+                        style_data["configuration"] = track_conf["displays"][0][
+                            "displayId"
+                        ]
                     else:
                         logging.debug("no display in track_conf for %s" % tId)
                     if track_conf.get("style_labels", None):
@@ -1418,7 +1438,7 @@
                 "offsetPx": 0,
                 "bpPerPx": bpPerPx,
                 "minimized": False,
-                "tracks": tracks_data
+                "tracks": tracks_data,
             }
             if drdict.get("refName", None):
                 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
@@ -1539,11 +1559,11 @@
 
     def clone_jbrowse(self, realclone=False):
         """
-            Clone a JBrowse directory into a destination directory.
+        Clone a JBrowse directory into a destination directory.
 
-            `realclone=true` will use the `jbrowse create` command.
-            To allow running on internet-less compute and for reproducibility
-            use frozen code with `realclone=false
+        `realclone=true` will use the `jbrowse create` command.
+        To allow running on internet-less compute and for reproducibility
+        use frozen code with `realclone=false
 
         """
         dest = self.outdir
@@ -1570,7 +1590,9 @@
             except OSError as e:
                 log.error("Error: %s - %s." % (e.filename, e.strerror))
         for neededfile in ["jb2_webserver.py", "bedscoreplugin.js"]:
-            shutil.copyfile(os.path.join(INSTALLED_TO, neededfile), os.path.join(dest, neededfile))
+            shutil.copyfile(
+                os.path.join(INSTALLED_TO, neededfile), os.path.join(dest, neededfile)
+            )
 
 
 def parse_style_conf(item):
@@ -1592,7 +1614,7 @@
     args = parser.parse_args()
     tree = ET.parse(args.xml)
     root = tree.getroot()
-    removeMe = string.punctuation.replace('.', ' ').replace('/', '').replace('-', '')
+    removeMe = string.punctuation.replace(".", " ").replace("/", "").replace("-", "")
     # first is a space because space needs to be added here for removal from labels as paths.
     nopunct = str.maketrans(dict.fromkeys(removeMe))
     # This should be done ASAP
@@ -1646,7 +1668,7 @@
             if trackfiles:
                 for x in trackfiles:
                     isBed = False
-                    if x.attrib['ext'] == "bed":
+                    if x.attrib["ext"] == "bed":
                         isBed = True
                     track_conf["label"] = "%s_%d" % (
                         x.attrib["label"].translate(nopunct),
@@ -1664,9 +1686,7 @@
                         )
                     else:
                         metadata = metadata_from_node(x.find("metadata"))
-                        track_conf["dataset_id"] = metadata.get(
-                            "dataset_id", "None"
-                        )
+                        track_conf["dataset_id"] = metadata.get("dataset_id", "None")
                         if x.attrib["useuri"].lower() == "yes":
                             tfa = (
                                 x.attrib["path"],