diff COBRAxy/marea.py @ 456:a6e45049c1b9 draft default tip

Uploaded
author francesco_lapi
date Fri, 12 Sep 2025 17:28:45 +0000
parents f62b8625f6f1
children
line wrap: on
line diff
--- a/COBRAxy/marea.py	Fri Sep 12 15:05:54 2025 +0000
+++ b/COBRAxy/marea.py	Fri Sep 12 17:28:45 2025 +0000
@@ -1,3 +1,10 @@
+"""
+MAREA: Enrichment and map styling for RAS/RPS data.
+
+This module compares groups of samples using RAS (Reaction Activity Scores) and/or
+RPS (Reaction Propensity Scores), computes statistics (p-values, z-scores, fold change),
+and applies visual styling to an SVG metabolic map (with optional PDF/PNG export).
+"""
 from __future__ import division
 import csv
 from enum import Enum
@@ -26,13 +33,13 @@
 ARGS :argparse.Namespace
 def process_args(args:List[str] = None) -> argparse.Namespace:
     """
-    Interfaces the script of a module with its frontend, making the user's choices for various parameters available as values in code.
+    Parse command-line arguments exposed by the Galaxy frontend for this module.
 
     Args:
-        args : Always obtained (in file) from sys.argv
+    args: Optional list of arguments, defaults to sys.argv when None.
 
     Returns:
-        Namespace : An object containing the parsed arguments
+    Namespace: Parsed arguments.
     """
     parser = argparse.ArgumentParser(
         usage = "%(prog)s [options]",
@@ -265,8 +272,6 @@
         tmp.append('stroke-dasharray:' + dash)
     return ';'.join(tmp)
 
-# TODO: remove, there's applyRPS whatever
-# The type of d values is collapsed, losing precision, because the dict containst lists instead of tuples, please fix!
 def fix_map(d :Dict[str, List[Union[float, FoldChange]]], core_map :ET.ElementTree, threshold_P_V :float, threshold_F_C :float, max_z_score :float) -> ET.ElementTree:
     """
     Edits the selected SVG map based on the p-value and fold change data (d) and some significance thresholds also passed as inputs.
@@ -343,18 +348,15 @@
         f"//*[@id=\"{reactionId}\"]").map(
         lambda xPath : metabMap.xpath(xPath)[0]).mapErr(
         lambda _ : utils.Result.ResultErr(f"No elements with ID \"{reactionId}\" found in map"))
-        # ^^^ we shamelessly ignore the contents of the IndexError, it offers nothing to the user.
 
 def styleMapElement(element :ET.Element, styleStr :str) -> None:
+    """Append/override stroke-related styles on a given SVG element."""
     currentStyles :str = element.get("style", "")
     if re.search(r";stroke:[^;]+;stroke-width:[^;]+;stroke-dasharray:[^;]+$", currentStyles):
-        currentStyles = ';'.join(currentStyles.split(';')[:-3]) # TODO: why the last 3? Are we sure?
-
-    #TODO: this is attempting to solve the styling override problem, not sure it does tho
+        currentStyles = ';'.join(currentStyles.split(';')[:-3])
 
     element.set("style", currentStyles + styleStr)
 
-# TODO: maybe remove vvv
 class ReactionDirection(Enum):
     Unknown = ""
     Direct  = "_F"
@@ -372,6 +374,7 @@
         return ReactionDirection.fromDir(reactionId[-2:])
 
 def getArrowBodyElementId(reactionId :str) -> str:
+    """Return the SVG element id for a reaction arrow body, normalizing direction tags."""
     if reactionId.endswith("_RV"): reactionId = reactionId[:-3] #TODO: standardize _RV
     elif ReactionDirection.fromReactionId(reactionId) is not ReactionDirection.Unknown: reactionId = reactionId[:-2]
     return f"R_{reactionId}"
@@ -401,13 +404,9 @@
     UpRegulated   = "#ecac68" # orange, up-regulated reaction
     DownRegulated = "#6495ed" # lightblue, down-regulated reaction
 
-    UpRegulatedInv = "#FF0000"
-    # ^^^ bright red, up-regulated net value for a reversible reaction with
-    # conflicting enrichment in the two directions.
+    UpRegulatedInv = "#FF0000"  # bright red for reversible with conflicting directions
 
-    DownRegulatedInv = "#0000FF"
-    # ^^^ bright blue, down-regulated net value for a reversible reaction with
-    # conflicting enrichment in the two directions.
+    DownRegulatedInv = "#0000FF"  # bright blue for reversible with conflicting directions
 
     @classmethod
     def fromFoldChangeSign(cls, foldChange :float, *, useAltColor = False) -> "ArrowColor":
@@ -444,7 +443,7 @@
             ERRORS.append(reactionId)
 
     def styleReactionElements(self, metabMap :ET.ElementTree, reactionId :str, *, mindReactionDir = True) -> None:
-        # If We're dealing with RAS data or in general don't care about the direction of the reaction we only style the arrow body
+    # If direction is irrelevant (e.g., RAS), style only the arrow body
         if not mindReactionDir:
             return self.applyTo(getArrowBodyElementId(reactionId), metabMap, self.toStyleStr())
         
@@ -453,24 +452,6 @@
         self.applyTo(idOpt1, metabMap, self.toStyleStr(downSizedForTips = True))
         if idOpt2: self.applyTo(idOpt2, metabMap, self.toStyleStr(downSizedForTips = True))
     
-    # TODO: this seems to be unused, remove
-    def getMapReactionId(self, reactionId :str, mindReactionDir :bool) -> str:
-        """
-        Computes the reaction ID as encoded in the map for a given reaction ID from the dataset.
-
-        Args:
-            reactionId: the reaction ID, as encoded in the dataset.
-            mindReactionDir: if True forward (F_) and backward (B_) directions will be encoded in the result.
-    
-        Returns:
-            str : the ID of an arrow's body or tips in the map.
-        """
-        # we assume the reactionIds also don't encode reaction dir if they don't mind it when styling the map.
-        if not mindReactionDir: return "R_" + reactionId
-
-        #TODO: this is clearly something we need to make consistent in RPS
-        return (reactionId[:-3:-1] + reactionId[:-2]) if reactionId[:-2] in ["_F", "_B"] else f"F_{reactionId}" # "Pyr_F" --> "F_Pyr"
-
     def toStyleStr(self, *, downSizedForTips = False) -> str:
         """
         Collapses the styles of this Arrow into a str, ready to be applied as part of the "style" property on an svg element.
@@ -482,13 +463,11 @@
         if downSizedForTips: width *= 0.8
         return f";stroke:{self.col};stroke-width:{width};stroke-dasharray:{'5,5' if self.dash else 'none'}"
 
-# vvv These constants could be inside the class itself a static properties, but python
-# was built by brainless organisms so here we are!
+# Default arrows used for different significance states
 INVALID_ARROW       = Arrow(Arrow.MIN_W, ArrowColor.Invalid)
 INSIGNIFICANT_ARROW = Arrow(Arrow.MIN_W, ArrowColor.Invalid, isDashed = True)
 TRANSPARENT_ARROW   = Arrow(Arrow.MIN_W, ArrowColor.Transparent) # Who cares how big it is if it's transparent
 
-# TODO: A more general version of this can be used for RAS as well, we don't need "fix map" or whatever
 def applyRpsEnrichmentToMap(rpsEnrichmentRes :Dict[str, Union[Tuple[float, FoldChange], Tuple[float, FoldChange, float, float]]], metabMap :ET.ElementTree, maxNumericZScore :float) -> None:
     """
     Applies RPS enrichment results to the provided metabolic map.
@@ -581,7 +560,7 @@
         
         if l:
             class_pat[classe] = {
-                "values": list(map(list, zip(*l))),  # trasposta
+                "values": list(map(list, zip(*l))),  # transpose
                 "samples": sample_ids
             }
             continue
@@ -592,7 +571,7 @@
     return class_pat
 
 ############################ conversion ##############################################
-#conversion from svg to png 
+# Conversion from SVG to PNG
 def svg_to_png_with_background(svg_path :utils.FilePath, png_path :utils.FilePath, dpi :int = 72, scale :int = 1, size :Optional[float] = None) -> None:
     """
     Internal utility to convert an SVG to PNG (forced opaque) to aid in PDF conversion.
@@ -660,14 +639,8 @@
     Returns:
         utils.FilePath : _description_
     """
-    # This function returns a util data structure but is extremely specific to this module.
-    # RAS also uses collections as output and as such might benefit from a method like this, but I'd wait
-    # TODO: until a third tool with multiple outputs appears before porting this to utils.
     return utils.FilePath(
         f"{dataset1Name}_vs_{dataset2Name}" + (f" ({details})" if details else ""),
-        # ^^^ yes this string is built every time even if the form is the same for the same 2 datasets in
-        # all output files: I don't care, this was never the performance bottleneck of the tool and
-        # there is no other net gain in saving and re-using the built string.
         ext,
         prefix = ARGS.output_path)
 
@@ -683,10 +656,8 @@
             if sizeMismatch > 0: row.extend([FIELD_NOT_AVAILABLE] * sizeMismatch)
             writer.writerow({ field : data for field, data in zip(fieldNames, row) })
 
-OldEnrichedScores = Dict[str, List[Union[float, FoldChange]]] #TODO: try to use Tuple whenever possible
+OldEnrichedScores = Dict[str, List[Union[float, FoldChange]]]
 def temp_thingsInCommon(tmp :OldEnrichedScores, core_map :ET.ElementTree, max_z_score :float, dataset1Name :str, dataset2Name = "rest", ras_enrichment = True) -> None:
-    # this function compiles the things always in common between comparison modes after enrichment.
-    # TODO: organize, name better.
     suffix = "RAS" if ras_enrichment else "RPS"
     writeToCsv(
         [ [reactId] + values for reactId, values in tmp.items() ],
@@ -809,7 +780,6 @@
 # TODO: the net RPS computation should be done in the RPS module
 def compareDatasetPair(dataset1Data :List[List[float]], dataset2Data :List[List[float]], ids :List[str]) -> Tuple[Dict[str, List[Union[float, FoldChange]]], float, Dict[str, Tuple[np.ndarray, np.ndarray]]]:
 
-    #TODO: the following code still suffers from "dumbvarnames-osis"
     netRPS :Dict[str, Tuple[np.ndarray, np.ndarray]] = {}
     comparisonResult :Dict[str, List[Union[float, FoldChange]]] = {}
     count   = 0
@@ -818,7 +788,7 @@
     for l1, l2 in zip(dataset1Data, dataset2Data):
         reactId = ids[count]
         count += 1
-        if not reactId: continue # we skip ids that have already been processed
+        if not reactId: continue 
 
         try: #TODO: identify the source of these errors and minimize code in the try block
             reactDir = ReactionDirection.fromReactionId(reactId)
@@ -947,13 +917,11 @@
         except Exception as e:
             raise utils.DataErr(pdfPath.show(), f'Error generating PDF file: {e}')
 
-    if not ARGS.generate_svg: # This argument is useless, who cares if the user wants the svg or not
+    if not ARGS.generate_svg:
         os.remove(svgFilePath.show())
 
 ClassPat = Dict[str, List[List[float]]]
 def getClassesAndIdsFromDatasets(datasetsPaths :List[str], datasetPath :str, classPath :str, names :List[str]) -> Tuple[List[str], ClassPat, Dict[str, List[str]]]:
-    # TODO: I suggest creating dicts with ids as keys instead of keeping class_pat and ids separate,
-    # for the sake of everyone's sanity.
     columnNames :Dict[str, List[str]] = {} # { datasetName : [ columnName, ... ], ... }
     class_pat :ClassPat = {}
     if ARGS.option == 'datasets':
@@ -983,9 +951,7 @@
                 columnNames[clas] = ["Reactions", *values_and_samples_id["samples"]]
     
     return ids, class_pat, columnNames
-    #^^^ TODO: this could be a match statement over an enum, make it happen future marea dev with python 3.12! (it's why I kept the ifs)
 
-#TODO: create these damn args as FilePath objects
 def getDatasetValues(datasetPath :str, datasetName :str) -> Tuple[ClassPat, List[str]]:
     """
     Opens the dataset at the given path and extracts the values (expected nullable numerics) and the IDs.
@@ -1025,18 +991,16 @@
         ARGS.tool_dir,
         utils.FilePath.fromStrPath(ARGS.custom_map) if ARGS.custom_map else None)
     
-    # TODO: in the future keep the indices WITH the data and fix the code below.
-
     # Prepare enrichment results containers
     ras_results = []
     rps_results = []
 
     # Compute RAS enrichment if requested
-    if ARGS.using_RAS: #       vvv columnNames only matter with RPS data
+    if ARGS.using_RAS: 
         ids_ras, class_pat_ras, _ = getClassesAndIdsFromDatasets(
             ARGS.input_datas, ARGS.input_data, ARGS.input_class, ARGS.names)
         ras_results, _ = computeEnrichment(class_pat_ras, ids_ras, fromRAS=True)
-        #           ^^^ netRPS only matter with RPS data
+ 
 
     # Compute RPS enrichment if requested
     if ARGS.using_RPS:
@@ -1065,7 +1029,7 @@
         # Apply RPS styling to arrow heads
         if res.get('rps'):
             tmp_rps, max_z_rps = res['rps']
-            # applyRpsEnrichmentToMap styles only heads unless only RPS are used
+
             temp_thingsInCommon(tmp_rps, map_copy, max_z_rps, i, j, ras_enrichment=False)
 
         # Output both SVG and PDF/PNG as configured
@@ -1076,7 +1040,6 @@
         for datasetName, rows in netRPS.items():
             writeToCsv(
                 [[reactId, *netValues] for reactId, netValues in rows.items()],
-                # vvv In weird comparison modes the dataset names are not recorded properly..
                 columnNames.get(datasetName, ["Reactions"]),
                 utils.FilePath(
                     "Net_RPS_" + datasetName,