diff COBRAxy/marea.py @ 143:507efdc9d226 draft

Uploaded
author luca_milaz
date Tue, 05 Nov 2024 21:42:17 +0000
parents 41f35c2f0c7b
children a9a490ae198d
line wrap: on
line diff
--- a/COBRAxy/marea.py	Thu Oct 31 20:54:58 2024 +0000
+++ b/COBRAxy/marea.py	Tue Nov 05 21:42:17 2024 +0000
@@ -15,6 +15,7 @@
 import argparse
 import pyvips
 from typing import Tuple, Union, Optional, List, Dict
+import copy
 
 ERRORS = []
 ########################## argparse ##########################################
@@ -761,7 +762,7 @@
     
     return tmp, max_z_score
 
-def computeEnrichment(metabMap :ET.ElementTree, class_pat :Dict[str, List[List[float]]], ids :List[str], *, fromRAS = True) -> None:
+def computeEnrichment(metabMap: ET.ElementTree, class_pat: Dict[str, List[List[float]]], ids: List[str], *, fromRAS=True) -> List[Tuple[str, str, dict, float]]:
     """
     Compares clustered data based on a given comparison mode and applies enrichment-based styling on the
     provided metabolic map.
@@ -773,58 +774,52 @@
         fromRAS : whether the data to enrich consists of RAS scores.
 
     Returns:
-        None
-
+        List[Tuple[str, str, dict, float]]: List of tuples with pairs of dataset names, comparison dictionary, and max z-score.
+        
     Raises:
         sys.exit : if there are less than 2 classes for comparison
     
     Side effects:
-        metabMap : mut
-        ids : mut
+        metabMap : mutates based on calculated enrichment
     """
-    class_pat = { k.strip() : v for k, v in class_pat.items() }
-    #TODO: simplfy this stuff vvv and stop using sys.exit (raise the correct utils error)
-    if (not class_pat) or (len(class_pat.keys()) < 2): sys.exit('Execution aborted: classes provided for comparisons are less than two\n')
+    class_pat = {k.strip(): v for k, v in class_pat.items()}
+    if (not class_pat) or (len(class_pat.keys()) < 2):
+        sys.exit('Execution aborted: classes provided for comparisons are less than two\n')
+    
+    enrichment_results = []
 
     if ARGS.comparison == "manyvsmany":
         for i, j in it.combinations(class_pat.keys(), 2):
-            #TODO: these 2 functions are always called in pair and in this order and need common data,
-            # some clever refactoring would be appreciated.
             comparisonDict, max_z_score = compareDatasetPair(class_pat.get(i), class_pat.get(j), ids)
-            temp_thingsInCommon(comparisonDict, metabMap, max_z_score, i, j, fromRAS)
+            enrichment_results.append((i, j, comparisonDict, max_z_score))
     
     elif ARGS.comparison == "onevsrest":
         for single_cluster in class_pat.keys():
-            t :List[List[List[float]]] = []
-            for k in class_pat.keys():
-                if k != single_cluster:
-                   t.append(class_pat.get(k))
-            
-            rest :List[List[float]] = []
-            for i in t:
-                rest = rest + i
-            
+            rest = [item for k, v in class_pat.items() if k != single_cluster for item in v]
             comparisonDict, max_z_score = compareDatasetPair(class_pat.get(single_cluster), rest, ids)
-            temp_thingsInCommon(comparisonDict, metabMap, max_z_score, single_cluster, fromRAS)
+            enrichment_results.append((single_cluster, "rest", comparisonDict, max_z_score))
     
     elif ARGS.comparison == "onevsmany":
         controlItems = class_pat.get(ARGS.control)
         for otherDataset in class_pat.keys():
-            if otherDataset == ARGS.control: continue
-            
+            if otherDataset == ARGS.control:
+                continue
             comparisonDict, max_z_score = compareDatasetPair(controlItems, class_pat.get(otherDataset), ids)
-            temp_thingsInCommon(comparisonDict, metabMap, max_z_score, ARGS.control, otherDataset, fromRAS)
+            enrichment_results.append((ARGS.control, otherDataset, comparisonDict, max_z_score))
+    
+    return enrichment_results
 
-def createOutputMaps(dataset1Name :str, dataset2Name :str, core_map :ET.ElementTree) -> None:
-    svgFilePath = buildOutputPath(dataset1Name, dataset2Name, details = "SVG Map", ext = utils.FileFormat.SVG)
+def createOutputMaps(dataset1Name: str, dataset2Name: str, core_map: ET.ElementTree) -> None:
+    svgFilePath = buildOutputPath(dataset1Name, dataset2Name, details="SVG Map", ext=utils.FileFormat.SVG)
     utils.writeSvg(svgFilePath, core_map)
 
     if ARGS.generate_pdf:
-        pngPath = buildOutputPath(dataset1Name, dataset2Name, details = "PNG Map", ext = utils.FileFormat.PNG)
-        pdfPath = buildOutputPath(dataset1Name, dataset2Name, details = "PDF Map", ext = utils.FileFormat.PDF)
-        convert_to_pdf(svgFilePath, pngPath, pdfPath)                     
+        pngPath = buildOutputPath(dataset1Name, dataset2Name, details="PNG Map", ext=utils.FileFormat.PNG)
+        pdfPath = buildOutputPath(dataset1Name, dataset2Name, details="PDF Map", ext=utils.FileFormat.PDF)
+        convert_to_pdf(svgFilePath, pngPath, pdfPath)
 
-    if not ARGS.generate_svg: os.remove(svgFilePath.show())
+    if not ARGS.generate_svg:
+        os.remove(svgFilePath)
 
 ClassPat = Dict[str, List[List[float]]]
 def getClassesAndIdsFromDatasets(datasetsPaths :List[str], datasetPath :str, classPath :str, names :List[str]) -> Tuple[List[str], ClassPat]:
@@ -880,46 +875,33 @@
     Raises:
         sys.exit : if a user-provided custom map is in the wrong format (ET.XMLSyntaxError, ET.XMLSchemaParseError)
     """
-
     global ARGS
     ARGS = process_args()
 
-    if os.path.isdir('result') == False: os.makedirs('result')
+    if not os.path.isdir('result'):
+        os.makedirs('result')
     
-    core_map :ET.ElementTree = ARGS.choice_map.getMap(
+    core_map: ET.ElementTree = ARGS.choice_map.getMap(
         ARGS.tool_dir,
         utils.FilePath.fromStrPath(ARGS.custom_map) if ARGS.custom_map else None)
-    # TODO: ^^^ ugly but fine for now, the argument is None if the model isn't custom because no file was given.
-    # getMap will None-check the customPath and panic when the model IS custom but there's no file (good). A cleaner
-    # solution can be derived from my comment in FilePath.fromStrPath
-
+    
     if ARGS.using_RAS:
         ids, class_pat = getClassesAndIdsFromDatasets(ARGS.input_datas, ARGS.input_data, ARGS.input_class, ARGS.names)
-        computeEnrichment(core_map, class_pat, ids)
+        enrichment_results = computeEnrichment(core_map, class_pat, ids)
+        for i, j, comparisonDict, max_z_score in enrichment_results:
+            map_copy = copy.deepcopy(core_map)
+            temp_thingsInCommon(comparisonDict, map_copy, max_z_score, i, j, fromRAS=True)
+            createOutputMaps(i, j, map_copy)
     
     if ARGS.using_RPS:
         ids, class_pat = getClassesAndIdsFromDatasets(ARGS.input_datas_rps, ARGS.input_data_rps, ARGS.input_class_rps, ARGS.names_rps)
-        computeEnrichment(core_map, class_pat, ids, fromRAS = False)
-    
-    # create output files: TODO: this is the same comparison happening in "maps", find a better way to organize this
-    if ARGS.comparison == "manyvsmany":
-        for i, j in it.combinations(class_pat.keys(), 2): createOutputMaps(i, j, core_map)
-        return
-    
-    if ARGS.comparison == "onevsrest":
-        for single_cluster in class_pat.keys(): createOutputMaps(single_cluster, "rest", core_map)
-        return
-    
-    for otherDataset in class_pat.keys():
-        if otherDataset != ARGS.control: createOutputMaps(i, j, core_map)
+        enrichment_results = computeEnrichment(core_map, class_pat, ids, fromRAS=False)
+        for i, j, comparisonDict, max_z_score in enrichment_results:
+            map_copy = copy.deepcopy(core_map)
+            temp_thingsInCommon(comparisonDict, map_copy, max_z_score, i, j, fromRAS=False)
+            createOutputMaps(i, j, map_copy)
 
-    if not ERRORS: return
-    utils.logWarning(
-        f"The following reaction IDs were mentioned in the dataset but weren't found in the map: {ERRORS}",
-        ARGS.out_log)
-    
-    print('Execution succeded')
-
+    print('Execution succeeded')
 ###############################################################################
 if __name__ == "__main__":
     main()
\ No newline at end of file