changeset 309:38c9a958ea78 draft

Uploaded
author francesco_lapi
date Thu, 22 May 2025 16:03:37 +0000
parents 797d0e002934
children c9647b137ab6
files COBRAxy/data_tutorial.zip COBRAxy/marea.py COBRAxy/marea_cluster.py COBRAxy/ras_generator.py
diffstat 4 files changed, 37 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
Binary file COBRAxy/data_tutorial.zip has changed
--- a/COBRAxy/marea.py	Tue May 20 16:01:21 2025 +0000
+++ b/COBRAxy/marea.py	Thu May 22 16:03:37 2025 +0000
@@ -568,16 +568,22 @@
         if pd.isnull(classe): continue
 
         l :List[List[float]] = []
+        sample_ids: List[str] = []
+
         for j in range(i, len(classes)):
             if classes.iloc[j, 1] == classe:
                 pat_id :str = classes.iloc[j, 0] # sample name
                 values = dataset_values.get(pat_id, None) # the column of values for that sample
                 if values != None:
                     l.append(values)
+                    sample_ids.append(pat_id)
                 classes.iloc[j, 1] = None # TODO: problems?
         
         if l:
-            class_pat[classe] = list(map(list, zip(*l)))
+            class_pat[classe] = {
+                "values": list(map(list, zip(*l))),  # trasposta
+                "samples": sample_ids
+            }
             continue
         
         utils.logWarning(
@@ -957,8 +963,11 @@
 
         values, ids = getDatasetValues(datasetPath, "Dataset Class (not actual name)")
         if values != None:
-            # TODO: add the columnNames thing, I didn't because I don't understand the whole "dataset classes" thing
-            class_pat = split_class(classes, values)
+            class_pat_with_samples_id = split_class(classes, values)
+
+            for clas, values_and_samples_id in class_pat_with_samples_id.items():
+                class_pat[clas] = values_and_samples_id["values"]
+                columnNames[clas] = values_and_samples_id["samples"]
     
     return ids, class_pat, columnNames
     #^^^ TODO: this could be a match statement over an enum, make it happen future marea dev with python 3.12! (it's why I kept the ifs)
@@ -1064,4 +1073,4 @@
     print('Execution succeeded')
 ###############################################################################
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
--- a/COBRAxy/marea_cluster.py	Tue May 20 16:01:21 2025 +0000
+++ b/COBRAxy/marea_cluster.py	Thu May 22 16:03:37 2025 +0000
@@ -59,14 +59,14 @@
     
     parser.add_argument('-el', '--elbow', 
                         type = str,
-                        default = 'false',
-                        choices = ['true', 'false'],
+                        default = 'False',
+                        choices = ['True', 'False'],
                         help = 'choose if you want to generate an elbow plot for kmeans')
     
     parser.add_argument('-si', '--silhouette', 
                         type = str,
-                        default = 'false',
-                        choices = ['true', 'false'],
+                        default = 'False',
+                        choices = ['True', 'False'],
                         help = 'choose if you want silhouette plots')
     
     parser.add_argument('-td', '--tool_dir',
@@ -75,7 +75,7 @@
                         help = 'your tool directory')
                         
     parser.add_argument('-ms', '--min_samples',
-                        type = float,
+                        type = int,
                         help = 'min samples for dbscan (optional)')
                         
     parser.add_argument('-ep', '--eps',
@@ -106,7 +106,7 @@
     Returns:
       None
     """
-    args = process_args(sys.argv)
+
     with open(args.out_log, 'a') as log:
         log.write(s + "\n\n")
     print(s)
@@ -213,8 +213,8 @@
         k_min (int): The minimum number of clusters to consider.
         k_max (int): The maximum number of clusters to consider.
         dataset (pandas.DataFrame): The dataset to perform clustering on.
-        elbow (str): Whether to generate an elbow plot for kmeans ('true' or 'false').
-        silhouette (str): Whether to generate silhouette plots ('true' or 'false').
+        elbow (str): Whether to generate an elbow plot for kmeans ('True' or 'False').
+        silhouette (str): Whether to generate silhouette plots ('True' or 'False').
         best_cluster (str): The file path to save the output of the best cluster.
 
     Returns:
@@ -224,12 +224,12 @@
         os.makedirs(args.output_path)
     
         
-    if elbow == 'true':
+    if elbow == 'True':
         elbow = True
     else:
         elbow = False
         
-    if silhouette == 'true':
+    if silhouette == 'True':
         silhouette = True
     else:
         silhouette = False
@@ -443,7 +443,7 @@
         k_min (int): The minimum number of clusters to consider.
         k_max (int): The maximum number of clusters to consider.
         best_cluster (str): The file path to save the output of the best cluster.
-        silhouette (str): Whether to generate silhouette plots ('true' or 'false').
+        silhouette (str): Whether to generate silhouette plots ('True' or 'False').
 
     Returns:
         None
@@ -477,7 +477,7 @@
         prefix = ''
         if (i + k_min == best):
             prefix = '_BEST'
-        if silhouette == 'true':
+        if silhouette == 'True':
             silhouette_draw(dataset, labels[i], i + k_min, f'{args.output_path}/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
      
     for i in range(len(labels)):
--- a/COBRAxy/ras_generator.py	Tue May 20 16:01:21 2025 +0000
+++ b/COBRAxy/ras_generator.py	Thu May 22 16:03:37 2025 +0000
@@ -8,6 +8,7 @@
 import utils.general_utils as utils
 import utils.rule_parsing as ruleUtils
 from typing import Union, Optional, List, Dict, Tuple, TypeVar
+import os
 
 ERRORS = []
 ########################## argparse ##########################################
@@ -212,7 +213,7 @@
     Returns:
         dict: A dictionary containing gene data with gene IDs as keys and corresponding values.
     """
-    args = process_args()    
+ 
     for i in range(len(gene)):
         tmp = gene.iloc[i, 0]
         gene.iloc[i, 0] = tmp.strip().split('.')[0]
@@ -227,16 +228,16 @@
     if gene_dup:
         if gene_custom == None:
 
-            if str(args.rules_selector) == 'HMRcore':
-                gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/HMRcore_genes.p', 'rb'))
+            if str(ARGS.rules_selector) == 'HMRcore':
+                gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/HMRcore_genes.p', 'rb'))
             
-            elif str(args.rules_selector) == 'Recon':
-                gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/Recon_genes.p', 'rb'))
+            elif str(ARGS.rules_selector) == 'Recon':
+                gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/Recon_genes.p', 'rb'))
             
-            elif str(args.rules_selector) == 'ENGRO2':
-                gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/ENGRO2_genes.p', 'rb'))
+            elif str(ARGS.rules_selector) == 'ENGRO2':
+                gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/ENGRO2_genes.p', 'rb'))
 
-            utils.logWarning(f"{args.tool_dir}'/local/pickle files/ENGRO2_genes.p'", ARGS.out_log)
+            utils.logWarning(f"{ARGS.tool_dir}'/local/pickle files/ENGRO2_genes.p'", ARGS.out_log)
 
             gene_in_rule = gene_in_rule.get(type_gene)
         
@@ -662,7 +663,7 @@
     # get args from frontend (related xml)
     global ARGS
     ARGS = process_args(args)
-    print(ARGS.rules_selector)
+
     # read dataset
     dataset = read_dataset(ARGS.input, "dataset")
     dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
@@ -672,6 +673,7 @@
 
     # handle custom models
     model :utils.Model = ARGS.rules_selector
+
     if model is utils.Model.Custom:
         rules = load_custom_rules()
         reactions = list(rules.keys())
@@ -703,4 +705,4 @@
 
 ###############################################################################
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()