changeset 428:8cd0c70b0084 draft

Uploaded
author francesco_lapi
date Wed, 10 Sep 2025 13:21:41 +0000
parents 4a385fdb9e58
children 0485c4b1943d
files COBRAxy/marea_cluster.py COBRAxy/ras_to_bounds_beta.py COBRAxy/ras_to_bounds_beta.xml
diffstat 3 files changed, 65 insertions(+), 68 deletions(-) [+]
line wrap: on
line diff
--- a/COBRAxy/marea_cluster.py	Wed Sep 10 11:38:08 2025 +0000
+++ b/COBRAxy/marea_cluster.py	Wed Sep 10 13:21:41 2025 +0000
@@ -47,6 +47,12 @@
                         default = 'kmeans',
                         help = 'choose clustering algorythm')
     
+    parser.add_argument('-sc', '--scaling',
+                        type = str,
+                        choices = ['true', 'false'],
+                        default = 'true',
+                        help = 'choose if you want to scaling the data')
+    
     parser.add_argument('-k1', '--k_min', 
                         type = int,
                         default = 2,
@@ -514,6 +520,21 @@
         if any(val is None or np.isnan(val) for val in X[i]):
             X = X.drop(columns=[i])
             
+    if args.scaling == True:
+        list_to_remove = []
+        toll_std=1e-8
+        for i in X.columns:
+            mean_i = X[i].mean()
+            std_i = X[i].std()
+            if std_i >toll_std:
+                #scaling with mean 0 and std 1
+                X[i] = (X[i]-mean_i)/std_i
+            else:
+                #remove feature because std = 0 during clustering
+                list_to_remove.append(i)
+        if len(list_to_remove)>0:
+            X = X.drop(columns=list_to_remove)
+
     if args.k_max != None:
        numero_classi = X.shape[0]
        while args.k_max >= numero_classi:
--- a/COBRAxy/ras_to_bounds_beta.py	Wed Sep 10 11:38:08 2025 +0000
+++ b/COBRAxy/ras_to_bounds_beta.py	Wed Sep 10 13:21:41 2025 +0000
@@ -14,7 +14,8 @@
 import utils.reaction_parsing as reactionUtils
 import utils.model_utils as modelUtils
 
-# , medium
+# ras_selector $cond_ras.ras_choice
+# 
 
 ################################# process args ###############################
 def process_args(args :List[str] = None) -> argparse.Namespace:
@@ -50,11 +51,6 @@
     parser.add_argument('-rn', '--name',
                 type=str,
                 help = 'ras class names')
-    
-    parser.add_argument('-rs', '--ras_selector',
-                        required = True,
-                        type=utils.Bool("using_RAS"),
-                        help = 'ras selector')
 
     parser.add_argument('-cc', '--cell_class',
                     type = str,
@@ -304,15 +300,7 @@
             save_models, save_models_path, save_models_format
         ) for cellName, ras_row in ras.iterrows())
     else:
-        bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"])
-        newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids))
-        newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True)
-
-        # Save model if requested
-        if save_models:
-            modified_model = apply_bounds_to_model(model, newBounds)
-            save_model(modified_model, "model_with_bounds", save_models_path, save_models_format)
-    
+        raise ValueError("RAS DataFrame is None. Cannot generate bounds without RAS data.")
     pass
 
 ############################# main ###########################################
@@ -329,34 +317,34 @@
     global ARGS
     ARGS = process_args(args)
 
-    if(ARGS.ras_selector == True):
-        ras_file_list = ARGS.input_ras.split(",")
-        ras_file_names = ARGS.name.split(",")
-        if len(ras_file_names) != len(set(ras_file_names)):
-            error_message = "Duplicated file names in the uploaded RAS matrices."
-            warning(error_message)
-            raise ValueError(error_message)
-            pass
-        ras_class_names = []
-        for file in ras_file_names:
-            ras_class_names.append(file.rsplit(".", 1)[0])
-        ras_list = []
-        class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"])
-        for ras_matrix, ras_class_name in zip(ras_file_list, ras_class_names):
-            ras = read_dataset(ras_matrix, "ras dataset")
-            ras.replace("None", None, inplace=True)
-            ras.set_index("Reactions", drop=True, inplace=True)
-            ras = ras.T
-            ras = ras.astype(float)
-            if(len(ras_file_list)>1):
-                #append class name to patient id (dataframe index)
-                ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index]
-            else:
-                ras.index = [f"{idx}" for idx in ras.index]
-            ras_list.append(ras)
-            for patient_id in ras.index:
-                class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name]
-        
+
+    ras_file_list = ARGS.input_ras.split(",")
+    ras_file_names = ARGS.name.split(",")
+    if len(ras_file_names) != len(set(ras_file_names)):
+        error_message = "Duplicated file names in the uploaded RAS matrices."
+        warning(error_message)
+        raise ValueError(error_message)
+        pass
+    ras_class_names = []
+    for file in ras_file_names:
+        ras_class_names.append(file.rsplit(".", 1)[0])
+    ras_list = []
+    class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"])
+    for ras_matrix, ras_class_name in zip(ras_file_list, ras_class_names):
+        ras = read_dataset(ras_matrix, "ras dataset")
+        ras.replace("None", None, inplace=True)
+        ras.set_index("Reactions", drop=True, inplace=True)
+        ras = ras.T
+        ras = ras.astype(float)
+        if(len(ras_file_list)>1):
+            #append class name to patient id (dataframe index)
+            ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index]
+        else:
+            ras.index = [f"{idx}" for idx in ras.index]
+        ras_list.append(ras)
+        for patient_id in ras.index:
+            class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name]
+    
         
         # Concatenate all ras DataFrames into a single DataFrame
         ras_combined = pd.concat(ras_list, axis=0)
@@ -372,15 +360,12 @@
     for key, value in validation.items():
         print(f"{key}: {value}")
 
-    if(ARGS.ras_selector == True):
-        generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path,
-                       save_models=ARGS.save_models, save_models_path=ARGS.save_models_path,
-                       save_models_format=ARGS.save_models_format)
-        class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False)
-    else:
-        generate_bounds_model(model, output_folder=ARGS.output_path,
-                       save_models=ARGS.save_models, save_models_path=ARGS.save_models_path,
-                       save_models_format=ARGS.save_models_format)
+
+    generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path,
+                    save_models=ARGS.save_models, save_models_path=ARGS.save_models_path,
+                    save_models_format=ARGS.save_models_format)
+    class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False)
+
 
     pass
         
--- a/COBRAxy/ras_to_bounds_beta.xml	Wed Sep 10 11:38:08 2025 +0000
+++ b/COBRAxy/ras_to_bounds_beta.xml	Wed Sep 10 13:21:41 2025 +0000
@@ -18,14 +18,12 @@
         --tool_dir $__tool_directory__
         --cell_class $cell_class
         --model_upload $model_upload
-        --ras_selector $cond_ras.ras_choice
         #set $names = ""
-        #if $cond_ras.ras_choice == "True"
-            --input_ras "${",".join(map(str, $cond_ras.input_ras))}"
-            #for $input_temp in $cond_ras.input_ras:
-                #set $names = $names + $input_temp.element_identifier + ","
-            #end for
-        #end if
+        --input_ras "${",".join(map(str, $input_ras))}"
+        #for $input_temp in $input_ras:
+            #set $names = $names + $input_temp.element_identifier + ","
+        #end for
+
         --save_models $save_models
         --save_models_path saved_models/
         --name "$names"
@@ -37,15 +35,8 @@
         <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular" 
                 label="Model rules file:" help="Upload a CSV/TSV file containing reaction rules generated by the Model Initialization tool." />
 
-        <conditional name="cond_ras">
-			<param name="ras_choice" argument="--ras_choice" type="select" label="Do want to use RAS?">
-                	<option value="True" selected="true">Yes</option>
-                	<option value="False">No</option>
-        	</param>
-            <when value="True">
-                <param name="input_ras" argument="--input_ras" multiple="true" type="data" format="tabular, csv, tsv" label="RAS matrix:" />
-            </when>
-        </conditional>  
+        <param name="input_ras" argument="--input_ras" multiple="true" type="data" format="tabular, csv, tsv" label="RAS matrix:" />
+
 
         <param name="save_models" argument="--save_models" type="select" label="Save models with applied bounds?">
             <option value="False" selected="true">No</option>