Mercurial > repos > bimib > cobraxy
changeset 428:8cd0c70b0084 draft
Uploaded
author | francesco_lapi |
---|---|
date | Wed, 10 Sep 2025 13:21:41 +0000 |
parents | 4a385fdb9e58 |
children | 0485c4b1943d |
files | COBRAxy/marea_cluster.py COBRAxy/ras_to_bounds_beta.py COBRAxy/ras_to_bounds_beta.xml |
diffstat | 3 files changed, 65 insertions(+), 68 deletions(-) [+] |
line wrap: on
line diff
--- a/COBRAxy/marea_cluster.py Wed Sep 10 11:38:08 2025 +0000 +++ b/COBRAxy/marea_cluster.py Wed Sep 10 13:21:41 2025 +0000 @@ -47,6 +47,12 @@ default = 'kmeans', help = 'choose clustering algorythm') + parser.add_argument('-sc', '--scaling', + type = str, + choices = ['true', 'false'], + default = 'true', + help = 'choose if you want to scaling the data') + parser.add_argument('-k1', '--k_min', type = int, default = 2, @@ -514,6 +520,21 @@ if any(val is None or np.isnan(val) for val in X[i]): X = X.drop(columns=[i]) + if args.scaling == True: + list_to_remove = [] + toll_std=1e-8 + for i in X.columns: + mean_i = X[i].mean() + std_i = X[i].std() + if std_i >toll_std: + #scaling with mean 0 and std 1 + X[i] = (X[i]-mean_i)/std_i + else: + #remove feature because std = 0 during clustering + list_to_remove.append(i) + if len(list_to_remove)>0: + X = X.drop(columns=list_to_remove) + if args.k_max != None: numero_classi = X.shape[0] while args.k_max >= numero_classi:
--- a/COBRAxy/ras_to_bounds_beta.py Wed Sep 10 11:38:08 2025 +0000 +++ b/COBRAxy/ras_to_bounds_beta.py Wed Sep 10 13:21:41 2025 +0000 @@ -14,7 +14,8 @@ import utils.reaction_parsing as reactionUtils import utils.model_utils as modelUtils -# , medium +# ras_selector $cond_ras.ras_choice +# ################################# process args ############################### def process_args(args :List[str] = None) -> argparse.Namespace: @@ -50,11 +51,6 @@ parser.add_argument('-rn', '--name', type=str, help = 'ras class names') - - parser.add_argument('-rs', '--ras_selector', - required = True, - type=utils.Bool("using_RAS"), - help = 'ras selector') parser.add_argument('-cc', '--cell_class', type = str, @@ -304,15 +300,7 @@ save_models, save_models_path, save_models_format ) for cellName, ras_row in ras.iterrows()) else: - bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) - newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) - newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) - - # Save model if requested - if save_models: - modified_model = apply_bounds_to_model(model, newBounds) - save_model(modified_model, "model_with_bounds", save_models_path, save_models_format) - + raise ValueError("RAS DataFrame is None. Cannot generate bounds without RAS data.") pass ############################# main ########################################### @@ -329,34 +317,34 @@ global ARGS ARGS = process_args(args) - if(ARGS.ras_selector == True): - ras_file_list = ARGS.input_ras.split(",") - ras_file_names = ARGS.name.split(",") - if len(ras_file_names) != len(set(ras_file_names)): - error_message = "Duplicated file names in the uploaded RAS matrices." - warning(error_message) - raise ValueError(error_message) - pass - ras_class_names = [] - for file in ras_file_names: - ras_class_names.append(file.rsplit(".", 1)[0]) - ras_list = [] - class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"]) - for ras_matrix, ras_class_name in zip(ras_file_list, ras_class_names): - ras = read_dataset(ras_matrix, "ras dataset") - ras.replace("None", None, inplace=True) - ras.set_index("Reactions", drop=True, inplace=True) - ras = ras.T - ras = ras.astype(float) - if(len(ras_file_list)>1): - #append class name to patient id (dataframe index) - ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index] - else: - ras.index = [f"{idx}" for idx in ras.index] - ras_list.append(ras) - for patient_id in ras.index: - class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name] - + + ras_file_list = ARGS.input_ras.split(",") + ras_file_names = ARGS.name.split(",") + if len(ras_file_names) != len(set(ras_file_names)): + error_message = "Duplicated file names in the uploaded RAS matrices." + warning(error_message) + raise ValueError(error_message) + pass + ras_class_names = [] + for file in ras_file_names: + ras_class_names.append(file.rsplit(".", 1)[0]) + ras_list = [] + class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"]) + for ras_matrix, ras_class_name in zip(ras_file_list, ras_class_names): + ras = read_dataset(ras_matrix, "ras dataset") + ras.replace("None", None, inplace=True) + ras.set_index("Reactions", drop=True, inplace=True) + ras = ras.T + ras = ras.astype(float) + if(len(ras_file_list)>1): + #append class name to patient id (dataframe index) + ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index] + else: + ras.index = [f"{idx}" for idx in ras.index] + ras_list.append(ras) + for patient_id in ras.index: + class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name] + # Concatenate all ras DataFrames into a single DataFrame ras_combined = pd.concat(ras_list, axis=0) @@ -372,15 +360,12 @@ for key, value in validation.items(): print(f"{key}: {value}") - if(ARGS.ras_selector == True): - generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path, - save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, - save_models_format=ARGS.save_models_format) - class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False) - else: - generate_bounds_model(model, output_folder=ARGS.output_path, - save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, - save_models_format=ARGS.save_models_format) + + generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path, + save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, + save_models_format=ARGS.save_models_format) + class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False) + pass
--- a/COBRAxy/ras_to_bounds_beta.xml Wed Sep 10 11:38:08 2025 +0000 +++ b/COBRAxy/ras_to_bounds_beta.xml Wed Sep 10 13:21:41 2025 +0000 @@ -18,14 +18,12 @@ --tool_dir $__tool_directory__ --cell_class $cell_class --model_upload $model_upload - --ras_selector $cond_ras.ras_choice #set $names = "" - #if $cond_ras.ras_choice == "True" - --input_ras "${",".join(map(str, $cond_ras.input_ras))}" - #for $input_temp in $cond_ras.input_ras: - #set $names = $names + $input_temp.element_identifier + "," - #end for - #end if + --input_ras "${",".join(map(str, $input_ras))}" + #for $input_temp in $input_ras: + #set $names = $names + $input_temp.element_identifier + "," + #end for + --save_models $save_models --save_models_path saved_models/ --name "$names" @@ -37,15 +35,8 @@ <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular" label="Model rules file:" help="Upload a CSV/TSV file containing reaction rules generated by the Model Initialization tool." /> - <conditional name="cond_ras"> - <param name="ras_choice" argument="--ras_choice" type="select" label="Do want to use RAS?"> - <option value="True" selected="true">Yes</option> - <option value="False">No</option> - </param> - <when value="True"> - <param name="input_ras" argument="--input_ras" multiple="true" type="data" format="tabular, csv, tsv" label="RAS matrix:" /> - </when> - </conditional> + <param name="input_ras" argument="--input_ras" multiple="true" type="data" format="tabular, csv, tsv" label="RAS matrix:" /> + <param name="save_models" argument="--save_models" type="select" label="Save models with applied bounds?"> <option value="False" selected="true">No</option>