comparison COBRAxy/flux_simulation_beta.py @ 419:ed2c1f9e20ba draft

Uploaded
author francesco_lapi
date Tue, 09 Sep 2025 09:08:17 +0000
parents 6b015d3184ab
children f9fe44c65772
comparison
equal deleted inserted replaced
418:919b5b71a61c 419:ed2c1f9e20ba
7 import cobra 7 import cobra
8 import utils.CBS_backend as CBS_backend 8 import utils.CBS_backend as CBS_backend
9 from joblib import Parallel, delayed, cpu_count 9 from joblib import Parallel, delayed, cpu_count
10 from cobra.sampling import OptGPSampler 10 from cobra.sampling import OptGPSampler
11 import sys 11 import sys
12 import utils.general_utils as utils 12 import utils.model_utils as model_utils
13 13
14 14
15 ################################# process args ############################### 15 ################################# process args ###############################
16 def process_args(args :List[str] = None) -> argparse.Namespace: 16 def process_args(args :List[str] = None) -> argparse.Namespace:
17 """ 17 """
27 description = 'process some value\'s') 27 description = 'process some value\'s')
28 28
29 parser.add_argument("-mo", "--model_upload", type = str, 29 parser.add_argument("-mo", "--model_upload", type = str,
30 help = "path to input file with custom rules, if provided") 30 help = "path to input file with custom rules, if provided")
31 31
32 parser.add_argument("-mab", "--model_and_bounds", type = str,
33 choices = ['True', 'False'],
34 required = True,
35 help = "upload mode: True for model+bounds, False for complete models")
36
37
32 parser.add_argument('-ol', '--out_log', 38 parser.add_argument('-ol', '--out_log',
33 help = "Output log") 39 help = "Output log")
34 40
35 parser.add_argument('-td', '--tool_dir', 41 parser.add_argument('-td', '--tool_dir',
36 type = str, 42 type = str,
37 required = True, 43 required = True,
38 help = 'your tool directory') 44 help = 'your tool directory')
39 45
40 parser.add_argument('-in', '--input', 46 parser.add_argument('-in', '--input',
41 required = True, 47 required = True,
42 type=str, 48 type=str,
43 help = 'inputs bounds') 49 help = 'input bounds files or complete model files')
44 50
45 parser.add_argument('-ni', '--names', 51 parser.add_argument('-ni', '--name',
46 required = True, 52 required = True,
47 type=str, 53 type=str,
48 help = 'cell names') 54 help = 'cell names')
49 55
50 parser.add_argument('-a', '--algorithm', 56 parser.add_argument('-a', '--algorithm',
213 for i in range(0, n_batches): 219 for i in range(0, n_batches):
214 os.remove(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv') 220 os.remove(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv')
215 pass 221 pass
216 222
217 223
218 def model_sampler(model_input_original:cobra.Model, bounds_path:str, cell_name:str)-> List[pd.DataFrame]: 224
219 """ 225 def model_sampler_with_bounds(model_input_original: cobra.Model, bounds_path: str, cell_name: str) -> List[pd.DataFrame]:
220 Prepares the model with bounds from the dataset and performs sampling and analysis based on the selected algorithm. 226 """
227 MODE 1: Prepares the model with bounds from separate bounds file and performs sampling.
221 228
222 Args: 229 Args:
223 model_input_original (cobra.Model): The original COBRA model. 230 model_input_original (cobra.Model): The original COBRA model.
224 bounds_path (str): Path to the CSV file containing the bounds dataset. 231 bounds_path (str): Path to the CSV file containing the bounds dataset.
225 cell_name (str): Name of the cell, used to generate filenames for output. 232 cell_name (str): Name of the cell, used to generate filenames for output.
228 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. 235 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results.
229 """ 236 """
230 237
231 model_input = model_input_original.copy() 238 model_input = model_input_original.copy()
232 bounds_df = read_dataset(bounds_path, "bounds dataset") 239 bounds_df = read_dataset(bounds_path, "bounds dataset")
240
241 # Apply bounds to model
233 for rxn_index, row in bounds_df.iterrows(): 242 for rxn_index, row in bounds_df.iterrows():
234 model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound 243 try:
235 model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound 244 model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound
236 245 model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound
246 except KeyError:
247 warning(f"Warning: Reaction {rxn_index} not found in model. Skipping.")
248
249 return perform_sampling_and_analysis(model_input, cell_name)
250
251
252 def perform_sampling_and_analysis(model_input: cobra.Model, cell_name: str) -> List[pd.DataFrame]:
253 """
254 Common function to perform sampling and analysis on a prepared model.
255
256 Args:
257 model_input (cobra.Model): The prepared COBRA model with bounds applied.
258 cell_name (str): Name of the cell, used to generate filenames for output.
259
260 Returns:
261 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results.
262 """
237 263
238 if ARGS.algorithm == 'OPTGP': 264 if ARGS.algorithm == 'OPTGP':
239 OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed) 265 OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed)
240
241 elif ARGS.algorithm == 'CBS': 266 elif ARGS.algorithm == 'CBS':
242 CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed) 267 CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed)
243 268
244 df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types) 269 df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types)
245 270
246 if("fluxes" not in ARGS.output_types): 271 if("fluxes" not in ARGS.output_types):
247 os.remove(ARGS.output_path + "/" + cell_name + '.csv') 272 os.remove(ARGS.output_path + "/" + cell_name + '.csv')
248 273
249 returnList = [] 274 returnList = [df_mean, df_median, df_quantiles]
250 returnList.append(df_mean)
251 returnList.append(df_median)
252 returnList.append(df_quantiles)
253 275
254 df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis) 276 df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis)
255 277
256 if("pFBA" in ARGS.output_type_analysis): 278 if("pFBA" in ARGS.output_type_analysis):
257 returnList.append(df_pFBA) 279 returnList.append(df_pFBA)
330 for output_type in output_types: 352 for output_type in output_types:
331 if(output_type == "pFBA"): 353 if(output_type == "pFBA"):
332 model.objective = "Biomass" 354 model.objective = "Biomass"
333 solution = cobra.flux_analysis.pfba(model) 355 solution = cobra.flux_analysis.pfba(model)
334 fluxes = solution.fluxes 356 fluxes = solution.fluxes
335 df_pFBA.loc[0,[rxn._id for rxn in model.reactions]] = fluxes.tolist() 357 df_pFBA.loc[0,[rxn.id for rxn in model.reactions]] = fluxes.tolist()
336 df_pFBA = df_pFBA.reset_index(drop=True) 358 df_pFBA = df_pFBA.reset_index(drop=True)
337 df_pFBA.index = [model_name] 359 df_pFBA.index = [model_name]
338 df_pFBA = df_pFBA.astype(float).round(6) 360 df_pFBA = df_pFBA.astype(float).round(6)
339 elif(output_type == "FVA"): 361 elif(output_type == "FVA"):
340 fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8) 362 fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8)
369 391
370 Returns: 392 Returns:
371 None 393 None
372 """ 394 """
373 395
374 num_processors = cpu_count() 396 num_processors = max(1, cpu_count() - 1)
375 397
376 global ARGS 398 global ARGS
377 ARGS = process_args(args) 399 ARGS = process_args(args)
378 400
379 if not os.path.exists(ARGS.output_path): 401 if not os.path.exists(ARGS.output_path):
380 os.makedirs(ARGS.output_path) 402 os.makedirs(ARGS.output_path)
381 403
382 #model_type :utils.Model = ARGS.model_selector 404 #ARGS.bounds = ARGS.input.split(",")
383 #if model_type is utils.Model.Custom: 405 #ARGS.bounds_name = ARGS.name.split(",")
384 # model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext) 406 #ARGS.output_types = ARGS.output_type.split(",")
385 #else: 407 #ARGS.output_type_analysis = ARGS.output_type_analysis.split(",")
386 # model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) 408
387 409 # --- Normalize inputs (the tool may pass comma-separated --input and either --name or --names) ---
388 model = utils.build_cobra_model_from_csv(ARGS.model_upload) 410 ARGS.input_files = ARGS.input.split(",") if getattr(ARGS, "input", None) else []
389 411 ARGS.file_names = ARGS.name.split(",")
390 validation = utils.validate_model(model) 412 # output types (required) -> list
391 413 ARGS.output_types = ARGS.output_type.split(",") if getattr(ARGS, "output_type", None) else []
392 print("\n=== VALIDAZIONE MODELLO ===") 414 # optional analysis output types -> list or empty
393 for key, value in validation.items(): 415 ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") if getattr(ARGS, "output_type_analysis", None) else []
394 print(f"{key}: {value}") 416
395 417
396 #Set solver verbosity to 1 to see warning and error messages only. 418 if ARGS.model_and_bounds == "True":
397 model.solver.configuration.verbosity = 1 419 # MODE 1: Model + bounds (separate files)
398 420 print("=== MODE 1: Model + Bounds (separate files) ===")
399 ARGS.bounds = ARGS.input.split(",") 421
400 ARGS.bounds_name = ARGS.names.split(",") 422 # Load base model
401 ARGS.output_types = ARGS.output_type.split(",") 423 if not ARGS.model_upload:
402 ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") 424 sys.exit("Error: model_upload is required for Mode 1")
403 425
404 426 base_model = model_utils.build_cobra_model_from_csv(ARGS.model_upload)
405 results = Parallel(n_jobs=num_processors)(delayed(model_sampler)(model, bounds_path, cell_name) for bounds_path, cell_name in zip(ARGS.bounds, ARGS.bounds_name)) 427
428 validation = model_utils.validate_model(base_model)
429
430 print("\n=== VALIDAZIONE MODELLO ===")
431 for key, value in validation.items():
432 print(f"{key}: {value}")
433
434 #Set solver verbosity to 1 to see warning and error messages only.
435 base_model.solver.configuration.verbosity = 1
436
437 # Process each bounds file with the base model
438 results = Parallel(n_jobs=num_processors)(
439 delayed(model_sampler_with_bounds)(base_model, bounds_file, cell_name)
440 for bounds_file, cell_name in zip(ARGS.input_files, ARGS.file_names)
441 )
442
443 else:
444 # MODE 2: Multiple complete models
445 print("=== MODE 2: Multiple complete models ===")
446
447 # Process each complete model file
448 results = Parallel(n_jobs=num_processors)(
449 delayed(perform_sampling_and_analysis)(model_utils.build_cobra_model_from_csv(model_file), cell_name)
450 for model_file, cell_name in zip(ARGS.input_files, ARGS.file_names)
451 )
452
406 453
407 all_mean = pd.concat([result[0] for result in results], ignore_index=False) 454 all_mean = pd.concat([result[0] for result in results], ignore_index=False)
408 all_median = pd.concat([result[1] for result in results], ignore_index=False) 455 all_median = pd.concat([result[1] for result in results], ignore_index=False)
409 all_quantiles = pd.concat([result[2] for result in results], ignore_index=False) 456 all_quantiles = pd.concat([result[2] for result in results], ignore_index=False)
410 457