Mercurial > repos > bimib > cobraxy
comparison COBRAxy/flux_simulation.py @ 489:97eea560a10f draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Mon, 29 Sep 2025 10:33:26 +0000 |
| parents | c561c060a55f |
| children | ded8a0d6cb6d |
comparison
equal
deleted
inserted
replaced
| 488:e0bcc61b2feb | 489:97eea560a10f |
|---|---|
| 1 """ | |
| 2 Flux sampling and analysis utilities for COBRA models. | |
| 3 | |
| 4 This script supports two modes: | |
| 5 - Mode 1 (model_and_bounds=True): load a base model and apply bounds from | |
| 6 separate files before sampling. | |
| 7 - Mode 2 (model_and_bounds=False): load complete models and sample directly. | |
| 8 | |
| 9 Sampling algorithms supported: OPTGP and CBS. Outputs include flux samples | |
| 10 and optional analyses (pFBA, FVA, sensitivity), saved as tabular files. | |
| 11 """ | |
| 12 | |
| 1 import argparse | 13 import argparse |
| 2 import utils.general_utils as utils | 14 import utils.general_utils as utils |
| 3 from typing import Optional, List | 15 from typing import List |
| 4 import os | 16 import os |
| 17 import pandas as pd | |
| 5 import numpy as np | 18 import numpy as np |
| 6 import pandas as pd | |
| 7 import cobra | 19 import cobra |
| 8 import utils.CBS_backend as CBS_backend | 20 import utils.CBS_backend as CBS_backend |
| 9 from joblib import Parallel, delayed, cpu_count | 21 from joblib import Parallel, delayed, cpu_count |
| 10 from cobra.sampling import OptGPSampler | 22 from cobra.sampling import OptGPSampler |
| 11 import sys | 23 import sys |
| 24 import utils.model_utils as model_utils | |
| 25 | |
| 12 | 26 |
| 13 ################################# process args ############################### | 27 ################################# process args ############################### |
| 14 def process_args(args :List[str] = None) -> argparse.Namespace: | 28 def process_args(args: List[str] = None) -> argparse.Namespace: |
| 15 """ | 29 """ |
| 16 Processes command-line arguments. | 30 Processes command-line arguments. |
| 17 | 31 |
| 18 Args: | 32 Args: |
| 19 args (list): List of command-line arguments. | 33 args (list): List of command-line arguments. |
| 20 | 34 |
| 21 Returns: | 35 Returns: |
| 22 Namespace: An object containing parsed arguments. | 36 Namespace: An object containing parsed arguments. |
| 23 """ | 37 """ |
| 24 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | 38 parser = argparse.ArgumentParser(usage='%(prog)s [options]', |
| 25 description = 'process some value\'s') | 39 description='process some value\'s') |
| 26 | 40 |
| 27 parser.add_argument('-ol', '--out_log', | 41 parser.add_argument("-mo", "--model_upload", type=str, |
| 28 help = "Output log") | 42 help="path to input file with custom rules, if provided") |
| 43 | |
| 44 parser.add_argument("-mab", "--model_and_bounds", type=str, | |
| 45 choices=['True', 'False'], | |
| 46 required=True, | |
| 47 help="upload mode: True for model+bounds, False for complete models") | |
| 48 | |
| 49 parser.add_argument("-ens", "--sampling_enabled", type=str, | |
| 50 choices=['true', 'false'], | |
| 51 required=True, | |
| 52 help="enable sampling: 'true' for sampling, 'false' for no sampling") | |
| 53 | |
| 54 parser.add_argument('-ol', '--out_log', | |
| 55 help="Output log") | |
| 29 | 56 |
| 30 parser.add_argument('-td', '--tool_dir', | 57 parser.add_argument('-td', '--tool_dir', |
| 31 type = str, | 58 type=str, |
| 32 required = True, | 59 required=True, |
| 33 help = 'your tool directory') | 60 help='your tool directory') |
| 34 | 61 |
| 35 parser.add_argument('-in', '--input', | 62 parser.add_argument('-in', '--input', |
| 36 required = True, | 63 required=True, |
| 37 type=str, | 64 type=str, |
| 38 help = 'inputs bounds') | 65 help='input bounds files or complete model files') |
| 39 | 66 |
| 40 parser.add_argument('-ni', '--names', | 67 parser.add_argument('-ni', '--name', |
| 41 required = True, | 68 required=True, |
| 42 type=str, | 69 type=str, |
| 43 help = 'cell names') | 70 help='cell names') |
| 44 | |
| 45 parser.add_argument( | |
| 46 '-ms', '--model_selector', | |
| 47 type = utils.Model, default = utils.Model.ENGRO2, choices = [utils.Model.ENGRO2, utils.Model.Custom], | |
| 48 help = 'chose which type of model you want use') | |
| 49 | |
| 50 parser.add_argument("-mo", "--model", type = str) | |
| 51 | |
| 52 parser.add_argument("-mn", "--model_name", type = str, help = "custom mode name") | |
| 53 | 71 |
| 54 parser.add_argument('-a', '--algorithm', | 72 parser.add_argument('-a', '--algorithm', |
| 55 type = str, | 73 type=str, |
| 56 choices = ['OPTGP', 'CBS'], | 74 choices=['OPTGP', 'CBS'], |
| 57 required = True, | 75 required=True, |
| 58 help = 'choose sampling algorithm') | 76 help='choose sampling algorithm') |
| 59 | 77 |
| 60 parser.add_argument('-th', '--thinning', | 78 parser.add_argument('-th', '--thinning', |
| 61 type = int, | 79 type=int, |
| 62 default= 100, | 80 default=100, |
| 81 required=True, | |
| 82 help='choose thinning') | |
| 83 | |
| 84 parser.add_argument('-ns', '--n_samples', | |
| 85 type=int, | |
| 86 required=True, | |
| 87 help='choose how many samples (set to 0 for optimization only)') | |
| 88 | |
| 89 parser.add_argument('-sd', '--seed', | |
| 90 type=int, | |
| 91 required=True, | |
| 92 help='seed for random number generation') | |
| 93 | |
| 94 parser.add_argument('-nb', '--n_batches', | |
| 95 type=int, | |
| 96 required=True, | |
| 97 help='choose how many batches') | |
| 98 | |
| 99 parser.add_argument('-opt', '--perc_opt', | |
| 100 type=float, | |
| 101 default=0.9, | |
| 63 required=False, | 102 required=False, |
| 64 help = 'choose thinning') | 103 help='choose the fraction of optimality for FVA (0-1)') |
| 65 | 104 |
| 66 parser.add_argument('-ns', '--n_samples', | 105 parser.add_argument('-ot', '--output_type', |
| 67 type = int, | 106 type=str, |
| 68 required = True, | 107 required=True, |
| 69 help = 'choose how many samples') | 108 help='output type for sampling results') |
| 70 | 109 |
| 71 parser.add_argument('-sd', '--seed', | 110 parser.add_argument('-ota', '--output_type_analysis', |
| 72 type = int, | 111 type=str, |
| 73 required = True, | 112 required=False, |
| 74 help = 'seed') | 113 help='output type analysis (optimization methods)') |
| 75 | 114 |
| 76 parser.add_argument('-nb', '--n_batches', | 115 parser.add_argument('-idop', '--output_path', |
| 77 type = int, | 116 type=str, |
| 78 required = True, | |
| 79 help = 'choose how many batches') | |
| 80 | |
| 81 parser.add_argument('-ot', '--output_type', | |
| 82 type = str, | |
| 83 required = True, | |
| 84 help = 'output type') | |
| 85 | |
| 86 parser.add_argument('-ota', '--output_type_analysis', | |
| 87 type = str, | |
| 88 required = False, | |
| 89 help = 'output type analysis') | |
| 90 | |
| 91 parser.add_argument('-idop', '--output_path', | |
| 92 type = str, | |
| 93 default='flux_simulation', | 117 default='flux_simulation', |
| 94 help = 'output path for maps') | 118 help='output path for maps') |
| 95 | 119 |
| 96 ARGS = parser.parse_args(args) | 120 ARGS = parser.parse_args(args) |
| 97 return ARGS | 121 return ARGS |
| 98 | |
| 99 ########################### warning ########################################### | 122 ########################### warning ########################################### |
| 100 def warning(s :str) -> None: | 123 def warning(s :str) -> None: |
| 101 """ | 124 """ |
| 102 Log a warning message to an output log file and print it to the console. | 125 Log a warning message to an output log file and print it to the console. |
| 103 | 126 |
| 111 log.write(s + "\n\n") | 134 log.write(s + "\n\n") |
| 112 print(s) | 135 print(s) |
| 113 | 136 |
| 114 | 137 |
| 115 def write_to_file(dataset: pd.DataFrame, name: str, keep_index:bool=False)->None: | 138 def write_to_file(dataset: pd.DataFrame, name: str, keep_index:bool=False)->None: |
| 139 """ | |
| 140 Write a DataFrame to a TSV file under ARGS.output_path with a given base name. | |
| 141 | |
| 142 Args: | |
| 143 dataset: The DataFrame to write. | |
| 144 name: Base file name (without extension). | |
| 145 keep_index: Whether to keep the DataFrame index in the file. | |
| 146 | |
| 147 Returns: | |
| 148 None | |
| 149 """ | |
| 116 dataset.index.name = 'Reactions' | 150 dataset.index.name = 'Reactions' |
| 117 dataset.to_csv(ARGS.output_path + "/" + name + ".csv", sep = '\t', index = keep_index) | 151 dataset.to_csv(ARGS.output_path + "/" + name + ".csv", sep = '\t', index = keep_index) |
| 118 | 152 |
| 119 ############################ dataset input #################################### | 153 ############################ dataset input #################################### |
| 120 def read_dataset(data :str, name :str) -> pd.DataFrame: | 154 def read_dataset(data :str, name :str) -> pd.DataFrame: |
| 140 sys.exit('Execution aborted: wrong format of ' + name + '\n') | 174 sys.exit('Execution aborted: wrong format of ' + name + '\n') |
| 141 return dataset | 175 return dataset |
| 142 | 176 |
| 143 | 177 |
| 144 | 178 |
| 145 def OPTGP_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, thinning:int=100, n_batches:int=1, seed:int=0)-> None: | 179 def OPTGP_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, thinning: int = 100, n_batches: int = 1, seed: int = 0) -> None: |
| 146 """ | 180 """ |
| 147 Samples from the OPTGP (Optimal Global Perturbation) algorithm and saves the results to CSV files. | 181 Samples from the OPTGP (Optimal Global Perturbation) algorithm and saves the results to CSV files. |
| 148 | 182 |
| 149 Args: | 183 Args: |
| 150 model (cobra.Model): The COBRA model to sample from. | 184 model (cobra.Model): The COBRA model to sample from. |
| 151 model_name (str): The name of the model, used in naming output files. | 185 model_name (str): The name of the model, used in naming output files. |
| 152 n_samples (int, optional): Number of samples per batch. Default is 1000. | 186 n_samples (int, optional): Number of samples per batch. Default is 1000. |
| 153 thinning (int, optional): Thinning parameter for the sampler. Default is 100. | 187 thinning (int, optional): Thinning parameter for the sampler. Default is 100. |
| 154 n_batches (int, optional): Number of batches to run. Default is 1. | 188 n_batches (int, optional): Number of batches to run. Default is 1. |
| 155 seed (int, optional): Random seed for reproducibility. Default is 0. | 189 seed (int, optional): Random seed for reproducibility. Default is 0. |
| 156 | 190 |
| 157 Returns: | 191 Returns: |
| 158 None | 192 None |
| 159 """ | 193 """ |
| 160 | 194 import numpy as np |
| 161 for i in range(0, n_batches): | 195 |
| 196 # Get reaction IDs for consistent column ordering | |
| 197 reaction_ids = [rxn.id for rxn in model.reactions] | |
| 198 | |
| 199 # Sample and save each batch as numpy file | |
| 200 for i in range(n_batches): | |
| 162 optgp = OptGPSampler(model, thinning, seed) | 201 optgp = OptGPSampler(model, thinning, seed) |
| 163 samples = optgp.sample(n_samples) | 202 samples = optgp.sample(n_samples) |
| 164 samples.to_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv', index=False) | 203 |
| 165 seed+=1 | 204 # Save as numpy array (more memory efficient) |
| 166 samplesTotal = pd.DataFrame() | 205 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" |
| 167 for i in range(0, n_batches): | 206 np.save(batch_filename, samples.to_numpy()) |
| 168 samples_batch = pd.read_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv') | 207 |
| 169 samplesTotal = pd.concat([samplesTotal, samples_batch], ignore_index = True) | 208 seed += 1 |
| 170 | 209 |
| 210 # Merge all batches into a single DataFrame | |
| 211 all_samples = [] | |
| 212 | |
| 213 for i in range(n_batches): | |
| 214 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" | |
| 215 batch_data = np.load(batch_filename, allow_pickle=True) | |
| 216 all_samples.append(batch_data) | |
| 217 | |
| 218 # Concatenate all batches | |
| 219 samplesTotal_array = np.vstack(all_samples) | |
| 220 | |
| 221 # Convert back to DataFrame with proper column names | |
| 222 samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids) | |
| 223 | |
| 224 # Save the final merged result as CSV | |
| 171 write_to_file(samplesTotal.T, model_name, True) | 225 write_to_file(samplesTotal.T, model_name, True) |
| 172 | 226 |
| 173 for i in range(0, n_batches): | 227 # Clean up temporary numpy files |
| 174 os.remove(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv') | 228 for i in range(n_batches): |
| 175 pass | 229 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" |
| 176 | 230 if os.path.exists(batch_filename): |
| 177 | 231 os.remove(batch_filename) |
| 178 def CBS_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, n_batches:int=1, seed:int=0)-> None: | 232 |
| 233 | |
| 234 def CBS_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, n_batches: int = 1, seed: int = 0) -> None: | |
| 179 """ | 235 """ |
| 180 Samples using the CBS (Constraint-based Sampling) algorithm and saves the results to CSV files. | 236 Samples using the CBS (Constraint-based Sampling) algorithm and saves the results to CSV files. |
| 181 | 237 |
| 182 Args: | 238 Args: |
| 183 model (cobra.Model): The COBRA model to sample from. | 239 model (cobra.Model): The COBRA model to sample from. |
| 184 model_name (str): The name of the model, used in naming output files. | 240 model_name (str): The name of the model, used in naming output files. |
| 185 n_samples (int, optional): Number of samples per batch. Default is 1000. | 241 n_samples (int, optional): Number of samples per batch. Default is 1000. |
| 186 n_batches (int, optional): Number of batches to run. Default is 1. | 242 n_batches (int, optional): Number of batches to run. Default is 1. |
| 187 seed (int, optional): Random seed for reproducibility. Default is 0. | 243 seed (int, optional): Random seed for reproducibility. Default is 0. |
| 188 | 244 |
| 189 Returns: | 245 Returns: |
| 190 None | 246 None |
| 191 """ | 247 """ |
| 192 | 248 import numpy as np |
| 193 df_FVA = cobra.flux_analysis.flux_variability_analysis(model,fraction_of_optimum=0).round(6) | 249 |
| 194 | 250 # Get reaction IDs for consistent column ordering |
| 195 df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples*n_batches, df_FVA, seed=seed) | 251 reaction_ids = [reaction.id for reaction in model.reactions] |
| 196 | 252 |
| 197 for i in range(0, n_batches): | 253 # Perform FVA analysis once for all batches |
| 198 samples = pd.DataFrame(columns =[reaction.id for reaction in model.reactions], index = range(n_samples)) | 254 df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0).round(6) |
| 255 | |
| 256 # Generate random objective functions for all samples across all batches | |
| 257 df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples * n_batches, df_FVA, seed=seed) | |
| 258 | |
| 259 # Sample and save each batch as numpy file | |
| 260 for i in range(n_batches): | |
| 261 samples = pd.DataFrame(columns=reaction_ids, index=range(n_samples)) | |
| 262 | |
| 199 try: | 263 try: |
| 200 CBS_backend.randomObjectiveFunctionSampling(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples], samples) | 264 CBS_backend.randomObjectiveFunctionSampling( |
| 265 model, | |
| 266 n_samples, | |
| 267 df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples], | |
| 268 samples | |
| 269 ) | |
| 201 except Exception as e: | 270 except Exception as e: |
| 202 utils.logWarning( | 271 utils.logWarning( |
| 203 "Warning: GLPK solver has failed for " + model_name + ". Trying with COBRA interface. Error:" + str(e), | 272 f"Warning: GLPK solver has failed for {model_name}. Trying with COBRA interface. Error: {str(e)}", |
| 204 ARGS.out_log) | 273 ARGS.out_log |
| 205 CBS_backend.randomObjectiveFunctionSampling_cobrapy(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples], | 274 ) |
| 206 samples) | 275 CBS_backend.randomObjectiveFunctionSampling_cobrapy( |
| 207 utils.logWarning(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv', ARGS.out_log) | 276 model, |
| 208 samples.to_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv', index=False) | 277 n_samples, |
| 209 | 278 df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples], |
| 210 samplesTotal = pd.DataFrame() | 279 samples |
| 211 for i in range(0, n_batches): | 280 ) |
| 212 samples_batch = pd.read_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv') | 281 |
| 213 samplesTotal = pd.concat([samplesTotal, samples_batch], ignore_index = True) | 282 # Save as numpy array (more memory efficient) |
| 214 | 283 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" |
| 284 utils.logWarning(batch_filename, ARGS.out_log) | |
| 285 np.save(batch_filename, samples.to_numpy()) | |
| 286 | |
| 287 # Merge all batches into a single DataFrame | |
| 288 all_samples = [] | |
| 289 | |
| 290 for i in range(n_batches): | |
| 291 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" | |
| 292 batch_data = np.load(batch_filename, allow_pickle=True) | |
| 293 all_samples.append(batch_data) | |
| 294 | |
| 295 # Concatenate all batches | |
| 296 samplesTotal_array = np.vstack(all_samples) | |
| 297 | |
| 298 # Convert back to DataFrame with proper column namesq | |
| 299 samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids) | |
| 300 | |
| 301 # Save the final merged result as CSV | |
| 215 write_to_file(samplesTotal.T, model_name, True) | 302 write_to_file(samplesTotal.T, model_name, True) |
| 216 | 303 |
| 217 for i in range(0, n_batches): | 304 # Clean up temporary numpy files |
| 218 os.remove(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv') | 305 for i in range(n_batches): |
| 219 pass | 306 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" |
| 220 | 307 if os.path.exists(batch_filename): |
| 221 | 308 os.remove(batch_filename) |
| 222 def model_sampler(model_input_original:cobra.Model, bounds_path:str, cell_name:str)-> List[pd.DataFrame]: | 309 |
| 223 """ | 310 |
| 224 Prepares the model with bounds from the dataset and performs sampling and analysis based on the selected algorithm. | 311 |
| 312 def model_sampler_with_bounds(model_input_original: cobra.Model, bounds_path: str, cell_name: str) -> List[pd.DataFrame]: | |
| 313 """ | |
| 314 MODE 1: Prepares the model with bounds from separate bounds file and performs sampling. | |
| 225 | 315 |
| 226 Args: | 316 Args: |
| 227 model_input_original (cobra.Model): The original COBRA model. | 317 model_input_original (cobra.Model): The original COBRA model. |
| 228 bounds_path (str): Path to the CSV file containing the bounds dataset. | 318 bounds_path (str): Path to the CSV file containing the bounds dataset. |
| 229 cell_name (str): Name of the cell, used to generate filenames for output. | 319 cell_name (str): Name of the cell, used to generate filenames for output. |
| 232 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. | 322 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. |
| 233 """ | 323 """ |
| 234 | 324 |
| 235 model_input = model_input_original.copy() | 325 model_input = model_input_original.copy() |
| 236 bounds_df = read_dataset(bounds_path, "bounds dataset") | 326 bounds_df = read_dataset(bounds_path, "bounds dataset") |
| 327 | |
| 328 # Apply bounds to model | |
| 237 for rxn_index, row in bounds_df.iterrows(): | 329 for rxn_index, row in bounds_df.iterrows(): |
| 238 model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound | 330 try: |
| 239 model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound | 331 model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound |
| 240 | 332 model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound |
| 241 | 333 except KeyError: |
| 242 if ARGS.algorithm == 'OPTGP': | 334 warning(f"Warning: Reaction {rxn_index} not found in model. Skipping.") |
| 243 OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed) | 335 |
| 244 | 336 return perform_sampling_and_analysis(model_input, cell_name) |
| 245 elif ARGS.algorithm == 'CBS': | 337 |
| 246 CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed) | 338 |
| 247 | 339 def perform_sampling_and_analysis(model_input: cobra.Model, cell_name: str) -> List[pd.DataFrame]: |
| 248 df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types) | 340 """ |
| 249 | 341 Common function to perform sampling and analysis on a prepared model. |
| 250 if("fluxes" not in ARGS.output_types): | 342 |
| 251 os.remove(ARGS.output_path + "/" + cell_name + '.csv') | 343 Args: |
| 344 model_input (cobra.Model): The prepared COBRA model with bounds applied. | |
| 345 cell_name (str): Name of the cell, used to generate filenames for output. | |
| 346 | |
| 347 Returns: | |
| 348 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. | |
| 349 """ | |
| 252 | 350 |
| 253 returnList = [] | 351 returnList = [] |
| 254 returnList.append(df_mean) | 352 |
| 255 returnList.append(df_median) | 353 if ARGS.sampling_enabled == "true": |
| 256 returnList.append(df_quantiles) | 354 |
| 355 if ARGS.algorithm == 'OPTGP': | |
| 356 OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed) | |
| 357 elif ARGS.algorithm == 'CBS': | |
| 358 CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed) | |
| 359 | |
| 360 df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types) | |
| 361 | |
| 362 if("fluxes" not in ARGS.output_types): | |
| 363 os.remove(ARGS.output_path + "/" + cell_name + '.csv') | |
| 364 | |
| 365 returnList = [df_mean, df_median, df_quantiles] | |
| 257 | 366 |
| 258 df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis) | 367 df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis) |
| 259 | 368 |
| 260 if("pFBA" in ARGS.output_type_analysis): | 369 if("pFBA" in ARGS.output_type_analysis): |
| 261 returnList.append(df_pFBA) | 370 returnList.append(df_pFBA) |
| 314 | 423 |
| 315 return df_mean, df_median, df_quantiles | 424 return df_mean, df_median, df_quantiles |
| 316 | 425 |
| 317 def fluxes_analysis(model:cobra.Model, model_name:str, output_types:List)-> List[pd.DataFrame]: | 426 def fluxes_analysis(model:cobra.Model, model_name:str, output_types:List)-> List[pd.DataFrame]: |
| 318 """ | 427 """ |
| 319 Performs flux analysis including pFBA, FVA, and sensitivity analysis. | 428 Performs flux analysis including pFBA, FVA, and sensitivity analysis. The objective function |
| 429 is assumed to be already set in the model. | |
| 320 | 430 |
| 321 Args: | 431 Args: |
| 322 model (cobra.Model): The COBRA model to analyze. | 432 model (cobra.Model): The COBRA model to analyze. |
| 323 model_name (str): Name of the model, used in filenames for output. | 433 model_name (str): Name of the model, used in filenames for output. |
| 324 output_types (List[str]): Types of analysis to perform (pFBA, FVA, sensitivity). | 434 output_types (List[str]): Types of analysis to perform (pFBA, FVA, sensitivity). |
| 331 df_FVA= pd.DataFrame() | 441 df_FVA= pd.DataFrame() |
| 332 df_sensitivity= pd.DataFrame() | 442 df_sensitivity= pd.DataFrame() |
| 333 | 443 |
| 334 for output_type in output_types: | 444 for output_type in output_types: |
| 335 if(output_type == "pFBA"): | 445 if(output_type == "pFBA"): |
| 336 model.objective = "Biomass" | |
| 337 solution = cobra.flux_analysis.pfba(model) | 446 solution = cobra.flux_analysis.pfba(model) |
| 338 fluxes = solution.fluxes | 447 fluxes = solution.fluxes |
| 339 df_pFBA.loc[0,[rxn._id for rxn in model.reactions]] = fluxes.tolist() | 448 df_pFBA.loc[0,[rxn.id for rxn in model.reactions]] = fluxes.tolist() |
| 340 df_pFBA = df_pFBA.reset_index(drop=True) | 449 df_pFBA = df_pFBA.reset_index(drop=True) |
| 341 df_pFBA.index = [model_name] | 450 df_pFBA.index = [model_name] |
| 342 df_pFBA = df_pFBA.astype(float).round(6) | 451 df_pFBA = df_pFBA.astype(float).round(6) |
| 343 elif(output_type == "FVA"): | 452 elif(output_type == "FVA"): |
| 344 fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8) | 453 fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=ARGS.perc_opt, processes=1).round(8) |
| 345 columns = [] | 454 columns = [] |
| 346 for rxn in fva.index.to_list(): | 455 for rxn in fva.index.to_list(): |
| 347 columns.append(rxn + "_min") | 456 columns.append(rxn + "_min") |
| 348 columns.append(rxn + "_max") | 457 columns.append(rxn + "_max") |
| 349 df_FVA= pd.DataFrame(columns = columns) | 458 df_FVA= pd.DataFrame(columns = columns) |
| 352 df_FVA.loc[0, index_rxn+ "_max"] = fva.loc[index_rxn, "maximum"] | 461 df_FVA.loc[0, index_rxn+ "_max"] = fva.loc[index_rxn, "maximum"] |
| 353 df_FVA = df_FVA.reset_index(drop=True) | 462 df_FVA = df_FVA.reset_index(drop=True) |
| 354 df_FVA.index = [model_name] | 463 df_FVA.index = [model_name] |
| 355 df_FVA = df_FVA.astype(float).round(6) | 464 df_FVA = df_FVA.astype(float).round(6) |
| 356 elif(output_type == "sensitivity"): | 465 elif(output_type == "sensitivity"): |
| 357 model.objective = "Biomass" | |
| 358 solution_original = model.optimize().objective_value | 466 solution_original = model.optimize().objective_value |
| 359 reactions = model.reactions | 467 reactions = model.reactions |
| 360 single = cobra.flux_analysis.single_reaction_deletion(model) | 468 single = cobra.flux_analysis.single_reaction_deletion(model) |
| 361 newRow = [] | 469 newRow = [] |
| 362 df_sensitivity = pd.DataFrame(columns = [rxn.id for rxn in reactions], index = [model_name]) | 470 df_sensitivity = pd.DataFrame(columns = [rxn.id for rxn in reactions], index = [model_name]) |
| 365 df_sensitivity.loc[model_name] = newRow | 473 df_sensitivity.loc[model_name] = newRow |
| 366 df_sensitivity = df_sensitivity.astype(float).round(6) | 474 df_sensitivity = df_sensitivity.astype(float).round(6) |
| 367 return df_pFBA, df_FVA, df_sensitivity | 475 return df_pFBA, df_FVA, df_sensitivity |
| 368 | 476 |
| 369 ############################# main ########################################### | 477 ############################# main ########################################### |
| 370 def main(args :List[str] = None) -> None: | 478 def main(args: List[str] = None) -> None: |
| 371 """ | 479 """ |
| 372 Initializes everything and sets the program in motion based on the fronted input arguments. | 480 Initialize and run sampling/analysis based on the frontend input arguments. |
| 373 | 481 |
| 374 Returns: | 482 Returns: |
| 375 None | 483 None |
| 376 """ | 484 """ |
| 377 | 485 |
| 378 num_processors = cpu_count() | 486 num_processors = max(1, cpu_count() - 1) |
| 379 | 487 |
| 380 global ARGS | 488 global ARGS |
| 381 ARGS = process_args(args) | 489 ARGS = process_args(args) |
| 382 | 490 |
| 383 if not os.path.exists(ARGS.output_path): | 491 if not os.path.exists(ARGS.output_path): |
| 384 os.makedirs(ARGS.output_path) | 492 os.makedirs(ARGS.output_path) |
| 385 | 493 |
| 386 model_type :utils.Model = ARGS.model_selector | 494 # --- Normalize inputs (the tool may pass comma-separated --input and either --name or --names) --- |
| 387 if model_type is utils.Model.Custom: | 495 ARGS.input_files = ARGS.input.split(",") if ARGS.input else [] |
| 388 model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext) | 496 ARGS.file_names = ARGS.name.split(",") |
| 497 # output types (required) -> list | |
| 498 ARGS.output_types = ARGS.output_type.split(",") if ARGS.output_type else [] | |
| 499 # optional analysis output types -> list or empty | |
| 500 ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") if ARGS.output_type_analysis else [] | |
| 501 | |
| 502 # Determine if sampling should be performed | |
| 503 if ARGS.sampling_enabled == "true": | |
| 504 perform_sampling = True | |
| 389 else: | 505 else: |
| 390 model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) | 506 perform_sampling = False |
| 391 | 507 |
| 392 #Set solver verbosity to 1 to see warning and error messages only. | 508 print("=== INPUT FILES ===") |
| 393 model.solver.configuration.verbosity = 1 | 509 print(f"{ARGS.input_files}") |
| 394 | 510 print(f"{ARGS.file_names}") |
| 395 ARGS.bounds = ARGS.input.split(",") | 511 print(f"{ARGS.output_type}") |
| 396 ARGS.bounds_name = ARGS.names.split(",") | 512 print(f"{ARGS.output_types}") |
| 397 ARGS.output_types = ARGS.output_type.split(",") | 513 print(f"{ARGS.output_type_analysis}") |
| 398 ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") | 514 print(f"Sampling enabled: {perform_sampling} (n_samples: {ARGS.n_samples})") |
| 399 | 515 |
| 400 | 516 if ARGS.model_and_bounds == "True": |
| 401 results = Parallel(n_jobs=num_processors)(delayed(model_sampler)(model, bounds_path, cell_name) for bounds_path, cell_name in zip(ARGS.bounds, ARGS.bounds_name)) | 517 # MODE 1: Model + bounds (separate files) |
| 402 | 518 print("=== MODE 1: Model + Bounds (separate files) ===") |
| 403 all_mean = pd.concat([result[0] for result in results], ignore_index=False) | 519 |
| 404 all_median = pd.concat([result[1] for result in results], ignore_index=False) | 520 # Load base model |
| 405 all_quantiles = pd.concat([result[2] for result in results], ignore_index=False) | 521 if not ARGS.model_upload: |
| 406 | 522 sys.exit("Error: model_upload is required for Mode 1") |
| 407 if("mean" in ARGS.output_types): | 523 |
| 408 all_mean = all_mean.fillna(0.0) | 524 base_model = model_utils.build_cobra_model_from_csv(ARGS.model_upload) |
| 409 all_mean = all_mean.sort_index() | 525 |
| 410 write_to_file(all_mean.T, "mean", True) | 526 validation = model_utils.validate_model(base_model) |
| 411 | 527 |
| 412 if("median" in ARGS.output_types): | 528 print("\n=== MODEL VALIDATION ===") |
| 413 all_median = all_median.fillna(0.0) | 529 for key, value in validation.items(): |
| 414 all_median = all_median.sort_index() | 530 print(f"{key}: {value}") |
| 415 write_to_file(all_median.T, "median", True) | 531 |
| 416 | 532 # Set solver verbosity to 1 to see warning and error messages only. |
| 417 if("quantiles" in ARGS.output_types): | 533 base_model.solver.configuration.verbosity = 1 |
| 418 all_quantiles = all_quantiles.fillna(0.0) | 534 |
| 419 all_quantiles = all_quantiles.sort_index() | 535 # Process each bounds file with the base model |
| 420 write_to_file(all_quantiles.T, "quantiles", True) | 536 results = Parallel(n_jobs=num_processors)( |
| 421 | 537 delayed(model_sampler_with_bounds)(base_model, bounds_file, cell_name) |
| 422 index_result = 3 | 538 for bounds_file, cell_name in zip(ARGS.input_files, ARGS.file_names) |
| 423 if("pFBA" in ARGS.output_type_analysis): | 539 ) |
| 540 | |
| 541 else: | |
| 542 # MODE 2: Multiple complete models | |
| 543 print("=== MODE 2: Multiple complete models ===") | |
| 544 | |
| 545 # Process each complete model file | |
| 546 results = Parallel(n_jobs=num_processors)( | |
| 547 delayed(perform_sampling_and_analysis)(model_utils.build_cobra_model_from_csv(model_file), cell_name) | |
| 548 for model_file, cell_name in zip(ARGS.input_files, ARGS.file_names) | |
| 549 ) | |
| 550 | |
| 551 # Handle sampling outputs (only if sampling was performed) | |
| 552 if perform_sampling: | |
| 553 print("=== PROCESSING SAMPLING RESULTS ===") | |
| 554 | |
| 555 all_mean = pd.concat([result[0] for result in results], ignore_index=False) | |
| 556 all_median = pd.concat([result[1] for result in results], ignore_index=False) | |
| 557 all_quantiles = pd.concat([result[2] for result in results], ignore_index=False) | |
| 558 | |
| 559 if "mean" in ARGS.output_types: | |
| 560 all_mean = all_mean.fillna(0.0) | |
| 561 all_mean = all_mean.sort_index() | |
| 562 write_to_file(all_mean.T, "mean", True) | |
| 563 | |
| 564 if "median" in ARGS.output_types: | |
| 565 all_median = all_median.fillna(0.0) | |
| 566 all_median = all_median.sort_index() | |
| 567 write_to_file(all_median.T, "median", True) | |
| 568 | |
| 569 if "quantiles" in ARGS.output_types: | |
| 570 all_quantiles = all_quantiles.fillna(0.0) | |
| 571 all_quantiles = all_quantiles.sort_index() | |
| 572 write_to_file(all_quantiles.T, "quantiles", True) | |
| 573 else: | |
| 574 print("=== SAMPLING SKIPPED (n_samples = 0 or sampling disabled) ===") | |
| 575 | |
| 576 # Handle optimization analysis outputs (always available) | |
| 577 print("=== PROCESSING OPTIMIZATION RESULTS ===") | |
| 578 | |
| 579 # Determine the starting index for optimization results | |
| 580 # If sampling was performed, optimization results start at index 3 | |
| 581 # If no sampling, optimization results start at index 0 | |
| 582 index_result = 3 if perform_sampling else 0 | |
| 583 | |
| 584 if "pFBA" in ARGS.output_type_analysis: | |
| 424 all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False) | 585 all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False) |
| 425 all_pFBA = all_pFBA.sort_index() | 586 all_pFBA = all_pFBA.sort_index() |
| 426 write_to_file(all_pFBA.T, "pFBA", True) | 587 write_to_file(all_pFBA.T, "pFBA", True) |
| 427 index_result+=1 | 588 index_result += 1 |
| 428 if("FVA" in ARGS.output_type_analysis): | 589 |
| 429 all_FVA= pd.concat([result[index_result] for result in results], ignore_index=False) | 590 if "FVA" in ARGS.output_type_analysis: |
| 591 all_FVA = pd.concat([result[index_result] for result in results], ignore_index=False) | |
| 430 all_FVA = all_FVA.sort_index() | 592 all_FVA = all_FVA.sort_index() |
| 431 write_to_file(all_FVA.T, "FVA", True) | 593 write_to_file(all_FVA.T, "FVA", True) |
| 432 index_result+=1 | 594 index_result += 1 |
| 433 if("sensitivity" in ARGS.output_type_analysis): | 595 |
| 596 if "sensitivity" in ARGS.output_type_analysis: | |
| 434 all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False) | 597 all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False) |
| 435 all_sensitivity = all_sensitivity.sort_index() | 598 all_sensitivity = all_sensitivity.sort_index() |
| 436 write_to_file(all_sensitivity.T, "sensitivity", True) | 599 write_to_file(all_sensitivity.T, "sensitivity", True) |
| 437 | 600 |
| 438 pass | 601 return |
| 439 | 602 |
| 440 ############################################################################## | 603 ############################################################################## |
| 441 if __name__ == "__main__": | 604 if __name__ == "__main__": |
| 442 main() | 605 main() |
