changeset 220:6ddd9dcc9c72 draft

Uploaded
author francesco_lapi
date Thu, 01 Aug 2024 15:51:31 +0000
parents c3bc39f335d3
children 3cbc03acc7b5
files marea_2/flux_simulation.py
diffstat 1 files changed, 30 insertions(+), 135 deletions(-) [+]
line wrap: on
line diff
--- a/marea_2/flux_simulation.py	Thu Aug 01 15:38:01 2024 +0000
+++ b/marea_2/flux_simulation.py	Thu Aug 01 15:51:31 2024 +0000
@@ -31,25 +31,20 @@
                         type = str,
                         required = True,
                         help = 'your tool directory')
-    
+ 
+
+
+
+ 
     parser.add_argument('-in', '--input',
                         required = True,
                         type=str,
-                        help = 'inputs bounds')
+                        help = 'inputs model')
     
-    parser.add_argument('-ni', '--names',
+    parser.add_argument('-nm', '--name',
                         required = True,
                         type=str,
-                        help = 'cell names')
- 
-    parser.add_argument(
-        '-ms', '--model_selector', 
-        type = utils.Model, default = utils.Model.ENGRO2, choices = [utils.Model.ENGRO2, utils.Model.Custom],
-        help = 'chose which type of model you want use')
-    
-    parser.add_argument("-mo", "--model", type = str)
-    
-    parser.add_argument("-mn", "--model_name", type = str, help = "custom mode name")
+                        help = 'inputs model ids')
     
     parser.add_argument('-a', '--algorithm',
                         type = str,
@@ -83,7 +78,7 @@
                         required = True,
                         help = 'output type')
     
-    parser.add_argument('-ota', '--output_type_analysis', 
+    parser.add_argument('-ot', '--output_type_analysis', 
                         type = str,
                         required = False,
                         help = 'output type analysis')
@@ -110,30 +105,6 @@
 def write_to_file(dataset: pd.DataFrame, name: str, keep_index:bool=False)->None:
     dataset.to_csv(ARGS.output_folder + name + ".csv", sep = '\t', index = keep_index)
 
-############################ dataset input ####################################
-def read_dataset(data :str, name :str) -> pd.DataFrame:
-    """
-    Read a dataset from a CSV file and return it as a pandas DataFrame.
-
-    Args:
-        data (str): Path to the CSV file containing the dataset.
-        name (str): Name of the dataset, used in error messages.
-
-    Returns:
-        pandas.DataFrame: DataFrame containing the dataset.
-
-    Raises:
-        pd.errors.EmptyDataError: If the CSV file is empty.
-        sys.exit: If the CSV file has the wrong format, the execution is aborted.
-    """
-    try:
-        dataset = pd.read_csv(data, sep = '\t', header = 0, index_col=0, engine='python')
-    except pd.errors.EmptyDataError:
-        sys.exit('Execution aborted: wrong format of ' + name + '\n')
-    if len(dataset.columns) < 2:
-        sys.exit('Execution aborted: wrong format of ' + name + '\n')
-    return dataset
-
 
 
 def OPTGP_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, thinning:int=100, n_batches:int=1, seed:int=0)-> None:
@@ -171,7 +142,6 @@
             ARGS.out_log)
             CBS_backend.randomObjectiveFunctionSampling_cobrapy(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples], 
                                                     samples)
-        utils.logWarning(ARGS.output_folder +  model_name + '_'+ str(i)+'_CBS.csv', ARGS.out_log)
         samples.to_csv(ARGS.output_folder +  model_name + '_'+ str(i)+'_CBS.csv', index=False)
 
     samplesTotal = pd.DataFrame()
@@ -186,42 +156,29 @@
     pass
 
 
-def model_sampler(model_input_original:cobra.Model, bounds_path:str, cell_name:str)-> List[pd.DataFrame]:
+def model_sampler(model_input:str, model_name:str)-> List[pd.DataFrame]:
+
+    model_type = utils.Model.Custom
+    model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(model_input), customExtension = utils.FilePath.fromStrPath(model_name).ext)
 
-    model_input = model_input_original.copy()
-    bounds_df = read_dataset(bounds_path, "bounds dataset")
-    for rxn_index, row in bounds_df.iterrows():
-        model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound
-        model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound
+    utils.logWarning(
+        "Sampling model: " + model_name,
+        ARGS.out_log)
     
-    name = cell_name.split('.')[0]
+    name = model_name.split('.')[0]
     
     if ARGS.algorithm == 'OPTGP':
-        OPTGP_sampler(model_input, name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed)
+        OPTGP_sampler(model, name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed)
 
     elif ARGS.algorithm == 'CBS':
-        CBS_sampler(model_input,  name, ARGS.n_samples, ARGS.n_batches, ARGS.seed)
+        CBS_sampler(model,  name, ARGS.n_samples, ARGS.n_batches, ARGS.seed)
 
     df_mean, df_median, df_quantiles = fluxes_statistics(name, ARGS.output_types)
 
     if("fluxes" not in ARGS.output_types):
         os.remove(ARGS.output_folder  +  name + '.csv')
 
-    returnList = []
-    returnList.append(df_mean)
-    returnList.append(df_median)
-    returnList.append(df_quantiles)
-
-    df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, name, ARGS.output_type_analysis)
-
-    if("pFBA" in ARGS.output_type_analysis):
-        returnList.append(df_pFBA)
-    if("FVA" in ARGS.output_type_analysis):
-        returnList.append(df_FVA)
-    if("sensitivity" in ARGS.output_type_analysis):
-        returnList.append(df_sensitivity)
-
-    return returnList
+    return df_mean, df_median, df_quantiles
 
 def fluxes_statistics(model_name: str,  output_types:List)-> List[pd.DataFrame]:
 
@@ -259,47 +216,6 @@
     
     return df_mean, df_median, df_quantiles
 
-def fluxes_analysis(model:cobra.Model,  model_name:str, output_types:List)-> List[pd.DataFrame]:
-
-    df_pFBA = pd.DataFrame()
-    
-    df_sensitivity= pd.DataFrame()
-
-    for output_type in output_types:
-        if(output_type == "pFBA"):
-            model.objective = "Biomass"
-            solution = cobra.flux_analysis.pfba(model)
-            fluxes = solution.fluxes
-            df_pFBA.loc[0,[rxn._id for rxn in model.reactions]] = fluxes.tolist()
-            df_pFBA = df_pFBA.reset_index(drop=True)
-            df_pFBA.index = [model_name]
-            df_pFBA = df_pFBA.astype(float).round(6)
-        elif(output_type == "FVA"):
-            fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8)
-            columns = []
-            for rxn in fva.index.to_list():
-                columns.append(rxn + "_min")
-                columns.append(rxn + "_max")
-            df_FVA= pd.DataFrame(columns = columns)
-            for index_rxn, row in fva.iterrows():
-                df_FVA.loc[0, index_rxn+ "_min"] = fva.loc[index_rxn, "minimum"]
-                df_FVA.loc[0, index_rxn+ "_max"] = fva.loc[index_rxn, "maximum"]
-            df_FVA = df_FVA.reset_index(drop=True)
-            df_FVA.index = [model_name]
-            df_FVA = df_FVA.astype(float).round(6)
-        elif(output_type == "sensitivity"):
-            model.objective = "Biomass"
-            solution_original = model.optimize().objective_value
-            reactions = model.reactions
-            single = cobra.flux_analysis.single_reaction_deletion(model)
-            newRow = []
-            df_sensitivity = pd.DataFrame(columns = [rxn.id for rxn in reactions], index = [model_name])
-            for rxn in reactions:
-                newRow.append(single.knockout[rxn.id].growth.values[0]/solution_original)
-            df_sensitivity.loc[model_name] = newRow
-            df_sensitivity = df_sensitivity.astype(float).round(6)
-    return df_pFBA, df_FVA, df_sensitivity
-
 ############################# main ###########################################
 def main() -> None:
     """
@@ -308,30 +224,26 @@
     Returns:
         None
     """
-    if not os.path.exists('flux_simulation/'):
-        os.makedirs('flux_simulation/')
+    if not os.path.exists('flux_sampling'):
+        os.makedirs('flux_sampling')
 
     num_processors = cpu_count()
 
     global ARGS
     ARGS = process_args(sys.argv)
 
-    ARGS.output_folder = 'flux_simulation/'
+    ARGS.output_folder = 'flux_sampling/'
     
+    utils.logWarning(
+        ARGS.output_type,
+        ARGS.out_log)
     
-    model_type :utils.Model = ARGS.model_selector
-    if model_type is utils.Model.Custom:
-        model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext)
-    else:
-        model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir)
-    
-    ARGS.bounds = ARGS.input.split(",")
-    ARGS.bounds_name = ARGS.names.split(",")
+    models_input = ARGS.input.split(",")
+    models_name = ARGS.name.split(",")
     ARGS.output_types = ARGS.output_type.split(",")
-    ARGS.output_type_analysis = ARGS.output_type_analysis.split(",")
 
-
-    results = Parallel(n_jobs=num_processors)(delayed(model_sampler)(model, bounds_path, cell_name) for bounds_path, cell_name in zip(ARGS.bounds, ARGS.bounds_name))
+ 
+    results = Parallel(n_jobs=num_processors)(delayed(model_sampler)(model_input, model_name) for model_input, model_name in zip(models_input, models_name))
 
     all_mean = pd.concat([result[0] for result in results], ignore_index=False)
     all_median = pd.concat([result[1] for result in results], ignore_index=False)
@@ -351,23 +263,6 @@
         all_quantiles = all_quantiles.fillna(0.0)
         all_quantiles = all_quantiles.sort_index()
         write_to_file(all_quantiles, "quantiles", True)
-
-    index_result = 3
-    if("pFBA" in ARGS.output_type_analysis):
-        all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False)
-        all_pFBA = all_pFBA.sort_index()
-        write_to_file(all_pFBA, "pFBA", True)
-        index_result+=1
-    if("FVA" in ARGS.output_type_analysis):
-        all_FVA= pd.concat([result[index_result] for result in results], ignore_index=False)
-        all_FVA = all_FVA.sort_index()
-        write_to_file(all_FVA, "FVA", True)
-        index_result+=1
-    if("sensitivity" in ARGS.output_type_analysis):
-        all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False)
-        all_sensitivity = all_sensitivity.sort_index()
-        write_to_file(all_sensitivity, "sensitivity", True)
-
     pass
         
 ##############################################################################