| 456 | 1 """ | 
|  | 2 CBS backend utilities using GLPK for constraint-based sampling. | 
|  | 3 | 
|  | 4 This module builds and solves LP problems from COBRA models, supports random | 
|  | 5 objective function generation (CBS), and provides both swiglpk-based and | 
|  | 6 COBRApy-based sampling fallbacks. | 
|  | 7 """ | 
| 381 | 8 from swiglpk import * | 
|  | 9 import random | 
|  | 10 import pandas as pd | 
|  | 11 import numpy as np | 
|  | 12 import cobra as cb | 
|  | 13 | 
|  | 14 # Initialize LP problem | 
|  | 15 def initialize_lp_problem(S): | 
| 456 | 16     """ | 
|  | 17     Prepare sparse matrix structures for GLPK given a stoichiometric matrix. | 
|  | 18 | 
|  | 19     Args: | 
|  | 20         S: Stoichiometric matrix (DOK or similar) as returned by cobra.util.create_stoichiometric_matrix. | 
|  | 21 | 
|  | 22     Returns: | 
|  | 23         tuple: (len_vector, values, indexes, ia, ja, ar, nrows, ncol) | 
|  | 24             - len_vector: number of non-zero entries | 
|  | 25             - values: list of non-zero values | 
|  | 26             - indexes: list of (row, col) indices for non-zero entries | 
|  | 27             - ia, ja, ar: GLPK-ready arrays for the sparse matrix | 
|  | 28             - nrows, ncol: matrix shape | 
|  | 29     """ | 
| 381 | 30 | 
|  | 31     len_vector=len(S.keys()) | 
|  | 32     values=list(S.values()) | 
|  | 33     indexes=list(S.keys()) | 
|  | 34     ia = intArray(len_vector+1); | 
|  | 35     ja = intArray(len_vector+1); | 
|  | 36     ar = doubleArray(len_vector+1); | 
|  | 37 | 
|  | 38     i=0 | 
|  | 39     ind_row=[indexes[i][0]+1 for i in range(0, len(values) )] | 
|  | 40     ind_col=[indexes[i][1]+1 for i in range(0, len(values) )] | 
|  | 41     for i in range(1, len(values) + 1): | 
|  | 42         ia[i]=ind_row[i-1] | 
|  | 43         ja[i]=ind_col[i-1] | 
|  | 44         ar[i] = values[i-1] | 
|  | 45 | 
|  | 46     nrows=S.shape[0] | 
|  | 47     ncol=S.shape[1] | 
|  | 48 | 
|  | 49     return len_vector, values, indexes, ia, ja, ar, nrows, ncol | 
|  | 50 | 
|  | 51 | 
|  | 52 | 
|  | 53 def create_and_solve_lp_problem(lb,ub,nrows, ncol, len_vector, ia, ja, ar, | 
|  | 54                                 obj_coefs,reactions,return_lp=False): | 
| 456 | 55     """ | 
|  | 56     Create and solve a GLPK LP problem for a metabolic model. | 
|  | 57 | 
|  | 58     Args: | 
|  | 59         lb, ub: Lower/upper bounds per reaction (lists of floats). | 
|  | 60         nrows, ncol: Dimensions of the S matrix. | 
|  | 61         len_vector, ia, ja, ar: Sparse matrix data prepared for GLPK. | 
|  | 62         obj_coefs: Objective function coefficients (list of floats). | 
|  | 63         reactions: Reaction identifiers (list of str), used for output mapping. | 
|  | 64         return_lp: If True, also return the GLPK problem object (caller must delete). | 
|  | 65 | 
|  | 66     Returns: | 
|  | 67         tuple: (fluxes, Z) or (fluxes, Z, lp) if return_lp=True. | 
|  | 68     """ | 
| 381 | 69 | 
|  | 70 | 
|  | 71     lp = glp_create_prob(); | 
|  | 72     glp_set_prob_name(lp, "sample"); | 
|  | 73     glp_set_obj_dir(lp, GLP_MAX); | 
|  | 74     glp_add_rows(lp, nrows); | 
|  | 75     eps = 1e-16 | 
|  | 76     for i in range(nrows): | 
|  | 77         glp_set_row_name(lp, i+1, "constrain_"+str(i+1)); | 
|  | 78         glp_set_row_bnds(lp, i+1, GLP_FX, 0.0, 0.0); | 
|  | 79     glp_add_cols(lp, ncol); | 
|  | 80     for i in range(ncol): | 
|  | 81         glp_set_col_name(lp, i+1, "flux_"+str(i+1)); | 
|  | 82         glp_set_col_bnds(lp, i+1, GLP_DB,lb[i]-eps,ub[i]+eps); | 
|  | 83     glp_load_matrix(lp, len_vector, ia, ja, ar); | 
|  | 84 | 
|  | 85     try: | 
|  | 86         fluxes,Z=solve_lp_problem(lp,obj_coefs,reactions) | 
|  | 87         if return_lp: | 
|  | 88             return fluxes,Z,lp | 
|  | 89         else: | 
|  | 90             glp_delete_prob(lp); | 
|  | 91             return fluxes,Z | 
|  | 92     except Exception as e: | 
|  | 93         glp_delete_prob(lp) | 
|  | 94         raise Exception(e) | 
|  | 95 | 
|  | 96 | 
|  | 97 def solve_lp_problem(lp,obj_coefs,reactions): | 
| 456 | 98     """ | 
|  | 99     Configure and solve an LP with GLPK using provided objective coefficients. | 
|  | 100 | 
|  | 101     Args: | 
|  | 102         lp: GLPK problem handle. | 
|  | 103         obj_coefs: Objective coefficients. | 
|  | 104         reactions: Reaction identifiers (unused in computation; length used for output). | 
|  | 105 | 
|  | 106     Returns: | 
|  | 107         tuple: (fluxes, Z) where fluxes is a list of primal column values and Z is the objective value. | 
|  | 108     """ | 
| 381 | 109 | 
|  | 110     # Set the coefficients of the objective function | 
|  | 111     i=1 | 
|  | 112     for ind_coef in obj_coefs: | 
|  | 113         glp_set_obj_coef(lp, i, ind_coef); | 
|  | 114         i+=1 | 
|  | 115 | 
|  | 116     # Initialize the parameters | 
|  | 117     params=glp_smcp() | 
|  | 118     params.presolve=GLP_ON | 
|  | 119     params.msg_lev = GLP_MSG_ERR | 
|  | 120     params.tm_lim=4000 | 
|  | 121     glp_init_smcp(params) | 
|  | 122 | 
|  | 123     glp_term_out(GLP_OFF) | 
|  | 124 | 
|  | 125     try: | 
|  | 126 | 
|  | 127         # Solve the problem | 
|  | 128         glp_scale_prob(lp,GLP_SF_AUTO) | 
|  | 129 | 
|  | 130         value=glp_simplex(lp, params) | 
|  | 131 | 
|  | 132         Z = glp_get_obj_val(lp); | 
|  | 133 | 
|  | 134         if value == 0: | 
|  | 135             fluxes = [] | 
|  | 136             for i in range(len(reactions)): fluxes.append(glp_get_col_prim(lp, i+1)) | 
|  | 137             return fluxes,Z | 
|  | 138         else: | 
|  | 139             raise Exception("error in LP problem. Problem:",str(value)) | 
|  | 140     except Exception as e: | 
|  | 141         # Re-enable terminal output for error reporting | 
|  | 142         glp_term_out(GLP_ON) | 
|  | 143         raise Exception(e) | 
|  | 144     finally: | 
|  | 145         # Re-enable terminal output after solving | 
|  | 146         glp_term_out(GLP_ON) | 
|  | 147 | 
|  | 148 def create_lp_structure(model): | 
| 456 | 149     """ | 
|  | 150     Extract the LP structure (S matrix, bounds, objective) from a COBRA model. | 
|  | 151 | 
|  | 152     Args: | 
|  | 153         model (cobra.Model): The COBRA model. | 
|  | 154 | 
|  | 155     Returns: | 
|  | 156         tuple: (S, lb, ub, coefs_obj, reactions) | 
|  | 157     """ | 
| 381 | 158 | 
|  | 159     reactions=[el.id for el in model.reactions] | 
|  | 160     coefs_obj=[reaction.objective_coefficient for reaction in model.reactions] | 
|  | 161 | 
|  | 162     # Lower and upper bounds | 
|  | 163     lb=[reaction.lower_bound for reaction in model.reactions] | 
|  | 164     ub=[reaction.upper_bound for reaction in model.reactions] | 
|  | 165 | 
|  | 166     # Create S matrix | 
|  | 167     S=cb.util.create_stoichiometric_matrix(model,array_type="dok") | 
|  | 168 | 
|  | 169     return S,lb,ub,coefs_obj,reactions | 
|  | 170 | 
|  | 171 def randomObjectiveFunctionSampling(model, nsample, coefficients_df, df_sample): | 
| 456 | 172     """ | 
|  | 173     Sample fluxes using GLPK by iterating over random objective functions. | 
|  | 174 | 
|  | 175     Args: | 
|  | 176         model: COBRA model. | 
|  | 177         nsample: Number of samples to generate. | 
|  | 178         coefficients_df: DataFrame of objective coefficients (columns are samples; last row is type_of_problem). | 
|  | 179         df_sample: Output DataFrame to fill with flux samples (index: sample, columns: reactions). | 
|  | 180 | 
|  | 181     Returns: | 
|  | 182         None | 
|  | 183     """ | 
| 381 | 184 | 
|  | 185     S,lb,ub,coefs_obj,reactions = create_lp_structure(model) | 
|  | 186     len_vector, values, indexes, ia, ja, ar, nrow, ncol = initialize_lp_problem(S) | 
|  | 187 | 
|  | 188     for i in range(nsample): | 
|  | 189 | 
|  | 190         coefs_obj=coefficients_df.iloc[:,i].values | 
|  | 191 | 
| 456 | 192         if coefs_obj[-1]==1: # minimize | 
| 381 | 193             coefs_obj= coefs_obj[0:-1] * -1 | 
|  | 194         else: | 
|  | 195             coefs_obj=coefs_obj[0:-1] | 
|  | 196 | 
|  | 197         fluxes,Z = create_and_solve_lp_problem(lb,ub, nrow, ncol, len_vector, | 
|  | 198                                                         ia, ja, ar, coefs_obj,reactions,return_lp=False) | 
|  | 199         df_sample.loc[i] = fluxes | 
| 456 | 200     return | 
| 381 | 201 | 
|  | 202 def randomObjectiveFunctionSampling_cobrapy(model, nsample, coefficients_df, df_sample): | 
| 456 | 203     """ | 
|  | 204     Fallback sampling using COBRApy's optimize with per-sample randomized objectives. | 
|  | 205 | 
|  | 206     Args: | 
|  | 207         model: COBRA model. | 
|  | 208         nsample: Number of samples to generate. | 
|  | 209         coefficients_df: DataFrame of objective coefficients (columns are samples; last row is type_of_problem). | 
|  | 210         df_sample: Output DataFrame to fill with flux samples (index: sample, columns: reactions). | 
|  | 211 | 
|  | 212     Returns: | 
|  | 213         None | 
|  | 214     """ | 
| 381 | 215 | 
|  | 216     for i in range(nsample): | 
|  | 217 | 
|  | 218         dict_coeff={} | 
|  | 219         if(coefficients_df.iloc[-1][i]==1): | 
| 456 | 220             type_problem = -1 # minimize | 
| 381 | 221         else: | 
| 456 | 222             type_problem = 1 # maximize | 
| 381 | 223 | 
|  | 224         for rxn in [reaction.id for reaction in model.reactions]: | 
|  | 225             dict_coeff[model.reactions.get_by_id(rxn)] = coefficients_df.loc[rxn][i] * type_problem | 
|  | 226 | 
|  | 227         model.objective = dict_coeff | 
|  | 228         solution =  model.optimize().fluxes | 
|  | 229         for rxn, flux in solution.items(): | 
|  | 230             df_sample.loc[i][rxn] = flux | 
|  | 231 | 
| 456 | 232     return | 
|  | 233 | 
|  | 234 def randomObjectiveFunction(model, n_samples, df_fva, seed=0): | 
|  | 235     """ | 
|  | 236     Create random objective function coefficients for CBS sampling. | 
| 381 | 237 | 
| 456 | 238     The last row 'type_of_problem' encodes 0 for maximize and 1 for minimize. | 
|  | 239 | 
|  | 240     Args: | 
|  | 241         model: COBRA model. | 
|  | 242         n_samples: Number of random objectives to generate. | 
|  | 243         df_fva: Flux Variability Analysis results with 'minimum' and 'maximum' per reaction. | 
|  | 244         seed: Seed for reproducibility. | 
| 381 | 245 | 
| 456 | 246     Returns: | 
|  | 247         pandas.DataFrame: Coefficients DataFrame indexed by reaction IDs plus 'type_of_problem'. | 
|  | 248     """ | 
|  | 249     # reactions = model.reactions | 
|  | 250     reactions = [reaction.id for reaction in model.reactions] | 
|  | 251     cont = seed | 
|  | 252     list_ex = reactions.copy() | 
|  | 253     list_ex.append("type_of_problem") | 
|  | 254     coefficients_df = pd.DataFrame(index=list_ex, columns=[str(i) for i in range(n_samples)]) | 
|  | 255 | 
|  | 256     for i in range(0, n_samples): | 
| 381 | 257 | 
| 456 | 258         cont = cont + 1 | 
|  | 259         random.seed(cont) | 
|  | 260 | 
|  | 261         # Generate a random threshold in [0, 1] | 
|  | 262         threshold = random.random() | 
|  | 263 | 
|  | 264         for reaction in reactions: | 
|  | 265 | 
|  | 266             cont = cont + 1 | 
| 381 | 267             random.seed(cont) | 
| 456 | 268 | 
|  | 269             val = random.random() | 
| 381 | 270 | 
| 456 | 271             if val > threshold: | 
|  | 272 | 
|  | 273                 cont = cont + 1 | 
| 381 | 274                 random.seed(cont) | 
| 456 | 275 | 
|  | 276                 # Coefficient in [-1, 1] | 
|  | 277                 c = 2 * random.random() - 1 | 
| 381 | 278 | 
| 456 | 279                 val_max = np.max([abs(df_fva.loc[reaction, "minimum"]), abs(df_fva.loc[reaction, "maximum"])]) | 
|  | 280 | 
|  | 281                 if val_max != 0: # only if FVA is non-zero | 
|  | 282                     coefficients_df.loc[reaction, str(i)] = c / val_max # scale by FVA | 
|  | 283                 else: | 
|  | 284                     coefficients_df.loc[reaction, str(i)] = 0 | 
| 381 | 285 | 
| 456 | 286             else: | 
|  | 287                 coefficients_df.loc[reaction, str(i)] = 0 | 
| 381 | 288 | 
| 456 | 289         cont = cont + 1 | 
|  | 290         random.seed(cont) | 
| 381 | 291 | 
| 456 | 292         if random.random() < 0.5: | 
|  | 293             coefficients_df.loc["type_of_problem", str(i)] = 0 # maximize | 
|  | 294         else: | 
|  | 295             coefficients_df.loc["type_of_problem", str(i)] = 1 # minimize | 
|  | 296 | 
|  | 297     return coefficients_df |