Mercurial > repos > bimib > cobraxy
diff COBRAxy/ras_to_bounds_beta.py @ 414:5086145cfb96 draft
Uploaded
author | francesco_lapi |
---|---|
date | Mon, 08 Sep 2025 21:54:14 +0000 |
parents | 6b015d3184ab |
children | 5f8f4a2d1370 |
line wrap: on
line diff
--- a/COBRAxy/ras_to_bounds_beta.py Mon Sep 08 21:37:14 2025 +0000 +++ b/COBRAxy/ras_to_bounds_beta.py Mon Sep 08 21:54:14 2025 +0000 @@ -10,6 +10,7 @@ import sys import csv from joblib import Parallel, delayed, cpu_count +import utils.rule_parsing as rulesUtils # , medium @@ -149,6 +150,126 @@ new_bounds.loc[reaction, "upper_bound"] = valMax return new_bounds +################################- DATA GENERATION -################################ +ReactionId = str +def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]: + """ + Generates a dictionary mapping reaction ids to rules from the model. + + Args: + model : the model to derive data from. + asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings. + + Returns: + Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules. + Dict[ReactionId, str] : the generated dictionary of raw rules. + """ + # Is the below approach convoluted? yes + # Ok but is it inefficient? probably + # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane) + _ruleGetter = lambda reaction : reaction.gene_reaction_rule + ruleExtractor = (lambda reaction : + rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter + + return { + reaction.id : ruleExtractor(reaction) + for reaction in model.reactions + if reaction.gene_reaction_rule } + +def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]: + """ + Generates a dictionary mapping reaction ids to reaction formulas from the model. + + Args: + model : the model to derive data from. + asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are. + + Returns: + Dict[ReactionId, str] : the generated dictionary. + """ + + unparsedReactions = { + reaction.id : reaction.reaction + for reaction in model.reactions + if reaction.reaction + } + + if not asParsed: return unparsedReactions + + return reactionUtils.create_reaction_dict(unparsedReactions) + +def get_medium(model:cobra.Model) -> pd.DataFrame: + trueMedium=[] + for r in model.reactions: + positiveCoeff=0 + for m in r.metabolites: + if r.get_coefficient(m.id)>0: + positiveCoeff=1; + if (positiveCoeff==0 and r.lower_bound<0): + trueMedium.append(r.id) + + df_medium = pd.DataFrame() + df_medium["reaction"] = trueMedium + return df_medium + +def generate_bounds(model:cobra.Model) -> pd.DataFrame: + + rxns = [] + for reaction in model.reactions: + rxns.append(reaction.id) + + bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns) + + for reaction in model.reactions: + bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound] + return bounds + + + +def generate_compartments(model: cobra.Model) -> pd.DataFrame: + """ + Generates a DataFrame containing compartment information for each reaction. + Creates columns for each compartment position (Compartment_1, Compartment_2, etc.) + + Args: + model: the COBRA model to extract compartment data from. + + Returns: + pd.DataFrame: DataFrame with ReactionID and compartment columns + """ + pathway_data = [] + + # First pass: determine the maximum number of pathways any reaction has + max_pathways = 0 + reaction_pathways = {} + + for reaction in model.reactions: + # Get unique pathways from all metabolites in the reaction + if type(reaction.annotation['pathways']) == list: + reaction_pathways[reaction.id] = reaction.annotation['pathways'] + max_pathways = max(max_pathways, len(reaction.annotation['pathways'])) + else: + reaction_pathways[reaction.id] = [reaction.annotation['pathways']] + + # Create column names for pathways + pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)] + + # Second pass: create the data + for reaction_id, pathways in reaction_pathways.items(): + row = {"ReactionID": reaction_id} + + # Fill pathway columns + for i in range(max_pathways): + col_name = pathway_columns[i] + if i < len(pathways): + row[col_name] = pathways[i] + else: + row[col_name] = None # or "" if you prefer empty strings + + pathway_data.append(row) + + return pd.DataFrame(pathway_data) + def save_model(model, filename, output_folder, file_format='csv'): """ Save a COBRA model to file in the specified format. @@ -170,10 +291,10 @@ # Special handling for tabular format using utils functions filepath = os.path.join(output_folder, f"{filename}.csv") - rules = utils.generate_rules(model, asParsed = False) - reactions = utils.generate_reactions(model, asParsed = False) - bounds = utils.generate_bounds(model) - medium = utils.get_medium(model) + rules = generate_rules(model, asParsed = False) + reactions = generate_reactions(model, asParsed = False) + bounds = generate_bounds(model) + medium = get_medium(model) try: compartments = utils.generate_compartments(model) @@ -269,7 +390,7 @@ pass -def generate_bounds(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame: +def generate_bounds_model(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame: """ Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. @@ -369,12 +490,12 @@ print(f"{key}: {value}") if(ARGS.ras_selector == True): - generate_bounds(model, ras=ras_combined, output_folder=ARGS.output_path, + generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path, save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, save_models_format=ARGS.save_models_format) class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False) else: - generate_bounds(model, output_folder=ARGS.output_path, + generate_bounds_model(model, output_folder=ARGS.output_path, save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, save_models_format=ARGS.save_models_format)