Mercurial > repos > bimib > cobraxy
diff COBRAxy/utils/general_utils.py @ 411:6b015d3184ab draft
Uploaded
author | francesco_lapi |
---|---|
date | Mon, 08 Sep 2025 21:07:34 +0000 |
parents | 71850bdf9e1e |
children | bdf4630ac1eb |
line wrap: on
line diff
--- a/COBRAxy/utils/general_utils.py Mon Sep 08 17:33:52 2025 +0000 +++ b/COBRAxy/utils/general_utils.py Mon Sep 08 21:07:34 2025 +0000 @@ -17,6 +17,8 @@ import gzip import bz2 from io import StringIO +import rule_parsing as rulesUtils +import reaction_parsing as reactionUtils @@ -981,3 +983,124 @@ validation['status'] = f"Error: {e}" return validation + + +################################- DATA GENERATION -################################ +ReactionId = str +def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]: + """ + Generates a dictionary mapping reaction ids to rules from the model. + + Args: + model : the model to derive data from. + asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings. + + Returns: + Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules. + Dict[ReactionId, str] : the generated dictionary of raw rules. + """ + # Is the below approach convoluted? yes + # Ok but is it inefficient? probably + # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane) + _ruleGetter = lambda reaction : reaction.gene_reaction_rule + ruleExtractor = (lambda reaction : + rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter + + return { + reaction.id : ruleExtractor(reaction) + for reaction in model.reactions + if reaction.gene_reaction_rule } + +def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]: + """ + Generates a dictionary mapping reaction ids to reaction formulas from the model. + + Args: + model : the model to derive data from. + asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are. + + Returns: + Dict[ReactionId, str] : the generated dictionary. + """ + + unparsedReactions = { + reaction.id : reaction.reaction + for reaction in model.reactions + if reaction.reaction + } + + if not asParsed: return unparsedReactions + + return reactionUtils.create_reaction_dict(unparsedReactions) + +def get_medium(model:cobra.Model) -> pd.DataFrame: + trueMedium=[] + for r in model.reactions: + positiveCoeff=0 + for m in r.metabolites: + if r.get_coefficient(m.id)>0: + positiveCoeff=1; + if (positiveCoeff==0 and r.lower_bound<0): + trueMedium.append(r.id) + + df_medium = pd.DataFrame() + df_medium["reaction"] = trueMedium + return df_medium + +def generate_bounds(model:cobra.Model) -> pd.DataFrame: + + rxns = [] + for reaction in model.reactions: + rxns.append(reaction.id) + + bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns) + + for reaction in model.reactions: + bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound] + return bounds + + + +def generate_compartments(model: cobra.Model) -> pd.DataFrame: + """ + Generates a DataFrame containing compartment information for each reaction. + Creates columns for each compartment position (Compartment_1, Compartment_2, etc.) + + Args: + model: the COBRA model to extract compartment data from. + + Returns: + pd.DataFrame: DataFrame with ReactionID and compartment columns + """ + pathway_data = [] + + # First pass: determine the maximum number of pathways any reaction has + max_pathways = 0 + reaction_pathways = {} + + for reaction in model.reactions: + # Get unique pathways from all metabolites in the reaction + if type(reaction.annotation['pathways']) == list: + reaction_pathways[reaction.id] = reaction.annotation['pathways'] + max_pathways = max(max_pathways, len(reaction.annotation['pathways'])) + else: + reaction_pathways[reaction.id] = [reaction.annotation['pathways']] + + # Create column names for pathways + pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)] + + # Second pass: create the data + for reaction_id, pathways in reaction_pathways.items(): + row = {"ReactionID": reaction_id} + + # Fill pathway columns + for i in range(max_pathways): + col_name = pathway_columns[i] + if i < len(pathways): + row[col_name] = pathways[i] + else: + row[col_name] = None # or "" if you prefer empty strings + + pathway_data.append(row) + + return pd.DataFrame(pathway_data) \ No newline at end of file