annotate COBRAxy/utils/model_utils.py @ 418:919b5b71a61c draft

Uploaded
author francesco_lapi
date Tue, 09 Sep 2025 07:36:30 +0000
parents
children ed2c1f9e20ba
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
418
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
1 import os
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
2 import csv
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
3 import cobra
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
4 import pickle
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
5 import argparse
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
6 import pandas as pd
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
7 from typing import Optional, Tuple, Union, List, Dict
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
8 import utils.general_utils as utils
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
9 import utils.rule_parsing as rulesUtils
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
10
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
11 ################################- DATA GENERATION -################################
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
12 ReactionId = str
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
13 def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
14 """
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
15 Generates a dictionary mapping reaction ids to rules from the model.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
16
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
17 Args:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
18 model : the model to derive data from.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
19 asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
20
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
21 Returns:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
22 Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
23 Dict[ReactionId, str] : the generated dictionary of raw rules.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
24 """
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
25 # Is the below approach convoluted? yes
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
26 # Ok but is it inefficient? probably
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
27 # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane)
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
28 _ruleGetter = lambda reaction : reaction.gene_reaction_rule
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
29 ruleExtractor = (lambda reaction :
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
30 rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
31
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
32 return {
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
33 reaction.id : ruleExtractor(reaction)
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
34 for reaction in model.reactions
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
35 if reaction.gene_reaction_rule }
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
36
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
37 def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
38 """
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
39 Generates a dictionary mapping reaction ids to reaction formulas from the model.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
40
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
41 Args:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
42 model : the model to derive data from.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
43 asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
44
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
45 Returns:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
46 Dict[ReactionId, str] : the generated dictionary.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
47 """
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
48
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
49 unparsedReactions = {
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
50 reaction.id : reaction.reaction
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
51 for reaction in model.reactions
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
52 if reaction.reaction
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
53 }
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
54
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
55 if not asParsed: return unparsedReactions
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
56
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
57 return reactionUtils.create_reaction_dict(unparsedReactions)
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
58
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
59 def get_medium(model:cobra.Model) -> pd.DataFrame:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
60 trueMedium=[]
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
61 for r in model.reactions:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
62 positiveCoeff=0
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
63 for m in r.metabolites:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
64 if r.get_coefficient(m.id)>0:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
65 positiveCoeff=1;
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
66 if (positiveCoeff==0 and r.lower_bound<0):
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
67 trueMedium.append(r.id)
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
68
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
69 df_medium = pd.DataFrame()
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
70 df_medium["reaction"] = trueMedium
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
71 return df_medium
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
72
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
73 def generate_bounds(model:cobra.Model) -> pd.DataFrame:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
74
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
75 rxns = []
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
76 for reaction in model.reactions:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
77 rxns.append(reaction.id)
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
78
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
79 bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns)
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
80
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
81 for reaction in model.reactions:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
82 bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound]
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
83 return bounds
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
84
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
85
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
86
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
87 def generate_compartments(model: cobra.Model) -> pd.DataFrame:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
88 """
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
89 Generates a DataFrame containing compartment information for each reaction.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
90 Creates columns for each compartment position (Compartment_1, Compartment_2, etc.)
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
91
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
92 Args:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
93 model: the COBRA model to extract compartment data from.
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
94
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
95 Returns:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
96 pd.DataFrame: DataFrame with ReactionID and compartment columns
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
97 """
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
98 pathway_data = []
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
99
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
100 # First pass: determine the maximum number of pathways any reaction has
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
101 max_pathways = 0
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
102 reaction_pathways = {}
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
103
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
104 for reaction in model.reactions:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
105 # Get unique pathways from all metabolites in the reaction
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
106 if type(reaction.annotation['pathways']) == list:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
107 reaction_pathways[reaction.id] = reaction.annotation['pathways']
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
108 max_pathways = max(max_pathways, len(reaction.annotation['pathways']))
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
109 else:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
110 reaction_pathways[reaction.id] = [reaction.annotation['pathways']]
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
111
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
112 # Create column names for pathways
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
113 pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)]
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
114
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
115 # Second pass: create the data
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
116 for reaction_id, pathways in reaction_pathways.items():
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
117 row = {"ReactionID": reaction_id}
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
118
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
119 # Fill pathway columns
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
120 for i in range(max_pathways):
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
121 col_name = pathway_columns[i]
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
122 if i < len(pathways):
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
123 row[col_name] = pathways[i]
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
124 else:
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
125 row[col_name] = None # or "" if you prefer empty strings
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
126
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
127 pathway_data.append(row)
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
128
919b5b71a61c Uploaded
francesco_lapi
parents:
diff changeset
129 return pd.DataFrame(pathway_data)