comparison COBRAxy/utils/general_utils.py @ 411:6b015d3184ab draft

Uploaded
author francesco_lapi
date Mon, 08 Sep 2025 21:07:34 +0000
parents 71850bdf9e1e
children bdf4630ac1eb
comparison
equal deleted inserted replaced
410:d660c5b03c14 411:6b015d3184ab
15 15
16 import zipfile 16 import zipfile
17 import gzip 17 import gzip
18 import bz2 18 import bz2
19 from io import StringIO 19 from io import StringIO
20 import rule_parsing as rulesUtils
21 import reaction_parsing as reactionUtils
20 22
21 23
22 24
23 class ValueErr(Exception): 25 class ValueErr(Exception):
24 def __init__(self, param_name, expected, actual): 26 def __init__(self, param_name, expected, actual):
979 except Exception as e: 981 except Exception as e:
980 validation['growth_rate'] = None 982 validation['growth_rate'] = None
981 validation['status'] = f"Error: {e}" 983 validation['status'] = f"Error: {e}"
982 984
983 return validation 985 return validation
986
987
988 ################################- DATA GENERATION -################################
989 ReactionId = str
990 def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]:
991 """
992 Generates a dictionary mapping reaction ids to rules from the model.
993
994 Args:
995 model : the model to derive data from.
996 asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings.
997
998 Returns:
999 Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules.
1000 Dict[ReactionId, str] : the generated dictionary of raw rules.
1001 """
1002 # Is the below approach convoluted? yes
1003 # Ok but is it inefficient? probably
1004 # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane)
1005 _ruleGetter = lambda reaction : reaction.gene_reaction_rule
1006 ruleExtractor = (lambda reaction :
1007 rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter
1008
1009 return {
1010 reaction.id : ruleExtractor(reaction)
1011 for reaction in model.reactions
1012 if reaction.gene_reaction_rule }
1013
1014 def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]:
1015 """
1016 Generates a dictionary mapping reaction ids to reaction formulas from the model.
1017
1018 Args:
1019 model : the model to derive data from.
1020 asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are.
1021
1022 Returns:
1023 Dict[ReactionId, str] : the generated dictionary.
1024 """
1025
1026 unparsedReactions = {
1027 reaction.id : reaction.reaction
1028 for reaction in model.reactions
1029 if reaction.reaction
1030 }
1031
1032 if not asParsed: return unparsedReactions
1033
1034 return reactionUtils.create_reaction_dict(unparsedReactions)
1035
1036 def get_medium(model:cobra.Model) -> pd.DataFrame:
1037 trueMedium=[]
1038 for r in model.reactions:
1039 positiveCoeff=0
1040 for m in r.metabolites:
1041 if r.get_coefficient(m.id)>0:
1042 positiveCoeff=1;
1043 if (positiveCoeff==0 and r.lower_bound<0):
1044 trueMedium.append(r.id)
1045
1046 df_medium = pd.DataFrame()
1047 df_medium["reaction"] = trueMedium
1048 return df_medium
1049
1050 def generate_bounds(model:cobra.Model) -> pd.DataFrame:
1051
1052 rxns = []
1053 for reaction in model.reactions:
1054 rxns.append(reaction.id)
1055
1056 bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns)
1057
1058 for reaction in model.reactions:
1059 bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound]
1060 return bounds
1061
1062
1063
1064 def generate_compartments(model: cobra.Model) -> pd.DataFrame:
1065 """
1066 Generates a DataFrame containing compartment information for each reaction.
1067 Creates columns for each compartment position (Compartment_1, Compartment_2, etc.)
1068
1069 Args:
1070 model: the COBRA model to extract compartment data from.
1071
1072 Returns:
1073 pd.DataFrame: DataFrame with ReactionID and compartment columns
1074 """
1075 pathway_data = []
1076
1077 # First pass: determine the maximum number of pathways any reaction has
1078 max_pathways = 0
1079 reaction_pathways = {}
1080
1081 for reaction in model.reactions:
1082 # Get unique pathways from all metabolites in the reaction
1083 if type(reaction.annotation['pathways']) == list:
1084 reaction_pathways[reaction.id] = reaction.annotation['pathways']
1085 max_pathways = max(max_pathways, len(reaction.annotation['pathways']))
1086 else:
1087 reaction_pathways[reaction.id] = [reaction.annotation['pathways']]
1088
1089 # Create column names for pathways
1090 pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)]
1091
1092 # Second pass: create the data
1093 for reaction_id, pathways in reaction_pathways.items():
1094 row = {"ReactionID": reaction_id}
1095
1096 # Fill pathway columns
1097 for i in range(max_pathways):
1098 col_name = pathway_columns[i]
1099 if i < len(pathways):
1100 row[col_name] = pathways[i]
1101 else:
1102 row[col_name] = None # or "" if you prefer empty strings
1103
1104 pathway_data.append(row)
1105
1106 return pd.DataFrame(pathway_data)