Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/general_utils.py @ 411:6b015d3184ab draft
Uploaded
author | francesco_lapi |
---|---|
date | Mon, 08 Sep 2025 21:07:34 +0000 |
parents | 71850bdf9e1e |
children | bdf4630ac1eb |
comparison
equal
deleted
inserted
replaced
410:d660c5b03c14 | 411:6b015d3184ab |
---|---|
15 | 15 |
16 import zipfile | 16 import zipfile |
17 import gzip | 17 import gzip |
18 import bz2 | 18 import bz2 |
19 from io import StringIO | 19 from io import StringIO |
20 import rule_parsing as rulesUtils | |
21 import reaction_parsing as reactionUtils | |
20 | 22 |
21 | 23 |
22 | 24 |
23 class ValueErr(Exception): | 25 class ValueErr(Exception): |
24 def __init__(self, param_name, expected, actual): | 26 def __init__(self, param_name, expected, actual): |
979 except Exception as e: | 981 except Exception as e: |
980 validation['growth_rate'] = None | 982 validation['growth_rate'] = None |
981 validation['status'] = f"Error: {e}" | 983 validation['status'] = f"Error: {e}" |
982 | 984 |
983 return validation | 985 return validation |
986 | |
987 | |
988 ################################- DATA GENERATION -################################ | |
989 ReactionId = str | |
990 def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]: | |
991 """ | |
992 Generates a dictionary mapping reaction ids to rules from the model. | |
993 | |
994 Args: | |
995 model : the model to derive data from. | |
996 asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings. | |
997 | |
998 Returns: | |
999 Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules. | |
1000 Dict[ReactionId, str] : the generated dictionary of raw rules. | |
1001 """ | |
1002 # Is the below approach convoluted? yes | |
1003 # Ok but is it inefficient? probably | |
1004 # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane) | |
1005 _ruleGetter = lambda reaction : reaction.gene_reaction_rule | |
1006 ruleExtractor = (lambda reaction : | |
1007 rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter | |
1008 | |
1009 return { | |
1010 reaction.id : ruleExtractor(reaction) | |
1011 for reaction in model.reactions | |
1012 if reaction.gene_reaction_rule } | |
1013 | |
1014 def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]: | |
1015 """ | |
1016 Generates a dictionary mapping reaction ids to reaction formulas from the model. | |
1017 | |
1018 Args: | |
1019 model : the model to derive data from. | |
1020 asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are. | |
1021 | |
1022 Returns: | |
1023 Dict[ReactionId, str] : the generated dictionary. | |
1024 """ | |
1025 | |
1026 unparsedReactions = { | |
1027 reaction.id : reaction.reaction | |
1028 for reaction in model.reactions | |
1029 if reaction.reaction | |
1030 } | |
1031 | |
1032 if not asParsed: return unparsedReactions | |
1033 | |
1034 return reactionUtils.create_reaction_dict(unparsedReactions) | |
1035 | |
1036 def get_medium(model:cobra.Model) -> pd.DataFrame: | |
1037 trueMedium=[] | |
1038 for r in model.reactions: | |
1039 positiveCoeff=0 | |
1040 for m in r.metabolites: | |
1041 if r.get_coefficient(m.id)>0: | |
1042 positiveCoeff=1; | |
1043 if (positiveCoeff==0 and r.lower_bound<0): | |
1044 trueMedium.append(r.id) | |
1045 | |
1046 df_medium = pd.DataFrame() | |
1047 df_medium["reaction"] = trueMedium | |
1048 return df_medium | |
1049 | |
1050 def generate_bounds(model:cobra.Model) -> pd.DataFrame: | |
1051 | |
1052 rxns = [] | |
1053 for reaction in model.reactions: | |
1054 rxns.append(reaction.id) | |
1055 | |
1056 bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns) | |
1057 | |
1058 for reaction in model.reactions: | |
1059 bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound] | |
1060 return bounds | |
1061 | |
1062 | |
1063 | |
1064 def generate_compartments(model: cobra.Model) -> pd.DataFrame: | |
1065 """ | |
1066 Generates a DataFrame containing compartment information for each reaction. | |
1067 Creates columns for each compartment position (Compartment_1, Compartment_2, etc.) | |
1068 | |
1069 Args: | |
1070 model: the COBRA model to extract compartment data from. | |
1071 | |
1072 Returns: | |
1073 pd.DataFrame: DataFrame with ReactionID and compartment columns | |
1074 """ | |
1075 pathway_data = [] | |
1076 | |
1077 # First pass: determine the maximum number of pathways any reaction has | |
1078 max_pathways = 0 | |
1079 reaction_pathways = {} | |
1080 | |
1081 for reaction in model.reactions: | |
1082 # Get unique pathways from all metabolites in the reaction | |
1083 if type(reaction.annotation['pathways']) == list: | |
1084 reaction_pathways[reaction.id] = reaction.annotation['pathways'] | |
1085 max_pathways = max(max_pathways, len(reaction.annotation['pathways'])) | |
1086 else: | |
1087 reaction_pathways[reaction.id] = [reaction.annotation['pathways']] | |
1088 | |
1089 # Create column names for pathways | |
1090 pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)] | |
1091 | |
1092 # Second pass: create the data | |
1093 for reaction_id, pathways in reaction_pathways.items(): | |
1094 row = {"ReactionID": reaction_id} | |
1095 | |
1096 # Fill pathway columns | |
1097 for i in range(max_pathways): | |
1098 col_name = pathway_columns[i] | |
1099 if i < len(pathways): | |
1100 row[col_name] = pathways[i] | |
1101 else: | |
1102 row[col_name] = None # or "" if you prefer empty strings | |
1103 | |
1104 pathway_data.append(row) | |
1105 | |
1106 return pd.DataFrame(pathway_data) |