annotate COBRAxy/src/utils/reaction_parsing.py @ 539:2fb97466e404 draft

Uploaded
author francesco_lapi
date Sat, 25 Oct 2025 14:55:13 +0000
parents
children fcdbc81feb45
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
539
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
1 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
2 Helpers to parse reaction strings into structured dictionaries.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
3
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
4 Features:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
5 - Reaction direction detection (forward, backward, reversible)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
6 - Parsing of custom reaction strings into stoichiometric maps
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
7 - Conversion of a dict of raw reactions into a directional reactions dict
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
8 - Loading custom reactions from a tabular file (TSV)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
9 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
10 from enum import Enum
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
11 import utils.general_utils as utils
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
12 from typing import Dict
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
13 import re
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
14
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
15 # Reaction direction encoding:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
16 class ReactionDir(Enum):
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
17 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
18 A reaction can go forward, backward, or be reversible (both directions).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
19 Cobrapy-style formulas encode direction using specific arrows handled here.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
20 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
21 FORWARD = "-->"
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
22 BACKWARD = "<--"
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
23 REVERSIBLE = "<=>"
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
24
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
25 @classmethod
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
26 def fromReaction(cls, reaction :str) -> 'ReactionDir':
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
27 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
28 Takes a whole reaction formula string and looks for one of the arrows, returning the
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
29 corresponding reaction direction.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
30
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
31 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
32 reaction : the reaction's formula.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
33
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
34 Raises:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
35 ValueError : if no valid arrow is found.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
36
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
37 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
38 ReactionDir : the corresponding reaction direction.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
39 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
40 for member in cls:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
41 if member.value in reaction: return member
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
42
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
43 raise ValueError("No valid arrow found within reaction string.")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
44
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
45 ReactionsDict = Dict[str, Dict[str, float]]
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
46
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
47
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
48 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
49 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
50 Add one reaction entry to reactionsDict.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
51
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
52 The entry maps each substrate ID to its stoichiometric coefficient.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
53 If a substrate appears without an explicit coefficient, 1.0 is assumed.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
54
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
55 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
56 reactionsDict: Dict to update in place.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
57 rId: Unique reaction ID.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
58 reaction: Reaction formula string.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
59
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
60 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
61 None
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
62
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
63 Side effects: updates reactionsDict in place.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
64 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
65 reaction = reaction.strip()
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
66 if not reaction: return
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
67
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
68 reactionsDict[rId] = {}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
69 # Assumes ' + ' is spaced to avoid confusion with charge symbols.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
70 for word in reaction.split(" + "):
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
71 metabId, stoichCoeff = word, 1.0
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
72 # Coefficient can be integer or float (dot decimal) and must be space-separated.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
73 foundCoeff = re.search(r"\d+(\.\d+)? ", word)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
74 if foundCoeff:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
75 wholeMatch = foundCoeff.group(0)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
76 metabId = word[len(wholeMatch):].strip()
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
77 stoichCoeff = float(wholeMatch.strip())
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
78
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
79 reactionsDict[rId][metabId] = stoichCoeff
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
80
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
81 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
82
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
83
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
84 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
85 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
86 Parse a dict of raw reaction strings into a directional reactions dict.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
87
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
88 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
89 unparsed_reactions: Mapping reaction ID -> raw reaction string.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
90
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
91 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
92 ReactionsDict: Parsed dict. Reversible reactions produce two entries with _F and _B suffixes.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
93 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
94 reactionsDict :ReactionsDict = {}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
95 for rId, reaction in unparsed_reactions.items():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
96 reactionDir = ReactionDir.fromReaction(reaction)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
97 left, right = reaction.split(f" {reactionDir.value} ")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
98
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
99 # Reversible reactions are split into two: forward (_F) and backward (_B).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
100 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
101 if reactionDir is not ReactionDir.BACKWARD:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
102 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
103
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
104 if reactionDir is not ReactionDir.FORWARD:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
105 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
106
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
107 return reactionsDict
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
108
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
109
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
110 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
111 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
112 Load custom reactions from a tabular file and parse into a reactions dict.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
113
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
114 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
115 customReactionsPath: Path to the reactions file (TSV or CSV-like).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
116
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
117 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
118 ReactionsDict: Parsed reactions dictionary.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
119 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
120 try:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
121 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
122 if len(rows) <= 1:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
123 raise ValueError("The custom reactions file must contain at least one reaction.")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
124
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
125 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
126
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
127 except Exception as e:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
128 # Fallback re-read with same settings; preserves original behavior
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
129 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
130 if len(rows) <= 1:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
131 raise ValueError("The custom reactions file must contain at least one reaction.")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
132
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
133 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
134
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
135 reactionsData = {row[id_idx] : row[idx_formula] for row in rows[1:]}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
136
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
137 return create_reaction_dict(reactionsData)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
138