annotate COBRAxy/src/utils/reaction_parsing.py @ 546:01147e83f43c draft default tip

Uploaded
author luca_milaz
date Mon, 27 Oct 2025 12:33:08 +0000
parents fcdbc81feb45
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
539
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
1 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
2 Helpers to parse reaction strings into structured dictionaries.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
3
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
4 Features:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
5 - Reaction direction detection (forward, backward, reversible)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
6 - Parsing of custom reaction strings into stoichiometric maps
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
7 - Conversion of a dict of raw reactions into a directional reactions dict
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
8 - Loading custom reactions from a tabular file (TSV)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
9 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
10 from enum import Enum
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
11 from typing import Dict
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
12 import re
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
13
542
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
14 try:
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
15 from . import general_utils as utils
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
16 except:
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
17 import general_utils as utils
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
18
539
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
19 # Reaction direction encoding:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
20 class ReactionDir(Enum):
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
21 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
22 A reaction can go forward, backward, or be reversible (both directions).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
23 Cobrapy-style formulas encode direction using specific arrows handled here.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
24 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
25 FORWARD = "-->"
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
26 BACKWARD = "<--"
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
27 REVERSIBLE = "<=>"
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
28
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
29 @classmethod
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
30 def fromReaction(cls, reaction :str) -> 'ReactionDir':
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
31 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
32 Takes a whole reaction formula string and looks for one of the arrows, returning the
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
33 corresponding reaction direction.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
34
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
35 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
36 reaction : the reaction's formula.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
37
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
38 Raises:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
39 ValueError : if no valid arrow is found.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
40
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
41 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
42 ReactionDir : the corresponding reaction direction.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
43 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
44 for member in cls:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
45 if member.value in reaction: return member
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
46
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
47 raise ValueError("No valid arrow found within reaction string.")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
48
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
49 ReactionsDict = Dict[str, Dict[str, float]]
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
50
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
51
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
52 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
53 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
54 Add one reaction entry to reactionsDict.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
55
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
56 The entry maps each substrate ID to its stoichiometric coefficient.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
57 If a substrate appears without an explicit coefficient, 1.0 is assumed.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
58
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
59 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
60 reactionsDict: Dict to update in place.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
61 rId: Unique reaction ID.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
62 reaction: Reaction formula string.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
63
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
64 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
65 None
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
66
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
67 Side effects: updates reactionsDict in place.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
68 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
69 reaction = reaction.strip()
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
70 if not reaction: return
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
71
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
72 reactionsDict[rId] = {}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
73 # Assumes ' + ' is spaced to avoid confusion with charge symbols.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
74 for word in reaction.split(" + "):
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
75 metabId, stoichCoeff = word, 1.0
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
76 # Coefficient can be integer or float (dot decimal) and must be space-separated.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
77 foundCoeff = re.search(r"\d+(\.\d+)? ", word)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
78 if foundCoeff:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
79 wholeMatch = foundCoeff.group(0)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
80 metabId = word[len(wholeMatch):].strip()
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
81 stoichCoeff = float(wholeMatch.strip())
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
82
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
83 reactionsDict[rId][metabId] = stoichCoeff
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
84
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
85 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
86
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
87
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
88 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
89 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
90 Parse a dict of raw reaction strings into a directional reactions dict.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
91
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
92 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
93 unparsed_reactions: Mapping reaction ID -> raw reaction string.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
94
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
95 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
96 ReactionsDict: Parsed dict. Reversible reactions produce two entries with _F and _B suffixes.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
97 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
98 reactionsDict :ReactionsDict = {}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
99 for rId, reaction in unparsed_reactions.items():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
100 reactionDir = ReactionDir.fromReaction(reaction)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
101 left, right = reaction.split(f" {reactionDir.value} ")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
102
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
103 # Reversible reactions are split into two: forward (_F) and backward (_B).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
104 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
105 if reactionDir is not ReactionDir.BACKWARD:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
106 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
107
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
108 if reactionDir is not ReactionDir.FORWARD:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
109 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
110
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
111 return reactionsDict
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
112
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
113
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
114 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
115 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
116 Load custom reactions from a tabular file and parse into a reactions dict.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
117
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
118 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
119 customReactionsPath: Path to the reactions file (TSV or CSV-like).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
120
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
121 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
122 ReactionsDict: Parsed reactions dictionary.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
123 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
124 try:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
125 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
126 if len(rows) <= 1:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
127 raise ValueError("The custom reactions file must contain at least one reaction.")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
128
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
129 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
130
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
131 except Exception as e:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
132 # Fallback re-read with same settings; preserves original behavior
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
133 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
134 if len(rows) <= 1:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
135 raise ValueError("The custom reactions file must contain at least one reaction.")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
136
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
137 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
138
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
139 reactionsData = {row[id_idx] : row[idx_formula] for row in rows[1:]}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
140
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
141 return create_reaction_dict(reactionsData)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
142