comparison COBRAxy/src/utils/reaction_parsing.py @ 539:2fb97466e404 draft

Uploaded
author francesco_lapi
date Sat, 25 Oct 2025 14:55:13 +0000
parents
children fcdbc81feb45
comparison
equal deleted inserted replaced
538:fd53d42348bd 539:2fb97466e404
1 """
2 Helpers to parse reaction strings into structured dictionaries.
3
4 Features:
5 - Reaction direction detection (forward, backward, reversible)
6 - Parsing of custom reaction strings into stoichiometric maps
7 - Conversion of a dict of raw reactions into a directional reactions dict
8 - Loading custom reactions from a tabular file (TSV)
9 """
10 from enum import Enum
11 import utils.general_utils as utils
12 from typing import Dict
13 import re
14
15 # Reaction direction encoding:
16 class ReactionDir(Enum):
17 """
18 A reaction can go forward, backward, or be reversible (both directions).
19 Cobrapy-style formulas encode direction using specific arrows handled here.
20 """
21 FORWARD = "-->"
22 BACKWARD = "<--"
23 REVERSIBLE = "<=>"
24
25 @classmethod
26 def fromReaction(cls, reaction :str) -> 'ReactionDir':
27 """
28 Takes a whole reaction formula string and looks for one of the arrows, returning the
29 corresponding reaction direction.
30
31 Args:
32 reaction : the reaction's formula.
33
34 Raises:
35 ValueError : if no valid arrow is found.
36
37 Returns:
38 ReactionDir : the corresponding reaction direction.
39 """
40 for member in cls:
41 if member.value in reaction: return member
42
43 raise ValueError("No valid arrow found within reaction string.")
44
45 ReactionsDict = Dict[str, Dict[str, float]]
46
47
48 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None:
49 """
50 Add one reaction entry to reactionsDict.
51
52 The entry maps each substrate ID to its stoichiometric coefficient.
53 If a substrate appears without an explicit coefficient, 1.0 is assumed.
54
55 Args:
56 reactionsDict: Dict to update in place.
57 rId: Unique reaction ID.
58 reaction: Reaction formula string.
59
60 Returns:
61 None
62
63 Side effects: updates reactionsDict in place.
64 """
65 reaction = reaction.strip()
66 if not reaction: return
67
68 reactionsDict[rId] = {}
69 # Assumes ' + ' is spaced to avoid confusion with charge symbols.
70 for word in reaction.split(" + "):
71 metabId, stoichCoeff = word, 1.0
72 # Coefficient can be integer or float (dot decimal) and must be space-separated.
73 foundCoeff = re.search(r"\d+(\.\d+)? ", word)
74 if foundCoeff:
75 wholeMatch = foundCoeff.group(0)
76 metabId = word[len(wholeMatch):].strip()
77 stoichCoeff = float(wholeMatch.strip())
78
79 reactionsDict[rId][metabId] = stoichCoeff
80
81 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed.
82
83
84 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict:
85 """
86 Parse a dict of raw reaction strings into a directional reactions dict.
87
88 Args:
89 unparsed_reactions: Mapping reaction ID -> raw reaction string.
90
91 Returns:
92 ReactionsDict: Parsed dict. Reversible reactions produce two entries with _F and _B suffixes.
93 """
94 reactionsDict :ReactionsDict = {}
95 for rId, reaction in unparsed_reactions.items():
96 reactionDir = ReactionDir.fromReaction(reaction)
97 left, right = reaction.split(f" {reactionDir.value} ")
98
99 # Reversible reactions are split into two: forward (_F) and backward (_B).
100 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE
101 if reactionDir is not ReactionDir.BACKWARD:
102 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left)
103
104 if reactionDir is not ReactionDir.FORWARD:
105 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right)
106
107 return reactionsDict
108
109
110 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict:
111 """
112 Load custom reactions from a tabular file and parse into a reactions dict.
113
114 Args:
115 customReactionsPath: Path to the reactions file (TSV or CSV-like).
116
117 Returns:
118 ReactionsDict: Parsed reactions dictionary.
119 """
120 try:
121 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
122 if len(rows) <= 1:
123 raise ValueError("The custom reactions file must contain at least one reaction.")
124
125 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
126
127 except Exception as e:
128 # Fallback re-read with same settings; preserves original behavior
129 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
130 if len(rows) <= 1:
131 raise ValueError("The custom reactions file must contain at least one reaction.")
132
133 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
134
135 reactionsData = {row[id_idx] : row[idx_formula] for row in rows[1:]}
136
137 return create_reaction_dict(reactionsData)
138