Mercurial > repos > bimib > cobraxy
diff COBRAxy/src/utils/reaction_parsing.py @ 539:2fb97466e404 draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Sat, 25 Oct 2025 14:55:13 +0000 |
| parents | |
| children | fcdbc81feb45 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COBRAxy/src/utils/reaction_parsing.py Sat Oct 25 14:55:13 2025 +0000 @@ -0,0 +1,138 @@ +""" +Helpers to parse reaction strings into structured dictionaries. + +Features: +- Reaction direction detection (forward, backward, reversible) +- Parsing of custom reaction strings into stoichiometric maps +- Conversion of a dict of raw reactions into a directional reactions dict +- Loading custom reactions from a tabular file (TSV) +""" +from enum import Enum +import utils.general_utils as utils +from typing import Dict +import re + +# Reaction direction encoding: +class ReactionDir(Enum): + """ + A reaction can go forward, backward, or be reversible (both directions). + Cobrapy-style formulas encode direction using specific arrows handled here. + """ + FORWARD = "-->" + BACKWARD = "<--" + REVERSIBLE = "<=>" + + @classmethod + def fromReaction(cls, reaction :str) -> 'ReactionDir': + """ + Takes a whole reaction formula string and looks for one of the arrows, returning the + corresponding reaction direction. + + Args: + reaction : the reaction's formula. + + Raises: + ValueError : if no valid arrow is found. + + Returns: + ReactionDir : the corresponding reaction direction. + """ + for member in cls: + if member.value in reaction: return member + + raise ValueError("No valid arrow found within reaction string.") + +ReactionsDict = Dict[str, Dict[str, float]] + + +def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None: + """ + Add one reaction entry to reactionsDict. + + The entry maps each substrate ID to its stoichiometric coefficient. + If a substrate appears without an explicit coefficient, 1.0 is assumed. + + Args: + reactionsDict: Dict to update in place. + rId: Unique reaction ID. + reaction: Reaction formula string. + + Returns: + None + + Side effects: updates reactionsDict in place. + """ + reaction = reaction.strip() + if not reaction: return + + reactionsDict[rId] = {} + # Assumes ' + ' is spaced to avoid confusion with charge symbols. + for word in reaction.split(" + "): + metabId, stoichCoeff = word, 1.0 + # Coefficient can be integer or float (dot decimal) and must be space-separated. + foundCoeff = re.search(r"\d+(\.\d+)? ", word) + if foundCoeff: + wholeMatch = foundCoeff.group(0) + metabId = word[len(wholeMatch):].strip() + stoichCoeff = float(wholeMatch.strip()) + + reactionsDict[rId][metabId] = stoichCoeff + + if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed. + + +def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict: + """ + Parse a dict of raw reaction strings into a directional reactions dict. + + Args: + unparsed_reactions: Mapping reaction ID -> raw reaction string. + + Returns: + ReactionsDict: Parsed dict. Reversible reactions produce two entries with _F and _B suffixes. + """ + reactionsDict :ReactionsDict = {} + for rId, reaction in unparsed_reactions.items(): + reactionDir = ReactionDir.fromReaction(reaction) + left, right = reaction.split(f" {reactionDir.value} ") + + # Reversible reactions are split into two: forward (_F) and backward (_B). + reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE + if reactionDir is not ReactionDir.BACKWARD: + add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left) + + if reactionDir is not ReactionDir.FORWARD: + add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right) + + return reactionsDict + + +def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict: + """ + Load custom reactions from a tabular file and parse into a reactions dict. + + Args: + customReactionsPath: Path to the reactions file (TSV or CSV-like). + + Returns: + ReactionsDict: Parsed reactions dictionary. + """ + try: + rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) + if len(rows) <= 1: + raise ValueError("The custom reactions file must contain at least one reaction.") + + id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") + + except Exception as e: + # Fallback re-read with same settings; preserves original behavior + rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) + if len(rows) <= 1: + raise ValueError("The custom reactions file must contain at least one reaction.") + + id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") + + reactionsData = {row[id_idx] : row[idx_formula] for row in rows[1:]} + + return create_reaction_dict(reactionsData) +
