annotate cobraxy-9688ad27287b/COBRAxy/utils/reaction_parsing.py @ 90:a48b2e06ebe7 draft

Uploaded
author luca_milaz
date Sun, 13 Oct 2024 11:35:56 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
90
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
1 from enum import Enum
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
2 import utils.general_utils as utils
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
3 from typing import Dict
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
4 import csv
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
5 import re
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
6
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
7 # Reaction direction encoding:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
8 class ReactionDir(Enum):
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
9 """
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
10 A reaction can go forwards, backwards or be reversible (able to proceed in both directions).
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
11 Models created / managed with cobrapy encode this information within the reaction's
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
12 formula using the arrows this enum keeps as values.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
13 """
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
14 FORWARD = "-->"
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
15 BACKWARD = "<--"
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
16 REVERSIBLE = "<=>"
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
17
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
18 @classmethod
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
19 def fromReaction(cls, reaction :str) -> 'ReactionDir':
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
20 """
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
21 Takes a whole reaction formula string and looks for one of the arrows, returning the
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
22 corresponding reaction direction.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
23
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
24 Args:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
25 reaction : the reaction's formula.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
26
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
27 Raises:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
28 ValueError : if no valid arrow is found.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
29
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
30 Returns:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
31 ReactionDir : the corresponding reaction direction.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
32 """
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
33 for member in cls:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
34 if member.value in reaction: return member
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
35
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
36 raise ValueError("No valid arrow found within reaction string.")
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
37
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
38 ReactionsDict = Dict[str, Dict[str, float]]
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
39
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
40
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
41 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
42 """
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
43 Adds an entry to the given reactionsDict. Each entry consists of a given unique reaction id
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
44 (key) and a :dict (value) matching each substrate in the reaction to its stoichiometric coefficient.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
45 Keys and values are both obtained from the reaction's formula: if a substrate (custom metabolite id)
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
46 appears without an explicit coeff, the value 1.0 will be used instead.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
47
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
48 Args:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
49 reactionsDict : dictionary encoding custom reactions information.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
50 rId : unique reaction id.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
51 reaction : the reaction's formula.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
52
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
53 Returns:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
54 None
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
55
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
56 Side effects:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
57 reactionsDict : mut
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
58 """
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
59 reaction = reaction.strip()
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
60 if not reaction: return
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
61
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
62 reactionsDict[rId] = {}
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
63 # We assume the '+' separating consecutive metabs in a reaction is spaced from them,
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
64 # to avoid confusing it for electrical charge:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
65 for word in reaction.split(" + "):
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
66 metabId, stoichCoeff = word, 1.0
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
67 # Implicit stoichiometric coeff is equal to 1, some coeffs are floats.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
68
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
69 # Accepted coeffs can be integer or floats with a dot (.) decimal separator
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
70 # and must be separated from the metab with a space:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
71 foundCoeff = re.search(r"\d+(\.\d+)? ", word)
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
72 if foundCoeff:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
73 wholeMatch = foundCoeff.group(0)
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
74 metabId = word[len(wholeMatch):].strip()
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
75 stoichCoeff = float(wholeMatch.strip())
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
76
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
77 reactionsDict[rId][metabId] = stoichCoeff
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
78
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
79 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
80
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
81
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
82 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
83 """
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
84 Parses the given dictionary into the correct format.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
85
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
86 Args:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
87 unparsed_reactions (Dict[str, str]): A dictionary where keys are reaction IDs and values are unparsed reaction strings.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
88
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
89 Returns:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
90 ReactionsDict: The correctly parsed dict.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
91 """
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
92 reactionsDict :ReactionsDict = {}
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
93 for rId, reaction in unparsed_reactions.items():
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
94 reactionDir = ReactionDir.fromReaction(reaction)
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
95 left, right = reaction.split(f" {reactionDir.value} ")
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
96
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
97 # Reversible reactions are split into distinct reactions, one for each direction.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
98 # In general we only care about substrates, the product information is lost.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
99 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
100 if reactionDir is not ReactionDir.BACKWARD:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
101 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left)
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
102
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
103 if reactionDir is not ReactionDir.FORWARD:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
104 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right)
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
105
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
106 # ^^^ to further clarify: if a reaction is NOT reversible it will not be marked as _F or _B
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
107 # and whichever direction we DO keep (forward if --> and backward if <--) loses this information.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
108 # This IS a small problem when coloring the map in marea.py because the arrow IDs in the map follow
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
109 # through with a similar convention on ALL reactions and correctly encode direction based on their
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
110 # model of origin. TODO: a proposed solution is to unify the standard in RPS to fully mimic the maps,
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
111 # which involves re-writing the "reactions" dictionary.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
112
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
113 return reactionsDict
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
114
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
115
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
116 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
117 """
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
118 Creates a custom dictionary encoding reactions information from a csv file containing
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
119 data about these reactions, the path of which is given as input.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
120
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
121 Args:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
122 customReactionsPath : path to the reactions information file.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
123
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
124 Returns:
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
125 ReactionsDict : dictionary encoding custom reactions information.
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
126 """
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
127 reactionsData :Dict[str, str] = {row[0]: row[1] for row in utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath))}
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
128
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
129 return create_reaction_dict(reactionsData)
a48b2e06ebe7 Uploaded
luca_milaz
parents:
diff changeset
130