Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/rule_parsing.py @ 456:a6e45049c1b9 draft default tip
Uploaded
author | francesco_lapi |
---|---|
date | Fri, 12 Sep 2025 17:28:45 +0000 |
parents | 41f35c2f0c7b |
children |
comparison
equal
deleted
inserted
replaced
455:4e2bc80764b6 | 456:a6e45049c1b9 |
---|---|
1 """ | |
2 Parsing utilities for gene rules (GPRs). | |
3 | |
4 This module provides: | |
5 - RuleErr: structured errors for malformed rules | |
6 - RuleOp: valid logical operators (AND/OR) | |
7 - OpList: nested list structure representing parsed rules with explicit operator | |
8 - RuleStack: helper stack to build nested OpLists during parsing | |
9 - parseRuleToNestedList: main entry to parse a rule string into an OpList | |
10 """ | |
1 from enum import Enum | 11 from enum import Enum |
2 import utils.general_utils as utils | 12 import utils.general_utils as utils |
3 from typing import List, Union, Optional | 13 from typing import List, Union, Optional |
4 | 14 |
5 class RuleErr(utils.CustomErr): | 15 class RuleErr(utils.CustomErr): |
6 """ | 16 """ |
7 CustomErr subclass for rule syntax errors. | 17 Error type for rule syntax errors. |
8 """ | 18 """ |
9 errName = "Rule Syntax Error" | 19 errName = "Rule Syntax Error" |
10 def __init__(self, rule :str, msg = "no further details provided") -> None: | 20 def __init__(self, rule :str, msg = "no further details provided") -> None: |
11 super().__init__( | 21 super().__init__( |
12 f"rule \"{rule}\" is malformed, {msg}", | 22 f"rule \"{rule}\" is malformed, {msg}", |
13 "please verify your input follows the validity guidelines") | 23 "please verify your input follows the validity guidelines") |
14 | 24 |
15 class RuleOp(Enum): | 25 class RuleOp(Enum): |
16 """ | 26 """ |
17 Encodes all operators valid in gene rules. | 27 Valid logical operators for gene rules. |
18 """ | 28 """ |
19 OR = "or" | 29 OR = "or" |
20 AND = "and" | 30 AND = "and" |
21 | 31 |
22 @classmethod | 32 @classmethod |
25 | 35 |
26 def __str__(self) -> str: return self.value | 36 def __str__(self) -> str: return self.value |
27 | 37 |
28 class OpList(List[Union[str, "OpList"]]): | 38 class OpList(List[Union[str, "OpList"]]): |
29 """ | 39 """ |
30 Represents a parsed rule and each of its nesting levels, including the operator that level uses. | 40 Parsed rule structure: a list with an associated operator for that level. |
31 """ | 41 """ |
32 def __init__(self, op :Optional[RuleOp] = None) -> None: | 42 def __init__(self, op :Optional[RuleOp] = None) -> None: |
33 """ | 43 """ |
34 (Private) Initializes an instance of OpList. | 44 (Private) Initializes an instance of OpList. |
35 | 45 |
68 f"{nextIndent}{item.__repr__(nextIndent) if isinstance(item, OpList) else item}" | 78 f"{nextIndent}{item.__repr__(nextIndent) if isinstance(item, OpList) else item}" |
69 for item in self ]) + f"\n{indent}]" | 79 for item in self ]) + f"\n{indent}]" |
70 | 80 |
71 class RuleStack: | 81 class RuleStack: |
72 """ | 82 """ |
73 FILO stack structure to save the intermediate representation of a Rule during parsing, with the | 83 FILO stack used during parsing to build nested OpLists; the top is the current level. |
74 current nesting level at the top of the stack. | |
75 """ | 84 """ |
76 def __init__(self) -> None: | 85 def __init__(self) -> None: |
77 """ | 86 """ |
78 (Private) initializes an instance of RuleStack. | 87 (Private) initializes an instance of RuleStack. |
79 | 88 |
175 """ | 184 """ |
176 self.current = self.__stack[-1] | 185 self.current = self.__stack[-1] |
177 | 186 |
178 def parseRuleToNestedList(rule :str) -> OpList: | 187 def parseRuleToNestedList(rule :str) -> OpList: |
179 """ | 188 """ |
180 Parse a single rule from its string representation to an OpList, making all priority explicit | 189 Parse a rule string into an OpList, making operator precedence explicit via nesting. |
181 through nesting levels. | |
182 | 190 |
183 Args: | 191 Args: |
184 rule : the string representation of a rule to be parsed. | 192 rule: Rule string to parse (supports parentheses, 'and', 'or'). |
185 | 193 |
186 Raises: | 194 Raises: |
187 RuleErr : whenever something goes wrong during parsing. | 195 RuleErr: If the rule is malformed (e.g., mismatched parentheses or misplaced operators). |
188 | 196 |
189 Returns: | 197 Returns: |
190 OpList : the parsed rule. | 198 OpList: Parsed rule as an OpList structure. |
191 """ | 199 """ |
192 source = iter(rule | 200 source = iter(rule |
193 .replace("(", "( ").replace(")", " )") # Single out parens as words | 201 .replace("(", "( ").replace(")", " )") # single out parentheses as words |
194 .strip() # remove whitespace at extremities | 202 .strip() # trim edges |
195 .split()) # split by spaces | 203 .split()) # split by spaces |
196 | 204 |
197 stack = RuleStack() | 205 stack = RuleStack() |
198 nestingErr = RuleErr(rule, "mismatch between open and closed parentheses") | 206 nestingErr = RuleErr(rule, "mismatch between open and closed parentheses") |
199 try: | 207 try: |
200 while True: # keep reading until source ends | 208 while True: # read until source ends |
201 while True: | 209 while True: |
202 operand = next(source, None) # expected name or rule opening | 210 operand = next(source, None) # expect operand or '(' |
203 if operand is None: raise RuleErr(rule, "found trailing open parentheses") | 211 if operand is None: raise RuleErr(rule, "found trailing open parentheses") |
204 if operand == "and" or operand == "or" or operand == ")": # found operator instead, panic | 212 if operand in ("and", "or", ")"): # unexpected operator position |
205 raise RuleErr(rule, f"found \"{operand}\" in unexpected position") | 213 raise RuleErr(rule, f"found \"{operand}\" in unexpected position") |
206 | 214 |
207 if operand != "(": break # found name | 215 if operand != "(": break # got a name |
208 | 216 |
209 # found rule opening, we add new nesting level but don't know the operator | 217 # found rule opening: add a new nesting level |
210 stack.push() | 218 stack.push() |
211 | 219 |
212 stack.current.append(operand) | 220 stack.current.append(operand) |
213 | 221 |
214 while True: # keep reading until operator is found or source ends | 222 while True: # read until operator found or source ends |
215 operator = next(source, None) # expected operator or rule closing | 223 operator = next(source, None) # expect operator or ')' |
216 if operator and operator != ")": break # found operator | 224 if operator and operator != ")": break # got operator |
217 | 225 |
218 if stack.currentIsAnd(): stack.pop() # we close the "and" chain | 226 if stack.currentIsAnd(): stack.pop() # close current AND chain |
219 | 227 |
220 if not operator: break | 228 if not operator: break |
221 stack.pop() # we close the parentheses | 229 stack.pop() # close parentheses |
222 | 230 |
223 # we proceed with operator: | 231 if not operator: break |
224 if not operator: break # there is no such thing as a double loop break.. yet | |
225 | 232 |
226 if not RuleOp.isOperator(operator): raise RuleErr( | 233 if not RuleOp.isOperator(operator): raise RuleErr( |
227 rule, f"found \"{operator}\" in unexpected position, expected operator") | 234 rule, f"found \"{operator}\" in unexpected position, expected operator") |
228 | 235 |
229 operator = RuleOp(operator) | 236 operator = RuleOp(operator) |
232 | 239 |
233 elif operator is RuleOp.AND and not stack.currentIsAnd(): | 240 elif operator is RuleOp.AND and not stack.currentIsAnd(): |
234 stack.push(operator) | 241 stack.push(operator) |
235 stack.popForward() | 242 stack.popForward() |
236 | 243 |
237 stack.current.setOpIfMissing(operator) # buffer now knows what operator its data had | 244 stack.current.setOpIfMissing(operator) |
238 | 245 |
239 except RuleErr as err: raise err # bubble up proper errors | 246 except RuleErr as err: raise err # bubble up proper errors |
240 except: raise nestingErr # everything else is interpreted as a nesting error. | 247 except: raise nestingErr # everything else is interpreted as a nesting error. |
241 | 248 |
242 parsedRule = stack.obtain(nestingErr) | 249 parsedRule = stack.obtain(nestingErr) |