comparison COBRAxy/utils/rule_parsing.py @ 456:a6e45049c1b9 draft default tip

Uploaded
author francesco_lapi
date Fri, 12 Sep 2025 17:28:45 +0000
parents 41f35c2f0c7b
children
comparison
equal deleted inserted replaced
455:4e2bc80764b6 456:a6e45049c1b9
1 """
2 Parsing utilities for gene rules (GPRs).
3
4 This module provides:
5 - RuleErr: structured errors for malformed rules
6 - RuleOp: valid logical operators (AND/OR)
7 - OpList: nested list structure representing parsed rules with explicit operator
8 - RuleStack: helper stack to build nested OpLists during parsing
9 - parseRuleToNestedList: main entry to parse a rule string into an OpList
10 """
1 from enum import Enum 11 from enum import Enum
2 import utils.general_utils as utils 12 import utils.general_utils as utils
3 from typing import List, Union, Optional 13 from typing import List, Union, Optional
4 14
5 class RuleErr(utils.CustomErr): 15 class RuleErr(utils.CustomErr):
6 """ 16 """
7 CustomErr subclass for rule syntax errors. 17 Error type for rule syntax errors.
8 """ 18 """
9 errName = "Rule Syntax Error" 19 errName = "Rule Syntax Error"
10 def __init__(self, rule :str, msg = "no further details provided") -> None: 20 def __init__(self, rule :str, msg = "no further details provided") -> None:
11 super().__init__( 21 super().__init__(
12 f"rule \"{rule}\" is malformed, {msg}", 22 f"rule \"{rule}\" is malformed, {msg}",
13 "please verify your input follows the validity guidelines") 23 "please verify your input follows the validity guidelines")
14 24
15 class RuleOp(Enum): 25 class RuleOp(Enum):
16 """ 26 """
17 Encodes all operators valid in gene rules. 27 Valid logical operators for gene rules.
18 """ 28 """
19 OR = "or" 29 OR = "or"
20 AND = "and" 30 AND = "and"
21 31
22 @classmethod 32 @classmethod
25 35
26 def __str__(self) -> str: return self.value 36 def __str__(self) -> str: return self.value
27 37
28 class OpList(List[Union[str, "OpList"]]): 38 class OpList(List[Union[str, "OpList"]]):
29 """ 39 """
30 Represents a parsed rule and each of its nesting levels, including the operator that level uses. 40 Parsed rule structure: a list with an associated operator for that level.
31 """ 41 """
32 def __init__(self, op :Optional[RuleOp] = None) -> None: 42 def __init__(self, op :Optional[RuleOp] = None) -> None:
33 """ 43 """
34 (Private) Initializes an instance of OpList. 44 (Private) Initializes an instance of OpList.
35 45
68 f"{nextIndent}{item.__repr__(nextIndent) if isinstance(item, OpList) else item}" 78 f"{nextIndent}{item.__repr__(nextIndent) if isinstance(item, OpList) else item}"
69 for item in self ]) + f"\n{indent}]" 79 for item in self ]) + f"\n{indent}]"
70 80
71 class RuleStack: 81 class RuleStack:
72 """ 82 """
73 FILO stack structure to save the intermediate representation of a Rule during parsing, with the 83 FILO stack used during parsing to build nested OpLists; the top is the current level.
74 current nesting level at the top of the stack.
75 """ 84 """
76 def __init__(self) -> None: 85 def __init__(self) -> None:
77 """ 86 """
78 (Private) initializes an instance of RuleStack. 87 (Private) initializes an instance of RuleStack.
79 88
175 """ 184 """
176 self.current = self.__stack[-1] 185 self.current = self.__stack[-1]
177 186
178 def parseRuleToNestedList(rule :str) -> OpList: 187 def parseRuleToNestedList(rule :str) -> OpList:
179 """ 188 """
180 Parse a single rule from its string representation to an OpList, making all priority explicit 189 Parse a rule string into an OpList, making operator precedence explicit via nesting.
181 through nesting levels.
182 190
183 Args: 191 Args:
184 rule : the string representation of a rule to be parsed. 192 rule: Rule string to parse (supports parentheses, 'and', 'or').
185 193
186 Raises: 194 Raises:
187 RuleErr : whenever something goes wrong during parsing. 195 RuleErr: If the rule is malformed (e.g., mismatched parentheses or misplaced operators).
188 196
189 Returns: 197 Returns:
190 OpList : the parsed rule. 198 OpList: Parsed rule as an OpList structure.
191 """ 199 """
192 source = iter(rule 200 source = iter(rule
193 .replace("(", "( ").replace(")", " )") # Single out parens as words 201 .replace("(", "( ").replace(")", " )") # single out parentheses as words
194 .strip() # remove whitespace at extremities 202 .strip() # trim edges
195 .split()) # split by spaces 203 .split()) # split by spaces
196 204
197 stack = RuleStack() 205 stack = RuleStack()
198 nestingErr = RuleErr(rule, "mismatch between open and closed parentheses") 206 nestingErr = RuleErr(rule, "mismatch between open and closed parentheses")
199 try: 207 try:
200 while True: # keep reading until source ends 208 while True: # read until source ends
201 while True: 209 while True:
202 operand = next(source, None) # expected name or rule opening 210 operand = next(source, None) # expect operand or '('
203 if operand is None: raise RuleErr(rule, "found trailing open parentheses") 211 if operand is None: raise RuleErr(rule, "found trailing open parentheses")
204 if operand == "and" or operand == "or" or operand == ")": # found operator instead, panic 212 if operand in ("and", "or", ")"): # unexpected operator position
205 raise RuleErr(rule, f"found \"{operand}\" in unexpected position") 213 raise RuleErr(rule, f"found \"{operand}\" in unexpected position")
206 214
207 if operand != "(": break # found name 215 if operand != "(": break # got a name
208 216
209 # found rule opening, we add new nesting level but don't know the operator 217 # found rule opening: add a new nesting level
210 stack.push() 218 stack.push()
211 219
212 stack.current.append(operand) 220 stack.current.append(operand)
213 221
214 while True: # keep reading until operator is found or source ends 222 while True: # read until operator found or source ends
215 operator = next(source, None) # expected operator or rule closing 223 operator = next(source, None) # expect operator or ')'
216 if operator and operator != ")": break # found operator 224 if operator and operator != ")": break # got operator
217 225
218 if stack.currentIsAnd(): stack.pop() # we close the "and" chain 226 if stack.currentIsAnd(): stack.pop() # close current AND chain
219 227
220 if not operator: break 228 if not operator: break
221 stack.pop() # we close the parentheses 229 stack.pop() # close parentheses
222 230
223 # we proceed with operator: 231 if not operator: break
224 if not operator: break # there is no such thing as a double loop break.. yet
225 232
226 if not RuleOp.isOperator(operator): raise RuleErr( 233 if not RuleOp.isOperator(operator): raise RuleErr(
227 rule, f"found \"{operator}\" in unexpected position, expected operator") 234 rule, f"found \"{operator}\" in unexpected position, expected operator")
228 235
229 operator = RuleOp(operator) 236 operator = RuleOp(operator)
232 239
233 elif operator is RuleOp.AND and not stack.currentIsAnd(): 240 elif operator is RuleOp.AND and not stack.currentIsAnd():
234 stack.push(operator) 241 stack.push(operator)
235 stack.popForward() 242 stack.popForward()
236 243
237 stack.current.setOpIfMissing(operator) # buffer now knows what operator its data had 244 stack.current.setOpIfMissing(operator)
238 245
239 except RuleErr as err: raise err # bubble up proper errors 246 except RuleErr as err: raise err # bubble up proper errors
240 except: raise nestingErr # everything else is interpreted as a nesting error. 247 except: raise nestingErr # everything else is interpreted as a nesting error.
241 248
242 parsedRule = stack.obtain(nestingErr) 249 parsedRule = stack.obtain(nestingErr)