Mercurial > repos > bimib > cobraxy
comparison COBRAxy/src/utils/rule_parsing.py @ 539:2fb97466e404 draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Sat, 25 Oct 2025 14:55:13 +0000 |
| parents | |
| children | fcdbc81feb45 |
comparison
equal
deleted
inserted
replaced
| 538:fd53d42348bd | 539:2fb97466e404 |
|---|---|
| 1 """ | |
| 2 Parsing utilities for gene rules (GPRs). | |
| 3 | |
| 4 This module provides: | |
| 5 - RuleErr: structured errors for malformed rules | |
| 6 - RuleOp: valid logical operators (AND/OR) | |
| 7 - OpList: nested list structure representing parsed rules with explicit operator | |
| 8 - RuleStack: helper stack to build nested OpLists during parsing | |
| 9 - parseRuleToNestedList: main entry to parse a rule string into an OpList | |
| 10 """ | |
| 11 from enum import Enum | |
| 12 import utils.general_utils as utils | |
| 13 from typing import List, Union, Optional | |
| 14 | |
| 15 class RuleErr(utils.CustomErr): | |
| 16 """ | |
| 17 Error type for rule syntax errors. | |
| 18 """ | |
| 19 errName = "Rule Syntax Error" | |
| 20 def __init__(self, rule :str, msg = "no further details provided") -> None: | |
| 21 super().__init__( | |
| 22 f"rule \"{rule}\" is malformed, {msg}", | |
| 23 "please verify your input follows the validity guidelines") | |
| 24 | |
| 25 class RuleOp(Enum): | |
| 26 """ | |
| 27 Valid logical operators for gene rules. | |
| 28 """ | |
| 29 OR = "or" | |
| 30 AND = "and" | |
| 31 | |
| 32 @classmethod | |
| 33 def isOperator(cls, op :str) -> bool: | |
| 34 return op.upper() in cls.__members__ | |
| 35 | |
| 36 def __str__(self) -> str: return self.value | |
| 37 | |
| 38 class OpList(List[Union[str, "OpList"]]): | |
| 39 """ | |
| 40 Parsed rule structure: a list with an associated operator for that level. | |
| 41 """ | |
| 42 def __init__(self, op :Optional[RuleOp] = None) -> None: | |
| 43 """ | |
| 44 (Private) Initializes an instance of OpList. | |
| 45 | |
| 46 Args: | |
| 47 op (str): Operator to be assigned to the OpList. Defaults to "". | |
| 48 | |
| 49 Returns: | |
| 50 None : practically, an OpList instance. | |
| 51 """ | |
| 52 self.op = op | |
| 53 | |
| 54 def setOpIfMissing(self, op :RuleOp) -> None: | |
| 55 """ | |
| 56 Sets the operator of the OpList if it's missing. | |
| 57 | |
| 58 Args: | |
| 59 op (str): Operator to be assigned to the OpList. | |
| 60 | |
| 61 Returns: | |
| 62 None | |
| 63 """ | |
| 64 if not self.op: self.op = op | |
| 65 | |
| 66 def __repr__(self, indent = "") -> str: | |
| 67 """ | |
| 68 (Private) Returns a string representation of the current OpList instance. | |
| 69 | |
| 70 Args: | |
| 71 indent (str): Indentation level . Defaults to "". | |
| 72 | |
| 73 Returns: | |
| 74 str: A string representation of the current OpList instance. | |
| 75 """ | |
| 76 nextIndent = indent + " " | |
| 77 return f"<{self.op}>[\n" + ",\n".join([ | |
| 78 f"{nextIndent}{item.__repr__(nextIndent) if isinstance(item, OpList) else item}" | |
| 79 for item in self ]) + f"\n{indent}]" | |
| 80 | |
| 81 class RuleStack: | |
| 82 """ | |
| 83 FILO stack used during parsing to build nested OpLists; the top is the current level. | |
| 84 """ | |
| 85 def __init__(self) -> None: | |
| 86 """ | |
| 87 (Private) initializes an instance of RuleStack. | |
| 88 | |
| 89 Returns: | |
| 90 None : practically, a RuleStack instance. | |
| 91 """ | |
| 92 self.__stack = [OpList()] # the stack starts out with the result list already allocated | |
| 93 self.__updateCurrent() | |
| 94 | |
| 95 def pop(self) -> None: | |
| 96 """ | |
| 97 Removes the OpList on top of the stack, also flattening it once when possible. | |
| 98 | |
| 99 Side Effects: | |
| 100 self : mut | |
| 101 | |
| 102 Returns: | |
| 103 None | |
| 104 """ | |
| 105 oldTop = self.__stack.pop() | |
| 106 if len(oldTop) == 1 and isinstance(oldTop[0], OpList): self.__stack[-1][-1] = oldTop[0] | |
| 107 self.__updateCurrent() | |
| 108 | |
| 109 def push(self, operator = "") -> None: | |
| 110 """ | |
| 111 Adds a new nesting level, in the form of a new OpList on top of the stack. | |
| 112 | |
| 113 Args: | |
| 114 operator : the operator assigned to the new OpList. | |
| 115 | |
| 116 Side Effects: | |
| 117 self : mut | |
| 118 | |
| 119 Returns: | |
| 120 None | |
| 121 """ | |
| 122 newLevel = OpList(operator) | |
| 123 self.current.append(newLevel) | |
| 124 self.__stack.append(newLevel) | |
| 125 self.__updateCurrent() | |
| 126 | |
| 127 def popForward(self) -> None: | |
| 128 """ | |
| 129 Moves the last "actual" item from the 2nd to last list to the beginning of the top list, as per | |
| 130 the example below: | |
| 131 stack : [list_a, list_b] | |
| 132 list_a : [item1, item2, list_b] --> [item1, list_b] | |
| 133 list_b : [item3, item4] --> [item2, item3, item4] | |
| 134 | |
| 135 This is essentially a "give back as needed" operation. | |
| 136 | |
| 137 Side Effects: | |
| 138 self : mut | |
| 139 | |
| 140 Returns: | |
| 141 None | |
| 142 """ | |
| 143 self.current.insert(0, self.__stack[-2].pop(-2)) | |
| 144 | |
| 145 def currentIsAnd(self) -> bool: | |
| 146 """ | |
| 147 Checks if the current OpList's assigned operator is "and". | |
| 148 | |
| 149 Returns: | |
| 150 bool : True if the current OpList's assigned operator is "and", False otherwise. | |
| 151 """ | |
| 152 return self.current.op is RuleOp.AND | |
| 153 | |
| 154 def obtain(self, err :Optional[utils.CustomErr] = None) -> Optional[OpList]: | |
| 155 """ | |
| 156 Obtains the first OpList on the stack, only if it's the only element. | |
| 157 | |
| 158 Args: | |
| 159 err : The error to raise if obtaining the result is not possible. | |
| 160 | |
| 161 Side Effects: | |
| 162 self : mut | |
| 163 | |
| 164 Raises: | |
| 165 err: If given, otherwise None is returned. | |
| 166 | |
| 167 Returns: | |
| 168 Optional[OpList]: The first OpList on the stack, only if it's the only element. | |
| 169 """ | |
| 170 | |
| 171 if len(self.__stack) == 1: return self.__stack.pop() | |
| 172 if err: raise err | |
| 173 return None | |
| 174 | |
| 175 def __updateCurrent(self) -> None: | |
| 176 """ | |
| 177 (Private) Updates the current OpList to the one on top of the stack. | |
| 178 | |
| 179 Side Effects: | |
| 180 self : mut | |
| 181 | |
| 182 Returns: | |
| 183 None | |
| 184 """ | |
| 185 self.current = self.__stack[-1] | |
| 186 | |
| 187 def parseRuleToNestedList(rule :str) -> OpList: | |
| 188 """ | |
| 189 Parse a rule string into an OpList, making operator precedence explicit via nesting. | |
| 190 | |
| 191 Args: | |
| 192 rule: Rule string to parse (supports parentheses, 'and', 'or'). | |
| 193 | |
| 194 Raises: | |
| 195 RuleErr: If the rule is malformed (e.g., mismatched parentheses or misplaced operators). | |
| 196 | |
| 197 Returns: | |
| 198 OpList: Parsed rule as an OpList structure. | |
| 199 """ | |
| 200 source = iter(rule | |
| 201 .replace("(", "( ").replace(")", " )") # single out parentheses as words | |
| 202 .strip() # trim edges | |
| 203 .split()) # split by spaces | |
| 204 | |
| 205 stack = RuleStack() | |
| 206 nestingErr = RuleErr(rule, "mismatch between open and closed parentheses") | |
| 207 try: | |
| 208 while True: # read until source ends | |
| 209 while True: | |
| 210 operand = next(source, None) # expect operand or '(' | |
| 211 if operand is None: raise RuleErr(rule, "found trailing open parentheses") | |
| 212 if operand in ("and", "or", ")"): # unexpected operator position | |
| 213 raise RuleErr(rule, f"found \"{operand}\" in unexpected position") | |
| 214 | |
| 215 if operand != "(": break # got a name | |
| 216 | |
| 217 # found rule opening: add a new nesting level | |
| 218 stack.push() | |
| 219 | |
| 220 stack.current.append(operand) | |
| 221 | |
| 222 while True: # read until operator found or source ends | |
| 223 operator = next(source, None) # expect operator or ')' | |
| 224 if operator and operator != ")": break # got operator | |
| 225 | |
| 226 if stack.currentIsAnd(): stack.pop() # close current AND chain | |
| 227 | |
| 228 if not operator: break | |
| 229 stack.pop() # close parentheses | |
| 230 | |
| 231 if not operator: break | |
| 232 | |
| 233 if not RuleOp.isOperator(operator): raise RuleErr( | |
| 234 rule, f"found \"{operator}\" in unexpected position, expected operator") | |
| 235 | |
| 236 operator = RuleOp(operator) | |
| 237 if operator is RuleOp.OR and stack.currentIsAnd(): | |
| 238 stack.pop() | |
| 239 | |
| 240 elif operator is RuleOp.AND and not stack.currentIsAnd(): | |
| 241 stack.push(operator) | |
| 242 stack.popForward() | |
| 243 | |
| 244 stack.current.setOpIfMissing(operator) | |
| 245 | |
| 246 except RuleErr as err: raise err # bubble up proper errors | |
| 247 except: raise nestingErr # everything else is interpreted as a nesting error. | |
| 248 | |
| 249 parsedRule = stack.obtain(nestingErr) | |
| 250 return parsedRule[0] if len(parsedRule) == 1 and isinstance(parsedRule[0], list) else parsedRule |
