comparison COBRAxy/utils/rule_parsing.py @ 4:41f35c2f0c7b draft

Uploaded
author luca_milaz
date Wed, 18 Sep 2024 10:59:10 +0000
parents
children
comparison
equal deleted inserted replaced
3:1f3ac6fd9867 4:41f35c2f0c7b
1 from enum import Enum
2 import utils.general_utils as utils
3 from typing import List, Union, Optional
4
5 class RuleErr(utils.CustomErr):
6 """
7 CustomErr subclass for rule syntax errors.
8 """
9 errName = "Rule Syntax Error"
10 def __init__(self, rule :str, msg = "no further details provided") -> None:
11 super().__init__(
12 f"rule \"{rule}\" is malformed, {msg}",
13 "please verify your input follows the validity guidelines")
14
15 class RuleOp(Enum):
16 """
17 Encodes all operators valid in gene rules.
18 """
19 OR = "or"
20 AND = "and"
21
22 @classmethod
23 def isOperator(cls, op :str) -> bool:
24 return op.upper() in cls.__members__
25
26 def __str__(self) -> str: return self.value
27
28 class OpList(List[Union[str, "OpList"]]):
29 """
30 Represents a parsed rule and each of its nesting levels, including the operator that level uses.
31 """
32 def __init__(self, op :Optional[RuleOp] = None) -> None:
33 """
34 (Private) Initializes an instance of OpList.
35
36 Args:
37 op (str): Operator to be assigned to the OpList. Defaults to "".
38
39 Returns:
40 None : practically, an OpList instance.
41 """
42 self.op = op
43
44 def setOpIfMissing(self, op :RuleOp) -> None:
45 """
46 Sets the operator of the OpList if it's missing.
47
48 Args:
49 op (str): Operator to be assigned to the OpList.
50
51 Returns:
52 None
53 """
54 if not self.op: self.op = op
55
56 def __repr__(self, indent = "") -> str:
57 """
58 (Private) Returns a string representation of the current OpList instance.
59
60 Args:
61 indent (str): Indentation level . Defaults to "".
62
63 Returns:
64 str: A string representation of the current OpList instance.
65 """
66 nextIndent = indent + " "
67 return f"<{self.op}>[\n" + ",\n".join([
68 f"{nextIndent}{item.__repr__(nextIndent) if isinstance(item, OpList) else item}"
69 for item in self ]) + f"\n{indent}]"
70
71 class RuleStack:
72 """
73 FILO stack structure to save the intermediate representation of a Rule during parsing, with the
74 current nesting level at the top of the stack.
75 """
76 def __init__(self) -> None:
77 """
78 (Private) initializes an instance of RuleStack.
79
80 Returns:
81 None : practically, a RuleStack instance.
82 """
83 self.__stack = [OpList()] # the stack starts out with the result list already allocated
84 self.__updateCurrent()
85
86 def pop(self) -> None:
87 """
88 Removes the OpList on top of the stack, also flattening it once when possible.
89
90 Side Effects:
91 self : mut
92
93 Returns:
94 None
95 """
96 oldTop = self.__stack.pop()
97 if len(oldTop) == 1 and isinstance(oldTop[0], OpList): self.__stack[-1][-1] = oldTop[0]
98 self.__updateCurrent()
99
100 def push(self, operator = "") -> None:
101 """
102 Adds a new nesting level, in the form of a new OpList on top of the stack.
103
104 Args:
105 operator : the operator assigned to the new OpList.
106
107 Side Effects:
108 self : mut
109
110 Returns:
111 None
112 """
113 newLevel = OpList(operator)
114 self.current.append(newLevel)
115 self.__stack.append(newLevel)
116 self.__updateCurrent()
117
118 def popForward(self) -> None:
119 """
120 Moves the last "actual" item from the 2nd to last list to the beginning of the top list, as per
121 the example below:
122 stack : [list_a, list_b]
123 list_a : [item1, item2, list_b] --> [item1, list_b]
124 list_b : [item3, item4] --> [item2, item3, item4]
125
126 This is essentially a "give back as needed" operation.
127
128 Side Effects:
129 self : mut
130
131 Returns:
132 None
133 """
134 self.current.insert(0, self.__stack[-2].pop(-2))
135
136 def currentIsAnd(self) -> bool:
137 """
138 Checks if the current OpList's assigned operator is "and".
139
140 Returns:
141 bool : True if the current OpList's assigned operator is "and", False otherwise.
142 """
143 return self.current.op is RuleOp.AND
144
145 def obtain(self, err :Optional[utils.CustomErr] = None) -> Optional[OpList]:
146 """
147 Obtains the first OpList on the stack, only if it's the only element.
148
149 Args:
150 err : The error to raise if obtaining the result is not possible.
151
152 Side Effects:
153 self : mut
154
155 Raises:
156 err: If given, otherwise None is returned.
157
158 Returns:
159 Optional[OpList]: The first OpList on the stack, only if it's the only element.
160 """
161
162 if len(self.__stack) == 1: return self.__stack.pop()
163 if err: raise err
164 return None
165
166 def __updateCurrent(self) -> None:
167 """
168 (Private) Updates the current OpList to the one on top of the stack.
169
170 Side Effects:
171 self : mut
172
173 Returns:
174 None
175 """
176 self.current = self.__stack[-1]
177
178 def parseRuleToNestedList(rule :str) -> OpList:
179 """
180 Parse a single rule from its string representation to an OpList, making all priority explicit
181 through nesting levels.
182
183 Args:
184 rule : the string representation of a rule to be parsed.
185
186 Raises:
187 RuleErr : whenever something goes wrong during parsing.
188
189 Returns:
190 OpList : the parsed rule.
191 """
192 source = iter(rule
193 .replace("(", "( ").replace(")", " )") # Single out parens as words
194 .strip() # remove whitespace at extremities
195 .split()) # split by spaces
196
197 stack = RuleStack()
198 nestingErr = RuleErr(rule, "mismatch between open and closed parentheses")
199 try:
200 while True: # keep reading until source ends
201 while True:
202 operand = next(source, None) # expected name or rule opening
203 if operand is None: raise RuleErr(rule, "found trailing open parentheses")
204 if operand == "and" or operand == "or" or operand == ")": # found operator instead, panic
205 raise RuleErr(rule, f"found \"{operand}\" in unexpected position")
206
207 if operand != "(": break # found name
208
209 # found rule opening, we add new nesting level but don't know the operator
210 stack.push()
211
212 stack.current.append(operand)
213
214 while True: # keep reading until operator is found or source ends
215 operator = next(source, None) # expected operator or rule closing
216 if operator and operator != ")": break # found operator
217
218 if stack.currentIsAnd(): stack.pop() # we close the "and" chain
219
220 if not operator: break
221 stack.pop() # we close the parentheses
222
223 # we proceed with operator:
224 if not operator: break # there is no such thing as a double loop break.. yet
225
226 if not RuleOp.isOperator(operator): raise RuleErr(
227 rule, f"found \"{operator}\" in unexpected position, expected operator")
228
229 operator = RuleOp(operator)
230 if operator is RuleOp.OR and stack.currentIsAnd():
231 stack.pop()
232
233 elif operator is RuleOp.AND and not stack.currentIsAnd():
234 stack.push(operator)
235 stack.popForward()
236
237 stack.current.setOpIfMissing(operator) # buffer now knows what operator its data had
238
239 except RuleErr as err: raise err # bubble up proper errors
240 except: raise nestingErr # everything else is interpreted as a nesting error.
241
242 parsedRule = stack.obtain(nestingErr)
243 return parsedRule[0] if len(parsedRule) == 1 and isinstance(parsedRule[0], list) else parsedRule