Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/rule_parsing.py @ 4:41f35c2f0c7b draft
Uploaded
author | luca_milaz |
---|---|
date | Wed, 18 Sep 2024 10:59:10 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:1f3ac6fd9867 | 4:41f35c2f0c7b |
---|---|
1 from enum import Enum | |
2 import utils.general_utils as utils | |
3 from typing import List, Union, Optional | |
4 | |
5 class RuleErr(utils.CustomErr): | |
6 """ | |
7 CustomErr subclass for rule syntax errors. | |
8 """ | |
9 errName = "Rule Syntax Error" | |
10 def __init__(self, rule :str, msg = "no further details provided") -> None: | |
11 super().__init__( | |
12 f"rule \"{rule}\" is malformed, {msg}", | |
13 "please verify your input follows the validity guidelines") | |
14 | |
15 class RuleOp(Enum): | |
16 """ | |
17 Encodes all operators valid in gene rules. | |
18 """ | |
19 OR = "or" | |
20 AND = "and" | |
21 | |
22 @classmethod | |
23 def isOperator(cls, op :str) -> bool: | |
24 return op.upper() in cls.__members__ | |
25 | |
26 def __str__(self) -> str: return self.value | |
27 | |
28 class OpList(List[Union[str, "OpList"]]): | |
29 """ | |
30 Represents a parsed rule and each of its nesting levels, including the operator that level uses. | |
31 """ | |
32 def __init__(self, op :Optional[RuleOp] = None) -> None: | |
33 """ | |
34 (Private) Initializes an instance of OpList. | |
35 | |
36 Args: | |
37 op (str): Operator to be assigned to the OpList. Defaults to "". | |
38 | |
39 Returns: | |
40 None : practically, an OpList instance. | |
41 """ | |
42 self.op = op | |
43 | |
44 def setOpIfMissing(self, op :RuleOp) -> None: | |
45 """ | |
46 Sets the operator of the OpList if it's missing. | |
47 | |
48 Args: | |
49 op (str): Operator to be assigned to the OpList. | |
50 | |
51 Returns: | |
52 None | |
53 """ | |
54 if not self.op: self.op = op | |
55 | |
56 def __repr__(self, indent = "") -> str: | |
57 """ | |
58 (Private) Returns a string representation of the current OpList instance. | |
59 | |
60 Args: | |
61 indent (str): Indentation level . Defaults to "". | |
62 | |
63 Returns: | |
64 str: A string representation of the current OpList instance. | |
65 """ | |
66 nextIndent = indent + " " | |
67 return f"<{self.op}>[\n" + ",\n".join([ | |
68 f"{nextIndent}{item.__repr__(nextIndent) if isinstance(item, OpList) else item}" | |
69 for item in self ]) + f"\n{indent}]" | |
70 | |
71 class RuleStack: | |
72 """ | |
73 FILO stack structure to save the intermediate representation of a Rule during parsing, with the | |
74 current nesting level at the top of the stack. | |
75 """ | |
76 def __init__(self) -> None: | |
77 """ | |
78 (Private) initializes an instance of RuleStack. | |
79 | |
80 Returns: | |
81 None : practically, a RuleStack instance. | |
82 """ | |
83 self.__stack = [OpList()] # the stack starts out with the result list already allocated | |
84 self.__updateCurrent() | |
85 | |
86 def pop(self) -> None: | |
87 """ | |
88 Removes the OpList on top of the stack, also flattening it once when possible. | |
89 | |
90 Side Effects: | |
91 self : mut | |
92 | |
93 Returns: | |
94 None | |
95 """ | |
96 oldTop = self.__stack.pop() | |
97 if len(oldTop) == 1 and isinstance(oldTop[0], OpList): self.__stack[-1][-1] = oldTop[0] | |
98 self.__updateCurrent() | |
99 | |
100 def push(self, operator = "") -> None: | |
101 """ | |
102 Adds a new nesting level, in the form of a new OpList on top of the stack. | |
103 | |
104 Args: | |
105 operator : the operator assigned to the new OpList. | |
106 | |
107 Side Effects: | |
108 self : mut | |
109 | |
110 Returns: | |
111 None | |
112 """ | |
113 newLevel = OpList(operator) | |
114 self.current.append(newLevel) | |
115 self.__stack.append(newLevel) | |
116 self.__updateCurrent() | |
117 | |
118 def popForward(self) -> None: | |
119 """ | |
120 Moves the last "actual" item from the 2nd to last list to the beginning of the top list, as per | |
121 the example below: | |
122 stack : [list_a, list_b] | |
123 list_a : [item1, item2, list_b] --> [item1, list_b] | |
124 list_b : [item3, item4] --> [item2, item3, item4] | |
125 | |
126 This is essentially a "give back as needed" operation. | |
127 | |
128 Side Effects: | |
129 self : mut | |
130 | |
131 Returns: | |
132 None | |
133 """ | |
134 self.current.insert(0, self.__stack[-2].pop(-2)) | |
135 | |
136 def currentIsAnd(self) -> bool: | |
137 """ | |
138 Checks if the current OpList's assigned operator is "and". | |
139 | |
140 Returns: | |
141 bool : True if the current OpList's assigned operator is "and", False otherwise. | |
142 """ | |
143 return self.current.op is RuleOp.AND | |
144 | |
145 def obtain(self, err :Optional[utils.CustomErr] = None) -> Optional[OpList]: | |
146 """ | |
147 Obtains the first OpList on the stack, only if it's the only element. | |
148 | |
149 Args: | |
150 err : The error to raise if obtaining the result is not possible. | |
151 | |
152 Side Effects: | |
153 self : mut | |
154 | |
155 Raises: | |
156 err: If given, otherwise None is returned. | |
157 | |
158 Returns: | |
159 Optional[OpList]: The first OpList on the stack, only if it's the only element. | |
160 """ | |
161 | |
162 if len(self.__stack) == 1: return self.__stack.pop() | |
163 if err: raise err | |
164 return None | |
165 | |
166 def __updateCurrent(self) -> None: | |
167 """ | |
168 (Private) Updates the current OpList to the one on top of the stack. | |
169 | |
170 Side Effects: | |
171 self : mut | |
172 | |
173 Returns: | |
174 None | |
175 """ | |
176 self.current = self.__stack[-1] | |
177 | |
178 def parseRuleToNestedList(rule :str) -> OpList: | |
179 """ | |
180 Parse a single rule from its string representation to an OpList, making all priority explicit | |
181 through nesting levels. | |
182 | |
183 Args: | |
184 rule : the string representation of a rule to be parsed. | |
185 | |
186 Raises: | |
187 RuleErr : whenever something goes wrong during parsing. | |
188 | |
189 Returns: | |
190 OpList : the parsed rule. | |
191 """ | |
192 source = iter(rule | |
193 .replace("(", "( ").replace(")", " )") # Single out parens as words | |
194 .strip() # remove whitespace at extremities | |
195 .split()) # split by spaces | |
196 | |
197 stack = RuleStack() | |
198 nestingErr = RuleErr(rule, "mismatch between open and closed parentheses") | |
199 try: | |
200 while True: # keep reading until source ends | |
201 while True: | |
202 operand = next(source, None) # expected name or rule opening | |
203 if operand is None: raise RuleErr(rule, "found trailing open parentheses") | |
204 if operand == "and" or operand == "or" or operand == ")": # found operator instead, panic | |
205 raise RuleErr(rule, f"found \"{operand}\" in unexpected position") | |
206 | |
207 if operand != "(": break # found name | |
208 | |
209 # found rule opening, we add new nesting level but don't know the operator | |
210 stack.push() | |
211 | |
212 stack.current.append(operand) | |
213 | |
214 while True: # keep reading until operator is found or source ends | |
215 operator = next(source, None) # expected operator or rule closing | |
216 if operator and operator != ")": break # found operator | |
217 | |
218 if stack.currentIsAnd(): stack.pop() # we close the "and" chain | |
219 | |
220 if not operator: break | |
221 stack.pop() # we close the parentheses | |
222 | |
223 # we proceed with operator: | |
224 if not operator: break # there is no such thing as a double loop break.. yet | |
225 | |
226 if not RuleOp.isOperator(operator): raise RuleErr( | |
227 rule, f"found \"{operator}\" in unexpected position, expected operator") | |
228 | |
229 operator = RuleOp(operator) | |
230 if operator is RuleOp.OR and stack.currentIsAnd(): | |
231 stack.pop() | |
232 | |
233 elif operator is RuleOp.AND and not stack.currentIsAnd(): | |
234 stack.push(operator) | |
235 stack.popForward() | |
236 | |
237 stack.current.setOpIfMissing(operator) # buffer now knows what operator its data had | |
238 | |
239 except RuleErr as err: raise err # bubble up proper errors | |
240 except: raise nestingErr # everything else is interpreted as a nesting error. | |
241 | |
242 parsedRule = stack.obtain(nestingErr) | |
243 return parsedRule[0] if len(parsedRule) == 1 and isinstance(parsedRule[0], list) else parsedRule |