Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/lxml/_elementpath.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 # cython: language_level=2 | |
2 | |
3 # | |
4 # ElementTree | |
5 # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ | |
6 # | |
7 # limited xpath support for element trees | |
8 # | |
9 # history: | |
10 # 2003-05-23 fl created | |
11 # 2003-05-28 fl added support for // etc | |
12 # 2003-08-27 fl fixed parsing of periods in element names | |
13 # 2007-09-10 fl new selection engine | |
14 # 2007-09-12 fl fixed parent selector | |
15 # 2007-09-13 fl added iterfind; changed findall to return a list | |
16 # 2007-11-30 fl added namespaces support | |
17 # 2009-10-30 fl added child element value filter | |
18 # | |
19 # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. | |
20 # | |
21 # fredrik@pythonware.com | |
22 # http://www.pythonware.com | |
23 # | |
24 # -------------------------------------------------------------------- | |
25 # The ElementTree toolkit is | |
26 # | |
27 # Copyright (c) 1999-2009 by Fredrik Lundh | |
28 # | |
29 # By obtaining, using, and/or copying this software and/or its | |
30 # associated documentation, you agree that you have read, understood, | |
31 # and will comply with the following terms and conditions: | |
32 # | |
33 # Permission to use, copy, modify, and distribute this software and | |
34 # its associated documentation for any purpose and without fee is | |
35 # hereby granted, provided that the above copyright notice appears in | |
36 # all copies, and that both that copyright notice and this permission | |
37 # notice appear in supporting documentation, and that the name of | |
38 # Secret Labs AB or the author not be used in advertising or publicity | |
39 # pertaining to distribution of the software without specific, written | |
40 # prior permission. | |
41 # | |
42 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD | |
43 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- | |
44 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR | |
45 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY | |
46 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
47 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS | |
48 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE | |
49 # OF THIS SOFTWARE. | |
50 # -------------------------------------------------------------------- | |
51 | |
52 ## | |
53 # Implementation module for XPath support. There's usually no reason | |
54 # to import this module directly; the <b>ElementTree</b> does this for | |
55 # you, if needed. | |
56 ## | |
57 | |
58 from __future__ import absolute_import | |
59 | |
60 import re | |
61 | |
62 xpath_tokenizer_re = re.compile( | |
63 "(" | |
64 "'[^']*'|\"[^\"]*\"|" | |
65 "::|" | |
66 "//?|" | |
67 r"\.\.|" | |
68 r"\(\)|" | |
69 r"[/.*:\[\]\(\)@=])|" | |
70 r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" | |
71 r"\s+" | |
72 ) | |
73 | |
74 def xpath_tokenizer(pattern, namespaces=None): | |
75 # ElementTree uses '', lxml used None originally. | |
76 default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None | |
77 parsing_attribute = False | |
78 for token in xpath_tokenizer_re.findall(pattern): | |
79 ttype, tag = token | |
80 if tag and tag[0] != "{": | |
81 if ":" in tag: | |
82 prefix, uri = tag.split(":", 1) | |
83 try: | |
84 if not namespaces: | |
85 raise KeyError | |
86 yield ttype, "{%s}%s" % (namespaces[prefix], uri) | |
87 except KeyError: | |
88 raise SyntaxError("prefix %r not found in prefix map" % prefix) | |
89 elif default_namespace and not parsing_attribute: | |
90 yield ttype, "{%s}%s" % (default_namespace, tag) | |
91 else: | |
92 yield token | |
93 parsing_attribute = False | |
94 else: | |
95 yield token | |
96 parsing_attribute = ttype == '@' | |
97 | |
98 | |
99 def prepare_child(next, token): | |
100 tag = token[1] | |
101 def select(result): | |
102 for elem in result: | |
103 for e in elem.iterchildren(tag): | |
104 yield e | |
105 return select | |
106 | |
107 def prepare_star(next, token): | |
108 def select(result): | |
109 for elem in result: | |
110 for e in elem.iterchildren('*'): | |
111 yield e | |
112 return select | |
113 | |
114 def prepare_self(next, token): | |
115 def select(result): | |
116 return result | |
117 return select | |
118 | |
119 def prepare_descendant(next, token): | |
120 token = next() | |
121 if token[0] == "*": | |
122 tag = "*" | |
123 elif not token[0]: | |
124 tag = token[1] | |
125 else: | |
126 raise SyntaxError("invalid descendant") | |
127 def select(result): | |
128 for elem in result: | |
129 for e in elem.iterdescendants(tag): | |
130 yield e | |
131 return select | |
132 | |
133 def prepare_parent(next, token): | |
134 def select(result): | |
135 for elem in result: | |
136 parent = elem.getparent() | |
137 if parent is not None: | |
138 yield parent | |
139 return select | |
140 | |
141 def prepare_predicate(next, token): | |
142 # FIXME: replace with real parser!!! refs: | |
143 # http://effbot.org/zone/simple-iterator-parser.htm | |
144 # http://javascript.crockford.com/tdop/tdop.html | |
145 signature = '' | |
146 predicate = [] | |
147 while 1: | |
148 token = next() | |
149 if token[0] == "]": | |
150 break | |
151 if token == ('', ''): | |
152 # ignore whitespace | |
153 continue | |
154 if token[0] and token[0][:1] in "'\"": | |
155 token = "'", token[0][1:-1] | |
156 signature += token[0] or "-" | |
157 predicate.append(token[1]) | |
158 | |
159 # use signature to determine predicate type | |
160 if signature == "@-": | |
161 # [@attribute] predicate | |
162 key = predicate[1] | |
163 def select(result): | |
164 for elem in result: | |
165 if elem.get(key) is not None: | |
166 yield elem | |
167 return select | |
168 if signature == "@-='": | |
169 # [@attribute='value'] | |
170 key = predicate[1] | |
171 value = predicate[-1] | |
172 def select(result): | |
173 for elem in result: | |
174 if elem.get(key) == value: | |
175 yield elem | |
176 return select | |
177 if signature == "-" and not re.match(r"-?\d+$", predicate[0]): | |
178 # [tag] | |
179 tag = predicate[0] | |
180 def select(result): | |
181 for elem in result: | |
182 for _ in elem.iterchildren(tag): | |
183 yield elem | |
184 break | |
185 return select | |
186 if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])): | |
187 # [.='value'] or [tag='value'] | |
188 tag = predicate[0] | |
189 value = predicate[-1] | |
190 if tag: | |
191 def select(result): | |
192 for elem in result: | |
193 for e in elem.iterchildren(tag): | |
194 if "".join(e.itertext()) == value: | |
195 yield elem | |
196 break | |
197 else: | |
198 def select(result): | |
199 for elem in result: | |
200 if "".join(elem.itertext()) == value: | |
201 yield elem | |
202 return select | |
203 if signature == "-" or signature == "-()" or signature == "-()-": | |
204 # [index] or [last()] or [last()-index] | |
205 if signature == "-": | |
206 # [index] | |
207 index = int(predicate[0]) - 1 | |
208 if index < 0: | |
209 if index == -1: | |
210 raise SyntaxError( | |
211 "indices in path predicates are 1-based, not 0-based") | |
212 else: | |
213 raise SyntaxError("path index >= 1 expected") | |
214 else: | |
215 if predicate[0] != "last": | |
216 raise SyntaxError("unsupported function") | |
217 if signature == "-()-": | |
218 try: | |
219 index = int(predicate[2]) - 1 | |
220 except ValueError: | |
221 raise SyntaxError("unsupported expression") | |
222 else: | |
223 index = -1 | |
224 def select(result): | |
225 for elem in result: | |
226 parent = elem.getparent() | |
227 if parent is None: | |
228 continue | |
229 try: | |
230 # FIXME: what if the selector is "*" ? | |
231 elems = list(parent.iterchildren(elem.tag)) | |
232 if elems[index] is elem: | |
233 yield elem | |
234 except IndexError: | |
235 pass | |
236 return select | |
237 raise SyntaxError("invalid predicate") | |
238 | |
239 ops = { | |
240 "": prepare_child, | |
241 "*": prepare_star, | |
242 ".": prepare_self, | |
243 "..": prepare_parent, | |
244 "//": prepare_descendant, | |
245 "[": prepare_predicate, | |
246 } | |
247 | |
248 | |
249 # -------------------------------------------------------------------- | |
250 | |
251 _cache = {} | |
252 | |
253 | |
254 def _build_path_iterator(path, namespaces): | |
255 """compile selector pattern""" | |
256 if path[-1:] == "/": | |
257 path += "*" # implicit all (FIXME: keep this?) | |
258 | |
259 cache_key = (path,) | |
260 if namespaces: | |
261 # lxml originally used None for the default namespace but ElementTree uses the | |
262 # more convenient (all-strings-dict) empty string, so we support both here, | |
263 # preferring the more convenient '', as long as they aren't ambiguous. | |
264 if None in namespaces: | |
265 if '' in namespaces and namespaces[None] != namespaces['']: | |
266 raise ValueError("Ambiguous default namespace provided: %r versus %r" % ( | |
267 namespaces[None], namespaces[''])) | |
268 cache_key += (namespaces[None],) + tuple(sorted( | |
269 item for item in namespaces.items() if item[0] is not None)) | |
270 else: | |
271 cache_key += tuple(sorted(namespaces.items())) | |
272 | |
273 try: | |
274 return _cache[cache_key] | |
275 except KeyError: | |
276 pass | |
277 if len(_cache) > 100: | |
278 _cache.clear() | |
279 | |
280 if path[:1] == "/": | |
281 raise SyntaxError("cannot use absolute path on element") | |
282 stream = iter(xpath_tokenizer(path, namespaces)) | |
283 try: | |
284 _next = stream.next | |
285 except AttributeError: | |
286 # Python 3 | |
287 _next = stream.__next__ | |
288 try: | |
289 token = _next() | |
290 except StopIteration: | |
291 raise SyntaxError("empty path expression") | |
292 selector = [] | |
293 while 1: | |
294 try: | |
295 selector.append(ops[token[0]](_next, token)) | |
296 except StopIteration: | |
297 raise SyntaxError("invalid path") | |
298 try: | |
299 token = _next() | |
300 if token[0] == "/": | |
301 token = _next() | |
302 except StopIteration: | |
303 break | |
304 _cache[cache_key] = selector | |
305 return selector | |
306 | |
307 | |
308 ## | |
309 # Iterate over the matching nodes | |
310 | |
311 def iterfind(elem, path, namespaces=None): | |
312 selector = _build_path_iterator(path, namespaces) | |
313 result = iter((elem,)) | |
314 for select in selector: | |
315 result = select(result) | |
316 return result | |
317 | |
318 | |
319 ## | |
320 # Find first matching object. | |
321 | |
322 def find(elem, path, namespaces=None): | |
323 it = iterfind(elem, path, namespaces) | |
324 try: | |
325 return next(it) | |
326 except StopIteration: | |
327 return None | |
328 | |
329 | |
330 ## | |
331 # Find all matching objects. | |
332 | |
333 def findall(elem, path, namespaces=None): | |
334 return list(iterfind(elem, path, namespaces)) | |
335 | |
336 | |
337 ## | |
338 # Find text for first matching object. | |
339 | |
340 def findtext(elem, path, default=None, namespaces=None): | |
341 el = find(elem, path, namespaces) | |
342 if el is None: | |
343 return default | |
344 else: | |
345 return el.text or '' |