Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/lxml/_elementpath.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac | 
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:4f3585e2f14b | 
|---|---|
| 1 # cython: language_level=2 | |
| 2 | |
| 3 # | |
| 4 # ElementTree | |
| 5 # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ | |
| 6 # | |
| 7 # limited xpath support for element trees | |
| 8 # | |
| 9 # history: | |
| 10 # 2003-05-23 fl created | |
| 11 # 2003-05-28 fl added support for // etc | |
| 12 # 2003-08-27 fl fixed parsing of periods in element names | |
| 13 # 2007-09-10 fl new selection engine | |
| 14 # 2007-09-12 fl fixed parent selector | |
| 15 # 2007-09-13 fl added iterfind; changed findall to return a list | |
| 16 # 2007-11-30 fl added namespaces support | |
| 17 # 2009-10-30 fl added child element value filter | |
| 18 # | |
| 19 # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. | |
| 20 # | |
| 21 # fredrik@pythonware.com | |
| 22 # http://www.pythonware.com | |
| 23 # | |
| 24 # -------------------------------------------------------------------- | |
| 25 # The ElementTree toolkit is | |
| 26 # | |
| 27 # Copyright (c) 1999-2009 by Fredrik Lundh | |
| 28 # | |
| 29 # By obtaining, using, and/or copying this software and/or its | |
| 30 # associated documentation, you agree that you have read, understood, | |
| 31 # and will comply with the following terms and conditions: | |
| 32 # | |
| 33 # Permission to use, copy, modify, and distribute this software and | |
| 34 # its associated documentation for any purpose and without fee is | |
| 35 # hereby granted, provided that the above copyright notice appears in | |
| 36 # all copies, and that both that copyright notice and this permission | |
| 37 # notice appear in supporting documentation, and that the name of | |
| 38 # Secret Labs AB or the author not be used in advertising or publicity | |
| 39 # pertaining to distribution of the software without specific, written | |
| 40 # prior permission. | |
| 41 # | |
| 42 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD | |
| 43 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- | |
| 44 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR | |
| 45 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY | |
| 46 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
| 47 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS | |
| 48 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE | |
| 49 # OF THIS SOFTWARE. | |
| 50 # -------------------------------------------------------------------- | |
| 51 | |
| 52 ## | |
| 53 # Implementation module for XPath support. There's usually no reason | |
| 54 # to import this module directly; the <b>ElementTree</b> does this for | |
| 55 # you, if needed. | |
| 56 ## | |
| 57 | |
| 58 from __future__ import absolute_import | |
| 59 | |
| 60 import re | |
| 61 | |
| 62 xpath_tokenizer_re = re.compile( | |
| 63 "(" | |
| 64 "'[^']*'|\"[^\"]*\"|" | |
| 65 "::|" | |
| 66 "//?|" | |
| 67 r"\.\.|" | |
| 68 r"\(\)|" | |
| 69 r"[/.*:\[\]\(\)@=])|" | |
| 70 r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" | |
| 71 r"\s+" | |
| 72 ) | |
| 73 | |
| 74 def xpath_tokenizer(pattern, namespaces=None): | |
| 75 # ElementTree uses '', lxml used None originally. | |
| 76 default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None | |
| 77 parsing_attribute = False | |
| 78 for token in xpath_tokenizer_re.findall(pattern): | |
| 79 ttype, tag = token | |
| 80 if tag and tag[0] != "{": | |
| 81 if ":" in tag: | |
| 82 prefix, uri = tag.split(":", 1) | |
| 83 try: | |
| 84 if not namespaces: | |
| 85 raise KeyError | |
| 86 yield ttype, "{%s}%s" % (namespaces[prefix], uri) | |
| 87 except KeyError: | |
| 88 raise SyntaxError("prefix %r not found in prefix map" % prefix) | |
| 89 elif default_namespace and not parsing_attribute: | |
| 90 yield ttype, "{%s}%s" % (default_namespace, tag) | |
| 91 else: | |
| 92 yield token | |
| 93 parsing_attribute = False | |
| 94 else: | |
| 95 yield token | |
| 96 parsing_attribute = ttype == '@' | |
| 97 | |
| 98 | |
| 99 def prepare_child(next, token): | |
| 100 tag = token[1] | |
| 101 def select(result): | |
| 102 for elem in result: | |
| 103 for e in elem.iterchildren(tag): | |
| 104 yield e | |
| 105 return select | |
| 106 | |
| 107 def prepare_star(next, token): | |
| 108 def select(result): | |
| 109 for elem in result: | |
| 110 for e in elem.iterchildren('*'): | |
| 111 yield e | |
| 112 return select | |
| 113 | |
| 114 def prepare_self(next, token): | |
| 115 def select(result): | |
| 116 return result | |
| 117 return select | |
| 118 | |
| 119 def prepare_descendant(next, token): | |
| 120 token = next() | |
| 121 if token[0] == "*": | |
| 122 tag = "*" | |
| 123 elif not token[0]: | |
| 124 tag = token[1] | |
| 125 else: | |
| 126 raise SyntaxError("invalid descendant") | |
| 127 def select(result): | |
| 128 for elem in result: | |
| 129 for e in elem.iterdescendants(tag): | |
| 130 yield e | |
| 131 return select | |
| 132 | |
| 133 def prepare_parent(next, token): | |
| 134 def select(result): | |
| 135 for elem in result: | |
| 136 parent = elem.getparent() | |
| 137 if parent is not None: | |
| 138 yield parent | |
| 139 return select | |
| 140 | |
| 141 def prepare_predicate(next, token): | |
| 142 # FIXME: replace with real parser!!! refs: | |
| 143 # http://effbot.org/zone/simple-iterator-parser.htm | |
| 144 # http://javascript.crockford.com/tdop/tdop.html | |
| 145 signature = '' | |
| 146 predicate = [] | |
| 147 while 1: | |
| 148 token = next() | |
| 149 if token[0] == "]": | |
| 150 break | |
| 151 if token == ('', ''): | |
| 152 # ignore whitespace | |
| 153 continue | |
| 154 if token[0] and token[0][:1] in "'\"": | |
| 155 token = "'", token[0][1:-1] | |
| 156 signature += token[0] or "-" | |
| 157 predicate.append(token[1]) | |
| 158 | |
| 159 # use signature to determine predicate type | |
| 160 if signature == "@-": | |
| 161 # [@attribute] predicate | |
| 162 key = predicate[1] | |
| 163 def select(result): | |
| 164 for elem in result: | |
| 165 if elem.get(key) is not None: | |
| 166 yield elem | |
| 167 return select | |
| 168 if signature == "@-='": | |
| 169 # [@attribute='value'] | |
| 170 key = predicate[1] | |
| 171 value = predicate[-1] | |
| 172 def select(result): | |
| 173 for elem in result: | |
| 174 if elem.get(key) == value: | |
| 175 yield elem | |
| 176 return select | |
| 177 if signature == "-" and not re.match(r"-?\d+$", predicate[0]): | |
| 178 # [tag] | |
| 179 tag = predicate[0] | |
| 180 def select(result): | |
| 181 for elem in result: | |
| 182 for _ in elem.iterchildren(tag): | |
| 183 yield elem | |
| 184 break | |
| 185 return select | |
| 186 if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])): | |
| 187 # [.='value'] or [tag='value'] | |
| 188 tag = predicate[0] | |
| 189 value = predicate[-1] | |
| 190 if tag: | |
| 191 def select(result): | |
| 192 for elem in result: | |
| 193 for e in elem.iterchildren(tag): | |
| 194 if "".join(e.itertext()) == value: | |
| 195 yield elem | |
| 196 break | |
| 197 else: | |
| 198 def select(result): | |
| 199 for elem in result: | |
| 200 if "".join(elem.itertext()) == value: | |
| 201 yield elem | |
| 202 return select | |
| 203 if signature == "-" or signature == "-()" or signature == "-()-": | |
| 204 # [index] or [last()] or [last()-index] | |
| 205 if signature == "-": | |
| 206 # [index] | |
| 207 index = int(predicate[0]) - 1 | |
| 208 if index < 0: | |
| 209 if index == -1: | |
| 210 raise SyntaxError( | |
| 211 "indices in path predicates are 1-based, not 0-based") | |
| 212 else: | |
| 213 raise SyntaxError("path index >= 1 expected") | |
| 214 else: | |
| 215 if predicate[0] != "last": | |
| 216 raise SyntaxError("unsupported function") | |
| 217 if signature == "-()-": | |
| 218 try: | |
| 219 index = int(predicate[2]) - 1 | |
| 220 except ValueError: | |
| 221 raise SyntaxError("unsupported expression") | |
| 222 else: | |
| 223 index = -1 | |
| 224 def select(result): | |
| 225 for elem in result: | |
| 226 parent = elem.getparent() | |
| 227 if parent is None: | |
| 228 continue | |
| 229 try: | |
| 230 # FIXME: what if the selector is "*" ? | |
| 231 elems = list(parent.iterchildren(elem.tag)) | |
| 232 if elems[index] is elem: | |
| 233 yield elem | |
| 234 except IndexError: | |
| 235 pass | |
| 236 return select | |
| 237 raise SyntaxError("invalid predicate") | |
| 238 | |
| 239 ops = { | |
| 240 "": prepare_child, | |
| 241 "*": prepare_star, | |
| 242 ".": prepare_self, | |
| 243 "..": prepare_parent, | |
| 244 "//": prepare_descendant, | |
| 245 "[": prepare_predicate, | |
| 246 } | |
| 247 | |
| 248 | |
| 249 # -------------------------------------------------------------------- | |
| 250 | |
| 251 _cache = {} | |
| 252 | |
| 253 | |
| 254 def _build_path_iterator(path, namespaces): | |
| 255 """compile selector pattern""" | |
| 256 if path[-1:] == "/": | |
| 257 path += "*" # implicit all (FIXME: keep this?) | |
| 258 | |
| 259 cache_key = (path,) | |
| 260 if namespaces: | |
| 261 # lxml originally used None for the default namespace but ElementTree uses the | |
| 262 # more convenient (all-strings-dict) empty string, so we support both here, | |
| 263 # preferring the more convenient '', as long as they aren't ambiguous. | |
| 264 if None in namespaces: | |
| 265 if '' in namespaces and namespaces[None] != namespaces['']: | |
| 266 raise ValueError("Ambiguous default namespace provided: %r versus %r" % ( | |
| 267 namespaces[None], namespaces[''])) | |
| 268 cache_key += (namespaces[None],) + tuple(sorted( | |
| 269 item for item in namespaces.items() if item[0] is not None)) | |
| 270 else: | |
| 271 cache_key += tuple(sorted(namespaces.items())) | |
| 272 | |
| 273 try: | |
| 274 return _cache[cache_key] | |
| 275 except KeyError: | |
| 276 pass | |
| 277 if len(_cache) > 100: | |
| 278 _cache.clear() | |
| 279 | |
| 280 if path[:1] == "/": | |
| 281 raise SyntaxError("cannot use absolute path on element") | |
| 282 stream = iter(xpath_tokenizer(path, namespaces)) | |
| 283 try: | |
| 284 _next = stream.next | |
| 285 except AttributeError: | |
| 286 # Python 3 | |
| 287 _next = stream.__next__ | |
| 288 try: | |
| 289 token = _next() | |
| 290 except StopIteration: | |
| 291 raise SyntaxError("empty path expression") | |
| 292 selector = [] | |
| 293 while 1: | |
| 294 try: | |
| 295 selector.append(ops[token[0]](_next, token)) | |
| 296 except StopIteration: | |
| 297 raise SyntaxError("invalid path") | |
| 298 try: | |
| 299 token = _next() | |
| 300 if token[0] == "/": | |
| 301 token = _next() | |
| 302 except StopIteration: | |
| 303 break | |
| 304 _cache[cache_key] = selector | |
| 305 return selector | |
| 306 | |
| 307 | |
| 308 ## | |
| 309 # Iterate over the matching nodes | |
| 310 | |
| 311 def iterfind(elem, path, namespaces=None): | |
| 312 selector = _build_path_iterator(path, namespaces) | |
| 313 result = iter((elem,)) | |
| 314 for select in selector: | |
| 315 result = select(result) | |
| 316 return result | |
| 317 | |
| 318 | |
| 319 ## | |
| 320 # Find first matching object. | |
| 321 | |
| 322 def find(elem, path, namespaces=None): | |
| 323 it = iterfind(elem, path, namespaces) | |
| 324 try: | |
| 325 return next(it) | |
| 326 except StopIteration: | |
| 327 return None | |
| 328 | |
| 329 | |
| 330 ## | |
| 331 # Find all matching objects. | |
| 332 | |
| 333 def findall(elem, path, namespaces=None): | |
| 334 return list(iterfind(elem, path, namespaces)) | |
| 335 | |
| 336 | |
| 337 ## | |
| 338 # Find text for first matching object. | |
| 339 | |
| 340 def findtext(elem, path, default=None, namespaces=None): | |
| 341 el = find(elem, path, namespaces) | |
| 342 if el is None: | |
| 343 return default | |
| 344 else: | |
| 345 return el.text or '' | 
