Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/lxml/_elementpath.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/lxml/_elementpath.py Fri Jul 31 00:32:28 2020 -0400 @@ -0,0 +1,345 @@ +# cython: language_level=2 + +# +# ElementTree +# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ +# +# limited xpath support for element trees +# +# history: +# 2003-05-23 fl created +# 2003-05-28 fl added support for // etc +# 2003-08-27 fl fixed parsing of periods in element names +# 2007-09-10 fl new selection engine +# 2007-09-12 fl fixed parent selector +# 2007-09-13 fl added iterfind; changed findall to return a list +# 2007-11-30 fl added namespaces support +# 2009-10-30 fl added child element value filter +# +# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2009 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +## +# Implementation module for XPath support. There's usually no reason +# to import this module directly; the <b>ElementTree</b> does this for +# you, if needed. +## + +from __future__ import absolute_import + +import re + +xpath_tokenizer_re = re.compile( + "(" + "'[^']*'|\"[^\"]*\"|" + "::|" + "//?|" + r"\.\.|" + r"\(\)|" + r"[/.*:\[\]\(\)@=])|" + r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" + r"\s+" + ) + +def xpath_tokenizer(pattern, namespaces=None): + # ElementTree uses '', lxml used None originally. + default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None + parsing_attribute = False + for token in xpath_tokenizer_re.findall(pattern): + ttype, tag = token + if tag and tag[0] != "{": + if ":" in tag: + prefix, uri = tag.split(":", 1) + try: + if not namespaces: + raise KeyError + yield ttype, "{%s}%s" % (namespaces[prefix], uri) + except KeyError: + raise SyntaxError("prefix %r not found in prefix map" % prefix) + elif default_namespace and not parsing_attribute: + yield ttype, "{%s}%s" % (default_namespace, tag) + else: + yield token + parsing_attribute = False + else: + yield token + parsing_attribute = ttype == '@' + + +def prepare_child(next, token): + tag = token[1] + def select(result): + for elem in result: + for e in elem.iterchildren(tag): + yield e + return select + +def prepare_star(next, token): + def select(result): + for elem in result: + for e in elem.iterchildren('*'): + yield e + return select + +def prepare_self(next, token): + def select(result): + return result + return select + +def prepare_descendant(next, token): + token = next() + if token[0] == "*": + tag = "*" + elif not token[0]: + tag = token[1] + else: + raise SyntaxError("invalid descendant") + def select(result): + for elem in result: + for e in elem.iterdescendants(tag): + yield e + return select + +def prepare_parent(next, token): + def select(result): + for elem in result: + parent = elem.getparent() + if parent is not None: + yield parent + return select + +def prepare_predicate(next, token): + # FIXME: replace with real parser!!! refs: + # http://effbot.org/zone/simple-iterator-parser.htm + # http://javascript.crockford.com/tdop/tdop.html + signature = '' + predicate = [] + while 1: + token = next() + if token[0] == "]": + break + if token == ('', ''): + # ignore whitespace + continue + if token[0] and token[0][:1] in "'\"": + token = "'", token[0][1:-1] + signature += token[0] or "-" + predicate.append(token[1]) + + # use signature to determine predicate type + if signature == "@-": + # [@attribute] predicate + key = predicate[1] + def select(result): + for elem in result: + if elem.get(key) is not None: + yield elem + return select + if signature == "@-='": + # [@attribute='value'] + key = predicate[1] + value = predicate[-1] + def select(result): + for elem in result: + if elem.get(key) == value: + yield elem + return select + if signature == "-" and not re.match(r"-?\d+$", predicate[0]): + # [tag] + tag = predicate[0] + def select(result): + for elem in result: + for _ in elem.iterchildren(tag): + yield elem + break + return select + if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])): + # [.='value'] or [tag='value'] + tag = predicate[0] + value = predicate[-1] + if tag: + def select(result): + for elem in result: + for e in elem.iterchildren(tag): + if "".join(e.itertext()) == value: + yield elem + break + else: + def select(result): + for elem in result: + if "".join(elem.itertext()) == value: + yield elem + return select + if signature == "-" or signature == "-()" or signature == "-()-": + # [index] or [last()] or [last()-index] + if signature == "-": + # [index] + index = int(predicate[0]) - 1 + if index < 0: + if index == -1: + raise SyntaxError( + "indices in path predicates are 1-based, not 0-based") + else: + raise SyntaxError("path index >= 1 expected") + else: + if predicate[0] != "last": + raise SyntaxError("unsupported function") + if signature == "-()-": + try: + index = int(predicate[2]) - 1 + except ValueError: + raise SyntaxError("unsupported expression") + else: + index = -1 + def select(result): + for elem in result: + parent = elem.getparent() + if parent is None: + continue + try: + # FIXME: what if the selector is "*" ? + elems = list(parent.iterchildren(elem.tag)) + if elems[index] is elem: + yield elem + except IndexError: + pass + return select + raise SyntaxError("invalid predicate") + +ops = { + "": prepare_child, + "*": prepare_star, + ".": prepare_self, + "..": prepare_parent, + "//": prepare_descendant, + "[": prepare_predicate, +} + + +# -------------------------------------------------------------------- + +_cache = {} + + +def _build_path_iterator(path, namespaces): + """compile selector pattern""" + if path[-1:] == "/": + path += "*" # implicit all (FIXME: keep this?) + + cache_key = (path,) + if namespaces: + # lxml originally used None for the default namespace but ElementTree uses the + # more convenient (all-strings-dict) empty string, so we support both here, + # preferring the more convenient '', as long as they aren't ambiguous. + if None in namespaces: + if '' in namespaces and namespaces[None] != namespaces['']: + raise ValueError("Ambiguous default namespace provided: %r versus %r" % ( + namespaces[None], namespaces[''])) + cache_key += (namespaces[None],) + tuple(sorted( + item for item in namespaces.items() if item[0] is not None)) + else: + cache_key += tuple(sorted(namespaces.items())) + + try: + return _cache[cache_key] + except KeyError: + pass + if len(_cache) > 100: + _cache.clear() + + if path[:1] == "/": + raise SyntaxError("cannot use absolute path on element") + stream = iter(xpath_tokenizer(path, namespaces)) + try: + _next = stream.next + except AttributeError: + # Python 3 + _next = stream.__next__ + try: + token = _next() + except StopIteration: + raise SyntaxError("empty path expression") + selector = [] + while 1: + try: + selector.append(ops[token[0]](_next, token)) + except StopIteration: + raise SyntaxError("invalid path") + try: + token = _next() + if token[0] == "/": + token = _next() + except StopIteration: + break + _cache[cache_key] = selector + return selector + + +## +# Iterate over the matching nodes + +def iterfind(elem, path, namespaces=None): + selector = _build_path_iterator(path, namespaces) + result = iter((elem,)) + for select in selector: + result = select(result) + return result + + +## +# Find first matching object. + +def find(elem, path, namespaces=None): + it = iterfind(elem, path, namespaces) + try: + return next(it) + except StopIteration: + return None + + +## +# Find all matching objects. + +def findall(elem, path, namespaces=None): + return list(iterfind(elem, path, namespaces)) + + +## +# Find text for first matching object. + +def findtext(elem, path, default=None, namespaces=None): + el = find(elem, path, namespaces) + if el is None: + return default + else: + return el.text or ''