Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/lxml/doctestcompare.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/lxml/doctestcompare.py Fri Jul 31 00:32:28 2020 -0400 @@ -0,0 +1,507 @@ +""" +lxml-based doctest output comparison. + +Note: normally, you should just import the `lxml.usedoctest` and +`lxml.html.usedoctest` modules from within a doctest, instead of this +one:: + + >>> import lxml.usedoctest # for XML output + + >>> import lxml.html.usedoctest # for HTML output + +To use this module directly, you must call ``lxmldoctest.install()``, +which will cause doctest to use this in all subsequent calls. + +This changes the way output is checked and comparisons are made for +XML or HTML-like content. + +XML or HTML content is noticed because the example starts with ``<`` +(it's HTML if it starts with ``<html``). You can also use the +``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing. + +Some rough wildcard-like things are allowed. Whitespace is generally +ignored (except in attributes). In text (attributes and text in the +body) you can use ``...`` as a wildcard. In an example it also +matches any trailing tags in the element, though it does not match +leading tags. You may create a tag ``<any>`` or include an ``any`` +attribute in the tag. An ``any`` tag matches any tag, while the +attribute matches any and all attributes. + +When a match fails, the reformatted example and gotten text is +displayed (indented), and a rough diff-like output is given. Anything +marked with ``+`` is in the output but wasn't supposed to be, and +similarly ``-`` means its in the example but wasn't in the output. + +You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP`` +""" + +from lxml import etree +import sys +import re +import doctest +try: + from html import escape as html_escape +except ImportError: + from cgi import escape as html_escape + +__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker', + 'LHTMLOutputChecker', 'install', 'temp_install'] + +try: + _basestring = basestring +except NameError: + _basestring = (str, bytes) + +_IS_PYTHON_3 = sys.version_info[0] >= 3 + +PARSE_HTML = doctest.register_optionflag('PARSE_HTML') +PARSE_XML = doctest.register_optionflag('PARSE_XML') +NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP') + +OutputChecker = doctest.OutputChecker + +def strip(v): + if v is None: + return None + else: + return v.strip() + +def norm_whitespace(v): + return _norm_whitespace_re.sub(' ', v) + +_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True) + +def html_fromstring(html): + return etree.fromstring(html, _html_parser) + +# We use this to distinguish repr()s from elements: +_repr_re = re.compile(r'^<[^>]+ (at|object) ') +_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+') + +class LXMLOutputChecker(OutputChecker): + + empty_tags = ( + 'param', 'img', 'area', 'br', 'basefont', 'input', + 'base', 'meta', 'link', 'col') + + def get_default_parser(self): + return etree.XML + + def check_output(self, want, got, optionflags): + alt_self = getattr(self, '_temp_override_self', None) + if alt_self is not None: + super_method = self._temp_call_super_check_output + self = alt_self + else: + super_method = OutputChecker.check_output + parser = self.get_parser(want, got, optionflags) + if not parser: + return super_method( + self, want, got, optionflags) + try: + want_doc = parser(want) + except etree.XMLSyntaxError: + return False + try: + got_doc = parser(got) + except etree.XMLSyntaxError: + return False + return self.compare_docs(want_doc, got_doc) + + def get_parser(self, want, got, optionflags): + parser = None + if NOPARSE_MARKUP & optionflags: + return None + if PARSE_HTML & optionflags: + parser = html_fromstring + elif PARSE_XML & optionflags: + parser = etree.XML + elif (want.strip().lower().startswith('<html') + and got.strip().startswith('<html')): + parser = html_fromstring + elif (self._looks_like_markup(want) + and self._looks_like_markup(got)): + parser = self.get_default_parser() + return parser + + def _looks_like_markup(self, s): + s = s.strip() + return (s.startswith('<') + and not _repr_re.search(s)) + + def compare_docs(self, want, got): + if not self.tag_compare(want.tag, got.tag): + return False + if not self.text_compare(want.text, got.text, True): + return False + if not self.text_compare(want.tail, got.tail, True): + return False + if 'any' not in want.attrib: + want_keys = sorted(want.attrib.keys()) + got_keys = sorted(got.attrib.keys()) + if want_keys != got_keys: + return False + for key in want_keys: + if not self.text_compare(want.attrib[key], got.attrib[key], False): + return False + if want.text != '...' or len(want): + want_children = list(want) + got_children = list(got) + while want_children or got_children: + if not want_children or not got_children: + return False + want_first = want_children.pop(0) + got_first = got_children.pop(0) + if not self.compare_docs(want_first, got_first): + return False + if not got_children and want_first.tail == '...': + break + return True + + def text_compare(self, want, got, strip): + want = want or '' + got = got or '' + if strip: + want = norm_whitespace(want).strip() + got = norm_whitespace(got).strip() + want = '^%s$' % re.escape(want) + want = want.replace(r'\.\.\.', '.*') + if re.search(want, got): + return True + else: + return False + + def tag_compare(self, want, got): + if want == 'any': + return True + if (not isinstance(want, _basestring) + or not isinstance(got, _basestring)): + return want == got + want = want or '' + got = got or '' + if want.startswith('{...}'): + # Ellipsis on the namespace + return want.split('}')[-1] == got.split('}')[-1] + else: + return want == got + + def output_difference(self, example, got, optionflags): + want = example.want + parser = self.get_parser(want, got, optionflags) + errors = [] + if parser is not None: + try: + want_doc = parser(want) + except etree.XMLSyntaxError: + e = sys.exc_info()[1] + errors.append('In example: %s' % e) + try: + got_doc = parser(got) + except etree.XMLSyntaxError: + e = sys.exc_info()[1] + errors.append('In actual output: %s' % e) + if parser is None or errors: + value = OutputChecker.output_difference( + self, example, got, optionflags) + if errors: + errors.append(value) + return '\n'.join(errors) + else: + return value + html = parser is html_fromstring + diff_parts = ['Expected:', + self.format_doc(want_doc, html, 2), + 'Got:', + self.format_doc(got_doc, html, 2), + 'Diff:', + self.collect_diff(want_doc, got_doc, html, 2)] + return '\n'.join(diff_parts) + + def html_empty_tag(self, el, html=True): + if not html: + return False + if el.tag not in self.empty_tags: + return False + if el.text or len(el): + # This shouldn't happen (contents in an empty tag) + return False + return True + + def format_doc(self, doc, html, indent, prefix=''): + parts = [] + if not len(doc): + # No children... + parts.append(' '*indent) + parts.append(prefix) + parts.append(self.format_tag(doc)) + if not self.html_empty_tag(doc, html): + if strip(doc.text): + parts.append(self.format_text(doc.text)) + parts.append(self.format_end_tag(doc)) + if strip(doc.tail): + parts.append(self.format_text(doc.tail)) + parts.append('\n') + return ''.join(parts) + parts.append(' '*indent) + parts.append(prefix) + parts.append(self.format_tag(doc)) + if not self.html_empty_tag(doc, html): + parts.append('\n') + if strip(doc.text): + parts.append(' '*indent) + parts.append(self.format_text(doc.text)) + parts.append('\n') + for el in doc: + parts.append(self.format_doc(el, html, indent+2)) + parts.append(' '*indent) + parts.append(self.format_end_tag(doc)) + parts.append('\n') + if strip(doc.tail): + parts.append(' '*indent) + parts.append(self.format_text(doc.tail)) + parts.append('\n') + return ''.join(parts) + + def format_text(self, text, strip=True): + if text is None: + return '' + if strip: + text = text.strip() + return html_escape(text, 1) + + def format_tag(self, el): + attrs = [] + if isinstance(el, etree.CommentBase): + # FIXME: probably PIs should be handled specially too? + return '<!--' + for name, value in sorted(el.attrib.items()): + attrs.append('%s="%s"' % (name, self.format_text(value, False))) + if not attrs: + return '<%s>' % el.tag + return '<%s %s>' % (el.tag, ' '.join(attrs)) + + def format_end_tag(self, el): + if isinstance(el, etree.CommentBase): + # FIXME: probably PIs should be handled specially too? + return '-->' + return '</%s>' % el.tag + + def collect_diff(self, want, got, html, indent): + parts = [] + if not len(want) and not len(got): + parts.append(' '*indent) + parts.append(self.collect_diff_tag(want, got)) + if not self.html_empty_tag(got, html): + parts.append(self.collect_diff_text(want.text, got.text)) + parts.append(self.collect_diff_end_tag(want, got)) + parts.append(self.collect_diff_text(want.tail, got.tail)) + parts.append('\n') + return ''.join(parts) + parts.append(' '*indent) + parts.append(self.collect_diff_tag(want, got)) + parts.append('\n') + if strip(want.text) or strip(got.text): + parts.append(' '*indent) + parts.append(self.collect_diff_text(want.text, got.text)) + parts.append('\n') + want_children = list(want) + got_children = list(got) + while want_children or got_children: + if not want_children: + parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+')) + continue + if not got_children: + parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-')) + continue + parts.append(self.collect_diff( + want_children.pop(0), got_children.pop(0), html, indent+2)) + parts.append(' '*indent) + parts.append(self.collect_diff_end_tag(want, got)) + parts.append('\n') + if strip(want.tail) or strip(got.tail): + parts.append(' '*indent) + parts.append(self.collect_diff_text(want.tail, got.tail)) + parts.append('\n') + return ''.join(parts) + + def collect_diff_tag(self, want, got): + if not self.tag_compare(want.tag, got.tag): + tag = '%s (got: %s)' % (want.tag, got.tag) + else: + tag = got.tag + attrs = [] + any = want.tag == 'any' or 'any' in want.attrib + for name, value in sorted(got.attrib.items()): + if name not in want.attrib and not any: + attrs.append('+%s="%s"' % (name, self.format_text(value, False))) + else: + if name in want.attrib: + text = self.collect_diff_text(want.attrib[name], value, False) + else: + text = self.format_text(value, False) + attrs.append('%s="%s"' % (name, text)) + if not any: + for name, value in sorted(want.attrib.items()): + if name in got.attrib: + continue + attrs.append('-%s="%s"' % (name, self.format_text(value, False))) + if attrs: + tag = '<%s %s>' % (tag, ' '.join(attrs)) + else: + tag = '<%s>' % tag + return tag + + def collect_diff_end_tag(self, want, got): + if want.tag != got.tag: + tag = '%s (got: %s)' % (want.tag, got.tag) + else: + tag = got.tag + return '</%s>' % tag + + def collect_diff_text(self, want, got, strip=True): + if self.text_compare(want, got, strip): + if not got: + return '' + return self.format_text(got, strip) + text = '%s (got: %s)' % (want, got) + return self.format_text(text, strip) + +class LHTMLOutputChecker(LXMLOutputChecker): + def get_default_parser(self): + return html_fromstring + +def install(html=False): + """ + Install doctestcompare for all future doctests. + + If html is true, then by default the HTML parser will be used; + otherwise the XML parser is used. + """ + if html: + doctest.OutputChecker = LHTMLOutputChecker + else: + doctest.OutputChecker = LXMLOutputChecker + +def temp_install(html=False, del_module=None): + """ + Use this *inside* a doctest to enable this checker for this + doctest only. + + If html is true, then by default the HTML parser will be used; + otherwise the XML parser is used. + """ + if html: + Checker = LHTMLOutputChecker + else: + Checker = LXMLOutputChecker + frame = _find_doctest_frame() + dt_self = frame.f_locals['self'] + checker = Checker() + old_checker = dt_self._checker + dt_self._checker = checker + # The unfortunate thing is that there is a local variable 'check' + # in the function that runs the doctests, that is a bound method + # into the output checker. We have to update that. We can't + # modify the frame, so we have to modify the object in place. The + # only way to do this is to actually change the func_code + # attribute of the method. We change it, and then wait for + # __record_outcome to be run, which signals the end of the __run + # method, at which point we restore the previous check_output + # implementation. + if _IS_PYTHON_3: + check_func = frame.f_locals['check'].__func__ + checker_check_func = checker.check_output.__func__ + else: + check_func = frame.f_locals['check'].im_func + checker_check_func = checker.check_output.im_func + # Because we can't patch up func_globals, this is the only global + # in check_output that we care about: + doctest.etree = etree + _RestoreChecker(dt_self, old_checker, checker, + check_func, checker_check_func, + del_module) + +class _RestoreChecker(object): + def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func, + del_module): + self.dt_self = dt_self + self.checker = old_checker + self.checker._temp_call_super_check_output = self.call_super + self.checker._temp_override_self = new_checker + self.check_func = check_func + self.clone_func = clone_func + self.del_module = del_module + self.install_clone() + self.install_dt_self() + def install_clone(self): + if _IS_PYTHON_3: + self.func_code = self.check_func.__code__ + self.func_globals = self.check_func.__globals__ + self.check_func.__code__ = self.clone_func.__code__ + else: + self.func_code = self.check_func.func_code + self.func_globals = self.check_func.func_globals + self.check_func.func_code = self.clone_func.func_code + def uninstall_clone(self): + if _IS_PYTHON_3: + self.check_func.__code__ = self.func_code + else: + self.check_func.func_code = self.func_code + def install_dt_self(self): + self.prev_func = self.dt_self._DocTestRunner__record_outcome + self.dt_self._DocTestRunner__record_outcome = self + def uninstall_dt_self(self): + self.dt_self._DocTestRunner__record_outcome = self.prev_func + def uninstall_module(self): + if self.del_module: + import sys + del sys.modules[self.del_module] + if '.' in self.del_module: + package, module = self.del_module.rsplit('.', 1) + package_mod = sys.modules[package] + delattr(package_mod, module) + def __call__(self, *args, **kw): + self.uninstall_clone() + self.uninstall_dt_self() + del self.checker._temp_override_self + del self.checker._temp_call_super_check_output + result = self.prev_func(*args, **kw) + self.uninstall_module() + return result + def call_super(self, *args, **kw): + self.uninstall_clone() + try: + return self.check_func(*args, **kw) + finally: + self.install_clone() + +def _find_doctest_frame(): + import sys + frame = sys._getframe(1) + while frame: + l = frame.f_locals + if 'BOOM' in l: + # Sign of doctest + return frame + frame = frame.f_back + raise LookupError( + "Could not find doctest (only use this function *inside* a doctest)") + +__test__ = { + 'basic': ''' + >>> temp_install() + >>> print """<xml a="1" b="2">stuff</xml>""" + <xml b="2" a="1">...</xml> + >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>""" + <xml xmlns="..."> + <tag attr="..." /> + </xml> + >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS + <xml>...foo /></xml> + '''} + +if __name__ == '__main__': + import doctest + doctest.testmod() + +