Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/lxml/doctestcompare.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 """ | |
| 2 lxml-based doctest output comparison. | |
| 3 | |
| 4 Note: normally, you should just import the `lxml.usedoctest` and | |
| 5 `lxml.html.usedoctest` modules from within a doctest, instead of this | |
| 6 one:: | |
| 7 | |
| 8 >>> import lxml.usedoctest # for XML output | |
| 9 | |
| 10 >>> import lxml.html.usedoctest # for HTML output | |
| 11 | |
| 12 To use this module directly, you must call ``lxmldoctest.install()``, | |
| 13 which will cause doctest to use this in all subsequent calls. | |
| 14 | |
| 15 This changes the way output is checked and comparisons are made for | |
| 16 XML or HTML-like content. | |
| 17 | |
| 18 XML or HTML content is noticed because the example starts with ``<`` | |
| 19 (it's HTML if it starts with ``<html``). You can also use the | |
| 20 ``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing. | |
| 21 | |
| 22 Some rough wildcard-like things are allowed. Whitespace is generally | |
| 23 ignored (except in attributes). In text (attributes and text in the | |
| 24 body) you can use ``...`` as a wildcard. In an example it also | |
| 25 matches any trailing tags in the element, though it does not match | |
| 26 leading tags. You may create a tag ``<any>`` or include an ``any`` | |
| 27 attribute in the tag. An ``any`` tag matches any tag, while the | |
| 28 attribute matches any and all attributes. | |
| 29 | |
| 30 When a match fails, the reformatted example and gotten text is | |
| 31 displayed (indented), and a rough diff-like output is given. Anything | |
| 32 marked with ``+`` is in the output but wasn't supposed to be, and | |
| 33 similarly ``-`` means its in the example but wasn't in the output. | |
| 34 | |
| 35 You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP`` | |
| 36 """ | |
| 37 | |
| 38 from lxml import etree | |
| 39 import sys | |
| 40 import re | |
| 41 import doctest | |
| 42 try: | |
| 43 from html import escape as html_escape | |
| 44 except ImportError: | |
| 45 from cgi import escape as html_escape | |
| 46 | |
| 47 __all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker', | |
| 48 'LHTMLOutputChecker', 'install', 'temp_install'] | |
| 49 | |
| 50 try: | |
| 51 _basestring = basestring | |
| 52 except NameError: | |
| 53 _basestring = (str, bytes) | |
| 54 | |
| 55 _IS_PYTHON_3 = sys.version_info[0] >= 3 | |
| 56 | |
| 57 PARSE_HTML = doctest.register_optionflag('PARSE_HTML') | |
| 58 PARSE_XML = doctest.register_optionflag('PARSE_XML') | |
| 59 NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP') | |
| 60 | |
| 61 OutputChecker = doctest.OutputChecker | |
| 62 | |
| 63 def strip(v): | |
| 64 if v is None: | |
| 65 return None | |
| 66 else: | |
| 67 return v.strip() | |
| 68 | |
| 69 def norm_whitespace(v): | |
| 70 return _norm_whitespace_re.sub(' ', v) | |
| 71 | |
| 72 _html_parser = etree.HTMLParser(recover=False, remove_blank_text=True) | |
| 73 | |
| 74 def html_fromstring(html): | |
| 75 return etree.fromstring(html, _html_parser) | |
| 76 | |
| 77 # We use this to distinguish repr()s from elements: | |
| 78 _repr_re = re.compile(r'^<[^>]+ (at|object) ') | |
| 79 _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+') | |
| 80 | |
| 81 class LXMLOutputChecker(OutputChecker): | |
| 82 | |
| 83 empty_tags = ( | |
| 84 'param', 'img', 'area', 'br', 'basefont', 'input', | |
| 85 'base', 'meta', 'link', 'col') | |
| 86 | |
| 87 def get_default_parser(self): | |
| 88 return etree.XML | |
| 89 | |
| 90 def check_output(self, want, got, optionflags): | |
| 91 alt_self = getattr(self, '_temp_override_self', None) | |
| 92 if alt_self is not None: | |
| 93 super_method = self._temp_call_super_check_output | |
| 94 self = alt_self | |
| 95 else: | |
| 96 super_method = OutputChecker.check_output | |
| 97 parser = self.get_parser(want, got, optionflags) | |
| 98 if not parser: | |
| 99 return super_method( | |
| 100 self, want, got, optionflags) | |
| 101 try: | |
| 102 want_doc = parser(want) | |
| 103 except etree.XMLSyntaxError: | |
| 104 return False | |
| 105 try: | |
| 106 got_doc = parser(got) | |
| 107 except etree.XMLSyntaxError: | |
| 108 return False | |
| 109 return self.compare_docs(want_doc, got_doc) | |
| 110 | |
| 111 def get_parser(self, want, got, optionflags): | |
| 112 parser = None | |
| 113 if NOPARSE_MARKUP & optionflags: | |
| 114 return None | |
| 115 if PARSE_HTML & optionflags: | |
| 116 parser = html_fromstring | |
| 117 elif PARSE_XML & optionflags: | |
| 118 parser = etree.XML | |
| 119 elif (want.strip().lower().startswith('<html') | |
| 120 and got.strip().startswith('<html')): | |
| 121 parser = html_fromstring | |
| 122 elif (self._looks_like_markup(want) | |
| 123 and self._looks_like_markup(got)): | |
| 124 parser = self.get_default_parser() | |
| 125 return parser | |
| 126 | |
| 127 def _looks_like_markup(self, s): | |
| 128 s = s.strip() | |
| 129 return (s.startswith('<') | |
| 130 and not _repr_re.search(s)) | |
| 131 | |
| 132 def compare_docs(self, want, got): | |
| 133 if not self.tag_compare(want.tag, got.tag): | |
| 134 return False | |
| 135 if not self.text_compare(want.text, got.text, True): | |
| 136 return False | |
| 137 if not self.text_compare(want.tail, got.tail, True): | |
| 138 return False | |
| 139 if 'any' not in want.attrib: | |
| 140 want_keys = sorted(want.attrib.keys()) | |
| 141 got_keys = sorted(got.attrib.keys()) | |
| 142 if want_keys != got_keys: | |
| 143 return False | |
| 144 for key in want_keys: | |
| 145 if not self.text_compare(want.attrib[key], got.attrib[key], False): | |
| 146 return False | |
| 147 if want.text != '...' or len(want): | |
| 148 want_children = list(want) | |
| 149 got_children = list(got) | |
| 150 while want_children or got_children: | |
| 151 if not want_children or not got_children: | |
| 152 return False | |
| 153 want_first = want_children.pop(0) | |
| 154 got_first = got_children.pop(0) | |
| 155 if not self.compare_docs(want_first, got_first): | |
| 156 return False | |
| 157 if not got_children and want_first.tail == '...': | |
| 158 break | |
| 159 return True | |
| 160 | |
| 161 def text_compare(self, want, got, strip): | |
| 162 want = want or '' | |
| 163 got = got or '' | |
| 164 if strip: | |
| 165 want = norm_whitespace(want).strip() | |
| 166 got = norm_whitespace(got).strip() | |
| 167 want = '^%s$' % re.escape(want) | |
| 168 want = want.replace(r'\.\.\.', '.*') | |
| 169 if re.search(want, got): | |
| 170 return True | |
| 171 else: | |
| 172 return False | |
| 173 | |
| 174 def tag_compare(self, want, got): | |
| 175 if want == 'any': | |
| 176 return True | |
| 177 if (not isinstance(want, _basestring) | |
| 178 or not isinstance(got, _basestring)): | |
| 179 return want == got | |
| 180 want = want or '' | |
| 181 got = got or '' | |
| 182 if want.startswith('{...}'): | |
| 183 # Ellipsis on the namespace | |
| 184 return want.split('}')[-1] == got.split('}')[-1] | |
| 185 else: | |
| 186 return want == got | |
| 187 | |
| 188 def output_difference(self, example, got, optionflags): | |
| 189 want = example.want | |
| 190 parser = self.get_parser(want, got, optionflags) | |
| 191 errors = [] | |
| 192 if parser is not None: | |
| 193 try: | |
| 194 want_doc = parser(want) | |
| 195 except etree.XMLSyntaxError: | |
| 196 e = sys.exc_info()[1] | |
| 197 errors.append('In example: %s' % e) | |
| 198 try: | |
| 199 got_doc = parser(got) | |
| 200 except etree.XMLSyntaxError: | |
| 201 e = sys.exc_info()[1] | |
| 202 errors.append('In actual output: %s' % e) | |
| 203 if parser is None or errors: | |
| 204 value = OutputChecker.output_difference( | |
| 205 self, example, got, optionflags) | |
| 206 if errors: | |
| 207 errors.append(value) | |
| 208 return '\n'.join(errors) | |
| 209 else: | |
| 210 return value | |
| 211 html = parser is html_fromstring | |
| 212 diff_parts = ['Expected:', | |
| 213 self.format_doc(want_doc, html, 2), | |
| 214 'Got:', | |
| 215 self.format_doc(got_doc, html, 2), | |
| 216 'Diff:', | |
| 217 self.collect_diff(want_doc, got_doc, html, 2)] | |
| 218 return '\n'.join(diff_parts) | |
| 219 | |
| 220 def html_empty_tag(self, el, html=True): | |
| 221 if not html: | |
| 222 return False | |
| 223 if el.tag not in self.empty_tags: | |
| 224 return False | |
| 225 if el.text or len(el): | |
| 226 # This shouldn't happen (contents in an empty tag) | |
| 227 return False | |
| 228 return True | |
| 229 | |
| 230 def format_doc(self, doc, html, indent, prefix=''): | |
| 231 parts = [] | |
| 232 if not len(doc): | |
| 233 # No children... | |
| 234 parts.append(' '*indent) | |
| 235 parts.append(prefix) | |
| 236 parts.append(self.format_tag(doc)) | |
| 237 if not self.html_empty_tag(doc, html): | |
| 238 if strip(doc.text): | |
| 239 parts.append(self.format_text(doc.text)) | |
| 240 parts.append(self.format_end_tag(doc)) | |
| 241 if strip(doc.tail): | |
| 242 parts.append(self.format_text(doc.tail)) | |
| 243 parts.append('\n') | |
| 244 return ''.join(parts) | |
| 245 parts.append(' '*indent) | |
| 246 parts.append(prefix) | |
| 247 parts.append(self.format_tag(doc)) | |
| 248 if not self.html_empty_tag(doc, html): | |
| 249 parts.append('\n') | |
| 250 if strip(doc.text): | |
| 251 parts.append(' '*indent) | |
| 252 parts.append(self.format_text(doc.text)) | |
| 253 parts.append('\n') | |
| 254 for el in doc: | |
| 255 parts.append(self.format_doc(el, html, indent+2)) | |
| 256 parts.append(' '*indent) | |
| 257 parts.append(self.format_end_tag(doc)) | |
| 258 parts.append('\n') | |
| 259 if strip(doc.tail): | |
| 260 parts.append(' '*indent) | |
| 261 parts.append(self.format_text(doc.tail)) | |
| 262 parts.append('\n') | |
| 263 return ''.join(parts) | |
| 264 | |
| 265 def format_text(self, text, strip=True): | |
| 266 if text is None: | |
| 267 return '' | |
| 268 if strip: | |
| 269 text = text.strip() | |
| 270 return html_escape(text, 1) | |
| 271 | |
| 272 def format_tag(self, el): | |
| 273 attrs = [] | |
| 274 if isinstance(el, etree.CommentBase): | |
| 275 # FIXME: probably PIs should be handled specially too? | |
| 276 return '<!--' | |
| 277 for name, value in sorted(el.attrib.items()): | |
| 278 attrs.append('%s="%s"' % (name, self.format_text(value, False))) | |
| 279 if not attrs: | |
| 280 return '<%s>' % el.tag | |
| 281 return '<%s %s>' % (el.tag, ' '.join(attrs)) | |
| 282 | |
| 283 def format_end_tag(self, el): | |
| 284 if isinstance(el, etree.CommentBase): | |
| 285 # FIXME: probably PIs should be handled specially too? | |
| 286 return '-->' | |
| 287 return '</%s>' % el.tag | |
| 288 | |
| 289 def collect_diff(self, want, got, html, indent): | |
| 290 parts = [] | |
| 291 if not len(want) and not len(got): | |
| 292 parts.append(' '*indent) | |
| 293 parts.append(self.collect_diff_tag(want, got)) | |
| 294 if not self.html_empty_tag(got, html): | |
| 295 parts.append(self.collect_diff_text(want.text, got.text)) | |
| 296 parts.append(self.collect_diff_end_tag(want, got)) | |
| 297 parts.append(self.collect_diff_text(want.tail, got.tail)) | |
| 298 parts.append('\n') | |
| 299 return ''.join(parts) | |
| 300 parts.append(' '*indent) | |
| 301 parts.append(self.collect_diff_tag(want, got)) | |
| 302 parts.append('\n') | |
| 303 if strip(want.text) or strip(got.text): | |
| 304 parts.append(' '*indent) | |
| 305 parts.append(self.collect_diff_text(want.text, got.text)) | |
| 306 parts.append('\n') | |
| 307 want_children = list(want) | |
| 308 got_children = list(got) | |
| 309 while want_children or got_children: | |
| 310 if not want_children: | |
| 311 parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+')) | |
| 312 continue | |
| 313 if not got_children: | |
| 314 parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-')) | |
| 315 continue | |
| 316 parts.append(self.collect_diff( | |
| 317 want_children.pop(0), got_children.pop(0), html, indent+2)) | |
| 318 parts.append(' '*indent) | |
| 319 parts.append(self.collect_diff_end_tag(want, got)) | |
| 320 parts.append('\n') | |
| 321 if strip(want.tail) or strip(got.tail): | |
| 322 parts.append(' '*indent) | |
| 323 parts.append(self.collect_diff_text(want.tail, got.tail)) | |
| 324 parts.append('\n') | |
| 325 return ''.join(parts) | |
| 326 | |
| 327 def collect_diff_tag(self, want, got): | |
| 328 if not self.tag_compare(want.tag, got.tag): | |
| 329 tag = '%s (got: %s)' % (want.tag, got.tag) | |
| 330 else: | |
| 331 tag = got.tag | |
| 332 attrs = [] | |
| 333 any = want.tag == 'any' or 'any' in want.attrib | |
| 334 for name, value in sorted(got.attrib.items()): | |
| 335 if name not in want.attrib and not any: | |
| 336 attrs.append('+%s="%s"' % (name, self.format_text(value, False))) | |
| 337 else: | |
| 338 if name in want.attrib: | |
| 339 text = self.collect_diff_text(want.attrib[name], value, False) | |
| 340 else: | |
| 341 text = self.format_text(value, False) | |
| 342 attrs.append('%s="%s"' % (name, text)) | |
| 343 if not any: | |
| 344 for name, value in sorted(want.attrib.items()): | |
| 345 if name in got.attrib: | |
| 346 continue | |
| 347 attrs.append('-%s="%s"' % (name, self.format_text(value, False))) | |
| 348 if attrs: | |
| 349 tag = '<%s %s>' % (tag, ' '.join(attrs)) | |
| 350 else: | |
| 351 tag = '<%s>' % tag | |
| 352 return tag | |
| 353 | |
| 354 def collect_diff_end_tag(self, want, got): | |
| 355 if want.tag != got.tag: | |
| 356 tag = '%s (got: %s)' % (want.tag, got.tag) | |
| 357 else: | |
| 358 tag = got.tag | |
| 359 return '</%s>' % tag | |
| 360 | |
| 361 def collect_diff_text(self, want, got, strip=True): | |
| 362 if self.text_compare(want, got, strip): | |
| 363 if not got: | |
| 364 return '' | |
| 365 return self.format_text(got, strip) | |
| 366 text = '%s (got: %s)' % (want, got) | |
| 367 return self.format_text(text, strip) | |
| 368 | |
| 369 class LHTMLOutputChecker(LXMLOutputChecker): | |
| 370 def get_default_parser(self): | |
| 371 return html_fromstring | |
| 372 | |
| 373 def install(html=False): | |
| 374 """ | |
| 375 Install doctestcompare for all future doctests. | |
| 376 | |
| 377 If html is true, then by default the HTML parser will be used; | |
| 378 otherwise the XML parser is used. | |
| 379 """ | |
| 380 if html: | |
| 381 doctest.OutputChecker = LHTMLOutputChecker | |
| 382 else: | |
| 383 doctest.OutputChecker = LXMLOutputChecker | |
| 384 | |
| 385 def temp_install(html=False, del_module=None): | |
| 386 """ | |
| 387 Use this *inside* a doctest to enable this checker for this | |
| 388 doctest only. | |
| 389 | |
| 390 If html is true, then by default the HTML parser will be used; | |
| 391 otherwise the XML parser is used. | |
| 392 """ | |
| 393 if html: | |
| 394 Checker = LHTMLOutputChecker | |
| 395 else: | |
| 396 Checker = LXMLOutputChecker | |
| 397 frame = _find_doctest_frame() | |
| 398 dt_self = frame.f_locals['self'] | |
| 399 checker = Checker() | |
| 400 old_checker = dt_self._checker | |
| 401 dt_self._checker = checker | |
| 402 # The unfortunate thing is that there is a local variable 'check' | |
| 403 # in the function that runs the doctests, that is a bound method | |
| 404 # into the output checker. We have to update that. We can't | |
| 405 # modify the frame, so we have to modify the object in place. The | |
| 406 # only way to do this is to actually change the func_code | |
| 407 # attribute of the method. We change it, and then wait for | |
| 408 # __record_outcome to be run, which signals the end of the __run | |
| 409 # method, at which point we restore the previous check_output | |
| 410 # implementation. | |
| 411 if _IS_PYTHON_3: | |
| 412 check_func = frame.f_locals['check'].__func__ | |
| 413 checker_check_func = checker.check_output.__func__ | |
| 414 else: | |
| 415 check_func = frame.f_locals['check'].im_func | |
| 416 checker_check_func = checker.check_output.im_func | |
| 417 # Because we can't patch up func_globals, this is the only global | |
| 418 # in check_output that we care about: | |
| 419 doctest.etree = etree | |
| 420 _RestoreChecker(dt_self, old_checker, checker, | |
| 421 check_func, checker_check_func, | |
| 422 del_module) | |
| 423 | |
| 424 class _RestoreChecker(object): | |
| 425 def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func, | |
| 426 del_module): | |
| 427 self.dt_self = dt_self | |
| 428 self.checker = old_checker | |
| 429 self.checker._temp_call_super_check_output = self.call_super | |
| 430 self.checker._temp_override_self = new_checker | |
| 431 self.check_func = check_func | |
| 432 self.clone_func = clone_func | |
| 433 self.del_module = del_module | |
| 434 self.install_clone() | |
| 435 self.install_dt_self() | |
| 436 def install_clone(self): | |
| 437 if _IS_PYTHON_3: | |
| 438 self.func_code = self.check_func.__code__ | |
| 439 self.func_globals = self.check_func.__globals__ | |
| 440 self.check_func.__code__ = self.clone_func.__code__ | |
| 441 else: | |
| 442 self.func_code = self.check_func.func_code | |
| 443 self.func_globals = self.check_func.func_globals | |
| 444 self.check_func.func_code = self.clone_func.func_code | |
| 445 def uninstall_clone(self): | |
| 446 if _IS_PYTHON_3: | |
| 447 self.check_func.__code__ = self.func_code | |
| 448 else: | |
| 449 self.check_func.func_code = self.func_code | |
| 450 def install_dt_self(self): | |
| 451 self.prev_func = self.dt_self._DocTestRunner__record_outcome | |
| 452 self.dt_self._DocTestRunner__record_outcome = self | |
| 453 def uninstall_dt_self(self): | |
| 454 self.dt_self._DocTestRunner__record_outcome = self.prev_func | |
| 455 def uninstall_module(self): | |
| 456 if self.del_module: | |
| 457 import sys | |
| 458 del sys.modules[self.del_module] | |
| 459 if '.' in self.del_module: | |
| 460 package, module = self.del_module.rsplit('.', 1) | |
| 461 package_mod = sys.modules[package] | |
| 462 delattr(package_mod, module) | |
| 463 def __call__(self, *args, **kw): | |
| 464 self.uninstall_clone() | |
| 465 self.uninstall_dt_self() | |
| 466 del self.checker._temp_override_self | |
| 467 del self.checker._temp_call_super_check_output | |
| 468 result = self.prev_func(*args, **kw) | |
| 469 self.uninstall_module() | |
| 470 return result | |
| 471 def call_super(self, *args, **kw): | |
| 472 self.uninstall_clone() | |
| 473 try: | |
| 474 return self.check_func(*args, **kw) | |
| 475 finally: | |
| 476 self.install_clone() | |
| 477 | |
| 478 def _find_doctest_frame(): | |
| 479 import sys | |
| 480 frame = sys._getframe(1) | |
| 481 while frame: | |
| 482 l = frame.f_locals | |
| 483 if 'BOOM' in l: | |
| 484 # Sign of doctest | |
| 485 return frame | |
| 486 frame = frame.f_back | |
| 487 raise LookupError( | |
| 488 "Could not find doctest (only use this function *inside* a doctest)") | |
| 489 | |
| 490 __test__ = { | |
| 491 'basic': ''' | |
| 492 >>> temp_install() | |
| 493 >>> print """<xml a="1" b="2">stuff</xml>""" | |
| 494 <xml b="2" a="1">...</xml> | |
| 495 >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>""" | |
| 496 <xml xmlns="..."> | |
| 497 <tag attr="..." /> | |
| 498 </xml> | |
| 499 >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS | |
| 500 <xml>...foo /></xml> | |
| 501 '''} | |
| 502 | |
| 503 if __name__ == '__main__': | |
| 504 import doctest | |
| 505 doctest.testmod() | |
| 506 | |
| 507 |
