Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/lxml/doctestcompare.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 """ | |
2 lxml-based doctest output comparison. | |
3 | |
4 Note: normally, you should just import the `lxml.usedoctest` and | |
5 `lxml.html.usedoctest` modules from within a doctest, instead of this | |
6 one:: | |
7 | |
8 >>> import lxml.usedoctest # for XML output | |
9 | |
10 >>> import lxml.html.usedoctest # for HTML output | |
11 | |
12 To use this module directly, you must call ``lxmldoctest.install()``, | |
13 which will cause doctest to use this in all subsequent calls. | |
14 | |
15 This changes the way output is checked and comparisons are made for | |
16 XML or HTML-like content. | |
17 | |
18 XML or HTML content is noticed because the example starts with ``<`` | |
19 (it's HTML if it starts with ``<html``). You can also use the | |
20 ``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing. | |
21 | |
22 Some rough wildcard-like things are allowed. Whitespace is generally | |
23 ignored (except in attributes). In text (attributes and text in the | |
24 body) you can use ``...`` as a wildcard. In an example it also | |
25 matches any trailing tags in the element, though it does not match | |
26 leading tags. You may create a tag ``<any>`` or include an ``any`` | |
27 attribute in the tag. An ``any`` tag matches any tag, while the | |
28 attribute matches any and all attributes. | |
29 | |
30 When a match fails, the reformatted example and gotten text is | |
31 displayed (indented), and a rough diff-like output is given. Anything | |
32 marked with ``+`` is in the output but wasn't supposed to be, and | |
33 similarly ``-`` means its in the example but wasn't in the output. | |
34 | |
35 You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP`` | |
36 """ | |
37 | |
38 from lxml import etree | |
39 import sys | |
40 import re | |
41 import doctest | |
42 try: | |
43 from html import escape as html_escape | |
44 except ImportError: | |
45 from cgi import escape as html_escape | |
46 | |
47 __all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker', | |
48 'LHTMLOutputChecker', 'install', 'temp_install'] | |
49 | |
50 try: | |
51 _basestring = basestring | |
52 except NameError: | |
53 _basestring = (str, bytes) | |
54 | |
55 _IS_PYTHON_3 = sys.version_info[0] >= 3 | |
56 | |
57 PARSE_HTML = doctest.register_optionflag('PARSE_HTML') | |
58 PARSE_XML = doctest.register_optionflag('PARSE_XML') | |
59 NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP') | |
60 | |
61 OutputChecker = doctest.OutputChecker | |
62 | |
63 def strip(v): | |
64 if v is None: | |
65 return None | |
66 else: | |
67 return v.strip() | |
68 | |
69 def norm_whitespace(v): | |
70 return _norm_whitespace_re.sub(' ', v) | |
71 | |
72 _html_parser = etree.HTMLParser(recover=False, remove_blank_text=True) | |
73 | |
74 def html_fromstring(html): | |
75 return etree.fromstring(html, _html_parser) | |
76 | |
77 # We use this to distinguish repr()s from elements: | |
78 _repr_re = re.compile(r'^<[^>]+ (at|object) ') | |
79 _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+') | |
80 | |
81 class LXMLOutputChecker(OutputChecker): | |
82 | |
83 empty_tags = ( | |
84 'param', 'img', 'area', 'br', 'basefont', 'input', | |
85 'base', 'meta', 'link', 'col') | |
86 | |
87 def get_default_parser(self): | |
88 return etree.XML | |
89 | |
90 def check_output(self, want, got, optionflags): | |
91 alt_self = getattr(self, '_temp_override_self', None) | |
92 if alt_self is not None: | |
93 super_method = self._temp_call_super_check_output | |
94 self = alt_self | |
95 else: | |
96 super_method = OutputChecker.check_output | |
97 parser = self.get_parser(want, got, optionflags) | |
98 if not parser: | |
99 return super_method( | |
100 self, want, got, optionflags) | |
101 try: | |
102 want_doc = parser(want) | |
103 except etree.XMLSyntaxError: | |
104 return False | |
105 try: | |
106 got_doc = parser(got) | |
107 except etree.XMLSyntaxError: | |
108 return False | |
109 return self.compare_docs(want_doc, got_doc) | |
110 | |
111 def get_parser(self, want, got, optionflags): | |
112 parser = None | |
113 if NOPARSE_MARKUP & optionflags: | |
114 return None | |
115 if PARSE_HTML & optionflags: | |
116 parser = html_fromstring | |
117 elif PARSE_XML & optionflags: | |
118 parser = etree.XML | |
119 elif (want.strip().lower().startswith('<html') | |
120 and got.strip().startswith('<html')): | |
121 parser = html_fromstring | |
122 elif (self._looks_like_markup(want) | |
123 and self._looks_like_markup(got)): | |
124 parser = self.get_default_parser() | |
125 return parser | |
126 | |
127 def _looks_like_markup(self, s): | |
128 s = s.strip() | |
129 return (s.startswith('<') | |
130 and not _repr_re.search(s)) | |
131 | |
132 def compare_docs(self, want, got): | |
133 if not self.tag_compare(want.tag, got.tag): | |
134 return False | |
135 if not self.text_compare(want.text, got.text, True): | |
136 return False | |
137 if not self.text_compare(want.tail, got.tail, True): | |
138 return False | |
139 if 'any' not in want.attrib: | |
140 want_keys = sorted(want.attrib.keys()) | |
141 got_keys = sorted(got.attrib.keys()) | |
142 if want_keys != got_keys: | |
143 return False | |
144 for key in want_keys: | |
145 if not self.text_compare(want.attrib[key], got.attrib[key], False): | |
146 return False | |
147 if want.text != '...' or len(want): | |
148 want_children = list(want) | |
149 got_children = list(got) | |
150 while want_children or got_children: | |
151 if not want_children or not got_children: | |
152 return False | |
153 want_first = want_children.pop(0) | |
154 got_first = got_children.pop(0) | |
155 if not self.compare_docs(want_first, got_first): | |
156 return False | |
157 if not got_children and want_first.tail == '...': | |
158 break | |
159 return True | |
160 | |
161 def text_compare(self, want, got, strip): | |
162 want = want or '' | |
163 got = got or '' | |
164 if strip: | |
165 want = norm_whitespace(want).strip() | |
166 got = norm_whitespace(got).strip() | |
167 want = '^%s$' % re.escape(want) | |
168 want = want.replace(r'\.\.\.', '.*') | |
169 if re.search(want, got): | |
170 return True | |
171 else: | |
172 return False | |
173 | |
174 def tag_compare(self, want, got): | |
175 if want == 'any': | |
176 return True | |
177 if (not isinstance(want, _basestring) | |
178 or not isinstance(got, _basestring)): | |
179 return want == got | |
180 want = want or '' | |
181 got = got or '' | |
182 if want.startswith('{...}'): | |
183 # Ellipsis on the namespace | |
184 return want.split('}')[-1] == got.split('}')[-1] | |
185 else: | |
186 return want == got | |
187 | |
188 def output_difference(self, example, got, optionflags): | |
189 want = example.want | |
190 parser = self.get_parser(want, got, optionflags) | |
191 errors = [] | |
192 if parser is not None: | |
193 try: | |
194 want_doc = parser(want) | |
195 except etree.XMLSyntaxError: | |
196 e = sys.exc_info()[1] | |
197 errors.append('In example: %s' % e) | |
198 try: | |
199 got_doc = parser(got) | |
200 except etree.XMLSyntaxError: | |
201 e = sys.exc_info()[1] | |
202 errors.append('In actual output: %s' % e) | |
203 if parser is None or errors: | |
204 value = OutputChecker.output_difference( | |
205 self, example, got, optionflags) | |
206 if errors: | |
207 errors.append(value) | |
208 return '\n'.join(errors) | |
209 else: | |
210 return value | |
211 html = parser is html_fromstring | |
212 diff_parts = ['Expected:', | |
213 self.format_doc(want_doc, html, 2), | |
214 'Got:', | |
215 self.format_doc(got_doc, html, 2), | |
216 'Diff:', | |
217 self.collect_diff(want_doc, got_doc, html, 2)] | |
218 return '\n'.join(diff_parts) | |
219 | |
220 def html_empty_tag(self, el, html=True): | |
221 if not html: | |
222 return False | |
223 if el.tag not in self.empty_tags: | |
224 return False | |
225 if el.text or len(el): | |
226 # This shouldn't happen (contents in an empty tag) | |
227 return False | |
228 return True | |
229 | |
230 def format_doc(self, doc, html, indent, prefix=''): | |
231 parts = [] | |
232 if not len(doc): | |
233 # No children... | |
234 parts.append(' '*indent) | |
235 parts.append(prefix) | |
236 parts.append(self.format_tag(doc)) | |
237 if not self.html_empty_tag(doc, html): | |
238 if strip(doc.text): | |
239 parts.append(self.format_text(doc.text)) | |
240 parts.append(self.format_end_tag(doc)) | |
241 if strip(doc.tail): | |
242 parts.append(self.format_text(doc.tail)) | |
243 parts.append('\n') | |
244 return ''.join(parts) | |
245 parts.append(' '*indent) | |
246 parts.append(prefix) | |
247 parts.append(self.format_tag(doc)) | |
248 if not self.html_empty_tag(doc, html): | |
249 parts.append('\n') | |
250 if strip(doc.text): | |
251 parts.append(' '*indent) | |
252 parts.append(self.format_text(doc.text)) | |
253 parts.append('\n') | |
254 for el in doc: | |
255 parts.append(self.format_doc(el, html, indent+2)) | |
256 parts.append(' '*indent) | |
257 parts.append(self.format_end_tag(doc)) | |
258 parts.append('\n') | |
259 if strip(doc.tail): | |
260 parts.append(' '*indent) | |
261 parts.append(self.format_text(doc.tail)) | |
262 parts.append('\n') | |
263 return ''.join(parts) | |
264 | |
265 def format_text(self, text, strip=True): | |
266 if text is None: | |
267 return '' | |
268 if strip: | |
269 text = text.strip() | |
270 return html_escape(text, 1) | |
271 | |
272 def format_tag(self, el): | |
273 attrs = [] | |
274 if isinstance(el, etree.CommentBase): | |
275 # FIXME: probably PIs should be handled specially too? | |
276 return '<!--' | |
277 for name, value in sorted(el.attrib.items()): | |
278 attrs.append('%s="%s"' % (name, self.format_text(value, False))) | |
279 if not attrs: | |
280 return '<%s>' % el.tag | |
281 return '<%s %s>' % (el.tag, ' '.join(attrs)) | |
282 | |
283 def format_end_tag(self, el): | |
284 if isinstance(el, etree.CommentBase): | |
285 # FIXME: probably PIs should be handled specially too? | |
286 return '-->' | |
287 return '</%s>' % el.tag | |
288 | |
289 def collect_diff(self, want, got, html, indent): | |
290 parts = [] | |
291 if not len(want) and not len(got): | |
292 parts.append(' '*indent) | |
293 parts.append(self.collect_diff_tag(want, got)) | |
294 if not self.html_empty_tag(got, html): | |
295 parts.append(self.collect_diff_text(want.text, got.text)) | |
296 parts.append(self.collect_diff_end_tag(want, got)) | |
297 parts.append(self.collect_diff_text(want.tail, got.tail)) | |
298 parts.append('\n') | |
299 return ''.join(parts) | |
300 parts.append(' '*indent) | |
301 parts.append(self.collect_diff_tag(want, got)) | |
302 parts.append('\n') | |
303 if strip(want.text) or strip(got.text): | |
304 parts.append(' '*indent) | |
305 parts.append(self.collect_diff_text(want.text, got.text)) | |
306 parts.append('\n') | |
307 want_children = list(want) | |
308 got_children = list(got) | |
309 while want_children or got_children: | |
310 if not want_children: | |
311 parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+')) | |
312 continue | |
313 if not got_children: | |
314 parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-')) | |
315 continue | |
316 parts.append(self.collect_diff( | |
317 want_children.pop(0), got_children.pop(0), html, indent+2)) | |
318 parts.append(' '*indent) | |
319 parts.append(self.collect_diff_end_tag(want, got)) | |
320 parts.append('\n') | |
321 if strip(want.tail) or strip(got.tail): | |
322 parts.append(' '*indent) | |
323 parts.append(self.collect_diff_text(want.tail, got.tail)) | |
324 parts.append('\n') | |
325 return ''.join(parts) | |
326 | |
327 def collect_diff_tag(self, want, got): | |
328 if not self.tag_compare(want.tag, got.tag): | |
329 tag = '%s (got: %s)' % (want.tag, got.tag) | |
330 else: | |
331 tag = got.tag | |
332 attrs = [] | |
333 any = want.tag == 'any' or 'any' in want.attrib | |
334 for name, value in sorted(got.attrib.items()): | |
335 if name not in want.attrib and not any: | |
336 attrs.append('+%s="%s"' % (name, self.format_text(value, False))) | |
337 else: | |
338 if name in want.attrib: | |
339 text = self.collect_diff_text(want.attrib[name], value, False) | |
340 else: | |
341 text = self.format_text(value, False) | |
342 attrs.append('%s="%s"' % (name, text)) | |
343 if not any: | |
344 for name, value in sorted(want.attrib.items()): | |
345 if name in got.attrib: | |
346 continue | |
347 attrs.append('-%s="%s"' % (name, self.format_text(value, False))) | |
348 if attrs: | |
349 tag = '<%s %s>' % (tag, ' '.join(attrs)) | |
350 else: | |
351 tag = '<%s>' % tag | |
352 return tag | |
353 | |
354 def collect_diff_end_tag(self, want, got): | |
355 if want.tag != got.tag: | |
356 tag = '%s (got: %s)' % (want.tag, got.tag) | |
357 else: | |
358 tag = got.tag | |
359 return '</%s>' % tag | |
360 | |
361 def collect_diff_text(self, want, got, strip=True): | |
362 if self.text_compare(want, got, strip): | |
363 if not got: | |
364 return '' | |
365 return self.format_text(got, strip) | |
366 text = '%s (got: %s)' % (want, got) | |
367 return self.format_text(text, strip) | |
368 | |
369 class LHTMLOutputChecker(LXMLOutputChecker): | |
370 def get_default_parser(self): | |
371 return html_fromstring | |
372 | |
373 def install(html=False): | |
374 """ | |
375 Install doctestcompare for all future doctests. | |
376 | |
377 If html is true, then by default the HTML parser will be used; | |
378 otherwise the XML parser is used. | |
379 """ | |
380 if html: | |
381 doctest.OutputChecker = LHTMLOutputChecker | |
382 else: | |
383 doctest.OutputChecker = LXMLOutputChecker | |
384 | |
385 def temp_install(html=False, del_module=None): | |
386 """ | |
387 Use this *inside* a doctest to enable this checker for this | |
388 doctest only. | |
389 | |
390 If html is true, then by default the HTML parser will be used; | |
391 otherwise the XML parser is used. | |
392 """ | |
393 if html: | |
394 Checker = LHTMLOutputChecker | |
395 else: | |
396 Checker = LXMLOutputChecker | |
397 frame = _find_doctest_frame() | |
398 dt_self = frame.f_locals['self'] | |
399 checker = Checker() | |
400 old_checker = dt_self._checker | |
401 dt_self._checker = checker | |
402 # The unfortunate thing is that there is a local variable 'check' | |
403 # in the function that runs the doctests, that is a bound method | |
404 # into the output checker. We have to update that. We can't | |
405 # modify the frame, so we have to modify the object in place. The | |
406 # only way to do this is to actually change the func_code | |
407 # attribute of the method. We change it, and then wait for | |
408 # __record_outcome to be run, which signals the end of the __run | |
409 # method, at which point we restore the previous check_output | |
410 # implementation. | |
411 if _IS_PYTHON_3: | |
412 check_func = frame.f_locals['check'].__func__ | |
413 checker_check_func = checker.check_output.__func__ | |
414 else: | |
415 check_func = frame.f_locals['check'].im_func | |
416 checker_check_func = checker.check_output.im_func | |
417 # Because we can't patch up func_globals, this is the only global | |
418 # in check_output that we care about: | |
419 doctest.etree = etree | |
420 _RestoreChecker(dt_self, old_checker, checker, | |
421 check_func, checker_check_func, | |
422 del_module) | |
423 | |
424 class _RestoreChecker(object): | |
425 def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func, | |
426 del_module): | |
427 self.dt_self = dt_self | |
428 self.checker = old_checker | |
429 self.checker._temp_call_super_check_output = self.call_super | |
430 self.checker._temp_override_self = new_checker | |
431 self.check_func = check_func | |
432 self.clone_func = clone_func | |
433 self.del_module = del_module | |
434 self.install_clone() | |
435 self.install_dt_self() | |
436 def install_clone(self): | |
437 if _IS_PYTHON_3: | |
438 self.func_code = self.check_func.__code__ | |
439 self.func_globals = self.check_func.__globals__ | |
440 self.check_func.__code__ = self.clone_func.__code__ | |
441 else: | |
442 self.func_code = self.check_func.func_code | |
443 self.func_globals = self.check_func.func_globals | |
444 self.check_func.func_code = self.clone_func.func_code | |
445 def uninstall_clone(self): | |
446 if _IS_PYTHON_3: | |
447 self.check_func.__code__ = self.func_code | |
448 else: | |
449 self.check_func.func_code = self.func_code | |
450 def install_dt_self(self): | |
451 self.prev_func = self.dt_self._DocTestRunner__record_outcome | |
452 self.dt_self._DocTestRunner__record_outcome = self | |
453 def uninstall_dt_self(self): | |
454 self.dt_self._DocTestRunner__record_outcome = self.prev_func | |
455 def uninstall_module(self): | |
456 if self.del_module: | |
457 import sys | |
458 del sys.modules[self.del_module] | |
459 if '.' in self.del_module: | |
460 package, module = self.del_module.rsplit('.', 1) | |
461 package_mod = sys.modules[package] | |
462 delattr(package_mod, module) | |
463 def __call__(self, *args, **kw): | |
464 self.uninstall_clone() | |
465 self.uninstall_dt_self() | |
466 del self.checker._temp_override_self | |
467 del self.checker._temp_call_super_check_output | |
468 result = self.prev_func(*args, **kw) | |
469 self.uninstall_module() | |
470 return result | |
471 def call_super(self, *args, **kw): | |
472 self.uninstall_clone() | |
473 try: | |
474 return self.check_func(*args, **kw) | |
475 finally: | |
476 self.install_clone() | |
477 | |
478 def _find_doctest_frame(): | |
479 import sys | |
480 frame = sys._getframe(1) | |
481 while frame: | |
482 l = frame.f_locals | |
483 if 'BOOM' in l: | |
484 # Sign of doctest | |
485 return frame | |
486 frame = frame.f_back | |
487 raise LookupError( | |
488 "Could not find doctest (only use this function *inside* a doctest)") | |
489 | |
490 __test__ = { | |
491 'basic': ''' | |
492 >>> temp_install() | |
493 >>> print """<xml a="1" b="2">stuff</xml>""" | |
494 <xml b="2" a="1">...</xml> | |
495 >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>""" | |
496 <xml xmlns="..."> | |
497 <tag attr="..." /> | |
498 </xml> | |
499 >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS | |
500 <xml>...foo /></xml> | |
501 '''} | |
502 | |
503 if __name__ == '__main__': | |
504 import doctest | |
505 doctest.testmod() | |
506 | |
507 |