Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/bs4/tests/test_lxml.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d30785e31577 |
---|---|
1 """Tests to ensure that the lxml tree builder generates good trees.""" | |
2 | |
3 import re | |
4 import warnings | |
5 | |
6 try: | |
7 import lxml.etree | |
8 LXML_PRESENT = True | |
9 LXML_VERSION = lxml.etree.LXML_VERSION | |
10 except ImportError as e: | |
11 LXML_PRESENT = False | |
12 LXML_VERSION = (0,) | |
13 | |
14 if LXML_PRESENT: | |
15 from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML | |
16 | |
17 from bs4 import ( | |
18 BeautifulSoup, | |
19 BeautifulStoneSoup, | |
20 ) | |
21 from bs4.element import Comment, Doctype, SoupStrainer | |
22 from bs4.testing import skipIf | |
23 from bs4.tests import test_htmlparser | |
24 from bs4.testing import ( | |
25 HTMLTreeBuilderSmokeTest, | |
26 XMLTreeBuilderSmokeTest, | |
27 SoupTest, | |
28 skipIf, | |
29 ) | |
30 | |
31 @skipIf( | |
32 not LXML_PRESENT, | |
33 "lxml seems not to be present, not testing its tree builder.") | |
34 class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): | |
35 """See ``HTMLTreeBuilderSmokeTest``.""" | |
36 | |
37 @property | |
38 def default_builder(self): | |
39 return LXMLTreeBuilder | |
40 | |
41 def test_out_of_range_entity(self): | |
42 self.assertSoupEquals( | |
43 "<p>foo�bar</p>", "<p>foobar</p>") | |
44 self.assertSoupEquals( | |
45 "<p>foo�bar</p>", "<p>foobar</p>") | |
46 self.assertSoupEquals( | |
47 "<p>foo�bar</p>", "<p>foobar</p>") | |
48 | |
49 def test_entities_in_foreign_document_encoding(self): | |
50 # We can't implement this case correctly because by the time we | |
51 # hear about markup like "“", it's been (incorrectly) converted into | |
52 # a string like u'\x93' | |
53 pass | |
54 | |
55 # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this | |
56 # test if an old version of lxml is installed. | |
57 | |
58 @skipIf( | |
59 not LXML_PRESENT or LXML_VERSION < (2,3,5,0), | |
60 "Skipping doctype test for old version of lxml to avoid segfault.") | |
61 def test_empty_doctype(self): | |
62 soup = self.soup("<!DOCTYPE>") | |
63 doctype = soup.contents[0] | |
64 self.assertEqual("", doctype.strip()) | |
65 | |
66 def test_beautifulstonesoup_is_xml_parser(self): | |
67 # Make sure that the deprecated BSS class uses an xml builder | |
68 # if one is installed. | |
69 with warnings.catch_warnings(record=True) as w: | |
70 soup = BeautifulStoneSoup("<b />") | |
71 self.assertEqual("<b/>", str(soup.b)) | |
72 self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) | |
73 | |
74 def test_tracking_line_numbers(self): | |
75 # The lxml TreeBuilder cannot keep track of line numbers from | |
76 # the original markup. Even if you ask for line numbers, we | |
77 # don't have 'em. | |
78 # | |
79 # This means that if you have a tag like <sourceline> or | |
80 # <sourcepos>, attribute access will find it rather than | |
81 # giving you a numeric answer. | |
82 soup = self.soup( | |
83 "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>", | |
84 store_line_numbers=True | |
85 ) | |
86 self.assertEqual("sourceline", soup.p.sourceline.name) | |
87 self.assertEqual("sourcepos", soup.p.sourcepos.name) | |
88 | |
89 @skipIf( | |
90 not LXML_PRESENT, | |
91 "lxml seems not to be present, not testing its XML tree builder.") | |
92 class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): | |
93 """See ``HTMLTreeBuilderSmokeTest``.""" | |
94 | |
95 @property | |
96 def default_builder(self): | |
97 return LXMLTreeBuilderForXML | |
98 | |
99 def test_namespace_indexing(self): | |
100 # We should not track un-prefixed namespaces as we can only hold one | |
101 # and it will be recognized as the default namespace by soupsieve, | |
102 # which may be confusing in some situations. When no namespace is provided | |
103 # for a selector, the default namespace (if defined) is assumed. | |
104 | |
105 soup = self.soup( | |
106 '<?xml version="1.1"?>\n' | |
107 '<root>' | |
108 '<tag xmlns="http://unprefixed-namespace.com">content</tag>' | |
109 '<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>' | |
110 '</root>' | |
111 ) | |
112 self.assertEqual( | |
113 soup._namespaces, | |
114 {'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'} | |
115 ) |