comparison planemo/lib/python3.7/site-packages/bs4/tests/test_builder_registry.py @ 0:d30785e31577 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:18:57 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d30785e31577
1 """Tests of the builder registry."""
2
3 import unittest
4 import warnings
5
6 from bs4 import BeautifulSoup
7 from bs4.builder import (
8 builder_registry as registry,
9 HTMLParserTreeBuilder,
10 TreeBuilderRegistry,
11 )
12
13 try:
14 from bs4.builder import HTML5TreeBuilder
15 HTML5LIB_PRESENT = True
16 except ImportError:
17 HTML5LIB_PRESENT = False
18
19 try:
20 from bs4.builder import (
21 LXMLTreeBuilderForXML,
22 LXMLTreeBuilder,
23 )
24 LXML_PRESENT = True
25 except ImportError:
26 LXML_PRESENT = False
27
28
29 class BuiltInRegistryTest(unittest.TestCase):
30 """Test the built-in registry with the default builders registered."""
31
32 def test_combination(self):
33 if LXML_PRESENT:
34 self.assertEqual(registry.lookup('fast', 'html'),
35 LXMLTreeBuilder)
36
37 if LXML_PRESENT:
38 self.assertEqual(registry.lookup('permissive', 'xml'),
39 LXMLTreeBuilderForXML)
40 self.assertEqual(registry.lookup('strict', 'html'),
41 HTMLParserTreeBuilder)
42 if HTML5LIB_PRESENT:
43 self.assertEqual(registry.lookup('html5lib', 'html'),
44 HTML5TreeBuilder)
45
46 def test_lookup_by_markup_type(self):
47 if LXML_PRESENT:
48 self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
49 self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
50 else:
51 self.assertEqual(registry.lookup('xml'), None)
52 if HTML5LIB_PRESENT:
53 self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
54 else:
55 self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
56
57 def test_named_library(self):
58 if LXML_PRESENT:
59 self.assertEqual(registry.lookup('lxml', 'xml'),
60 LXMLTreeBuilderForXML)
61 self.assertEqual(registry.lookup('lxml', 'html'),
62 LXMLTreeBuilder)
63 if HTML5LIB_PRESENT:
64 self.assertEqual(registry.lookup('html5lib'),
65 HTML5TreeBuilder)
66
67 self.assertEqual(registry.lookup('html.parser'),
68 HTMLParserTreeBuilder)
69
70 def test_beautifulsoup_constructor_does_lookup(self):
71
72 with warnings.catch_warnings(record=True) as w:
73 # This will create a warning about not explicitly
74 # specifying a parser, but we'll ignore it.
75
76 # You can pass in a string.
77 BeautifulSoup("", features="html")
78 # Or a list of strings.
79 BeautifulSoup("", features=["html", "fast"])
80
81 # You'll get an exception if BS can't find an appropriate
82 # builder.
83 self.assertRaises(ValueError, BeautifulSoup,
84 "", features="no-such-feature")
85
86 class RegistryTest(unittest.TestCase):
87 """Test the TreeBuilderRegistry class in general."""
88
89 def setUp(self):
90 self.registry = TreeBuilderRegistry()
91
92 def builder_for_features(self, *feature_list):
93 cls = type('Builder_' + '_'.join(feature_list),
94 (object,), {'features' : feature_list})
95
96 self.registry.register(cls)
97 return cls
98
99 def test_register_with_no_features(self):
100 builder = self.builder_for_features()
101
102 # Since the builder advertises no features, you can't find it
103 # by looking up features.
104 self.assertEqual(self.registry.lookup('foo'), None)
105
106 # But you can find it by doing a lookup with no features, if
107 # this happens to be the only registered builder.
108 self.assertEqual(self.registry.lookup(), builder)
109
110 def test_register_with_features_makes_lookup_succeed(self):
111 builder = self.builder_for_features('foo', 'bar')
112 self.assertEqual(self.registry.lookup('foo'), builder)
113 self.assertEqual(self.registry.lookup('bar'), builder)
114
115 def test_lookup_fails_when_no_builder_implements_feature(self):
116 builder = self.builder_for_features('foo', 'bar')
117 self.assertEqual(self.registry.lookup('baz'), None)
118
119 def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
120 builder1 = self.builder_for_features('foo')
121 builder2 = self.builder_for_features('bar')
122 self.assertEqual(self.registry.lookup(), builder2)
123
124 def test_lookup_fails_when_no_tree_builders_registered(self):
125 self.assertEqual(self.registry.lookup(), None)
126
127 def test_lookup_gets_most_recent_builder_supporting_all_features(self):
128 has_one = self.builder_for_features('foo')
129 has_the_other = self.builder_for_features('bar')
130 has_both_early = self.builder_for_features('foo', 'bar', 'baz')
131 has_both_late = self.builder_for_features('foo', 'bar', 'quux')
132 lacks_one = self.builder_for_features('bar')
133 has_the_other = self.builder_for_features('foo')
134
135 # There are two builders featuring 'foo' and 'bar', but
136 # the one that also features 'quux' was registered later.
137 self.assertEqual(self.registry.lookup('foo', 'bar'),
138 has_both_late)
139
140 # There is only one builder featuring 'foo', 'bar', and 'baz'.
141 self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
142 has_both_early)
143
144 def test_lookup_fails_when_cannot_reconcile_requested_features(self):
145 builder1 = self.builder_for_features('foo', 'bar')
146 builder2 = self.builder_for_features('foo', 'baz')
147 self.assertEqual(self.registry.lookup('bar', 'baz'), None)