Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/bs4/tests/test_htmlparser.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:18:57 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d30785e31577 |
|---|---|
| 1 """Tests to ensure that the html.parser tree builder generates good | |
| 2 trees.""" | |
| 3 | |
| 4 from pdb import set_trace | |
| 5 import pickle | |
| 6 from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest | |
| 7 from bs4.builder import HTMLParserTreeBuilder | |
| 8 from bs4.builder._htmlparser import BeautifulSoupHTMLParser | |
| 9 | |
| 10 class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): | |
| 11 | |
| 12 default_builder = HTMLParserTreeBuilder | |
| 13 | |
| 14 def test_namespaced_system_doctype(self): | |
| 15 # html.parser can't handle namespaced doctypes, so skip this one. | |
| 16 pass | |
| 17 | |
| 18 def test_namespaced_public_doctype(self): | |
| 19 # html.parser can't handle namespaced doctypes, so skip this one. | |
| 20 pass | |
| 21 | |
| 22 def test_builder_is_pickled(self): | |
| 23 """Unlike most tree builders, HTMLParserTreeBuilder and will | |
| 24 be restored after pickling. | |
| 25 """ | |
| 26 tree = self.soup("<a><b>foo</a>") | |
| 27 dumped = pickle.dumps(tree, 2) | |
| 28 loaded = pickle.loads(dumped) | |
| 29 self.assertTrue(isinstance(loaded.builder, type(tree.builder))) | |
| 30 | |
| 31 def test_redundant_empty_element_closing_tags(self): | |
| 32 self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>") | |
| 33 self.assertSoupEquals('</br></br></br>', "") | |
| 34 | |
| 35 def test_empty_element(self): | |
| 36 # This verifies that any buffered data present when the parser | |
| 37 # finishes working is handled. | |
| 38 self.assertSoupEquals("foo &# bar", "foo &# bar") | |
| 39 | |
| 40 def test_tracking_line_numbers(self): | |
| 41 # The html.parser TreeBuilder keeps track of line number and | |
| 42 # position of each element. | |
| 43 markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>" | |
| 44 soup = self.soup(markup) | |
| 45 self.assertEqual(2, soup.p.sourceline) | |
| 46 self.assertEqual(3, soup.p.sourcepos) | |
| 47 self.assertEqual("sourceline", soup.p.find('sourceline').name) | |
| 48 | |
| 49 # You can deactivate this behavior. | |
| 50 soup = self.soup(markup, store_line_numbers=False) | |
| 51 self.assertEqual("sourceline", soup.p.sourceline.name) | |
| 52 self.assertEqual("sourcepos", soup.p.sourcepos.name) | |
| 53 | |
| 54 def test_on_duplicate_attribute(self): | |
| 55 # The html.parser tree builder has a variety of ways of | |
| 56 # handling a tag that contains the same attribute multiple times. | |
| 57 | |
| 58 markup = '<a class="cls" href="url1" href="url2" href="url3" id="id">' | |
| 59 | |
| 60 # If you don't provide any particular value for | |
| 61 # on_duplicate_attribute, later values replace earlier values. | |
| 62 soup = self.soup(markup) | |
| 63 self.assertEqual("url3", soup.a['href']) | |
| 64 self.assertEqual(["cls"], soup.a['class']) | |
| 65 self.assertEqual("id", soup.a['id']) | |
| 66 | |
| 67 # You can also get this behavior explicitly. | |
| 68 def assert_attribute(on_duplicate_attribute, expected): | |
| 69 soup = self.soup( | |
| 70 markup, on_duplicate_attribute=on_duplicate_attribute | |
| 71 ) | |
| 72 self.assertEqual(expected, soup.a['href']) | |
| 73 | |
| 74 # Verify that non-duplicate attributes are treated normally. | |
| 75 self.assertEqual(["cls"], soup.a['class']) | |
| 76 self.assertEqual("id", soup.a['id']) | |
| 77 assert_attribute(None, "url3") | |
| 78 assert_attribute(BeautifulSoupHTMLParser.REPLACE, "url3") | |
| 79 | |
| 80 # You can ignore subsequent values in favor of the first. | |
| 81 assert_attribute(BeautifulSoupHTMLParser.IGNORE, "url1") | |
| 82 | |
| 83 # And you can pass in a callable that does whatever you want. | |
| 84 def accumulate(attrs, key, value): | |
| 85 if not isinstance(attrs[key], list): | |
| 86 attrs[key] = [attrs[key]] | |
| 87 attrs[key].append(value) | |
| 88 assert_attribute(accumulate, ["url1", "url2", "url3"]) | |
| 89 | |
| 90 | |
| 91 class TestHTMLParserSubclass(SoupTest): | |
| 92 def test_error(self): | |
| 93 """Verify that our HTMLParser subclass implements error() in a way | |
| 94 that doesn't cause a crash. | |
| 95 """ | |
| 96 parser = BeautifulSoupHTMLParser() | |
| 97 parser.error("don't crash") |
