Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/bs4/tests/test_htmlparser.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/bs4/tests/test_htmlparser.py Fri Jul 31 00:18:57 2020 -0400 @@ -0,0 +1,97 @@ +"""Tests to ensure that the html.parser tree builder generates good +trees.""" + +from pdb import set_trace +import pickle +from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest +from bs4.builder import HTMLParserTreeBuilder +from bs4.builder._htmlparser import BeautifulSoupHTMLParser + +class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): + + default_builder = HTMLParserTreeBuilder + + def test_namespaced_system_doctype(self): + # html.parser can't handle namespaced doctypes, so skip this one. + pass + + def test_namespaced_public_doctype(self): + # html.parser can't handle namespaced doctypes, so skip this one. + pass + + def test_builder_is_pickled(self): + """Unlike most tree builders, HTMLParserTreeBuilder and will + be restored after pickling. + """ + tree = self.soup("<a><b>foo</a>") + dumped = pickle.dumps(tree, 2) + loaded = pickle.loads(dumped) + self.assertTrue(isinstance(loaded.builder, type(tree.builder))) + + def test_redundant_empty_element_closing_tags(self): + self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>") + self.assertSoupEquals('</br></br></br>', "") + + def test_empty_element(self): + # This verifies that any buffered data present when the parser + # finishes working is handled. + self.assertSoupEquals("foo &# bar", "foo &# bar") + + def test_tracking_line_numbers(self): + # The html.parser TreeBuilder keeps track of line number and + # position of each element. + markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>" + soup = self.soup(markup) + self.assertEqual(2, soup.p.sourceline) + self.assertEqual(3, soup.p.sourcepos) + self.assertEqual("sourceline", soup.p.find('sourceline').name) + + # You can deactivate this behavior. + soup = self.soup(markup, store_line_numbers=False) + self.assertEqual("sourceline", soup.p.sourceline.name) + self.assertEqual("sourcepos", soup.p.sourcepos.name) + + def test_on_duplicate_attribute(self): + # The html.parser tree builder has a variety of ways of + # handling a tag that contains the same attribute multiple times. + + markup = '<a class="cls" href="url1" href="url2" href="url3" id="id">' + + # If you don't provide any particular value for + # on_duplicate_attribute, later values replace earlier values. + soup = self.soup(markup) + self.assertEqual("url3", soup.a['href']) + self.assertEqual(["cls"], soup.a['class']) + self.assertEqual("id", soup.a['id']) + + # You can also get this behavior explicitly. + def assert_attribute(on_duplicate_attribute, expected): + soup = self.soup( + markup, on_duplicate_attribute=on_duplicate_attribute + ) + self.assertEqual(expected, soup.a['href']) + + # Verify that non-duplicate attributes are treated normally. + self.assertEqual(["cls"], soup.a['class']) + self.assertEqual("id", soup.a['id']) + assert_attribute(None, "url3") + assert_attribute(BeautifulSoupHTMLParser.REPLACE, "url3") + + # You can ignore subsequent values in favor of the first. + assert_attribute(BeautifulSoupHTMLParser.IGNORE, "url1") + + # And you can pass in a callable that does whatever you want. + def accumulate(attrs, key, value): + if not isinstance(attrs[key], list): + attrs[key] = [attrs[key]] + attrs[key].append(value) + assert_attribute(accumulate, ["url1", "url2", "url3"]) + + +class TestHTMLParserSubclass(SoupTest): + def test_error(self): + """Verify that our HTMLParser subclass implements error() in a way + that doesn't cause a crash. + """ + parser = BeautifulSoupHTMLParser() + parser.error("don't crash")