comparison env/lib/python3.7/site-packages/bs4/tests/test_htmlparser.py @ 2:6af9afd405e9 draft

"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author shellac
date Thu, 14 May 2020 14:56:58 -0400
parents 26e78fe6e8c4
children
comparison
equal deleted inserted replaced
1:75ca89e9b81c 2:6af9afd405e9
1 """Tests to ensure that the html.parser tree builder generates good
2 trees."""
3
4 from pdb import set_trace
5 import pickle
6 from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
7 from bs4.builder import HTMLParserTreeBuilder
8 from bs4.builder._htmlparser import BeautifulSoupHTMLParser
9
10 class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
11
12 default_builder = HTMLParserTreeBuilder
13
14 def test_namespaced_system_doctype(self):
15 # html.parser can't handle namespaced doctypes, so skip this one.
16 pass
17
18 def test_namespaced_public_doctype(self):
19 # html.parser can't handle namespaced doctypes, so skip this one.
20 pass
21
22 def test_builder_is_pickled(self):
23 """Unlike most tree builders, HTMLParserTreeBuilder and will
24 be restored after pickling.
25 """
26 tree = self.soup("<a><b>foo</a>")
27 dumped = pickle.dumps(tree, 2)
28 loaded = pickle.loads(dumped)
29 self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
30
31 def test_redundant_empty_element_closing_tags(self):
32 self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
33 self.assertSoupEquals('</br></br></br>', "")
34
35 def test_empty_element(self):
36 # This verifies that any buffered data present when the parser
37 # finishes working is handled.
38 self.assertSoupEquals("foo &# bar", "foo &amp;# bar")
39
40 def test_tracking_line_numbers(self):
41 # The html.parser TreeBuilder keeps track of line number and
42 # position of each element.
43 markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>"
44 soup = self.soup(markup)
45 self.assertEqual(2, soup.p.sourceline)
46 self.assertEqual(3, soup.p.sourcepos)
47 self.assertEqual("sourceline", soup.p.find('sourceline').name)
48
49 # You can deactivate this behavior.
50 soup = self.soup(markup, store_line_numbers=False)
51 self.assertEqual("sourceline", soup.p.sourceline.name)
52 self.assertEqual("sourcepos", soup.p.sourcepos.name)
53
54
55 class TestHTMLParserSubclass(SoupTest):
56 def test_error(self):
57 """Verify that our HTMLParser subclass implements error() in a way
58 that doesn't cause a crash.
59 """
60 parser = BeautifulSoupHTMLParser()
61 parser.error("don't crash")