Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/bs4/tests/test_htmlparser.py @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author | shellac |
---|---|
date | Thu, 14 May 2020 14:56:58 -0400 |
parents | 26e78fe6e8c4 |
children |
comparison
equal
deleted
inserted
replaced
1:75ca89e9b81c | 2:6af9afd405e9 |
---|---|
1 """Tests to ensure that the html.parser tree builder generates good | |
2 trees.""" | |
3 | |
4 from pdb import set_trace | |
5 import pickle | |
6 from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest | |
7 from bs4.builder import HTMLParserTreeBuilder | |
8 from bs4.builder._htmlparser import BeautifulSoupHTMLParser | |
9 | |
10 class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): | |
11 | |
12 default_builder = HTMLParserTreeBuilder | |
13 | |
14 def test_namespaced_system_doctype(self): | |
15 # html.parser can't handle namespaced doctypes, so skip this one. | |
16 pass | |
17 | |
18 def test_namespaced_public_doctype(self): | |
19 # html.parser can't handle namespaced doctypes, so skip this one. | |
20 pass | |
21 | |
22 def test_builder_is_pickled(self): | |
23 """Unlike most tree builders, HTMLParserTreeBuilder and will | |
24 be restored after pickling. | |
25 """ | |
26 tree = self.soup("<a><b>foo</a>") | |
27 dumped = pickle.dumps(tree, 2) | |
28 loaded = pickle.loads(dumped) | |
29 self.assertTrue(isinstance(loaded.builder, type(tree.builder))) | |
30 | |
31 def test_redundant_empty_element_closing_tags(self): | |
32 self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>") | |
33 self.assertSoupEquals('</br></br></br>', "") | |
34 | |
35 def test_empty_element(self): | |
36 # This verifies that any buffered data present when the parser | |
37 # finishes working is handled. | |
38 self.assertSoupEquals("foo &# bar", "foo &# bar") | |
39 | |
40 def test_tracking_line_numbers(self): | |
41 # The html.parser TreeBuilder keeps track of line number and | |
42 # position of each element. | |
43 markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>" | |
44 soup = self.soup(markup) | |
45 self.assertEqual(2, soup.p.sourceline) | |
46 self.assertEqual(3, soup.p.sourcepos) | |
47 self.assertEqual("sourceline", soup.p.find('sourceline').name) | |
48 | |
49 # You can deactivate this behavior. | |
50 soup = self.soup(markup, store_line_numbers=False) | |
51 self.assertEqual("sourceline", soup.p.sourceline.name) | |
52 self.assertEqual("sourcepos", soup.p.sourcepos.name) | |
53 | |
54 | |
55 class TestHTMLParserSubclass(SoupTest): | |
56 def test_error(self): | |
57 """Verify that our HTMLParser subclass implements error() in a way | |
58 that doesn't cause a crash. | |
59 """ | |
60 parser = BeautifulSoupHTMLParser() | |
61 parser.error("don't crash") |