comparison env/lib/python3.7/site-packages/lxml/html/_html5builder.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 """
2 Legacy module - don't use in new code!
3
4 html5lib now has its own proper implementation.
5
6 This module implements a tree builder for html5lib that generates lxml
7 html element trees. This module uses camelCase as it follows the
8 html5lib style guide.
9 """
10
11 from html5lib.treebuilders import _base, etree as etree_builders
12 from lxml import html, etree
13
14
15 class DocumentType(object):
16
17 def __init__(self, name, publicId, systemId):
18 self.name = name
19 self.publicId = publicId
20 self.systemId = systemId
21
22 class Document(object):
23
24 def __init__(self):
25 self._elementTree = None
26 self.childNodes = []
27
28 def appendChild(self, element):
29 self._elementTree.getroot().addnext(element._element)
30
31
32 class TreeBuilder(_base.TreeBuilder):
33 documentClass = Document
34 doctypeClass = DocumentType
35 elementClass = None
36 commentClass = None
37 fragmentClass = Document
38
39 def __init__(self, *args, **kwargs):
40 html_builder = etree_builders.getETreeModule(html, fullTree=False)
41 etree_builder = etree_builders.getETreeModule(etree, fullTree=False)
42 self.elementClass = html_builder.Element
43 self.commentClass = etree_builder.Comment
44 _base.TreeBuilder.__init__(self, *args, **kwargs)
45
46 def reset(self):
47 _base.TreeBuilder.reset(self)
48 self.rootInserted = False
49 self.initialComments = []
50 self.doctype = None
51
52 def getDocument(self):
53 return self.document._elementTree
54
55 def getFragment(self):
56 fragment = []
57 element = self.openElements[0]._element
58 if element.text:
59 fragment.append(element.text)
60 fragment.extend(element.getchildren())
61 if element.tail:
62 fragment.append(element.tail)
63 return fragment
64
65 def insertDoctype(self, name, publicId, systemId):
66 doctype = self.doctypeClass(name, publicId, systemId)
67 self.doctype = doctype
68
69 def insertComment(self, data, parent=None):
70 if not self.rootInserted:
71 self.initialComments.append(data)
72 else:
73 _base.TreeBuilder.insertComment(self, data, parent)
74
75 def insertRoot(self, name):
76 buf = []
77 if self.doctype and self.doctype.name:
78 buf.append('<!DOCTYPE %s' % self.doctype.name)
79 if self.doctype.publicId is not None or self.doctype.systemId is not None:
80 buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId,
81 self.doctype.systemId))
82 buf.append('>')
83 buf.append('<html></html>')
84 root = html.fromstring(''.join(buf))
85
86 # Append the initial comments:
87 for comment in self.initialComments:
88 root.addprevious(etree.Comment(comment))
89
90 # Create the root document and add the ElementTree to it
91 self.document = self.documentClass()
92 self.document._elementTree = root.getroottree()
93
94 # Add the root element to the internal child/open data structures
95 root_element = self.elementClass(name)
96 root_element._element = root
97 self.document.childNodes.append(root_element)
98 self.openElements.append(root_element)
99
100 self.rootInserted = True