Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/bs4/formatter.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d30785e31577 |
---|---|
1 from bs4.dammit import EntitySubstitution | |
2 | |
3 class Formatter(EntitySubstitution): | |
4 """Describes a strategy to use when outputting a parse tree to a string. | |
5 | |
6 Some parts of this strategy come from the distinction between | |
7 HTML4, HTML5, and XML. Others are configurable by the user. | |
8 | |
9 Formatters are passed in as the `formatter` argument to methods | |
10 like `PageElement.encode`. Most people won't need to think about | |
11 formatters, and most people who need to think about them can pass | |
12 in one of these predefined strings as `formatter` rather than | |
13 making a new Formatter object: | |
14 | |
15 For HTML documents: | |
16 * 'html' - HTML entity substitution for generic HTML documents. (default) | |
17 * 'html5' - HTML entity substitution for HTML5 documents. | |
18 * 'minimal' - Only make the substitutions necessary to guarantee | |
19 valid HTML. | |
20 * None - Do not perform any substitution. This will be faster | |
21 but may result in invalid markup. | |
22 | |
23 For XML documents: | |
24 * 'html' - Entity substitution for XHTML documents. | |
25 * 'minimal' - Only make the substitutions necessary to guarantee | |
26 valid XML. (default) | |
27 * None - Do not perform any substitution. This will be faster | |
28 but may result in invalid markup. | |
29 """ | |
30 # Registries of XML and HTML formatters. | |
31 XML_FORMATTERS = {} | |
32 HTML_FORMATTERS = {} | |
33 | |
34 HTML = 'html' | |
35 XML = 'xml' | |
36 | |
37 HTML_DEFAULTS = dict( | |
38 cdata_containing_tags=set(["script", "style"]), | |
39 ) | |
40 | |
41 def _default(self, language, value, kwarg): | |
42 if value is not None: | |
43 return value | |
44 if language == self.XML: | |
45 return set() | |
46 return self.HTML_DEFAULTS[kwarg] | |
47 | |
48 def __init__( | |
49 self, language=None, entity_substitution=None, | |
50 void_element_close_prefix='/', cdata_containing_tags=None, | |
51 ): | |
52 """Constructor. | |
53 | |
54 :param language: This should be Formatter.XML if you are formatting | |
55 XML markup and Formatter.HTML if you are formatting HTML markup. | |
56 | |
57 :param entity_substitution: A function to call to replace special | |
58 characters with XML/HTML entities. For examples, see | |
59 bs4.dammit.EntitySubstitution.substitute_html and substitute_xml. | |
60 :param void_element_close_prefix: By default, void elements | |
61 are represented as <tag/> (XML rules) rather than <tag> | |
62 (HTML rules). To get <tag>, pass in the empty string. | |
63 :param cdata_containing_tags: The list of tags that are defined | |
64 as containing CDATA in this dialect. For example, in HTML, | |
65 <script> and <style> tags are defined as containing CDATA, | |
66 and their contents should not be formatted. | |
67 """ | |
68 self.language = language | |
69 self.entity_substitution = entity_substitution | |
70 self.void_element_close_prefix = void_element_close_prefix | |
71 self.cdata_containing_tags = self._default( | |
72 language, cdata_containing_tags, 'cdata_containing_tags' | |
73 ) | |
74 | |
75 def substitute(self, ns): | |
76 """Process a string that needs to undergo entity substitution. | |
77 This may be a string encountered in an attribute value or as | |
78 text. | |
79 | |
80 :param ns: A string. | |
81 :return: A string with certain characters replaced by named | |
82 or numeric entities. | |
83 """ | |
84 if not self.entity_substitution: | |
85 return ns | |
86 from .element import NavigableString | |
87 if (isinstance(ns, NavigableString) | |
88 and ns.parent is not None | |
89 and ns.parent.name in self.cdata_containing_tags): | |
90 # Do nothing. | |
91 return ns | |
92 # Substitute. | |
93 return self.entity_substitution(ns) | |
94 | |
95 def attribute_value(self, value): | |
96 """Process the value of an attribute. | |
97 | |
98 :param ns: A string. | |
99 :return: A string with certain characters replaced by named | |
100 or numeric entities. | |
101 """ | |
102 return self.substitute(value) | |
103 | |
104 def attributes(self, tag): | |
105 """Reorder a tag's attributes however you want. | |
106 | |
107 By default, attributes are sorted alphabetically. This makes | |
108 behavior consistent between Python 2 and Python 3, and preserves | |
109 backwards compatibility with older versions of Beautiful Soup. | |
110 """ | |
111 if tag.attrs is None: | |
112 return [] | |
113 return sorted(tag.attrs.items()) | |
114 | |
115 | |
116 class HTMLFormatter(Formatter): | |
117 """A generic Formatter for HTML.""" | |
118 REGISTRY = {} | |
119 def __init__(self, *args, **kwargs): | |
120 return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs) | |
121 | |
122 | |
123 class XMLFormatter(Formatter): | |
124 """A generic Formatter for XML.""" | |
125 REGISTRY = {} | |
126 def __init__(self, *args, **kwargs): | |
127 return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs) | |
128 | |
129 | |
130 # Set up aliases for the default formatters. | |
131 HTMLFormatter.REGISTRY['html'] = HTMLFormatter( | |
132 entity_substitution=EntitySubstitution.substitute_html | |
133 ) | |
134 HTMLFormatter.REGISTRY["html5"] = HTMLFormatter( | |
135 entity_substitution=EntitySubstitution.substitute_html, | |
136 void_element_close_prefix = None | |
137 ) | |
138 HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter( | |
139 entity_substitution=EntitySubstitution.substitute_xml | |
140 ) | |
141 HTMLFormatter.REGISTRY[None] = HTMLFormatter( | |
142 entity_substitution=None | |
143 ) | |
144 XMLFormatter.REGISTRY["html"] = XMLFormatter( | |
145 entity_substitution=EntitySubstitution.substitute_html | |
146 ) | |
147 XMLFormatter.REGISTRY["minimal"] = XMLFormatter( | |
148 entity_substitution=EntitySubstitution.substitute_xml | |
149 ) | |
150 XMLFormatter.REGISTRY[None] = Formatter( | |
151 Formatter(Formatter.XML, entity_substitution=None) | |
152 ) |