Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/bs4/formatter.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:18:57 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d30785e31577 |
|---|---|
| 1 from bs4.dammit import EntitySubstitution | |
| 2 | |
| 3 class Formatter(EntitySubstitution): | |
| 4 """Describes a strategy to use when outputting a parse tree to a string. | |
| 5 | |
| 6 Some parts of this strategy come from the distinction between | |
| 7 HTML4, HTML5, and XML. Others are configurable by the user. | |
| 8 | |
| 9 Formatters are passed in as the `formatter` argument to methods | |
| 10 like `PageElement.encode`. Most people won't need to think about | |
| 11 formatters, and most people who need to think about them can pass | |
| 12 in one of these predefined strings as `formatter` rather than | |
| 13 making a new Formatter object: | |
| 14 | |
| 15 For HTML documents: | |
| 16 * 'html' - HTML entity substitution for generic HTML documents. (default) | |
| 17 * 'html5' - HTML entity substitution for HTML5 documents. | |
| 18 * 'minimal' - Only make the substitutions necessary to guarantee | |
| 19 valid HTML. | |
| 20 * None - Do not perform any substitution. This will be faster | |
| 21 but may result in invalid markup. | |
| 22 | |
| 23 For XML documents: | |
| 24 * 'html' - Entity substitution for XHTML documents. | |
| 25 * 'minimal' - Only make the substitutions necessary to guarantee | |
| 26 valid XML. (default) | |
| 27 * None - Do not perform any substitution. This will be faster | |
| 28 but may result in invalid markup. | |
| 29 """ | |
| 30 # Registries of XML and HTML formatters. | |
| 31 XML_FORMATTERS = {} | |
| 32 HTML_FORMATTERS = {} | |
| 33 | |
| 34 HTML = 'html' | |
| 35 XML = 'xml' | |
| 36 | |
| 37 HTML_DEFAULTS = dict( | |
| 38 cdata_containing_tags=set(["script", "style"]), | |
| 39 ) | |
| 40 | |
| 41 def _default(self, language, value, kwarg): | |
| 42 if value is not None: | |
| 43 return value | |
| 44 if language == self.XML: | |
| 45 return set() | |
| 46 return self.HTML_DEFAULTS[kwarg] | |
| 47 | |
| 48 def __init__( | |
| 49 self, language=None, entity_substitution=None, | |
| 50 void_element_close_prefix='/', cdata_containing_tags=None, | |
| 51 ): | |
| 52 """Constructor. | |
| 53 | |
| 54 :param language: This should be Formatter.XML if you are formatting | |
| 55 XML markup and Formatter.HTML if you are formatting HTML markup. | |
| 56 | |
| 57 :param entity_substitution: A function to call to replace special | |
| 58 characters with XML/HTML entities. For examples, see | |
| 59 bs4.dammit.EntitySubstitution.substitute_html and substitute_xml. | |
| 60 :param void_element_close_prefix: By default, void elements | |
| 61 are represented as <tag/> (XML rules) rather than <tag> | |
| 62 (HTML rules). To get <tag>, pass in the empty string. | |
| 63 :param cdata_containing_tags: The list of tags that are defined | |
| 64 as containing CDATA in this dialect. For example, in HTML, | |
| 65 <script> and <style> tags are defined as containing CDATA, | |
| 66 and their contents should not be formatted. | |
| 67 """ | |
| 68 self.language = language | |
| 69 self.entity_substitution = entity_substitution | |
| 70 self.void_element_close_prefix = void_element_close_prefix | |
| 71 self.cdata_containing_tags = self._default( | |
| 72 language, cdata_containing_tags, 'cdata_containing_tags' | |
| 73 ) | |
| 74 | |
| 75 def substitute(self, ns): | |
| 76 """Process a string that needs to undergo entity substitution. | |
| 77 This may be a string encountered in an attribute value or as | |
| 78 text. | |
| 79 | |
| 80 :param ns: A string. | |
| 81 :return: A string with certain characters replaced by named | |
| 82 or numeric entities. | |
| 83 """ | |
| 84 if not self.entity_substitution: | |
| 85 return ns | |
| 86 from .element import NavigableString | |
| 87 if (isinstance(ns, NavigableString) | |
| 88 and ns.parent is not None | |
| 89 and ns.parent.name in self.cdata_containing_tags): | |
| 90 # Do nothing. | |
| 91 return ns | |
| 92 # Substitute. | |
| 93 return self.entity_substitution(ns) | |
| 94 | |
| 95 def attribute_value(self, value): | |
| 96 """Process the value of an attribute. | |
| 97 | |
| 98 :param ns: A string. | |
| 99 :return: A string with certain characters replaced by named | |
| 100 or numeric entities. | |
| 101 """ | |
| 102 return self.substitute(value) | |
| 103 | |
| 104 def attributes(self, tag): | |
| 105 """Reorder a tag's attributes however you want. | |
| 106 | |
| 107 By default, attributes are sorted alphabetically. This makes | |
| 108 behavior consistent between Python 2 and Python 3, and preserves | |
| 109 backwards compatibility with older versions of Beautiful Soup. | |
| 110 """ | |
| 111 if tag.attrs is None: | |
| 112 return [] | |
| 113 return sorted(tag.attrs.items()) | |
| 114 | |
| 115 | |
| 116 class HTMLFormatter(Formatter): | |
| 117 """A generic Formatter for HTML.""" | |
| 118 REGISTRY = {} | |
| 119 def __init__(self, *args, **kwargs): | |
| 120 return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs) | |
| 121 | |
| 122 | |
| 123 class XMLFormatter(Formatter): | |
| 124 """A generic Formatter for XML.""" | |
| 125 REGISTRY = {} | |
| 126 def __init__(self, *args, **kwargs): | |
| 127 return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs) | |
| 128 | |
| 129 | |
| 130 # Set up aliases for the default formatters. | |
| 131 HTMLFormatter.REGISTRY['html'] = HTMLFormatter( | |
| 132 entity_substitution=EntitySubstitution.substitute_html | |
| 133 ) | |
| 134 HTMLFormatter.REGISTRY["html5"] = HTMLFormatter( | |
| 135 entity_substitution=EntitySubstitution.substitute_html, | |
| 136 void_element_close_prefix = None | |
| 137 ) | |
| 138 HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter( | |
| 139 entity_substitution=EntitySubstitution.substitute_xml | |
| 140 ) | |
| 141 HTMLFormatter.REGISTRY[None] = HTMLFormatter( | |
| 142 entity_substitution=None | |
| 143 ) | |
| 144 XMLFormatter.REGISTRY["html"] = XMLFormatter( | |
| 145 entity_substitution=EntitySubstitution.substitute_html | |
| 146 ) | |
| 147 XMLFormatter.REGISTRY["minimal"] = XMLFormatter( | |
| 148 entity_substitution=EntitySubstitution.substitute_xml | |
| 149 ) | |
| 150 XMLFormatter.REGISTRY[None] = Formatter( | |
| 151 Formatter(Formatter.XML, entity_substitution=None) | |
| 152 ) |
