Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/humanfriendly/terminal/html.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/humanfriendly/terminal/html.py Thu May 14 16:47:39 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,423 +0,0 @@ -# Human friendly input/output in Python. -# -# Author: Peter Odding <peter@peterodding.com> -# Last Change: February 29, 2020 -# URL: https://humanfriendly.readthedocs.io - -"""Convert HTML with simple text formatting to text with ANSI escape sequences.""" - -# Standard library modules. -import re - -# Modules included in our package. -from humanfriendly.compat import HTMLParser, StringIO, name2codepoint, unichr -from humanfriendly.text import compact_empty_lines -from humanfriendly.terminal import ANSI_COLOR_CODES, ANSI_RESET, ansi_style - -# Public identifiers that require documentation. -__all__ = ('HTMLConverter', 'html_to_ansi') - - -def html_to_ansi(data, callback=None): - """ - Convert HTML with simple text formatting to text with ANSI escape sequences. - - :param data: The HTML to convert (a string). - :param callback: Optional callback to pass to :class:`HTMLConverter`. - :returns: Text with ANSI escape sequences (a string). - - Please refer to the documentation of the :class:`HTMLConverter` class for - details about the conversion process (like which tags are supported) and an - example with a screenshot. - """ - converter = HTMLConverter(callback=callback) - return converter(data) - - -class HTMLConverter(HTMLParser): - - """ - Convert HTML with simple text formatting to text with ANSI escape sequences. - - The following text styles are supported: - - - Bold: ``<b>``, ``<strong>`` and ``<span style="font-weight: bold;">`` - - Italic: ``<i>``, ``<em>`` and ``<span style="font-style: italic;">`` - - Strike-through: ``<del>``, ``<s>`` and ``<span style="text-decoration: line-through;">`` - - Underline: ``<ins>``, ``<u>`` and ``<span style="text-decoration: underline">`` - - Colors can be specified as follows: - - - Foreground color: ``<span style="color: #RRGGBB;">`` - - Background color: ``<span style="background-color: #RRGGBB;">`` - - Here's a small demonstration: - - .. code-block:: python - - from humanfriendly.text import dedent - from humanfriendly.terminal import html_to_ansi - - print(html_to_ansi(dedent(''' - <b>Hello world!</b> - <i>Is this thing on?</i> - I guess I can <u>underline</u> or <s>strike-through</s> text? - And what about <span style="color: red">color</span>? - '''))) - - rainbow_colors = [ - '#FF0000', '#E2571E', '#FF7F00', '#FFFF00', '#00FF00', - '#96BF33', '#0000FF', '#4B0082', '#8B00FF', '#FFFFFF', - ] - html_rainbow = "".join('<span style="color: %s">o</span>' % c for c in rainbow_colors) - print(html_to_ansi("Let's try a rainbow: %s" % html_rainbow)) - - Here's what the results look like: - - .. image:: images/html-to-ansi.png - - Some more details: - - - Nested tags are supported, within reasonable limits. - - - Text in ``<code>`` and ``<pre>`` tags will be highlighted in a - different color from the main text (currently this is yellow). - - - ``<a href="URL">TEXT</a>`` is converted to the format "TEXT (URL)" where - the uppercase symbols are highlighted in light blue with an underline. - - - ``<div>``, ``<p>`` and ``<pre>`` tags are considered block level tags - and are wrapped in vertical whitespace to prevent their content from - "running into" surrounding text. This may cause runs of multiple empty - lines to be emitted. As a *workaround* the :func:`__call__()` method - will automatically call :func:`.compact_empty_lines()` on the generated - output before returning it to the caller. Of course this won't work - when `output` is set to something like :data:`sys.stdout`. - - - ``<br>`` is converted to a single plain text line break. - - Implementation notes: - - - A list of dictionaries with style information is used as a stack where - new styling can be pushed and a pop will restore the previous styling. - When new styling is pushed, it is merged with (but overrides) the current - styling. - - - If you're going to be converting a lot of HTML it might be useful from - a performance standpoint to re-use an existing :class:`HTMLConverter` - object for unrelated HTML fragments, in this case take a look at the - :func:`__call__()` method (it makes this use case very easy). - - .. versionadded:: 4.15 - :class:`humanfriendly.terminal.HTMLConverter` was added to the - `humanfriendly` package during the initial development of my new - `chat-archive <https://chat-archive.readthedocs.io/>`_ project, whose - command line interface makes for a great demonstration of the - flexibility that this feature provides (hint: check out how the search - keyword highlighting combines with the regular highlighting). - """ - - BLOCK_TAGS = ('div', 'p', 'pre') - """The names of tags that are padded with vertical whitespace.""" - - def __init__(self, *args, **kw): - """ - Initialize an :class:`HTMLConverter` object. - - :param callback: Optional keyword argument to specify a function that - will be called to process text fragments before they - are emitted on the output stream. Note that link text - and preformatted text fragments are not processed by - this callback. - :param output: Optional keyword argument to redirect the output to the - given file-like object. If this is not given a new - :class:`~python3:io.StringIO` object is created. - """ - # Hide our optional keyword arguments from the superclass. - self.callback = kw.pop("callback", None) - self.output = kw.pop("output", None) - # Initialize the superclass. - HTMLParser.__init__(self, *args, **kw) - - def __call__(self, data): - """ - Reset the parser, convert some HTML and get the text with ANSI escape sequences. - - :param data: The HTML to convert to text (a string). - :returns: The converted text (only in case `output` is - a :class:`~python3:io.StringIO` object). - """ - self.reset() - self.feed(data) - self.close() - if isinstance(self.output, StringIO): - return compact_empty_lines(self.output.getvalue()) - - @property - def current_style(self): - """Get the current style from the top of the stack (a dictionary).""" - return self.stack[-1] if self.stack else {} - - def close(self): - """ - Close previously opened ANSI escape sequences. - - This method overrides the same method in the superclass to ensure that - an :data:`.ANSI_RESET` code is emitted when parsing reaches the end of - the input but a style is still active. This is intended to prevent - malformed HTML from messing up terminal output. - """ - if any(self.stack): - self.output.write(ANSI_RESET) - self.stack = [] - HTMLParser.close(self) - - def emit_style(self, style=None): - """ - Emit an ANSI escape sequence for the given or current style to the output stream. - - :param style: A dictionary with arguments for :func:`.ansi_style()` or - :data:`None`, in which case the style at the top of the - stack is emitted. - """ - # Clear the current text styles. - self.output.write(ANSI_RESET) - # Apply a new text style? - style = self.current_style if style is None else style - if style: - self.output.write(ansi_style(**style)) - - def handle_charref(self, value): - """ - Process a decimal or hexadecimal numeric character reference. - - :param value: The decimal or hexadecimal value (a string). - """ - self.output.write(unichr(int(value[1:], 16) if value.startswith('x') else int(value))) - - def handle_data(self, data): - """ - Process textual data. - - :param data: The decoded text (a string). - """ - if self.link_url: - # Link text is captured literally so that we can reliably check - # whether the text and the URL of the link are the same string. - self.link_text = data - elif self.callback and self.preformatted_text_level == 0: - # Text that is not part of a link and not preformatted text is - # passed to the user defined callback to allow for arbitrary - # pre-processing. - data = self.callback(data) - # All text is emitted unmodified on the output stream. - self.output.write(data) - - def handle_endtag(self, tag): - """ - Process the end of an HTML tag. - - :param tag: The name of the tag (a string). - """ - if tag in ('a', 'b', 'code', 'del', 'em', 'i', 'ins', 'pre', 's', 'strong', 'span', 'u'): - old_style = self.current_style - # The following conditional isn't necessary for well formed - # HTML but prevents raising exceptions on malformed HTML. - if self.stack: - self.stack.pop(-1) - new_style = self.current_style - if tag == 'a': - if self.urls_match(self.link_text, self.link_url): - # Don't render the URL when it's part of the link text. - self.emit_style(new_style) - else: - self.emit_style(new_style) - self.output.write(' (') - self.emit_style(old_style) - self.output.write(self.render_url(self.link_url)) - self.emit_style(new_style) - self.output.write(')') - else: - self.emit_style(new_style) - if tag in ('code', 'pre'): - self.preformatted_text_level -= 1 - if tag in self.BLOCK_TAGS: - # Emit an empty line after block level tags. - self.output.write('\n\n') - - def handle_entityref(self, name): - """ - Process a named character reference. - - :param name: The name of the character reference (a string). - """ - self.output.write(unichr(name2codepoint[name])) - - def handle_starttag(self, tag, attrs): - """ - Process the start of an HTML tag. - - :param tag: The name of the tag (a string). - :param attrs: A list of tuples with two strings each. - """ - if tag in self.BLOCK_TAGS: - # Emit an empty line before block level tags. - self.output.write('\n\n') - if tag == 'a': - self.push_styles(color='blue', bright=True, underline=True) - # Store the URL that the link points to for later use, so that we - # can render the link text before the URL (with the reasoning that - # this is the most intuitive way to present a link in a plain text - # interface). - self.link_url = next((v for n, v in attrs if n == 'href'), '') - elif tag == 'b' or tag == 'strong': - self.push_styles(bold=True) - elif tag == 'br': - self.output.write('\n') - elif tag == 'code' or tag == 'pre': - self.push_styles(color='yellow') - self.preformatted_text_level += 1 - elif tag == 'del' or tag == 's': - self.push_styles(strike_through=True) - elif tag == 'em' or tag == 'i': - self.push_styles(italic=True) - elif tag == 'ins' or tag == 'u': - self.push_styles(underline=True) - elif tag == 'span': - styles = {} - css = next((v for n, v in attrs if n == 'style'), "") - for rule in css.split(';'): - name, _, value = rule.partition(':') - name = name.strip() - value = value.strip() - if name == 'background-color': - styles['background'] = self.parse_color(value) - elif name == 'color': - styles['color'] = self.parse_color(value) - elif name == 'font-style' and value == 'italic': - styles['italic'] = True - elif name == 'font-weight' and value == 'bold': - styles['bold'] = True - elif name == 'text-decoration' and value == 'line-through': - styles['strike_through'] = True - elif name == 'text-decoration' and value == 'underline': - styles['underline'] = True - self.push_styles(**styles) - - def normalize_url(self, url): - """ - Normalize a URL to enable string equality comparison. - - :param url: The URL to normalize (a string). - :returns: The normalized URL (a string). - """ - return re.sub('^mailto:', '', url) - - def parse_color(self, value): - """ - Convert a CSS color to something that :func:`.ansi_style()` understands. - - :param value: A string like ``rgb(1,2,3)``, ``#AABBCC`` or ``yellow``. - :returns: A color value supported by :func:`.ansi_style()` or :data:`None`. - """ - # Parse an 'rgb(N,N,N)' expression. - if value.startswith('rgb'): - tokens = re.findall(r'\d+', value) - if len(tokens) == 3: - return tuple(map(int, tokens)) - # Parse an '#XXXXXX' expression. - elif value.startswith('#'): - value = value[1:] - length = len(value) - if length == 6: - # Six hex digits (proper notation). - return ( - int(value[:2], 16), - int(value[2:4], 16), - int(value[4:6], 16), - ) - elif length == 3: - # Three hex digits (shorthand). - return ( - int(value[0], 16), - int(value[1], 16), - int(value[2], 16), - ) - # Try to recognize a named color. - value = value.lower() - if value in ANSI_COLOR_CODES: - return value - - def push_styles(self, **changes): - """ - Push new style information onto the stack. - - :param changes: Any keyword arguments are passed on to :func:`.ansi_style()`. - - This method is a helper for :func:`handle_starttag()` - that does the following: - - 1. Make a copy of the current styles (from the top of the stack), - 2. Apply the given `changes` to the copy of the current styles, - 3. Add the new styles to the stack, - 4. Emit the appropriate ANSI escape sequence to the output stream. - """ - prototype = self.current_style - if prototype: - new_style = dict(prototype) - new_style.update(changes) - else: - new_style = changes - self.stack.append(new_style) - self.emit_style(new_style) - - def render_url(self, url): - """ - Prepare a URL for rendering on the terminal. - - :param url: The URL to simplify (a string). - :returns: The simplified URL (a string). - - This method pre-processes a URL before rendering on the terminal. The - following modifications are made: - - - The ``mailto:`` prefix is stripped. - - Spaces are converted to ``%20``. - - A trailing parenthesis is converted to ``%29``. - """ - url = re.sub('^mailto:', '', url) - url = re.sub(' ', '%20', url) - url = re.sub(r'\)$', '%29', url) - return url - - def reset(self): - """ - Reset the state of the HTML parser and ANSI converter. - - When `output` is a :class:`~python3:io.StringIO` object a new - instance will be created (and the old one garbage collected). - """ - # Reset the state of the superclass. - HTMLParser.reset(self) - # Reset our instance variables. - self.link_text = None - self.link_url = None - self.preformatted_text_level = 0 - if self.output is None or isinstance(self.output, StringIO): - # If the caller specified something like output=sys.stdout then it - # doesn't make much sense to negate that choice here in reset(). - self.output = StringIO() - self.stack = [] - - def urls_match(self, a, b): - """ - Compare two URLs for equality using :func:`normalize_url()`. - - :param a: A string containing a URL. - :param b: A string containing a URL. - :returns: :data:`True` if the URLs are the same, :data:`False` otherwise. - - This method is used by :func:`handle_endtag()` to omit the URL of a - hyperlink (``<a href="...">``) when the link text is that same URL. - """ - return self.normalize_url(a) == self.normalize_url(b)