springsuite: planemo/lib/python3.7/site-packages/webencodings/__init_

comparison planemo/lib/python3.7/site-packages/webencodings/init.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"

author	guerler
date	Fri, 31 Jul 2020 00:32:28 -0400
parents
children

comparison

equal deleted inserted replaced

-:d30785e31577
+:56ad4e20f292
+# coding: utf-8
+"""
+webencodings
+~~~~~~~~~~~~
+This is a Python implementation of the `WHATWG Encoding standard
+<http://encoding.spec.whatwg.org/>`. See README for details.
+:copyright: Copyright 2012 by Simon Sapin
+:license: BSD, see LICENSE for details.
+"""
+from __future__ import unicode_literals
+import codecs
+from .labels import LABELS
+VERSION = '0.5.1'
+# Some names in Encoding are not valid Python aliases. Remap these.
+PYTHON_NAMES = {
+'iso-8859-8-i': 'iso-8859-8',
+'x-mac-cyrillic': 'mac-cyrillic',
+'macintosh': 'mac-roman',
+'windows-874': 'cp874'}
+CACHE = {}
+def ascii_lower(string):
+r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.
+:param string: An Unicode string.
+:returns: A new Unicode string.
+This is used for `ASCII case-insensitive
+<http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_
+matching of encoding labels.
+The same matching is also used, among other things,
+for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_.
+This is different from the :meth:`~py:str.lower` method of Unicode strings
+which also affect non-ASCII characters,
+sometimes mapping them into the ASCII range:
+>>> keyword = u'Bac\N{KELVIN SIGN}ground'
+>>> assert keyword.lower() == u'background'
+>>> assert ascii_lower(keyword) != keyword.lower()
+>>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground'
+"""
+# This turns out to be faster than unicode.translate()
+return string.encode('utf8').lower().decode('utf8')
+def lookup(label):
+"""
+Look for an encoding by its label.
+This is the spec’s `get an encoding
+<http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm.
+Supported labels are listed there.
+:param label: A string.
+:returns:
+An :class:`Encoding` object, or :obj:`None` for an unknown label.
+"""
+# Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020.
+label = ascii_lower(label.strip('\t\n\f\r '))
+name = LABELS.get(label)
+if name is None:
+return None
+encoding = CACHE.get(name)
+if encoding is None:
+if name == 'x-user-defined':
+from .x_user_defined import codec_info
+else:
+python_name = PYTHON_NAMES.get(name, name)
+# Any python_name value that gets to here should be valid.
+codec_info = codecs.lookup(python_name)
+encoding = Encoding(name, codec_info)
+CACHE[name] = encoding
+return encoding
+def _get_encoding(encoding_or_label):
+"""
+Accept either an encoding object or label.
+:param encoding: An :class:`Encoding` object or a label string.
+:returns: An :class:`Encoding` object.
+:raises: :exc:`~exceptions.LookupError` for an unknown label.
+"""
+if hasattr(encoding_or_label, 'codec_info'):
+return encoding_or_label
+encoding = lookup(encoding_or_label)
+if encoding is None:
+raise LookupError('Unknown encoding label: %r' % encoding_or_label)
+return encoding
+class Encoding(object):
+"""Reresents a character encoding such as UTF-8,
+that can be used for decoding or encoding.
+.. attribute:: name
+Canonical name of the encoding
+.. attribute:: codec_info
+The actual implementation of the encoding,
+a stdlib :class:`~codecs.CodecInfo` object.
+See :func:`codecs.register`.
+"""
+def __init__(self, name, codec_info):
+self.name = name
+self.codec_info = codec_info
+def __repr__(self):
+return '<Encoding %s>' % self.name
+#: The UTF-8 encoding. Should be used for new content and formats.
+UTF8 = lookup('utf-8')
+_UTF16LE = lookup('utf-16le')
+_UTF16BE = lookup('utf-16be')
+def decode(input, fallback_encoding, errors='replace'):
+"""
+Decode a single string.
+:param input: A byte string
+:param fallback_encoding:
+An :class:`Encoding` object or a label string.
+The encoding to use if :obj:`input` does note have a BOM.
+:param errors: Type of error handling. See :func:`codecs.register`.
+:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+:return:
+A ``(output, encoding)`` tuple of an Unicode string
+and an :obj:`Encoding`.
+"""
+# Fail early if `encoding` is an invalid label.
+fallback_encoding = _get_encoding(fallback_encoding)
+bom_encoding, input = _detect_bom(input)
+encoding = bom_encoding or fallback_encoding
+return encoding.codec_info.decode(input, errors)[0], encoding
+def _detect_bom(input):
+"""Return (bom_encoding, input), with any BOM removed from the input."""
+if input.startswith(b'\xFF\xFE'):
+return _UTF16LE, input[2:]
+if input.startswith(b'\xFE\xFF'):
+return _UTF16BE, input[2:]
+if input.startswith(b'\xEF\xBB\xBF'):
+return UTF8, input[3:]
+return None, input
+def encode(input, encoding=UTF8, errors='strict'):
+"""
+Encode a single string.
+:param input: An Unicode string.
+:param encoding: An :class:`Encoding` object or a label string.
+:param errors: Type of error handling. See :func:`codecs.register`.
+:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+:return: A byte string.
+"""
+return _get_encoding(encoding).codec_info.encode(input, errors)[0]
+def iter_decode(input, fallback_encoding, errors='replace'):
+"""
+"Pull"-based decoder.
+:param input:
+An iterable of byte strings.
+The input is first consumed just enough to determine the encoding
+based on the precense of a BOM,
+then consumed on demand when the return value is.
+:param fallback_encoding:
+An :class:`Encoding` object or a label string.
+The encoding to use if :obj:`input` does note have a BOM.
+:param errors: Type of error handling. See :func:`codecs.register`.
+:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+:returns:
+An ``(output, encoding)`` tuple.
+:obj:`output` is an iterable of Unicode strings,
+:obj:`encoding` is the :obj:`Encoding` that is being used.
+"""
+decoder = IncrementalDecoder(fallback_encoding, errors)
+generator = _iter_decode_generator(input, decoder)
+encoding = next(generator)
+return generator, encoding
+def _iter_decode_generator(input, decoder):
+"""Return a generator that first yields the :obj:`Encoding`,
+then yields output chukns as Unicode strings.
+"""
+decode = decoder.decode
+input = iter(input)
+for chunck in input:
+output = decode(chunck)
+if output:
+assert decoder.encoding is not None
+yield decoder.encoding
+yield output
+break
+else:
+# Input exhausted without determining the encoding
+output = decode(b'', final=True)
+assert decoder.encoding is not None
+yield decoder.encoding
+if output:
+yield output
+return
+for chunck in input:
+output = decode(chunck)
+if output:
+yield output
+output = decode(b'', final=True)
+if output:
+yield output
+def iter_encode(input, encoding=UTF8, errors='strict'):
+"""
+“Pull”-based encoder.
+:param input: An iterable of Unicode strings.
+:param encoding: An :class:`Encoding` object or a label string.
+:param errors: Type of error handling. See :func:`codecs.register`.
+:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+:returns: An iterable of byte strings.
+"""
+# Fail early if `encoding` is an invalid label.
+encode = IncrementalEncoder(encoding, errors).encode
+return _iter_encode_generator(input, encode)
+def _iter_encode_generator(input, encode):
+for chunck in input:
+output = encode(chunck)
+if output:
+yield output
+output = encode('', final=True)
+if output:
+yield output
+class IncrementalDecoder(object):
+"""
+“Push”-based decoder.
+:param fallback_encoding:
+An :class:`Encoding` object or a label string.
+The encoding to use if :obj:`input` does note have a BOM.
+:param errors: Type of error handling. See :func:`codecs.register`.
+:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+"""
+def __init__(self, fallback_encoding, errors='replace'):
+# Fail early if `encoding` is an invalid label.
+self._fallback_encoding = _get_encoding(fallback_encoding)
+self._errors = errors
+self._buffer = b''
+self._decoder = None
+#: The actual :class:`Encoding` that is being used,
+#: or :obj:`None` if that is not determined yet.
+#: (Ie. if there is not enough input yet to determine
+#: if there is a BOM.)
+self.encoding = None  # Not known yet.
+def decode(self, input, final=False):
+"""Decode one chunk of the input.
+:param input: A byte string.
+:param final:
+Indicate that no more input is available.
+Must be :obj:`True` if this is the last call.
+:returns: An Unicode string.
+"""
+decoder = self._decoder
+if decoder is not None:
+return decoder(input, final)
+input = self._buffer + input
+encoding, input = _detect_bom(input)
+if encoding is None:
+if len(input) < 3 and not final:  # Not enough data yet.
+self._buffer = input
+return ''
+else:  # No BOM
+encoding = self._fallback_encoding
+decoder = encoding.codec_info.incrementaldecoder(self._errors).decode
+self._decoder = decoder
+self.encoding = encoding
+return decoder(input, final)
+class IncrementalEncoder(object):
+"""
+“Push”-based encoder.
+:param encoding: An :class:`Encoding` object or a label string.
+:param errors: Type of error handling. See :func:`codecs.register`.
+:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+.. method:: encode(input, final=False)
+:param input: An Unicode string.
+:param final:
+Indicate that no more input is available.
+Must be :obj:`True` if this is the last call.
+:returns: A byte string.
+"""
+def __init__(self, encoding=UTF8, errors='strict'):
+encoding = _get_encoding(encoding)
+self.encode = encoding.codec_info.incrementalencoder(errors).encode

Mercurial > repos > guerler > springsuite

comparison planemo/lib/python3.7/site-packages/webencodings/__init__.py @ 1:56ad4e20f292 draft

comparison planemo/lib/python3.7/site-packages/webencodings/init.py @ 1:56ad4e20f292 draft