Mercurial > repos > shellac > guppy_basecaller

diff env/lib/python3.7/site-packages/boltons/strutils.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author: shellac
date: Mon, 01 Jun 2020 08:59:25 -0400
parents: 79f47841a781
--- a/env/lib/python3.7/site-packages/boltons/strutils.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1125 +0,0 @@
-# -*- coding: utf-8 -*-
-"""So much practical programming involves string manipulation, which
-Python readily accommodates. Still, there are dozens of basic and
-common capabilities missing from the standard library, several of them
-provided by ``strutils``.
-"""
-
-from __future__ import print_function
-
-import re
-import sys
-import uuid
-import zlib
-import string
-import unicodedata
-import collections
-from gzip import GzipFile
-
-try:
-    from cStringIO import cStringIO as StringIO
-except ImportError:
-    from io import BytesIO as StringIO
-
-try:
-    from collections.abc import Mapping
-except ImportError:
-    from collections import Mapping
-
-try:
-    unicode, str, bytes, basestring = unicode, str, str, basestring
-    from HTMLParser import HTMLParser
-    import htmlentitydefs
-except NameError:  # basestring not defined in Python 3
-    unicode, str, bytes, basestring = str, bytes, bytes, (str, bytes)
-    unichr = chr
-    from html.parser import HTMLParser
-    from html import entities as htmlentitydefs
-
-
-__all__ = ['camel2under', 'under2camel', 'slugify', 'split_punct_ws',
-           'unit_len', 'ordinalize', 'cardinalize', 'pluralize', 'singularize',
-           'asciify', 'is_ascii', 'is_uuid', 'html2text', 'strip_ansi',
-           'bytes2human', 'find_hashtags', 'a10n', 'gzip_bytes', 'gunzip_bytes',
-           'iter_splitlines', 'indent', 'escape_shell_args',
-           'args2cmd', 'args2sh', 'parse_int_list', 'format_int_list', 'unwrap_text']
-
-
-_punct_ws_str = string.punctuation + string.whitespace
-_punct_re = re.compile('[' + _punct_ws_str + ']+')
-_camel2under_re = re.compile('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))')
-
-
-def camel2under(camel_string):
-    """Converts a camelcased string to underscores. Useful for turning a
-    class name into a function name.
-
-    >>> camel2under('BasicParseTest')
-    'basic_parse_test'
-    """
-    return _camel2under_re.sub(r'_\1', camel_string).lower()
-
-
-def under2camel(under_string):
-    """Converts an underscored string to camelcased. Useful for turning a
-    function name into a class name.
-
-    >>> under2camel('complex_tokenizer')
-    'ComplexTokenizer'
-    """
-    return ''.join(w.capitalize() or '_' for w in under_string.split('_'))
-
-
-def slugify(text, delim='_', lower=True, ascii=False):
-    """
-    A basic function that turns text full of scary characters
-    (i.e., punctuation and whitespace), into a relatively safe
-    lowercased string separated only by the delimiter specified
-    by *delim*, which defaults to ``_``.
-
-    The *ascii* convenience flag will :func:`asciify` the slug if
-    you require ascii-only slugs.
-
-    >>> slugify('First post! Hi!!!!~1    ')
-    'first_post_hi_1'
-
-    >>> slugify("Kurt Gödel's pretty cool.", ascii=True) == \
-        b'kurt_goedel_s_pretty_cool'
-    True
-
-    """
-    ret = delim.join(split_punct_ws(text)) or delim if text else ''
-    if ascii:
-        ret = asciify(ret)
-    if lower:
-        ret = ret.lower()
-    return ret
-
-
-def split_punct_ws(text):
-    """While :meth:`str.split` will split on whitespace,
-    :func:`split_punct_ws` will split on punctuation and
-    whitespace. This used internally by :func:`slugify`, above.
-
-    >>> split_punct_ws('First post! Hi!!!!~1    ')
-    ['First', 'post', 'Hi', '1']
-    """
-    return [w for w in _punct_re.split(text) if w]
-
-
-def unit_len(sized_iterable, unit_noun='item'):  # TODO: len_units()/unitize()?
-    """Returns a plain-English description of an iterable's
-    :func:`len()`, conditionally pluralized with :func:`cardinalize`,
-    detailed below.
-
-    >>> print(unit_len(range(10), 'number'))
-    10 numbers
-    >>> print(unit_len('aeiou', 'vowel'))
-    5 vowels
-    >>> print(unit_len([], 'worry'))
-    No worries
-    """
-    count = len(sized_iterable)
-    units = cardinalize(unit_noun, count)
-    if count:
-        return u'%s %s' % (count, units)
-    return u'No %s' % (units,)
-
-
-_ORDINAL_MAP = {'1': 'st',
-                '2': 'nd',
-                '3': 'rd'}  # 'th' is the default
-
-
-def ordinalize(number, ext_only=False):
-    """Turns *number* into its cardinal form, i.e., 1st, 2nd,
-    3rd, 4th, etc. If the last character isn't a digit, it returns the
-    string value unchanged.
-
-    Args:
-        number (int or str): Number to be cardinalized.
-        ext_only (bool): Whether to return only the suffix. Default ``False``.
-
-    >>> print(ordinalize(1))
-    1st
-    >>> print(ordinalize(3694839230))
-    3694839230th
-    >>> print(ordinalize('hi'))
-    hi
-    >>> print(ordinalize(1515))
-    1515th
-    """
-    numstr, ext = unicode(number), ''
-    if numstr and numstr[-1] in string.digits:
-        try:
-            # first check for teens
-            if numstr[-2] == '1':
-                ext = 'th'
-            else:
-                # all other cases
-                ext = _ORDINAL_MAP.get(numstr[-1], 'th')
-        except IndexError:
-            # single digit numbers (will reach here based on [-2] above)
-            ext = _ORDINAL_MAP.get(numstr[-1], 'th')
-    if ext_only:
-        return ext
-    else:
-        return numstr + ext
-
-
-def cardinalize(unit_noun, count):
-    """Conditionally pluralizes a singular word *unit_noun* if
-    *count* is not one, preserving case when possible.
-
-    >>> vowels = 'aeiou'
-    >>> print(len(vowels), cardinalize('vowel', len(vowels)))
-    5 vowels
-    >>> print(3, cardinalize('Wish', 3))
-    3 Wishes
-    """
-    if count == 1:
-        return unit_noun
-    return pluralize(unit_noun)
-
-
-def singularize(word):
-    """Semi-intelligently converts an English plural *word* to its
-    singular form, preserving case pattern.
-
-    >>> singularize('chances')
-    'chance'
-    >>> singularize('Activities')
-    'Activity'
-    >>> singularize('Glasses')
-    'Glass'
-    >>> singularize('FEET')
-    'FOOT'
-
-    """
-    orig_word, word = word, word.strip().lower()
-    if not word or word in _IRR_S2P:
-        return orig_word
-
-    irr_singular = _IRR_P2S.get(word)
-    if irr_singular:
-        singular = irr_singular
-    elif not word.endswith('s'):
-        return orig_word
-    elif len(word) == 2:
-        singular = word[:-1]  # or just return word?
-    elif word.endswith('ies') and word[-4:-3] not in 'aeiou':
-        singular = word[:-3] + 'y'
-    elif word.endswith('es') and word[-3] == 's':
-        singular = word[:-2]
-    else:
-        singular = word[:-1]
-    return _match_case(orig_word, singular)
-
-
-def pluralize(word):
-    """Semi-intelligently converts an English *word* from singular form to
-    plural, preserving case pattern.
-
-    >>> pluralize('friend')
-    'friends'
-    >>> pluralize('enemy')
-    'enemies'
-    >>> pluralize('Sheep')
-    'Sheep'
-    """
-    orig_word, word = word, word.strip().lower()
-    if not word or word in _IRR_P2S:
-        return orig_word
-    irr_plural = _IRR_S2P.get(word)
-    if irr_plural:
-        plural = irr_plural
-    elif word.endswith('y') and word[-2:-1] not in 'aeiou':
-        plural = word[:-1] + 'ies'
-    elif word[-1] == 's' or word.endswith('ch') or word.endswith('sh'):
-        plural = word if word.endswith('es') else word + 'es'
-    else:
-        plural = word + 's'
-    return _match_case(orig_word, plural)
-
-
-def _match_case(master, disciple):
-    if not master.strip():
-        return disciple
-    if master.lower() == master:
-        return disciple.lower()
-    elif master.upper() == master:
-        return disciple.upper()
-    elif master.title() == master:
-        return disciple.title()
-    return disciple
-
-
-# Singular to plural map of irregular pluralizations
-_IRR_S2P = {'addendum': 'addenda', 'alga': 'algae', 'alumna': 'alumnae',
-            'alumnus': 'alumni', 'analysis': 'analyses', 'antenna': 'antennae',
-            'appendix': 'appendices', 'axis': 'axes', 'bacillus': 'bacilli',
-            'bacterium': 'bacteria', 'basis': 'bases', 'beau': 'beaux',
-            'bison': 'bison', 'bureau': 'bureaus', 'cactus': 'cacti',
-            'calf': 'calves', 'child': 'children', 'corps': 'corps',
-            'corpus': 'corpora', 'crisis': 'crises', 'criterion': 'criteria',
-            'curriculum': 'curricula', 'datum': 'data', 'deer': 'deer',
-            'diagnosis': 'diagnoses', 'die': 'dice', 'dwarf': 'dwarves',
-            'echo': 'echoes', 'elf': 'elves', 'ellipsis': 'ellipses',
-            'embargo': 'embargoes', 'emphasis': 'emphases', 'erratum': 'errata',
-            'fireman': 'firemen', 'fish': 'fish', 'focus': 'foci',
-            'foot': 'feet', 'formula': 'formulae', 'formula': 'formulas',
-            'fungus': 'fungi', 'genus': 'genera', 'goose': 'geese',
-            'half': 'halves', 'hero': 'heroes', 'hippopotamus': 'hippopotami',
-            'hoof': 'hooves', 'hypothesis': 'hypotheses', 'index': 'indices',
-            'knife': 'knives', 'leaf': 'leaves', 'life': 'lives',
-            'loaf': 'loaves', 'louse': 'lice', 'man': 'men',
-            'matrix': 'matrices', 'means': 'means', 'medium': 'media',
-            'memorandum': 'memoranda', 'millennium': 'milennia', 'moose': 'moose',
-            'mosquito': 'mosquitoes', 'mouse': 'mice', 'nebula': 'nebulae',
-            'neurosis': 'neuroses', 'nucleus': 'nuclei', 'oasis': 'oases',
-            'octopus': 'octopi', 'offspring': 'offspring', 'ovum': 'ova',
-            'ox': 'oxen', 'paralysis': 'paralyses', 'parenthesis': 'parentheses',
-            'person': 'people', 'phenomenon': 'phenomena', 'potato': 'potatoes',
-            'radius': 'radii', 'scarf': 'scarves', 'scissors': 'scissors',
-            'self': 'selves', 'sense': 'senses', 'series': 'series', 'sheep':
-            'sheep', 'shelf': 'shelves', 'species': 'species', 'stimulus':
-            'stimuli', 'stratum': 'strata', 'syllabus': 'syllabi', 'symposium':
-            'symposia', 'synopsis': 'synopses', 'synthesis': 'syntheses',
-            'tableau': 'tableaux', 'that': 'those', 'thesis': 'theses',
-            'thief': 'thieves', 'this': 'these', 'tomato': 'tomatoes', 'tooth':
-            'teeth', 'torpedo': 'torpedoes', 'vertebra': 'vertebrae', 'veto':
-            'vetoes', 'vita': 'vitae', 'watch': 'watches', 'wife': 'wives',
-            'wolf': 'wolves', 'woman': 'women'}
-
-
-# Reverse index of the above
-_IRR_P2S = dict([(v, k) for k, v in _IRR_S2P.items()])
-
-HASHTAG_RE = re.compile(r"(?:^|\s)[＃#]{1}(\w+)", re.UNICODE)
-
-
-def find_hashtags(string):
-    """Finds and returns all hashtags in a string, with the hashmark
-    removed. Supports full-width hashmarks for Asian languages and
-    does not false-positive on URL anchors.
-
-    >>> find_hashtags('#atag http://asite/#ananchor')
-    ['atag']
-
-    ``find_hashtags`` also works with unicode hashtags.
-    """
-
-    # the following works, doctest just struggles with it
-    # >>> find_hashtags(u"can't get enough of that dignity chicken #肯德基 woo")
-    # [u'\u80af\u5fb7\u57fa']
-    return HASHTAG_RE.findall(string)
-
-
-def a10n(string):
-    """That thing where "internationalization" becomes "i18n", what's it
-    called? Abbreviation? Oh wait, no: ``a10n``. (It's actually a form
-    of `numeronym`_.)
-
-    >>> a10n('abbreviation')
-    'a10n'
-    >>> a10n('internationalization')
-    'i18n'
-    >>> a10n('')
-    ''
-
-    .. _numeronym: http://en.wikipedia.org/wiki/Numeronym
-    """
-    if len(string) < 3:
-        return string
-    return '%s%s%s' % (string[0], len(string[1:-1]), string[-1])
-
-
-ANSI_ESCAPE_BEGIN = '\x1b['
-ANSI_TERMINATORS = ('H', 'f', 'A', 'B', 'C', 'D', 'R', 's', 'u', 'J',
-                    'K', 'h', 'l', 'p', 'm')
-
-
-def strip_ansi(text):
-    """Strips ANSI escape codes from *text*. Useful for the occasional
-    time when a log or redirected output accidentally captures console
-    color codes and the like.
-
-    >>> strip_ansi('\x1b[0m\x1b[1;36mart\x1b[46;34m\xdc')
-    'art'
-
-    The test above is an excerpt from ANSI art on
-    `sixteencolors.net`_. This function does not interpret or render
-    ANSI art, but you can do so with `ansi2img`_ or `escapes.js`_.
-
-    .. _sixteencolors.net: http://sixteencolors.net
-    .. _ansi2img: http://www.bedroomlan.org/projects/ansi2img
-    .. _escapes.js: https://github.com/atdt/escapes.js
-    """
-    # TODO: move to cliutils.py
-    nansi, keep, i, text_len = [], True, 0, len(text)
-    while i < text_len:
-        if not keep and text[i] in ANSI_TERMINATORS:
-            keep = True
-        elif keep:
-            keep_end_i = text.find(ANSI_ESCAPE_BEGIN, i)
-            if keep_end_i < 0:
-                break
-            else:
-                nansi.append(text[i:keep_end_i])
-                i, keep = keep_end_i, False
-        i += 1
-    if not nansi:
-        return text
-    return type(text)().join(nansi)  # attempted unicode + str support
-
-
-def asciify(text, ignore=False):
-    """Converts a unicode or bytestring, *text*, into a bytestring with
-    just ascii characters. Performs basic deaccenting for all you
-    Europhiles out there.
-
-    Also, a gentle reminder that this is a **utility**, primarily meant
-    for slugification. Whenever possible, make your application work
-    **with** unicode, not against it.
-
-    Args:
-        text (str or unicode): The string to be asciified.
-        ignore (bool): Configures final encoding to ignore remaining
-            unasciified unicode instead of replacing it.
-
-    >>> asciify('Beyoncé') == b'Beyonce'
-    True
-    """
-    try:
-        try:
-            return text.encode('ascii')
-        except UnicodeDecodeError:
-            # this usually means you passed in a non-unicode string
-            text = text.decode('utf-8')
-            return text.encode('ascii')
-    except UnicodeEncodeError:
-        mode = 'replace'
-        if ignore:
-            mode = 'ignore'
-        transd = unicodedata.normalize('NFKD', text.translate(DEACCENT_MAP))
-        ret = transd.encode('ascii', mode)
-        return ret
-
-
-def is_ascii(text):
-    """Check if a unicode or bytestring, *text*, is composed of ascii
-    characters only. Raises :exc:`ValueError` if argument is not text.
-
-    Args:
-        text (str or unicode): The string to be checked.
-
-    >>> is_ascii('Beyoncé')
-    False
-    >>> is_ascii('Beyonce')
-    True
-    """
-    if isinstance(text, unicode):
-        try:
-            text.encode('ascii')
-        except UnicodeEncodeError:
-            return False
-    elif isinstance(text, bytes):
-        try:
-            text.decode('ascii')
-        except UnicodeDecodeError:
-            return False
-    else:
-        raise ValueError('expected text or bytes, not %r' % type(text))
-    return True
-
-
-class DeaccenterDict(dict):
-    "A small caching dictionary for deaccenting."
-    def __missing__(self, key):
-        ch = self.get(key)
-        if ch is not None:
-            return ch
-        try:
-            de = unicodedata.decomposition(unichr(key))
-            p1, _, p2 = de.rpartition(' ')
-            if int(p2, 16) == 0x308:
-                ch = self.get(key)
-            else:
-                ch = int(p1, 16)
-        except (IndexError, ValueError):
-            ch = self.get(key, key)
-        self[key] = ch
-        return ch
-
-    try:
-        from collections import defaultdict
-    except ImportError:
-        # no defaultdict means that __missing__ isn't supported in
-        # this version of python, so we define __getitem__
-        def __getitem__(self, key):
-            try:
-                return super(DeaccenterDict, self).__getitem__(key)
-            except KeyError:
-                return self.__missing__(key)
-    else:
-        del defaultdict
-
-
-# http://chmullig.com/2009/12/python-unicode-ascii-ifier/
-# For something more complete, investigate the unidecode
-# or isounidecode packages, which are capable of performing
-# crude transliteration.
-_BASE_DEACCENT_MAP = {
-    0xc6: u"AE", # Æ LATIN CAPITAL LETTER AE
-    0xd0: u"D",  # Ð LATIN CAPITAL LETTER ETH
-    0xd8: u"OE", # Ø LATIN CAPITAL LETTER O WITH STROKE
-    0xde: u"Th", # Þ LATIN CAPITAL LETTER THORN
-    0xc4: u'Ae', # Ä LATIN CAPITAL LETTER A WITH DIAERESIS
-    0xd6: u'Oe', # Ö LATIN CAPITAL LETTER O WITH DIAERESIS
-    0xdc: u'Ue', # Ü LATIN CAPITAL LETTER U WITH DIAERESIS
-    0xc0: u"A",  # À LATIN CAPITAL LETTER A WITH GRAVE
-    0xc1: u"A",  # Á LATIN CAPITAL LETTER A WITH ACUTE
-    0xc3: u"A",  # Ã LATIN CAPITAL LETTER A WITH TILDE
-    0xc7: u"C",  # Ç LATIN CAPITAL LETTER C WITH CEDILLA
-    0xc8: u"E",  # È LATIN CAPITAL LETTER E WITH GRAVE
-    0xc9: u"E",  # É LATIN CAPITAL LETTER E WITH ACUTE
-    0xca: u"E",  # Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    0xcc: u"I",  # Ì LATIN CAPITAL LETTER I WITH GRAVE
-    0xcd: u"I",  # Í LATIN CAPITAL LETTER I WITH ACUTE
-    0xd2: u"O",  # Ò LATIN CAPITAL LETTER O WITH GRAVE
-    0xd3: u"O",  # Ó LATIN CAPITAL LETTER O WITH ACUTE
-    0xd5: u"O",  # Õ LATIN CAPITAL LETTER O WITH TILDE
-    0xd9: u"U",  # Ù LATIN CAPITAL LETTER U WITH GRAVE
-    0xda: u"U",  # Ú LATIN CAPITAL LETTER U WITH ACUTE
-    0xdf: u"ss", # ß LATIN SMALL LETTER SHARP S
-    0xe6: u"ae", # æ LATIN SMALL LETTER AE
-    0xf0: u"d",  # ð LATIN SMALL LETTER ETH
-    0xf8: u"oe", # ø LATIN SMALL LETTER O WITH STROKE
-    0xfe: u"th", # þ LATIN SMALL LETTER THORN,
-    0xe4: u'ae', # ä LATIN SMALL LETTER A WITH DIAERESIS
-    0xf6: u'oe', # ö LATIN SMALL LETTER O WITH DIAERESIS
-    0xfc: u'ue', # ü LATIN SMALL LETTER U WITH DIAERESIS
-    0xe0: u"a",  # à LATIN SMALL LETTER A WITH GRAVE
-    0xe1: u"a",  # á LATIN SMALL LETTER A WITH ACUTE
-    0xe3: u"a",  # ã LATIN SMALL LETTER A WITH TILDE
-    0xe7: u"c",  # ç LATIN SMALL LETTER C WITH CEDILLA
-    0xe8: u"e",  # è LATIN SMALL LETTER E WITH GRAVE
-    0xe9: u"e",  # é LATIN SMALL LETTER E WITH ACUTE
-    0xea: u"e",  # ê LATIN SMALL LETTER E WITH CIRCUMFLEX
-    0xec: u"i",  # ì LATIN SMALL LETTER I WITH GRAVE
-    0xed: u"i",  # í LATIN SMALL LETTER I WITH ACUTE
-    0xf2: u"o",  # ò LATIN SMALL LETTER O WITH GRAVE
-    0xf3: u"o",  # ó LATIN SMALL LETTER O WITH ACUTE
-    0xf5: u"o",  # õ LATIN SMALL LETTER O WITH TILDE
-    0xf9: u"u",  # ù LATIN SMALL LETTER U WITH GRAVE
-    0xfa: u"u",  # ú LATIN SMALL LETTER U WITH ACUTE
-    0x2018: u"'",  # ‘ LEFT SINGLE QUOTATION MARK
-    0x2019: u"'",  # ’ RIGHT SINGLE QUOTATION MARK
-    0x201c: u'"',  # “ LEFT DOUBLE QUOTATION MARK
-    0x201d: u'"',  # ” RIGHT DOUBLE QUOTATION MARK
-    }
-
-
-DEACCENT_MAP = DeaccenterDict(_BASE_DEACCENT_MAP)
-
-
-_SIZE_SYMBOLS = ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')
-_SIZE_BOUNDS = [(1024 ** i, sym) for i, sym in enumerate(_SIZE_SYMBOLS)]
-_SIZE_RANGES = list(zip(_SIZE_BOUNDS, _SIZE_BOUNDS[1:]))
-
-
-def bytes2human(nbytes, ndigits=0):
-    """Turns an integer value of *nbytes* into a human readable format. Set
-    *ndigits* to control how many digits after the decimal point
-    should be shown (default ``0``).
-
-    >>> bytes2human(128991)
-    '126K'
-    >>> bytes2human(100001221)
-    '95M'
-    >>> bytes2human(0, 2)
-    '0.00B'
-    """
-    abs_bytes = abs(nbytes)
-    for (size, symbol), (next_size, next_symbol) in _SIZE_RANGES:
-        if abs_bytes <= next_size:
-            break
-    hnbytes = float(nbytes) / size
-    return '{hnbytes:.{ndigits}f}{symbol}'.format(hnbytes=hnbytes,
-                                                  ndigits=ndigits,
-                                                  symbol=symbol)
-
-
-class HTMLTextExtractor(HTMLParser):
-    def __init__(self):
-        self.reset()
-        self.strict = False
-        self.convert_charrefs = True
-        self.result = []
-
-    def handle_data(self, d):
-        self.result.append(d)
-
-    def handle_charref(self, number):
-        if number[0] == u'x' or number[0] == u'X':
-            codepoint = int(number[1:], 16)
-        else:
-            codepoint = int(number)
-        self.result.append(unichr(codepoint))
-
-    def handle_entityref(self, name):
-        try:
-            codepoint = htmlentitydefs.name2codepoint[name]
-        except KeyError:
-            self.result.append(u'&' + name + u';')
-        else:
-            self.result.append(unichr(codepoint))
-
-    def get_text(self):
-        return u''.join(self.result)
-
-
-def html2text(html):
-    """Strips tags from HTML text, returning markup-free text. Also, does
-    a best effort replacement of entities like "&nbsp;"
-
-    >>> r = html2text(u'<a href="#">Test &amp;<em>(\u0394&#x03b7;&#956;&#x03CE;)</em></a>')
-    >>> r == u'Test &(\u0394\u03b7\u03bc\u03ce)'
-    True
-    """
-    # based on answers to http://stackoverflow.com/questions/753052/
-    s = HTMLTextExtractor()
-    s.feed(html)
-    return s.get_text()
-
-
-_EMPTY_GZIP_BYTES = b'\x1f\x8b\x08\x089\xf3\xb9U\x00\x03empty\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00'
-_NON_EMPTY_GZIP_BYTES = b'\x1f\x8b\x08\x08\xbc\xf7\xb9U\x00\x03not_empty\x00K\xaa,I-N\xcc\xc8\xafT\xe4\x02\x00\xf3nb\xbf\x0b\x00\x00\x00'
-
-
-def gunzip_bytes(bytestring):
-    """The :mod:`gzip` module is great if you have a file or file-like
-    object, but what if you just have bytes. StringIO is one
-    possibility, but it's often faster, easier, and simpler to just
-    use this one-liner. Use this tried-and-true utility function to
-    decompress gzip from bytes.
-
-    >>> gunzip_bytes(_EMPTY_GZIP_BYTES) == b''
-    True
-    >>> gunzip_bytes(_NON_EMPTY_GZIP_BYTES).rstrip() == b'bytesahoy!'
-    True
-    """
-    return zlib.decompress(bytestring, 16 + zlib.MAX_WBITS)
-
-
-def gzip_bytes(bytestring, level=6):
-    """Turn some bytes into some compressed bytes.
-
-    >>> len(gzip_bytes(b'a' * 10000))
-    46
-
-    Args:
-        bytestring (bytes): Bytes to be compressed
-        level (int): An integer, 1-9, controlling the
-          speed/compression. 1 is fastest, least compressed, 9 is
-          slowest, but most compressed.
-
-    Note that all levels of gzip are pretty fast these days, though
-    it's not really a competitor in compression, at any level.
-    """
-    out = StringIO()
-    f = GzipFile(fileobj=out, mode='wb', compresslevel=level)
-    f.write(bytestring)
-    f.close()
-    return out.getvalue()
-
-
-
-_line_ending_re = re.compile(r'(\r\n|\n|\x0b|\f|\r|\x85|\x2028|\x2029)',
-                             re.UNICODE)
-
-
-def iter_splitlines(text):
-    r"""Like :meth:`str.splitlines`, but returns an iterator of lines
-    instead of a list. Also similar to :meth:`file.next`, as that also
-    lazily reads and yields lines from a file.
-
-    This function works with a variety of line endings, but as always,
-    be careful when mixing line endings within a file.
-
-    >>> list(iter_splitlines('\nhi\nbye\n'))
-    ['', 'hi', 'bye', '']
-    >>> list(iter_splitlines('\r\nhi\rbye\r\n'))
-    ['', 'hi', 'bye', '']
-    >>> list(iter_splitlines(''))
-    []
-    """
-    prev_end, len_text = 0, len(text)
-    # print('last: %r' % last_idx)
-    # start, end = None, None
-    for match in _line_ending_re.finditer(text):
-        start, end = match.start(1), match.end(1)
-        # print(start, end)
-        if prev_end <= start:
-            yield text[prev_end:start]
-        if end == len_text:
-            yield ''
-        prev_end = end
-    tail = text[prev_end:]
-    if tail:
-        yield tail
-    return
-
-
-def indent(text, margin, newline='\n', key=bool):
-    """The missing counterpart to the built-in :func:`textwrap.dedent`.
-
-    Args:
-        text (str): The text to indent.
-        margin (str): The string to prepend to each line.
-        newline (str): The newline used to rejoin the lines (default: ``\\n``)
-        key (callable): Called on each line to determine whether to
-          indent it. Default: :class:`bool`, to ensure that empty lines do
-          not get whitespace added.
-    """
-    indented_lines = [(margin + line if key(line) else line)
-                      for line in iter_splitlines(text)]
-    return newline.join(indented_lines)
-
-
-def is_uuid(obj, version=4):
-    """Check the argument is either a valid UUID object or string.
-
-    Args:
-        obj (object): The test target. Strings and UUID objects supported.
-        version (int): The target UUID version, set to 0 to skip version check.
-
-    >>> is_uuid('e682ccca-5a4c-4ef2-9711-73f9ad1e15ea')
-    True
-    >>> is_uuid('0221f0d9-d4b9-11e5-a478-10ddb1c2feb9')
-    False
-    >>> is_uuid('0221f0d9-d4b9-11e5-a478-10ddb1c2feb9', version=1)
-    True
-    """
-    if not isinstance(obj, uuid.UUID):
-        try:
-            obj = uuid.UUID(obj)
-        except (TypeError, ValueError, AttributeError):
-            return False
-    if version and obj.version != int(version):
-        return False
-    return True
-
-
-def escape_shell_args(args, sep=' ', style=None):
-    """Returns an escaped version of each string in *args*, according to
-    *style*.
-
-    Args:
-        args (list): A list of arguments to escape and join together
-        sep (str): The separator used to join the escaped arguments.
-        style (str): The style of escaping to use. Can be one of
-          ``cmd`` or ``sh``, geared toward Windows and Linux/BSD/etc.,
-          respectively. If *style* is ``None``, then it is picked
-          according to the system platform.
-
-    See :func:`args2cmd` and :func:`args2sh` for details and example
-    output for each style.
-    """
-    if not style:
-        style = 'cmd' if sys.platform == 'win32' else 'sh'
-
-    if style == 'sh':
-        return args2sh(args, sep=sep)
-    elif style == 'cmd':
-        return args2cmd(args, sep=sep)
-
-    raise ValueError("style expected one of 'cmd' or 'sh', not %r" % style)
-
-
-_find_sh_unsafe = re.compile(r'[^a-zA-Z0-9_@%+=:,./-]').search
-
-
-def args2sh(args, sep=' '):
-    """Return a shell-escaped string version of *args*, separated by
-    *sep*, based on the rules of sh, bash, and other shells in the
-    Linux/BSD/MacOS ecosystem.
-
-    >>> print(args2sh(['aa', '[bb]', "cc'cc", 'dd"dd']))
-    aa '[bb]' 'cc'"'"'cc' 'dd"dd'
-
-    As you can see, arguments with no special characters are not
-    escaped, arguments with special characters are quoted with single
-    quotes, and single quotes themselves are quoted with double
-    quotes. Double quotes are handled like any other special
-    character.
-
-    Based on code from the :mod:`pipes`/:mod:`shlex` modules. Also
-    note that :mod:`shlex` and :mod:`argparse` have functions to split
-    and parse strings escaped in this manner.
-    """
-    ret_list = []
-
-    for arg in args:
-        if not arg:
-            ret_list.append("''")
-            continue
-        if _find_sh_unsafe(arg) is None:
-            ret_list.append(arg)
-            continue
-        # use single quotes, and put single quotes into double quotes
-        # the string $'b is then quoted as '$'"'"'b'
-        ret_list.append("'" + arg.replace("'", "'\"'\"'") + "'")
-
-    return ' '.join(ret_list)
-
-
-def args2cmd(args, sep=' '):
-    r"""Return a shell-escaped string version of *args*, separated by
-    *sep*, using the same rules as the Microsoft C runtime.
-
-    >>> print(args2cmd(['aa', '[bb]', "cc'cc", 'dd"dd']))
-    aa [bb] cc'cc dd\"dd
-
-    As you can see, escaping is through backslashing and not quoting,
-    and double quotes are the only special character. See the comment
-    in the code for more details. Based on internal code from the
-    :mod:`subprocess` module.
-
-    """
-    # technique description from subprocess below
-    """
-    1) Arguments are delimited by white space, which is either a
-       space or a tab.
-
-    2) A string surrounded by double quotation marks is
-       interpreted as a single argument, regardless of white space
-       contained within.  A quoted string can be embedded in an
-       argument.
-
-    3) A double quotation mark preceded by a backslash is
-       interpreted as a literal double quotation mark.
-
-    4) Backslashes are interpreted literally, unless they
-       immediately precede a double quotation mark.
-
-    5) If backslashes immediately precede a double quotation mark,
-       every pair of backslashes is interpreted as a literal
-       backslash.  If the number of backslashes is odd, the last
-       backslash escapes the next double quotation mark as
-       described in rule 3.
-
-    See http://msdn.microsoft.com/en-us/library/17w5ykft.aspx
-    or search http://msdn.microsoft.com for
-    "Parsing C++ Command-Line Arguments"
-    """
-    result = []
-    needquote = False
-    for arg in args:
-        bs_buf = []
-
-        # Add a space to separate this argument from the others
-        if result:
-            result.append(' ')
-
-        needquote = (" " in arg) or ("\t" in arg) or not arg
-        if needquote:
-            result.append('"')
-
-        for c in arg:
-            if c == '\\':
-                # Don't know if we need to double yet.
-                bs_buf.append(c)
-            elif c == '"':
-                # Double backslashes.
-                result.append('\\' * len(bs_buf)*2)
-                bs_buf = []
-                result.append('\\"')
-            else:
-                # Normal char
-                if bs_buf:
-                    result.extend(bs_buf)
-                    bs_buf = []
-                result.append(c)
-
-        # Add remaining backslashes, if any.
-        if bs_buf:
-            result.extend(bs_buf)
-
-        if needquote:
-            result.extend(bs_buf)
-            result.append('"')
-
-    return ''.join(result)
-
-
-def parse_int_list(range_string, delim=',', range_delim='-'):
-    """Returns a sorted list of positive integers based on
-    *range_string*. Reverse of :func:`format_int_list`.
-
-    Args:
-        range_string (str): String of comma separated positive
-            integers or ranges (e.g. '1,2,4-6,8'). Typical of a custom
-            page range string used in printer dialogs.
-        delim (char): Defaults to ','. Separates integers and
-            contiguous ranges of integers.
-        range_delim (char): Defaults to '-'. Indicates a contiguous
-            range of integers.
-
-    >>> parse_int_list('1,3,5-8,10-11,15')
-    [1, 3, 5, 6, 7, 8, 10, 11, 15]
-
-    """
-    output = []
-
-    for x in range_string.strip().split(delim):
-
-        # Range
-        if range_delim in x:
-            range_limits = list(map(int, x.split(range_delim)))
-            output += list(range(min(range_limits), max(range_limits)+1))
-
-        # Empty String
-        elif not x:
-            continue
-
-        # Integer
-        else:
-            output.append(int(x))
-
-    return sorted(output)
-
-
-def format_int_list(int_list, delim=',', range_delim='-', delim_space=False):
-    """Returns a sorted range string from a list of positive integers
-    (*int_list*). Contiguous ranges of integers are collapsed to min
-    and max values. Reverse of :func:`parse_int_list`.
-
-    Args:
-        int_list (list): List of positive integers to be converted
-           into a range string (e.g. [1,2,4,5,6,8]).
-        delim (char): Defaults to ','. Separates integers and
-           contiguous ranges of integers.
-        range_delim (char): Defaults to '-'. Indicates a contiguous
-           range of integers.
-        delim_space (bool): Defaults to ``False``. If ``True``, adds a
-           space after all *delim* characters.
-
-    >>> format_int_list([1,3,5,6,7,8,10,11,15])
-    '1,3,5-8,10-11,15'
-
-    """
-    output = []
-    contig_range = collections.deque()
-
-    for x in sorted(int_list):
-
-        # Handle current (and first) value.
-        if len(contig_range) < 1:
-            contig_range.append(x)
-
-        # Handle current value, given multiple previous values are contiguous.
-        elif len(contig_range) > 1:
-            delta = x - contig_range[-1]
-
-            # Current value is contiguous.
-            if delta == 1:
-                contig_range.append(x)
-
-            # Current value is non-contiguous.
-            elif delta > 1:
-                range_substr = '{0:d}{1}{2:d}'.format(min(contig_range),
-                                                      range_delim,
-                                                      max(contig_range))
-                output.append(range_substr)
-                contig_range.clear()
-                contig_range.append(x)
-
-            # Current value repeated.
-            else:
-                continue
-
-        # Handle current value, given no previous contiguous integers
-        else:
-            delta = x - contig_range[0]
-
-            # Current value is contiguous.
-            if delta == 1:
-                contig_range.append(x)
-
-            # Current value is non-contiguous.
-            elif delta > 1:
-                output.append('{0:d}'.format(contig_range.popleft()))
-                contig_range.append(x)
-
-            # Current value repeated.
-            else:
-                continue
-
-    # Handle the last value.
-    else:
-
-        # Last value is non-contiguous.
-        if len(contig_range) == 1:
-            output.append('{0:d}'.format(contig_range.popleft()))
-            contig_range.clear()
-
-        # Last value is part of contiguous range.
-        elif len(contig_range) > 1:
-            range_substr = '{0:d}{1}{2:d}'.format(min(contig_range),
-                                                  range_delim,
-                                                  max(contig_range))
-            output.append(range_substr)
-            contig_range.clear()
-
-    if delim_space:
-        output_str = (delim+' ').join(output)
-    else:
-        output_str = delim.join(output)
-
-    return output_str
-
-
-class MultiReplace(object):
-    """
-    MultiReplace is a tool for doing multiple find/replace actions in one pass.
-
-    Given a mapping of values to be replaced it allows for all of the matching
-    values to be replaced in a single pass which can save a lot of performance
-    on very large strings. In addition to simple replace, it also allows for
-    replacing based on regular expressions.
-
-    Keyword Arguments:
-
-    :type regex: bool
-    :param regex: Treat search keys as regular expressions [Default: False]
-    :type flags: int
-    :param flags: flags to pass to the regex engine during compile
-
-    Dictionary Usage::
-
-        from lrmslib import stringutils
-        s = stringutils.MultiReplace({
-            'foo': 'zoo',
-            'cat': 'hat',
-            'bat': 'kraken'
-        })
-        new = s.sub('The foo bar cat ate a bat')
-        new == 'The zoo bar hat ate a kraken'
-
-    Iterable Usage::
-
-        from lrmslib import stringutils
-        s = stringutils.MultiReplace([
-            ('foo', 'zoo'),
-            ('cat', 'hat'),
-            ('bat', 'kraken)'
-        ])
-        new = s.sub('The foo bar cat ate a bat')
-        new == 'The zoo bar hat ate a kraken'
-
-
-    The constructor can be passed a dictionary or other mapping as well as
-    an iterable of tuples. If given an iterable, the substitution will be run
-    in the order the replacement values are specified in the iterable. This is
-    also true if it is given an OrderedDict. If given a dictionary then the
-    order will be non-deterministic::
-
-        >>> 'foo bar baz'.replace('foo', 'baz').replace('baz', 'bar')
-        'bar bar bar'
-        >>> m = MultiReplace({'foo': 'baz', 'baz': 'bar'})
-        >>> m.sub('foo bar baz')
-        'baz bar bar'
-
-    This is because the order of replacement can matter if you're inserting
-    something that might be replaced by a later substitution. Pay attention and
-    if you need to rely on order then consider using a list of tuples instead
-    of a dictionary.
-    """
-
-    def __init__(self, sub_map, **kwargs):
-        """Compile any regular expressions that have been passed."""
-        options = {
-            'regex': False,
-            'flags': 0,
-        }
-        options.update(kwargs)
-        self.group_map = {}
-        regex_values = []
-
-        if isinstance(sub_map, Mapping):
-            sub_map = sub_map.items()
-
-        for idx, vals in enumerate(sub_map):
-            group_name = 'group{0}'.format(idx)
-            if isinstance(vals[0], basestring):
-                # If we're not treating input strings like a regex, escape it
-                if not options['regex']:
-                    exp = re.escape(vals[0])
-                else:
-                    exp = vals[0]
-            else:
-                exp = vals[0].pattern
-
-            regex_values.append('(?P<{0}>{1})'.format(
-                group_name,
-                exp
-            ))
-            self.group_map[group_name] = vals[1]
-
-        self.combined_pattern = re.compile(
-            '|'.join(regex_values),
-            flags=options['flags']
-        )
-
-    def _get_value(self, match):
-        """Given a match object find replacement value."""
-        group_dict = match.groupdict()
-        key = [x for x in group_dict if group_dict[x]][0]
-        return self.group_map[key]
-
-    def sub(self, text):
-        """
-        Run substitutions on the input text.
-
-        Given an input string, run all substitutions given in the
-        constructor.
-        """
-        return self.combined_pattern.sub(self._get_value, text)
-
-
-def multi_replace(text, sub_map, **kwargs):
-    """Shortcut function to invoke multi-replace in a single command."""
-    m = MultiReplace(sub_map, **kwargs)
-    return m.sub(text)
-
-
-def unwrap_text(text, ending='\n\n'):
-    r"""
-    Unwrap text, the natural complement to :func:`textwrap.wrap`.
-
-    >>> text = "Short \n lines  \nwrapped\nsmall.\n\nAnother\nparagraph."
-    >>> unwrap_text(text)
-    'Short lines wrapped small.\n\nAnother paragraph.'
-
-    Args:
-       text: A string to unwrap.
-       ending (str): The string to join all unwrapped paragraphs
-          by. Pass ``None`` to get the list. Defaults to '\n\n' for
-          compatibility with Markdown and RST.
-
-    """
-    all_grafs = []
-    cur_graf = []
-    for line in text.splitlines():
-        line = line.strip()
-        if line:
-            cur_graf.append(line)
-        else:
-            all_grafs.append(' '.join(cur_graf))
-            cur_graf = []
-    if cur_graf:
-        all_grafs.append(' '.join(cur_graf))
-    if ending is None:
-        return all_grafs
-    return ending.join(all_grafs)
author	shellac
date	Mon, 01 Jun 2020 08:59:25 -0400
parents	79f47841a781
children