Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/humanfriendly/text.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.7/site-packages/humanfriendly/text.py Sat May 02 07:14:21 2020 -0400 @@ -0,0 +1,412 @@ +# Human friendly input/output in Python. +# +# Author: Peter Odding <peter@peterodding.com> +# Last Change: March 1, 2020 +# URL: https://humanfriendly.readthedocs.io + +""" +Simple text manipulation functions. + +The :mod:`~humanfriendly.text` module contains simple functions to manipulate text: + +- The :func:`concatenate()` and :func:`pluralize()` functions make it easy to + generate human friendly output. + +- The :func:`format()`, :func:`compact()` and :func:`dedent()` functions + provide a clean and simple to use syntax for composing large text fragments + with interpolated variables. + +- The :func:`tokenize()` function parses simple user input. +""" + +# Standard library modules. +import math +import numbers +import random +import re +import string +import textwrap + +# Public identifiers that require documentation. +__all__ = ( + 'compact', + 'compact_empty_lines', + 'concatenate', + 'dedent', + 'format', + 'generate_slug', + 'is_empty_line', + 'join_lines', + 'pluralize', + 'random_string', + 'split', + 'split_paragraphs', + 'tokenize', + 'trim_empty_lines', +) + + +def compact(text, *args, **kw): + ''' + Compact whitespace in a string. + + Trims leading and trailing whitespace, replaces runs of whitespace + characters with a single space and interpolates any arguments using + :func:`format()`. + + :param text: The text to compact (a string). + :param args: Any positional arguments are interpolated using :func:`format()`. + :param kw: Any keyword arguments are interpolated using :func:`format()`. + :returns: The compacted text (a string). + + Here's an example of how I like to use the :func:`compact()` function, this + is an example from a random unrelated project I'm working on at the moment:: + + raise PortDiscoveryError(compact(""" + Failed to discover port(s) that Apache is listening on! + Maybe I'm parsing the wrong configuration file? ({filename}) + """, filename=self.ports_config)) + + The combination of :func:`compact()` and Python's multi line strings allows + me to write long text fragments with interpolated variables that are easy + to write, easy to read and work well with Python's whitespace + sensitivity. + ''' + non_whitespace_tokens = text.split() + compacted_text = ' '.join(non_whitespace_tokens) + return format(compacted_text, *args, **kw) + + +def compact_empty_lines(text): + """ + Replace repeating empty lines with a single empty line (similar to ``cat -s``). + + :param text: The text in which to compact empty lines (a string). + :returns: The text with empty lines compacted (a string). + """ + i = 0 + lines = text.splitlines(True) + while i < len(lines): + if i > 0 and is_empty_line(lines[i - 1]) and is_empty_line(lines[i]): + lines.pop(i) + else: + i += 1 + return ''.join(lines) + + +def concatenate(items): + """ + Concatenate a list of items in a human friendly way. + + :param items: A sequence of strings. + :returns: A single string. + + >>> from humanfriendly.text import concatenate + >>> concatenate(["eggs", "milk", "bread"]) + 'eggs, milk and bread' + """ + items = list(items) + if len(items) > 1: + return ', '.join(items[:-1]) + ' and ' + items[-1] + elif items: + return items[0] + else: + return '' + + +def dedent(text, *args, **kw): + """ + Dedent a string (remove common leading whitespace from all lines). + + Removes common leading whitespace from all lines in the string using + :func:`textwrap.dedent()`, removes leading and trailing empty lines using + :func:`trim_empty_lines()` and interpolates any arguments using + :func:`format()`. + + :param text: The text to dedent (a string). + :param args: Any positional arguments are interpolated using :func:`format()`. + :param kw: Any keyword arguments are interpolated using :func:`format()`. + :returns: The dedented text (a string). + + The :func:`compact()` function's documentation contains an example of how I + like to use the :func:`compact()` and :func:`dedent()` functions. The main + difference is that I use :func:`compact()` for text that will be presented + to the user (where whitespace is not so significant) and :func:`dedent()` + for data file and code generation tasks (where newlines and indentation are + very significant). + """ + dedented_text = textwrap.dedent(text) + trimmed_text = trim_empty_lines(dedented_text) + return format(trimmed_text, *args, **kw) + + +def format(text, *args, **kw): + """ + Format a string using the string formatting operator and/or :meth:`str.format()`. + + :param text: The text to format (a string). + :param args: Any positional arguments are interpolated into the text using + the string formatting operator (``%``). If no positional + arguments are given no interpolation is done. + :param kw: Any keyword arguments are interpolated into the text using the + :meth:`str.format()` function. If no keyword arguments are given + no interpolation is done. + :returns: The text with any positional and/or keyword arguments + interpolated (a string). + + The implementation of this function is so trivial that it seems silly to + even bother writing and documenting it. Justifying this requires some + context :-). + + **Why format() instead of the string formatting operator?** + + For really simple string interpolation Python's string formatting operator + is ideal, but it does have some strange quirks: + + - When you switch from interpolating a single value to interpolating + multiple values you have to wrap them in tuple syntax. Because + :func:`format()` takes a `variable number of arguments`_ it always + receives a tuple (which saves me a context switch :-). Here's an + example: + + >>> from humanfriendly.text import format + >>> # The string formatting operator. + >>> print('the magic number is %s' % 42) + the magic number is 42 + >>> print('the magic numbers are %s and %s' % (12, 42)) + the magic numbers are 12 and 42 + >>> # The format() function. + >>> print(format('the magic number is %s', 42)) + the magic number is 42 + >>> print(format('the magic numbers are %s and %s', 12, 42)) + the magic numbers are 12 and 42 + + - When you interpolate a single value and someone accidentally passes in a + tuple your code raises a :exc:`~exceptions.TypeError`. Because + :func:`format()` takes a `variable number of arguments`_ it always + receives a tuple so this can never happen. Here's an example: + + >>> # How expecting to interpolate a single value can fail. + >>> value = (12, 42) + >>> print('the magic value is %s' % value) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + TypeError: not all arguments converted during string formatting + >>> # The following line works as intended, no surprises here! + >>> print(format('the magic value is %s', value)) + the magic value is (12, 42) + + **Why format() instead of the str.format() method?** + + When you're doing complex string interpolation the :meth:`str.format()` + function results in more readable code, however I frequently find myself + adding parentheses to force evaluation order. The :func:`format()` function + avoids this because of the relative priority between the comma and dot + operators. Here's an example: + + >>> "{adjective} example" + " " + "(can't think of anything less {adjective})".format(adjective='silly') + "{adjective} example (can't think of anything less silly)" + >>> ("{adjective} example" + " " + "(can't think of anything less {adjective})").format(adjective='silly') + "silly example (can't think of anything less silly)" + >>> format("{adjective} example" + " " + "(can't think of anything less {adjective})", adjective='silly') + "silly example (can't think of anything less silly)" + + The :func:`compact()` and :func:`dedent()` functions are wrappers that + combine :func:`format()` with whitespace manipulation to make it easy to + write nice to read Python code. + + .. _variable number of arguments: https://docs.python.org/2/tutorial/controlflow.html#arbitrary-argument-lists + """ + if args: + text %= args + if kw: + text = text.format(**kw) + return text + + +def generate_slug(text, delimiter="-"): + """ + Convert text to a normalized "slug" without whitespace. + + :param text: The original text, for example ``Some Random Text!``. + :param delimiter: The delimiter used to separate words + (defaults to the ``-`` character). + :returns: The slug text, for example ``some-random-text``. + :raises: :exc:`~exceptions.ValueError` when the provided + text is nonempty but results in an empty slug. + """ + slug = text.lower() + escaped = delimiter.replace("\\", "\\\\") + slug = re.sub("[^a-z0-9]+", escaped, slug) + slug = slug.strip(delimiter) + if text and not slug: + msg = "The provided text %r results in an empty slug!" + raise ValueError(format(msg, text)) + return slug + + +def is_empty_line(text): + """ + Check if a text is empty or contains only whitespace. + + :param text: The text to check for "emptiness" (a string). + :returns: :data:`True` if the text is empty or contains only whitespace, + :data:`False` otherwise. + """ + return len(text) == 0 or text.isspace() + + +def join_lines(text): + """ + Remove "hard wrapping" from the paragraphs in a string. + + :param text: The text to reformat (a string). + :returns: The text without hard wrapping (a string). + + This function works by removing line breaks when the last character before + a line break and the first character after the line break are both + non-whitespace characters. This means that common leading indentation will + break :func:`join_lines()` (in that case you can use :func:`dedent()` + before calling :func:`join_lines()`). + """ + return re.sub(r'(\S)\n(\S)', r'\1 \2', text) + + +def pluralize(count, singular, plural=None): + """ + Combine a count with the singular or plural form of a word. + + If the plural form of the word is not provided it is obtained by + concatenating the singular form of the word with the letter "s". Of course + this will not always be correct, which is why you have the option to + specify both forms. + + :param count: The count (a number). + :param singular: The singular form of the word (a string). + :param plural: The plural form of the word (a string or :data:`None`). + :returns: The count and singular/plural word concatenated (a string). + """ + if not plural: + plural = singular + 's' + return '%s %s' % (count, singular if math.floor(float(count)) == 1 else plural) + + +def random_string(length=(25, 100), characters=string.ascii_letters): + """random_string(length=(25, 100), characters=string.ascii_letters) + Generate a random string. + + :param length: The length of the string to be generated (a number or a + tuple with two numbers). If this is a tuple then a random + number between the two numbers given in the tuple is used. + :param characters: The characters to be used (a string, defaults + to :data:`string.ascii_letters`). + :returns: A random string. + + The :func:`random_string()` function is very useful in test suites; by the + time I included it in :mod:`humanfriendly.text` I had already included + variants of this function in seven different test suites :-). + """ + if not isinstance(length, numbers.Number): + length = random.randint(length[0], length[1]) + return ''.join(random.choice(characters) for _ in range(length)) + + +def split(text, delimiter=','): + """ + Split a comma-separated list of strings. + + :param text: The text to split (a string). + :param delimiter: The delimiter to split on (a string). + :returns: A list of zero or more nonempty strings. + + Here's the default behavior of Python's built in :meth:`str.split()` + function: + + >>> 'foo,bar, baz,'.split(',') + ['foo', 'bar', ' baz', ''] + + In contrast here's the default behavior of the :func:`split()` function: + + >>> from humanfriendly.text import split + >>> split('foo,bar, baz,') + ['foo', 'bar', 'baz'] + + Here is an example that parses a nested data structure (a mapping of + logging level names to one or more styles per level) that's encoded in a + string so it can be set as an environment variable: + + >>> from pprint import pprint + >>> encoded_data = 'debug=green;warning=yellow;error=red;critical=red,bold' + >>> parsed_data = dict((k, split(v, ',')) for k, v in (split(kv, '=') for kv in split(encoded_data, ';'))) + >>> pprint(parsed_data) + {'debug': ['green'], + 'warning': ['yellow'], + 'error': ['red'], + 'critical': ['red', 'bold']} + """ + return [token.strip() for token in text.split(delimiter) if token and not token.isspace()] + + +def split_paragraphs(text): + """ + Split a string into paragraphs (one or more lines delimited by an empty line). + + :param text: The text to split into paragraphs (a string). + :returns: A list of strings. + """ + paragraphs = [] + for chunk in text.split('\n\n'): + chunk = trim_empty_lines(chunk) + if chunk and not chunk.isspace(): + paragraphs.append(chunk) + return paragraphs + + +def tokenize(text): + """ + Tokenize a text into numbers and strings. + + :param text: The text to tokenize (a string). + :returns: A list of strings and/or numbers. + + This function is used to implement robust tokenization of user input in + functions like :func:`.parse_size()` and :func:`.parse_timespan()`. It + automatically coerces integer and floating point numbers, ignores + whitespace and knows how to separate numbers from strings even without + whitespace. Some examples to make this more concrete: + + >>> from humanfriendly.text import tokenize + >>> tokenize('42') + [42] + >>> tokenize('42MB') + [42, 'MB'] + >>> tokenize('42.5MB') + [42.5, 'MB'] + >>> tokenize('42.5 MB') + [42.5, 'MB'] + """ + tokenized_input = [] + for token in re.split(r'(\d+(?:\.\d+)?)', text): + token = token.strip() + if re.match(r'\d+\.\d+', token): + tokenized_input.append(float(token)) + elif token.isdigit(): + tokenized_input.append(int(token)) + elif token: + tokenized_input.append(token) + return tokenized_input + + +def trim_empty_lines(text): + """ + Trim leading and trailing empty lines from the given text. + + :param text: The text to trim (a string). + :returns: The trimmed text (a string). + """ + lines = text.splitlines(True) + while lines and is_empty_line(lines[0]): + lines.pop(0) + while lines and is_empty_line(lines[-1]): + lines.pop(-1) + return ''.join(lines)