diff env/lib/python3.7/site-packages/boltons/tableutils.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.7/site-packages/boltons/tableutils.py	Sat May 02 07:14:21 2020 -0400
@@ -0,0 +1,569 @@
+# -*- coding: utf-8 -*-
+"""If there is one recurring theme in ``boltons``, it is that Python
+has excellent datastructures that constitute a good foundation for
+most quick manipulations, as well as building applications. However,
+Python usage has grown much faster than builtin data structure
+power. Python has a growing need for more advanced general-purpose
+data structures which behave intuitively.
+
+The :class:`Table` class is one example. When handed one- or
+two-dimensional data, it can provide useful, if basic, text and HTML
+renditions of small to medium sized data. It also heuristically
+handles recursive data of various formats (lists, dicts, namedtuples,
+objects).
+
+For more advanced :class:`Table`-style manipulation check out the
+`pandas`_ DataFrame.
+
+.. _pandas: http://pandas.pydata.org/
+
+"""
+
+from __future__ import print_function
+
+try:
+    from html import escape as html_escape
+except ImportError:
+    from cgi import escape as html_escape
+import types
+from itertools import islice
+try:
+    from collections.abc import Sequence, Mapping, MutableSequence
+except ImportError:
+    from collections import Sequence, Mapping, MutableSequence
+try:
+    string_types, integer_types = (str, unicode), (int, long)
+    from cgi import escape as html_escape
+except NameError:
+    # Python 3 compat
+    unicode = str
+    string_types, integer_types = (str, bytes), (int,)
+    from html import escape as html_escape
+
+try:
+    from typeutils import make_sentinel
+    _MISSING = make_sentinel(var_name='_MISSING')
+except ImportError:
+    _MISSING = object()
+
+"""
+Some idle feature thoughts:
+
+* shift around column order without rearranging data
+* gotta make it so you can add additional items, not just initialize with
+* maybe a shortcut would be to allow adding of Tables to other Tables
+* what's the perf of preallocating lists and overwriting items versus
+  starting from empty?
+* is it possible to effectively tell the difference between when a
+  Table is from_data()'d with a single row (list) or with a list of lists?
+* CSS: white-space pre-line or pre-wrap maybe?
+* Would be nice to support different backends (currently uses lists
+  exclusively). Sometimes large datasets come in list-of-dicts and
+  list-of-tuples format and it's desirable to cut down processing overhead.
+
+TODO: make iterable on rows?
+"""
+
+__all__ = ['Table']
+
+
+def to_text(obj, maxlen=None):
+    try:
+        text = unicode(obj)
+    except Exception:
+        try:
+            text = unicode(repr(obj))
+        except Exception:
+            text = unicode(object.__repr__(obj))
+    if maxlen and len(text) > maxlen:
+        text = text[:maxlen - 3] + '...'
+        # TODO: inverse of ljust/rjust/center
+    return text
+
+
+def escape_html(obj, maxlen=None):
+    text = to_text(obj, maxlen=maxlen)
+    return html_escape(text, quote=True)
+
+
+_DNR = set((type(None), bool, complex, float,
+            type(NotImplemented), slice,
+            types.FunctionType, types.MethodType, types.BuiltinFunctionType,
+            types.GeneratorType) + string_types + integer_types)
+
+
+class UnsupportedData(TypeError):
+    pass
+
+
+class InputType(object):
+    def __init__(self, *a, **kw):
+        pass
+
+    def get_entry_seq(self, data_seq, headers):
+        return [self.get_entry(entry, headers) for entry in data_seq]
+
+
+class DictInputType(InputType):
+    def check_type(self, obj):
+        return isinstance(obj, Mapping)
+
+    def guess_headers(self, obj):
+        return sorted(obj.keys())
+
+    def get_entry(self, obj, headers):
+        return [obj.get(h) for h in headers]
+
+    def get_entry_seq(self, obj, headers):
+        return [[ci.get(h) for h in headers] for ci in obj]
+
+
+class ObjectInputType(InputType):
+    def check_type(self, obj):
+        return type(obj) not in _DNR and hasattr(obj, '__class__')
+
+    def guess_headers(self, obj):
+        headers = []
+        for attr in dir(obj):
+            # an object's __dict__ could technically have non-string keys
+            try:
+                val = getattr(obj, attr)
+            except Exception:
+                # seen on greenlet: `run` shows in dir() but raises
+                # AttributeError. Also properties misbehave.
+                continue
+            if callable(val):
+                continue
+            headers.append(attr)
+        return headers
+
+    def get_entry(self, obj, headers):
+        values = []
+        for h in headers:
+            try:
+                values.append(getattr(obj, h))
+            except Exception:
+                values.append(None)
+        return values
+
+
+# might be better to hardcode list support since it's so close to the
+# core or might be better to make this the copy-style from_* importer
+# and have the non-copy style be hardcoded in __init__
+class ListInputType(InputType):
+    def check_type(self, obj):
+        return isinstance(obj, MutableSequence)
+
+    def guess_headers(self, obj):
+        return None
+
+    def get_entry(self, obj, headers):
+        return obj
+
+    def get_entry_seq(self, obj_seq, headers):
+        return obj_seq
+
+
+class TupleInputType(InputType):
+    def check_type(self, obj):
+        return isinstance(obj, tuple)
+
+    def guess_headers(self, obj):
+        return None
+
+    def get_entry(self, obj, headers):
+        return list(obj)
+
+    def get_entry_seq(self, obj_seq, headers):
+        return [list(t) for t in obj_seq]
+
+
+class NamedTupleInputType(InputType):
+    def check_type(self, obj):
+        return hasattr(obj, '_fields') and isinstance(obj, tuple)
+
+    def guess_headers(self, obj):
+        return list(obj._fields)
+
+    def get_entry(self, obj, headers):
+        return [getattr(obj, h, None) for h in headers]
+
+    def get_entry_seq(self, obj_seq, headers):
+        return [[getattr(obj, h, None) for h in headers] for obj in obj_seq]
+
+
+class Table(object):
+    """
+    This Table class is meant to be simple, low-overhead, and extensible. Its
+    most common use would be for translation between in-memory data
+    structures and serialization formats, such as HTML and console-ready text.
+
+    As such, it stores data in list-of-lists format, and *does not* copy
+    lists passed in. It also reserves the right to modify those lists in a
+    "filling" process, whereby short lists are extended to the width of
+    the table (usually determined by number of headers). This greatly
+    reduces overhead and processing/validation that would have to occur
+    otherwise.
+
+    General description of headers behavior:
+
+    Headers describe the columns, but are not part of the data, however,
+    if the *headers* argument is omitted, Table tries to infer header
+    names from the data. It is possible to have a table with no headers,
+    just pass in ``headers=None``.
+
+    Supported inputs:
+
+    * :class:`list` of :class:`list` objects
+    * :class:`dict` (list/single)
+    * :class:`object` (list/single)
+    * :class:`collections.namedtuple` (list/single)
+    * TODO: DB API cursor?
+    * TODO: json
+
+    Supported outputs:
+
+    * HTML
+    * Pretty text (also usable as GF Markdown)
+    * TODO: CSV
+    * TODO: json
+    * TODO: json lines
+
+    To minimize resident size, the Table data is stored as a list of lists.
+    """
+
+    # order definitely matters here
+    _input_types = [DictInputType(), ListInputType(),
+                    NamedTupleInputType(), TupleInputType(),
+                    ObjectInputType()]
+
+    _html_tr, _html_tr_close = '<tr>', '</tr>'
+    _html_th, _html_th_close = '<th>', '</th>'
+    _html_td, _html_td_close = '<td>', '</td>'
+    _html_thead, _html_thead_close = '<thead>', '</thead>'
+    _html_tbody, _html_tbody_close = '<tbody>', '</tbody>'
+
+    # _html_tfoot, _html_tfoot_close = '<tfoot>', '</tfoot>'
+    _html_table_tag, _html_table_tag_close = '<table>', '</table>'
+
+    def __init__(self, data=None, headers=_MISSING, metadata=None):
+        if headers is _MISSING:
+            headers = []
+            if data:
+                headers, data = list(data[0]), islice(data, 1, None)
+        self.headers = headers or []
+        self.metadata = metadata or {}
+        self._data = []
+        self._width = 0
+
+        self.extend(data)
+
+    def extend(self, data):
+        """
+        Append the given data to the end of the Table.
+        """
+        if not data:
+            return
+        self._data.extend(data)
+        self._set_width()
+        self._fill()
+
+    def _set_width(self, reset=False):
+        if reset:
+            self._width = 0
+        if self._width:
+            return
+        if self.headers:
+            self._width = len(self.headers)
+            return
+        self._width = max([len(d) for d in self._data])
+
+    def _fill(self):
+        width, filler = self._width, [None]
+        if not width:
+            return
+        for d in self._data:
+            rem = width - len(d)
+            if rem > 0:
+                d.extend(filler * rem)
+        return
+
+    @classmethod
+    def from_dict(cls, data, headers=_MISSING, max_depth=1, metadata=None):
+        """Create a Table from a :class:`dict`. Operates the same as
+        :meth:`from_data`, but forces interpretation of the data as a
+        Mapping.
+        """
+        return cls.from_data(data=data, headers=headers,
+                             max_depth=max_depth, _data_type=DictInputType(),
+                             metadata=metadata)
+
+    @classmethod
+    def from_list(cls, data, headers=_MISSING, max_depth=1, metadata=None):
+        """Create a Table from a :class:`list`. Operates the same as
+        :meth:`from_data`, but forces the interpretation of the data
+        as a Sequence.
+        """
+        return cls.from_data(data=data, headers=headers,
+                             max_depth=max_depth, _data_type=ListInputType(),
+                             metadata=metadata)
+
+    @classmethod
+    def from_object(cls, data, headers=_MISSING, max_depth=1, metadata=None):
+        """Create a Table from an :class:`object`. Operates the same as
+        :meth:`from_data`, but forces the interpretation of the data
+        as an object. May be useful for some :class:`dict` and
+        :class:`list` subtypes.
+        """
+        return cls.from_data(data=data, headers=headers,
+                             max_depth=max_depth, _data_type=ObjectInputType(),
+                             metadata=metadata)
+
+    @classmethod
+    def from_data(cls, data, headers=_MISSING, max_depth=1, **kwargs):
+
+        """Create a Table from any supported data, heuristically
+        selecting how to represent the data in Table format.
+
+        Args:
+            data (object): Any object or iterable with data to be
+                imported to the Table.
+
+            headers (iterable): An iterable of headers to be matched
+                to the data. If not explicitly passed, headers will be
+                guessed for certain datatypes.
+
+            max_depth (int): The level to which nested Tables should
+                be created (default: 1).
+
+            _data_type (InputType subclass): For advanced use cases,
+                do not guess the type of the input data, use this data
+                type instead.
+        """
+        # TODO: seen/cycle detection/reuse ?
+        # maxdepth follows the same behavior as find command
+        # i.e., it doesn't work if max_depth=0 is passed in
+        metadata = kwargs.pop('metadata', None)
+        _data_type = kwargs.pop('_data_type', None)
+
+        if max_depth < 1:
+            # return data instead?
+            return cls(headers=headers, metadata=metadata)
+        is_seq = isinstance(data, Sequence)
+        if is_seq:
+            if not data:
+                return cls(headers=headers, metadata=metadata)
+            to_check = data[0]
+            if not _data_type:
+                for it in cls._input_types:
+                    if it.check_type(to_check):
+                        _data_type = it
+                        break
+                else:
+                    # not particularly happy about this rewind-y approach
+                    is_seq = False
+                    to_check = data
+        else:
+            if type(data) in _DNR:
+                # hmm, got scalar data.
+                # raise an exception or make an exception, nahmsayn?
+                return cls([[data]], headers=headers, metadata=metadata)
+            to_check = data
+        if not _data_type:
+            for it in cls._input_types:
+                if it.check_type(to_check):
+                    _data_type = it
+                    break
+            else:
+                raise UnsupportedData('unsupported data type %r'
+                                      % type(data))
+        if headers is _MISSING:
+            headers = _data_type.guess_headers(to_check)
+        if is_seq:
+            entries = _data_type.get_entry_seq(data, headers)
+        else:
+            entries = [_data_type.get_entry(data, headers)]
+        if max_depth > 1:
+            new_max_depth = max_depth - 1
+            for i, entry in enumerate(entries):
+                for j, cell in enumerate(entry):
+                    if type(cell) in _DNR:
+                        # optimization to avoid function overhead
+                        continue
+                    try:
+                        entries[i][j] = cls.from_data(cell,
+                                                      max_depth=new_max_depth)
+                    except UnsupportedData:
+                        continue
+        return cls(entries, headers=headers, metadata=metadata)
+
+    def __len__(self):
+        return len(self._data)
+
+    def __getitem__(self, idx):
+        return self._data[idx]
+
+    def __repr__(self):
+        cn = self.__class__.__name__
+        if self.headers:
+            return '%s(headers=%r, data=%r)' % (cn, self.headers, self._data)
+        else:
+            return '%s(%r)' % (cn, self._data)
+
+    def to_html(self, orientation=None, wrapped=True,
+                with_headers=True, with_newlines=True,
+                with_metadata=False, max_depth=1):
+        """Render this Table to HTML. Configure the structure of Table
+        HTML by subclassing and overriding ``_html_*`` class
+        attributes.
+
+        Args:
+            orientation (str): one of 'auto', 'horizontal', or
+                'vertical' (or the first letter of any of
+                those). Default 'auto'.
+            wrapped (bool): whether or not to include the wrapping
+                '<table></table>' tags. Default ``True``, set to
+                ``False`` if appending multiple Table outputs or an
+                otherwise customized HTML wrapping tag is needed.
+            with_newlines (bool): Set to ``True`` if output should
+                include added newlines to make the HTML more
+                readable. Default ``False``.
+            with_metadata (bool/str): Set to ``True`` if output should
+                be preceded with a Table of preset metadata, if it
+                exists. Set to special value ``'bottom'`` if the
+                metadata Table HTML should come *after* the main HTML output.
+            max_depth (int): Indicate how deeply to nest HTML tables
+                before simply reverting to :func:`repr`-ing the nested
+                data.
+
+        Returns:
+            A text string of the HTML of the rendered table.
+
+        """
+        lines = []
+        headers = []
+        if with_metadata and self.metadata:
+            metadata_table = Table.from_data(self.metadata,
+                                             max_depth=max_depth)
+            metadata_html = metadata_table.to_html(with_headers=True,
+                                                   with_newlines=with_newlines,
+                                                   with_metadata=False,
+                                                   max_depth=max_depth)
+            if with_metadata != 'bottom':
+                lines.append(metadata_html)
+                lines.append('<br />')
+
+        if with_headers and self.headers:
+            headers.extend(self.headers)
+            headers.extend([None] * (self._width - len(self.headers)))
+        if wrapped:
+            lines.append(self._html_table_tag)
+        orientation = orientation or 'auto'
+        ol = orientation[0].lower()
+        if ol == 'a':
+            ol = 'h' if len(self) > 1 else 'v'
+        if ol == 'h':
+            self._add_horizontal_html_lines(lines, headers=headers,
+                                            max_depth=max_depth)
+        elif ol == 'v':
+            self._add_vertical_html_lines(lines, headers=headers,
+                                          max_depth=max_depth)
+        else:
+            raise ValueError("expected one of 'auto', 'vertical', or"
+                             " 'horizontal', not %r" % orientation)
+        if with_metadata and self.metadata and with_metadata == 'bottom':
+            lines.append('<br />')
+            lines.append(metadata_html)
+
+        if wrapped:
+            lines.append(self._html_table_tag_close)
+        sep = '\n' if with_newlines else ''
+        return sep.join(lines)
+
+    def get_cell_html(self, value):
+        """Called on each value in an HTML table. By default it simply escapes
+        the HTML. Override this method to add additional conditions
+        and behaviors, but take care to ensure the final output is
+        HTML escaped.
+        """
+        return escape_html(value)
+
+    def _add_horizontal_html_lines(self, lines, headers, max_depth):
+        esc = self.get_cell_html
+        new_depth = max_depth - 1 if max_depth > 1 else max_depth
+        if max_depth > 1:
+            new_depth = max_depth - 1
+        if headers:
+            _thth = self._html_th_close + self._html_th
+            lines.append(self._html_thead)
+            lines.append(self._html_tr + self._html_th +
+                         _thth.join([esc(h) for h in headers]) +
+                         self._html_th_close + self._html_tr_close)
+            lines.append(self._html_thead_close)
+        trtd, _tdtd, _td_tr = (self._html_tr + self._html_td,
+                               self._html_td_close + self._html_td,
+                               self._html_td_close + self._html_tr_close)
+        lines.append(self._html_tbody)
+        for row in self._data:
+            if max_depth > 1:
+                _fill_parts = []
+                for cell in row:
+                    if isinstance(cell, Table):
+                        _fill_parts.append(cell.to_html(max_depth=new_depth))
+                    else:
+                        _fill_parts.append(esc(cell))
+            else:
+                _fill_parts = [esc(c) for c in row]
+            lines.append(''.join([trtd, _tdtd.join(_fill_parts), _td_tr]))
+        lines.append(self._html_tbody_close)
+
+    def _add_vertical_html_lines(self, lines, headers, max_depth):
+        esc = self.get_cell_html
+        new_depth = max_depth - 1 if max_depth > 1 else max_depth
+        tr, th, _th = self._html_tr, self._html_th, self._html_th_close
+        td, _tdtd = self._html_td, self._html_td_close + self._html_td
+        _td_tr = self._html_td_close + self._html_tr_close
+        for i in range(self._width):
+            line_parts = [tr]
+            if headers:
+                line_parts.extend([th, esc(headers[i]), _th])
+            if max_depth > 1:
+                new_depth = max_depth - 1
+                _fill_parts = []
+                for row in self._data:
+                    cell = row[i]
+                    if isinstance(cell, Table):
+                        _fill_parts.append(cell.to_html(max_depth=new_depth))
+                    else:
+                        _fill_parts.append(esc(row[i]))
+            else:
+                _fill_parts = [esc(row[i]) for row in self._data]
+            line_parts.extend([td, _tdtd.join(_fill_parts), _td_tr])
+            lines.append(''.join(line_parts))
+
+    def to_text(self, with_headers=True, maxlen=None):
+        """Get the Table's textual representation. Only works well
+        for Tables with non-recursive data.
+
+        Args:
+            with_headers (bool): Whether to include a header row at the top.
+            maxlen (int): Max length of data in each cell.
+        """
+        lines = []
+        widths = []
+        headers = list(self.headers)
+        text_data = [[to_text(cell, maxlen=maxlen) for cell in row]
+                     for row in self._data]
+        for idx in range(self._width):
+            cur_widths = [len(cur) for cur in text_data]
+            if with_headers:
+                cur_widths.append(len(to_text(headers[idx], maxlen=maxlen)))
+            widths.append(max(cur_widths))
+        if with_headers:
+            lines.append(' | '.join([h.center(widths[i])
+                                     for i, h in enumerate(headers)]))
+            lines.append('-|-'.join(['-' * w for w in widths]))
+        for row in text_data:
+            lines.append(' | '.join([cell.center(widths[j])
+                                     for j, cell in enumerate(row)]))
+        return '\n'.join(lines)