Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/boltons/tableutils.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.7/site-packages/boltons/tableutils.py Sat May 02 07:14:21 2020 -0400 @@ -0,0 +1,569 @@ +# -*- coding: utf-8 -*- +"""If there is one recurring theme in ``boltons``, it is that Python +has excellent datastructures that constitute a good foundation for +most quick manipulations, as well as building applications. However, +Python usage has grown much faster than builtin data structure +power. Python has a growing need for more advanced general-purpose +data structures which behave intuitively. + +The :class:`Table` class is one example. When handed one- or +two-dimensional data, it can provide useful, if basic, text and HTML +renditions of small to medium sized data. It also heuristically +handles recursive data of various formats (lists, dicts, namedtuples, +objects). + +For more advanced :class:`Table`-style manipulation check out the +`pandas`_ DataFrame. + +.. _pandas: http://pandas.pydata.org/ + +""" + +from __future__ import print_function + +try: + from html import escape as html_escape +except ImportError: + from cgi import escape as html_escape +import types +from itertools import islice +try: + from collections.abc import Sequence, Mapping, MutableSequence +except ImportError: + from collections import Sequence, Mapping, MutableSequence +try: + string_types, integer_types = (str, unicode), (int, long) + from cgi import escape as html_escape +except NameError: + # Python 3 compat + unicode = str + string_types, integer_types = (str, bytes), (int,) + from html import escape as html_escape + +try: + from typeutils import make_sentinel + _MISSING = make_sentinel(var_name='_MISSING') +except ImportError: + _MISSING = object() + +""" +Some idle feature thoughts: + +* shift around column order without rearranging data +* gotta make it so you can add additional items, not just initialize with +* maybe a shortcut would be to allow adding of Tables to other Tables +* what's the perf of preallocating lists and overwriting items versus + starting from empty? +* is it possible to effectively tell the difference between when a + Table is from_data()'d with a single row (list) or with a list of lists? +* CSS: white-space pre-line or pre-wrap maybe? +* Would be nice to support different backends (currently uses lists + exclusively). Sometimes large datasets come in list-of-dicts and + list-of-tuples format and it's desirable to cut down processing overhead. + +TODO: make iterable on rows? +""" + +__all__ = ['Table'] + + +def to_text(obj, maxlen=None): + try: + text = unicode(obj) + except Exception: + try: + text = unicode(repr(obj)) + except Exception: + text = unicode(object.__repr__(obj)) + if maxlen and len(text) > maxlen: + text = text[:maxlen - 3] + '...' + # TODO: inverse of ljust/rjust/center + return text + + +def escape_html(obj, maxlen=None): + text = to_text(obj, maxlen=maxlen) + return html_escape(text, quote=True) + + +_DNR = set((type(None), bool, complex, float, + type(NotImplemented), slice, + types.FunctionType, types.MethodType, types.BuiltinFunctionType, + types.GeneratorType) + string_types + integer_types) + + +class UnsupportedData(TypeError): + pass + + +class InputType(object): + def __init__(self, *a, **kw): + pass + + def get_entry_seq(self, data_seq, headers): + return [self.get_entry(entry, headers) for entry in data_seq] + + +class DictInputType(InputType): + def check_type(self, obj): + return isinstance(obj, Mapping) + + def guess_headers(self, obj): + return sorted(obj.keys()) + + def get_entry(self, obj, headers): + return [obj.get(h) for h in headers] + + def get_entry_seq(self, obj, headers): + return [[ci.get(h) for h in headers] for ci in obj] + + +class ObjectInputType(InputType): + def check_type(self, obj): + return type(obj) not in _DNR and hasattr(obj, '__class__') + + def guess_headers(self, obj): + headers = [] + for attr in dir(obj): + # an object's __dict__ could technically have non-string keys + try: + val = getattr(obj, attr) + except Exception: + # seen on greenlet: `run` shows in dir() but raises + # AttributeError. Also properties misbehave. + continue + if callable(val): + continue + headers.append(attr) + return headers + + def get_entry(self, obj, headers): + values = [] + for h in headers: + try: + values.append(getattr(obj, h)) + except Exception: + values.append(None) + return values + + +# might be better to hardcode list support since it's so close to the +# core or might be better to make this the copy-style from_* importer +# and have the non-copy style be hardcoded in __init__ +class ListInputType(InputType): + def check_type(self, obj): + return isinstance(obj, MutableSequence) + + def guess_headers(self, obj): + return None + + def get_entry(self, obj, headers): + return obj + + def get_entry_seq(self, obj_seq, headers): + return obj_seq + + +class TupleInputType(InputType): + def check_type(self, obj): + return isinstance(obj, tuple) + + def guess_headers(self, obj): + return None + + def get_entry(self, obj, headers): + return list(obj) + + def get_entry_seq(self, obj_seq, headers): + return [list(t) for t in obj_seq] + + +class NamedTupleInputType(InputType): + def check_type(self, obj): + return hasattr(obj, '_fields') and isinstance(obj, tuple) + + def guess_headers(self, obj): + return list(obj._fields) + + def get_entry(self, obj, headers): + return [getattr(obj, h, None) for h in headers] + + def get_entry_seq(self, obj_seq, headers): + return [[getattr(obj, h, None) for h in headers] for obj in obj_seq] + + +class Table(object): + """ + This Table class is meant to be simple, low-overhead, and extensible. Its + most common use would be for translation between in-memory data + structures and serialization formats, such as HTML and console-ready text. + + As such, it stores data in list-of-lists format, and *does not* copy + lists passed in. It also reserves the right to modify those lists in a + "filling" process, whereby short lists are extended to the width of + the table (usually determined by number of headers). This greatly + reduces overhead and processing/validation that would have to occur + otherwise. + + General description of headers behavior: + + Headers describe the columns, but are not part of the data, however, + if the *headers* argument is omitted, Table tries to infer header + names from the data. It is possible to have a table with no headers, + just pass in ``headers=None``. + + Supported inputs: + + * :class:`list` of :class:`list` objects + * :class:`dict` (list/single) + * :class:`object` (list/single) + * :class:`collections.namedtuple` (list/single) + * TODO: DB API cursor? + * TODO: json + + Supported outputs: + + * HTML + * Pretty text (also usable as GF Markdown) + * TODO: CSV + * TODO: json + * TODO: json lines + + To minimize resident size, the Table data is stored as a list of lists. + """ + + # order definitely matters here + _input_types = [DictInputType(), ListInputType(), + NamedTupleInputType(), TupleInputType(), + ObjectInputType()] + + _html_tr, _html_tr_close = '<tr>', '</tr>' + _html_th, _html_th_close = '<th>', '</th>' + _html_td, _html_td_close = '<td>', '</td>' + _html_thead, _html_thead_close = '<thead>', '</thead>' + _html_tbody, _html_tbody_close = '<tbody>', '</tbody>' + + # _html_tfoot, _html_tfoot_close = '<tfoot>', '</tfoot>' + _html_table_tag, _html_table_tag_close = '<table>', '</table>' + + def __init__(self, data=None, headers=_MISSING, metadata=None): + if headers is _MISSING: + headers = [] + if data: + headers, data = list(data[0]), islice(data, 1, None) + self.headers = headers or [] + self.metadata = metadata or {} + self._data = [] + self._width = 0 + + self.extend(data) + + def extend(self, data): + """ + Append the given data to the end of the Table. + """ + if not data: + return + self._data.extend(data) + self._set_width() + self._fill() + + def _set_width(self, reset=False): + if reset: + self._width = 0 + if self._width: + return + if self.headers: + self._width = len(self.headers) + return + self._width = max([len(d) for d in self._data]) + + def _fill(self): + width, filler = self._width, [None] + if not width: + return + for d in self._data: + rem = width - len(d) + if rem > 0: + d.extend(filler * rem) + return + + @classmethod + def from_dict(cls, data, headers=_MISSING, max_depth=1, metadata=None): + """Create a Table from a :class:`dict`. Operates the same as + :meth:`from_data`, but forces interpretation of the data as a + Mapping. + """ + return cls.from_data(data=data, headers=headers, + max_depth=max_depth, _data_type=DictInputType(), + metadata=metadata) + + @classmethod + def from_list(cls, data, headers=_MISSING, max_depth=1, metadata=None): + """Create a Table from a :class:`list`. Operates the same as + :meth:`from_data`, but forces the interpretation of the data + as a Sequence. + """ + return cls.from_data(data=data, headers=headers, + max_depth=max_depth, _data_type=ListInputType(), + metadata=metadata) + + @classmethod + def from_object(cls, data, headers=_MISSING, max_depth=1, metadata=None): + """Create a Table from an :class:`object`. Operates the same as + :meth:`from_data`, but forces the interpretation of the data + as an object. May be useful for some :class:`dict` and + :class:`list` subtypes. + """ + return cls.from_data(data=data, headers=headers, + max_depth=max_depth, _data_type=ObjectInputType(), + metadata=metadata) + + @classmethod + def from_data(cls, data, headers=_MISSING, max_depth=1, **kwargs): + + """Create a Table from any supported data, heuristically + selecting how to represent the data in Table format. + + Args: + data (object): Any object or iterable with data to be + imported to the Table. + + headers (iterable): An iterable of headers to be matched + to the data. If not explicitly passed, headers will be + guessed for certain datatypes. + + max_depth (int): The level to which nested Tables should + be created (default: 1). + + _data_type (InputType subclass): For advanced use cases, + do not guess the type of the input data, use this data + type instead. + """ + # TODO: seen/cycle detection/reuse ? + # maxdepth follows the same behavior as find command + # i.e., it doesn't work if max_depth=0 is passed in + metadata = kwargs.pop('metadata', None) + _data_type = kwargs.pop('_data_type', None) + + if max_depth < 1: + # return data instead? + return cls(headers=headers, metadata=metadata) + is_seq = isinstance(data, Sequence) + if is_seq: + if not data: + return cls(headers=headers, metadata=metadata) + to_check = data[0] + if not _data_type: + for it in cls._input_types: + if it.check_type(to_check): + _data_type = it + break + else: + # not particularly happy about this rewind-y approach + is_seq = False + to_check = data + else: + if type(data) in _DNR: + # hmm, got scalar data. + # raise an exception or make an exception, nahmsayn? + return cls([[data]], headers=headers, metadata=metadata) + to_check = data + if not _data_type: + for it in cls._input_types: + if it.check_type(to_check): + _data_type = it + break + else: + raise UnsupportedData('unsupported data type %r' + % type(data)) + if headers is _MISSING: + headers = _data_type.guess_headers(to_check) + if is_seq: + entries = _data_type.get_entry_seq(data, headers) + else: + entries = [_data_type.get_entry(data, headers)] + if max_depth > 1: + new_max_depth = max_depth - 1 + for i, entry in enumerate(entries): + for j, cell in enumerate(entry): + if type(cell) in _DNR: + # optimization to avoid function overhead + continue + try: + entries[i][j] = cls.from_data(cell, + max_depth=new_max_depth) + except UnsupportedData: + continue + return cls(entries, headers=headers, metadata=metadata) + + def __len__(self): + return len(self._data) + + def __getitem__(self, idx): + return self._data[idx] + + def __repr__(self): + cn = self.__class__.__name__ + if self.headers: + return '%s(headers=%r, data=%r)' % (cn, self.headers, self._data) + else: + return '%s(%r)' % (cn, self._data) + + def to_html(self, orientation=None, wrapped=True, + with_headers=True, with_newlines=True, + with_metadata=False, max_depth=1): + """Render this Table to HTML. Configure the structure of Table + HTML by subclassing and overriding ``_html_*`` class + attributes. + + Args: + orientation (str): one of 'auto', 'horizontal', or + 'vertical' (or the first letter of any of + those). Default 'auto'. + wrapped (bool): whether or not to include the wrapping + '<table></table>' tags. Default ``True``, set to + ``False`` if appending multiple Table outputs or an + otherwise customized HTML wrapping tag is needed. + with_newlines (bool): Set to ``True`` if output should + include added newlines to make the HTML more + readable. Default ``False``. + with_metadata (bool/str): Set to ``True`` if output should + be preceded with a Table of preset metadata, if it + exists. Set to special value ``'bottom'`` if the + metadata Table HTML should come *after* the main HTML output. + max_depth (int): Indicate how deeply to nest HTML tables + before simply reverting to :func:`repr`-ing the nested + data. + + Returns: + A text string of the HTML of the rendered table. + + """ + lines = [] + headers = [] + if with_metadata and self.metadata: + metadata_table = Table.from_data(self.metadata, + max_depth=max_depth) + metadata_html = metadata_table.to_html(with_headers=True, + with_newlines=with_newlines, + with_metadata=False, + max_depth=max_depth) + if with_metadata != 'bottom': + lines.append(metadata_html) + lines.append('<br />') + + if with_headers and self.headers: + headers.extend(self.headers) + headers.extend([None] * (self._width - len(self.headers))) + if wrapped: + lines.append(self._html_table_tag) + orientation = orientation or 'auto' + ol = orientation[0].lower() + if ol == 'a': + ol = 'h' if len(self) > 1 else 'v' + if ol == 'h': + self._add_horizontal_html_lines(lines, headers=headers, + max_depth=max_depth) + elif ol == 'v': + self._add_vertical_html_lines(lines, headers=headers, + max_depth=max_depth) + else: + raise ValueError("expected one of 'auto', 'vertical', or" + " 'horizontal', not %r" % orientation) + if with_metadata and self.metadata and with_metadata == 'bottom': + lines.append('<br />') + lines.append(metadata_html) + + if wrapped: + lines.append(self._html_table_tag_close) + sep = '\n' if with_newlines else '' + return sep.join(lines) + + def get_cell_html(self, value): + """Called on each value in an HTML table. By default it simply escapes + the HTML. Override this method to add additional conditions + and behaviors, but take care to ensure the final output is + HTML escaped. + """ + return escape_html(value) + + def _add_horizontal_html_lines(self, lines, headers, max_depth): + esc = self.get_cell_html + new_depth = max_depth - 1 if max_depth > 1 else max_depth + if max_depth > 1: + new_depth = max_depth - 1 + if headers: + _thth = self._html_th_close + self._html_th + lines.append(self._html_thead) + lines.append(self._html_tr + self._html_th + + _thth.join([esc(h) for h in headers]) + + self._html_th_close + self._html_tr_close) + lines.append(self._html_thead_close) + trtd, _tdtd, _td_tr = (self._html_tr + self._html_td, + self._html_td_close + self._html_td, + self._html_td_close + self._html_tr_close) + lines.append(self._html_tbody) + for row in self._data: + if max_depth > 1: + _fill_parts = [] + for cell in row: + if isinstance(cell, Table): + _fill_parts.append(cell.to_html(max_depth=new_depth)) + else: + _fill_parts.append(esc(cell)) + else: + _fill_parts = [esc(c) for c in row] + lines.append(''.join([trtd, _tdtd.join(_fill_parts), _td_tr])) + lines.append(self._html_tbody_close) + + def _add_vertical_html_lines(self, lines, headers, max_depth): + esc = self.get_cell_html + new_depth = max_depth - 1 if max_depth > 1 else max_depth + tr, th, _th = self._html_tr, self._html_th, self._html_th_close + td, _tdtd = self._html_td, self._html_td_close + self._html_td + _td_tr = self._html_td_close + self._html_tr_close + for i in range(self._width): + line_parts = [tr] + if headers: + line_parts.extend([th, esc(headers[i]), _th]) + if max_depth > 1: + new_depth = max_depth - 1 + _fill_parts = [] + for row in self._data: + cell = row[i] + if isinstance(cell, Table): + _fill_parts.append(cell.to_html(max_depth=new_depth)) + else: + _fill_parts.append(esc(row[i])) + else: + _fill_parts = [esc(row[i]) for row in self._data] + line_parts.extend([td, _tdtd.join(_fill_parts), _td_tr]) + lines.append(''.join(line_parts)) + + def to_text(self, with_headers=True, maxlen=None): + """Get the Table's textual representation. Only works well + for Tables with non-recursive data. + + Args: + with_headers (bool): Whether to include a header row at the top. + maxlen (int): Max length of data in each cell. + """ + lines = [] + widths = [] + headers = list(self.headers) + text_data = [[to_text(cell, maxlen=maxlen) for cell in row] + for row in self._data] + for idx in range(self._width): + cur_widths = [len(cur) for cur in text_data] + if with_headers: + cur_widths.append(len(to_text(headers[idx], maxlen=maxlen))) + widths.append(max(cur_widths)) + if with_headers: + lines.append(' | '.join([h.center(widths[i]) + for i, h in enumerate(headers)])) + lines.append('-|-'.join(['-' * w for w in widths])) + for row in text_data: + lines.append(' | '.join([cell.center(widths[j]) + for j, cell in enumerate(row)])) + return '\n'.join(lines)