Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/boltons/tableutils.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/boltons/tableutils.py Thu May 14 16:47:39 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,569 +0,0 @@ -# -*- coding: utf-8 -*- -"""If there is one recurring theme in ``boltons``, it is that Python -has excellent datastructures that constitute a good foundation for -most quick manipulations, as well as building applications. However, -Python usage has grown much faster than builtin data structure -power. Python has a growing need for more advanced general-purpose -data structures which behave intuitively. - -The :class:`Table` class is one example. When handed one- or -two-dimensional data, it can provide useful, if basic, text and HTML -renditions of small to medium sized data. It also heuristically -handles recursive data of various formats (lists, dicts, namedtuples, -objects). - -For more advanced :class:`Table`-style manipulation check out the -`pandas`_ DataFrame. - -.. _pandas: http://pandas.pydata.org/ - -""" - -from __future__ import print_function - -try: - from html import escape as html_escape -except ImportError: - from cgi import escape as html_escape -import types -from itertools import islice -try: - from collections.abc import Sequence, Mapping, MutableSequence -except ImportError: - from collections import Sequence, Mapping, MutableSequence -try: - string_types, integer_types = (str, unicode), (int, long) - from cgi import escape as html_escape -except NameError: - # Python 3 compat - unicode = str - string_types, integer_types = (str, bytes), (int,) - from html import escape as html_escape - -try: - from typeutils import make_sentinel - _MISSING = make_sentinel(var_name='_MISSING') -except ImportError: - _MISSING = object() - -""" -Some idle feature thoughts: - -* shift around column order without rearranging data -* gotta make it so you can add additional items, not just initialize with -* maybe a shortcut would be to allow adding of Tables to other Tables -* what's the perf of preallocating lists and overwriting items versus - starting from empty? -* is it possible to effectively tell the difference between when a - Table is from_data()'d with a single row (list) or with a list of lists? -* CSS: white-space pre-line or pre-wrap maybe? -* Would be nice to support different backends (currently uses lists - exclusively). Sometimes large datasets come in list-of-dicts and - list-of-tuples format and it's desirable to cut down processing overhead. - -TODO: make iterable on rows? -""" - -__all__ = ['Table'] - - -def to_text(obj, maxlen=None): - try: - text = unicode(obj) - except Exception: - try: - text = unicode(repr(obj)) - except Exception: - text = unicode(object.__repr__(obj)) - if maxlen and len(text) > maxlen: - text = text[:maxlen - 3] + '...' - # TODO: inverse of ljust/rjust/center - return text - - -def escape_html(obj, maxlen=None): - text = to_text(obj, maxlen=maxlen) - return html_escape(text, quote=True) - - -_DNR = set((type(None), bool, complex, float, - type(NotImplemented), slice, - types.FunctionType, types.MethodType, types.BuiltinFunctionType, - types.GeneratorType) + string_types + integer_types) - - -class UnsupportedData(TypeError): - pass - - -class InputType(object): - def __init__(self, *a, **kw): - pass - - def get_entry_seq(self, data_seq, headers): - return [self.get_entry(entry, headers) for entry in data_seq] - - -class DictInputType(InputType): - def check_type(self, obj): - return isinstance(obj, Mapping) - - def guess_headers(self, obj): - return sorted(obj.keys()) - - def get_entry(self, obj, headers): - return [obj.get(h) for h in headers] - - def get_entry_seq(self, obj, headers): - return [[ci.get(h) for h in headers] for ci in obj] - - -class ObjectInputType(InputType): - def check_type(self, obj): - return type(obj) not in _DNR and hasattr(obj, '__class__') - - def guess_headers(self, obj): - headers = [] - for attr in dir(obj): - # an object's __dict__ could technically have non-string keys - try: - val = getattr(obj, attr) - except Exception: - # seen on greenlet: `run` shows in dir() but raises - # AttributeError. Also properties misbehave. - continue - if callable(val): - continue - headers.append(attr) - return headers - - def get_entry(self, obj, headers): - values = [] - for h in headers: - try: - values.append(getattr(obj, h)) - except Exception: - values.append(None) - return values - - -# might be better to hardcode list support since it's so close to the -# core or might be better to make this the copy-style from_* importer -# and have the non-copy style be hardcoded in __init__ -class ListInputType(InputType): - def check_type(self, obj): - return isinstance(obj, MutableSequence) - - def guess_headers(self, obj): - return None - - def get_entry(self, obj, headers): - return obj - - def get_entry_seq(self, obj_seq, headers): - return obj_seq - - -class TupleInputType(InputType): - def check_type(self, obj): - return isinstance(obj, tuple) - - def guess_headers(self, obj): - return None - - def get_entry(self, obj, headers): - return list(obj) - - def get_entry_seq(self, obj_seq, headers): - return [list(t) for t in obj_seq] - - -class NamedTupleInputType(InputType): - def check_type(self, obj): - return hasattr(obj, '_fields') and isinstance(obj, tuple) - - def guess_headers(self, obj): - return list(obj._fields) - - def get_entry(self, obj, headers): - return [getattr(obj, h, None) for h in headers] - - def get_entry_seq(self, obj_seq, headers): - return [[getattr(obj, h, None) for h in headers] for obj in obj_seq] - - -class Table(object): - """ - This Table class is meant to be simple, low-overhead, and extensible. Its - most common use would be for translation between in-memory data - structures and serialization formats, such as HTML and console-ready text. - - As such, it stores data in list-of-lists format, and *does not* copy - lists passed in. It also reserves the right to modify those lists in a - "filling" process, whereby short lists are extended to the width of - the table (usually determined by number of headers). This greatly - reduces overhead and processing/validation that would have to occur - otherwise. - - General description of headers behavior: - - Headers describe the columns, but are not part of the data, however, - if the *headers* argument is omitted, Table tries to infer header - names from the data. It is possible to have a table with no headers, - just pass in ``headers=None``. - - Supported inputs: - - * :class:`list` of :class:`list` objects - * :class:`dict` (list/single) - * :class:`object` (list/single) - * :class:`collections.namedtuple` (list/single) - * TODO: DB API cursor? - * TODO: json - - Supported outputs: - - * HTML - * Pretty text (also usable as GF Markdown) - * TODO: CSV - * TODO: json - * TODO: json lines - - To minimize resident size, the Table data is stored as a list of lists. - """ - - # order definitely matters here - _input_types = [DictInputType(), ListInputType(), - NamedTupleInputType(), TupleInputType(), - ObjectInputType()] - - _html_tr, _html_tr_close = '<tr>', '</tr>' - _html_th, _html_th_close = '<th>', '</th>' - _html_td, _html_td_close = '<td>', '</td>' - _html_thead, _html_thead_close = '<thead>', '</thead>' - _html_tbody, _html_tbody_close = '<tbody>', '</tbody>' - - # _html_tfoot, _html_tfoot_close = '<tfoot>', '</tfoot>' - _html_table_tag, _html_table_tag_close = '<table>', '</table>' - - def __init__(self, data=None, headers=_MISSING, metadata=None): - if headers is _MISSING: - headers = [] - if data: - headers, data = list(data[0]), islice(data, 1, None) - self.headers = headers or [] - self.metadata = metadata or {} - self._data = [] - self._width = 0 - - self.extend(data) - - def extend(self, data): - """ - Append the given data to the end of the Table. - """ - if not data: - return - self._data.extend(data) - self._set_width() - self._fill() - - def _set_width(self, reset=False): - if reset: - self._width = 0 - if self._width: - return - if self.headers: - self._width = len(self.headers) - return - self._width = max([len(d) for d in self._data]) - - def _fill(self): - width, filler = self._width, [None] - if not width: - return - for d in self._data: - rem = width - len(d) - if rem > 0: - d.extend(filler * rem) - return - - @classmethod - def from_dict(cls, data, headers=_MISSING, max_depth=1, metadata=None): - """Create a Table from a :class:`dict`. Operates the same as - :meth:`from_data`, but forces interpretation of the data as a - Mapping. - """ - return cls.from_data(data=data, headers=headers, - max_depth=max_depth, _data_type=DictInputType(), - metadata=metadata) - - @classmethod - def from_list(cls, data, headers=_MISSING, max_depth=1, metadata=None): - """Create a Table from a :class:`list`. Operates the same as - :meth:`from_data`, but forces the interpretation of the data - as a Sequence. - """ - return cls.from_data(data=data, headers=headers, - max_depth=max_depth, _data_type=ListInputType(), - metadata=metadata) - - @classmethod - def from_object(cls, data, headers=_MISSING, max_depth=1, metadata=None): - """Create a Table from an :class:`object`. Operates the same as - :meth:`from_data`, but forces the interpretation of the data - as an object. May be useful for some :class:`dict` and - :class:`list` subtypes. - """ - return cls.from_data(data=data, headers=headers, - max_depth=max_depth, _data_type=ObjectInputType(), - metadata=metadata) - - @classmethod - def from_data(cls, data, headers=_MISSING, max_depth=1, **kwargs): - - """Create a Table from any supported data, heuristically - selecting how to represent the data in Table format. - - Args: - data (object): Any object or iterable with data to be - imported to the Table. - - headers (iterable): An iterable of headers to be matched - to the data. If not explicitly passed, headers will be - guessed for certain datatypes. - - max_depth (int): The level to which nested Tables should - be created (default: 1). - - _data_type (InputType subclass): For advanced use cases, - do not guess the type of the input data, use this data - type instead. - """ - # TODO: seen/cycle detection/reuse ? - # maxdepth follows the same behavior as find command - # i.e., it doesn't work if max_depth=0 is passed in - metadata = kwargs.pop('metadata', None) - _data_type = kwargs.pop('_data_type', None) - - if max_depth < 1: - # return data instead? - return cls(headers=headers, metadata=metadata) - is_seq = isinstance(data, Sequence) - if is_seq: - if not data: - return cls(headers=headers, metadata=metadata) - to_check = data[0] - if not _data_type: - for it in cls._input_types: - if it.check_type(to_check): - _data_type = it - break - else: - # not particularly happy about this rewind-y approach - is_seq = False - to_check = data - else: - if type(data) in _DNR: - # hmm, got scalar data. - # raise an exception or make an exception, nahmsayn? - return cls([[data]], headers=headers, metadata=metadata) - to_check = data - if not _data_type: - for it in cls._input_types: - if it.check_type(to_check): - _data_type = it - break - else: - raise UnsupportedData('unsupported data type %r' - % type(data)) - if headers is _MISSING: - headers = _data_type.guess_headers(to_check) - if is_seq: - entries = _data_type.get_entry_seq(data, headers) - else: - entries = [_data_type.get_entry(data, headers)] - if max_depth > 1: - new_max_depth = max_depth - 1 - for i, entry in enumerate(entries): - for j, cell in enumerate(entry): - if type(cell) in _DNR: - # optimization to avoid function overhead - continue - try: - entries[i][j] = cls.from_data(cell, - max_depth=new_max_depth) - except UnsupportedData: - continue - return cls(entries, headers=headers, metadata=metadata) - - def __len__(self): - return len(self._data) - - def __getitem__(self, idx): - return self._data[idx] - - def __repr__(self): - cn = self.__class__.__name__ - if self.headers: - return '%s(headers=%r, data=%r)' % (cn, self.headers, self._data) - else: - return '%s(%r)' % (cn, self._data) - - def to_html(self, orientation=None, wrapped=True, - with_headers=True, with_newlines=True, - with_metadata=False, max_depth=1): - """Render this Table to HTML. Configure the structure of Table - HTML by subclassing and overriding ``_html_*`` class - attributes. - - Args: - orientation (str): one of 'auto', 'horizontal', or - 'vertical' (or the first letter of any of - those). Default 'auto'. - wrapped (bool): whether or not to include the wrapping - '<table></table>' tags. Default ``True``, set to - ``False`` if appending multiple Table outputs or an - otherwise customized HTML wrapping tag is needed. - with_newlines (bool): Set to ``True`` if output should - include added newlines to make the HTML more - readable. Default ``False``. - with_metadata (bool/str): Set to ``True`` if output should - be preceded with a Table of preset metadata, if it - exists. Set to special value ``'bottom'`` if the - metadata Table HTML should come *after* the main HTML output. - max_depth (int): Indicate how deeply to nest HTML tables - before simply reverting to :func:`repr`-ing the nested - data. - - Returns: - A text string of the HTML of the rendered table. - - """ - lines = [] - headers = [] - if with_metadata and self.metadata: - metadata_table = Table.from_data(self.metadata, - max_depth=max_depth) - metadata_html = metadata_table.to_html(with_headers=True, - with_newlines=with_newlines, - with_metadata=False, - max_depth=max_depth) - if with_metadata != 'bottom': - lines.append(metadata_html) - lines.append('<br />') - - if with_headers and self.headers: - headers.extend(self.headers) - headers.extend([None] * (self._width - len(self.headers))) - if wrapped: - lines.append(self._html_table_tag) - orientation = orientation or 'auto' - ol = orientation[0].lower() - if ol == 'a': - ol = 'h' if len(self) > 1 else 'v' - if ol == 'h': - self._add_horizontal_html_lines(lines, headers=headers, - max_depth=max_depth) - elif ol == 'v': - self._add_vertical_html_lines(lines, headers=headers, - max_depth=max_depth) - else: - raise ValueError("expected one of 'auto', 'vertical', or" - " 'horizontal', not %r" % orientation) - if with_metadata and self.metadata and with_metadata == 'bottom': - lines.append('<br />') - lines.append(metadata_html) - - if wrapped: - lines.append(self._html_table_tag_close) - sep = '\n' if with_newlines else '' - return sep.join(lines) - - def get_cell_html(self, value): - """Called on each value in an HTML table. By default it simply escapes - the HTML. Override this method to add additional conditions - and behaviors, but take care to ensure the final output is - HTML escaped. - """ - return escape_html(value) - - def _add_horizontal_html_lines(self, lines, headers, max_depth): - esc = self.get_cell_html - new_depth = max_depth - 1 if max_depth > 1 else max_depth - if max_depth > 1: - new_depth = max_depth - 1 - if headers: - _thth = self._html_th_close + self._html_th - lines.append(self._html_thead) - lines.append(self._html_tr + self._html_th + - _thth.join([esc(h) for h in headers]) + - self._html_th_close + self._html_tr_close) - lines.append(self._html_thead_close) - trtd, _tdtd, _td_tr = (self._html_tr + self._html_td, - self._html_td_close + self._html_td, - self._html_td_close + self._html_tr_close) - lines.append(self._html_tbody) - for row in self._data: - if max_depth > 1: - _fill_parts = [] - for cell in row: - if isinstance(cell, Table): - _fill_parts.append(cell.to_html(max_depth=new_depth)) - else: - _fill_parts.append(esc(cell)) - else: - _fill_parts = [esc(c) for c in row] - lines.append(''.join([trtd, _tdtd.join(_fill_parts), _td_tr])) - lines.append(self._html_tbody_close) - - def _add_vertical_html_lines(self, lines, headers, max_depth): - esc = self.get_cell_html - new_depth = max_depth - 1 if max_depth > 1 else max_depth - tr, th, _th = self._html_tr, self._html_th, self._html_th_close - td, _tdtd = self._html_td, self._html_td_close + self._html_td - _td_tr = self._html_td_close + self._html_tr_close - for i in range(self._width): - line_parts = [tr] - if headers: - line_parts.extend([th, esc(headers[i]), _th]) - if max_depth > 1: - new_depth = max_depth - 1 - _fill_parts = [] - for row in self._data: - cell = row[i] - if isinstance(cell, Table): - _fill_parts.append(cell.to_html(max_depth=new_depth)) - else: - _fill_parts.append(esc(row[i])) - else: - _fill_parts = [esc(row[i]) for row in self._data] - line_parts.extend([td, _tdtd.join(_fill_parts), _td_tr]) - lines.append(''.join(line_parts)) - - def to_text(self, with_headers=True, maxlen=None): - """Get the Table's textual representation. Only works well - for Tables with non-recursive data. - - Args: - with_headers (bool): Whether to include a header row at the top. - maxlen (int): Max length of data in each cell. - """ - lines = [] - widths = [] - headers = list(self.headers) - text_data = [[to_text(cell, maxlen=maxlen) for cell in row] - for row in self._data] - for idx in range(self._width): - cur_widths = [len(cur) for cur in text_data] - if with_headers: - cur_widths.append(len(to_text(headers[idx], maxlen=maxlen))) - widths.append(max(cur_widths)) - if with_headers: - lines.append(' | '.join([h.center(widths[i]) - for i, h in enumerate(headers)])) - lines.append('-|-'.join(['-' * w for w in widths])) - for row in text_data: - lines.append(' | '.join([cell.center(widths[j]) - for j, cell in enumerate(row)])) - return '\n'.join(lines)