Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/boltons/tableutils.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """If there is one recurring theme in ``boltons``, it is that Python | |
3 has excellent datastructures that constitute a good foundation for | |
4 most quick manipulations, as well as building applications. However, | |
5 Python usage has grown much faster than builtin data structure | |
6 power. Python has a growing need for more advanced general-purpose | |
7 data structures which behave intuitively. | |
8 | |
9 The :class:`Table` class is one example. When handed one- or | |
10 two-dimensional data, it can provide useful, if basic, text and HTML | |
11 renditions of small to medium sized data. It also heuristically | |
12 handles recursive data of various formats (lists, dicts, namedtuples, | |
13 objects). | |
14 | |
15 For more advanced :class:`Table`-style manipulation check out the | |
16 `pandas`_ DataFrame. | |
17 | |
18 .. _pandas: http://pandas.pydata.org/ | |
19 | |
20 """ | |
21 | |
22 from __future__ import print_function | |
23 | |
24 try: | |
25 from html import escape as html_escape | |
26 except ImportError: | |
27 from cgi import escape as html_escape | |
28 import types | |
29 from itertools import islice | |
30 try: | |
31 from collections.abc import Sequence, Mapping, MutableSequence | |
32 except ImportError: | |
33 from collections import Sequence, Mapping, MutableSequence | |
34 try: | |
35 string_types, integer_types = (str, unicode), (int, long) | |
36 from cgi import escape as html_escape | |
37 except NameError: | |
38 # Python 3 compat | |
39 unicode = str | |
40 string_types, integer_types = (str, bytes), (int,) | |
41 from html import escape as html_escape | |
42 | |
43 try: | |
44 from typeutils import make_sentinel | |
45 _MISSING = make_sentinel(var_name='_MISSING') | |
46 except ImportError: | |
47 _MISSING = object() | |
48 | |
49 """ | |
50 Some idle feature thoughts: | |
51 | |
52 * shift around column order without rearranging data | |
53 * gotta make it so you can add additional items, not just initialize with | |
54 * maybe a shortcut would be to allow adding of Tables to other Tables | |
55 * what's the perf of preallocating lists and overwriting items versus | |
56 starting from empty? | |
57 * is it possible to effectively tell the difference between when a | |
58 Table is from_data()'d with a single row (list) or with a list of lists? | |
59 * CSS: white-space pre-line or pre-wrap maybe? | |
60 * Would be nice to support different backends (currently uses lists | |
61 exclusively). Sometimes large datasets come in list-of-dicts and | |
62 list-of-tuples format and it's desirable to cut down processing overhead. | |
63 | |
64 TODO: make iterable on rows? | |
65 """ | |
66 | |
67 __all__ = ['Table'] | |
68 | |
69 | |
70 def to_text(obj, maxlen=None): | |
71 try: | |
72 text = unicode(obj) | |
73 except Exception: | |
74 try: | |
75 text = unicode(repr(obj)) | |
76 except Exception: | |
77 text = unicode(object.__repr__(obj)) | |
78 if maxlen and len(text) > maxlen: | |
79 text = text[:maxlen - 3] + '...' | |
80 # TODO: inverse of ljust/rjust/center | |
81 return text | |
82 | |
83 | |
84 def escape_html(obj, maxlen=None): | |
85 text = to_text(obj, maxlen=maxlen) | |
86 return html_escape(text, quote=True) | |
87 | |
88 | |
89 _DNR = set((type(None), bool, complex, float, | |
90 type(NotImplemented), slice, | |
91 types.FunctionType, types.MethodType, types.BuiltinFunctionType, | |
92 types.GeneratorType) + string_types + integer_types) | |
93 | |
94 | |
95 class UnsupportedData(TypeError): | |
96 pass | |
97 | |
98 | |
99 class InputType(object): | |
100 def __init__(self, *a, **kw): | |
101 pass | |
102 | |
103 def get_entry_seq(self, data_seq, headers): | |
104 return [self.get_entry(entry, headers) for entry in data_seq] | |
105 | |
106 | |
107 class DictInputType(InputType): | |
108 def check_type(self, obj): | |
109 return isinstance(obj, Mapping) | |
110 | |
111 def guess_headers(self, obj): | |
112 return sorted(obj.keys()) | |
113 | |
114 def get_entry(self, obj, headers): | |
115 return [obj.get(h) for h in headers] | |
116 | |
117 def get_entry_seq(self, obj, headers): | |
118 return [[ci.get(h) for h in headers] for ci in obj] | |
119 | |
120 | |
121 class ObjectInputType(InputType): | |
122 def check_type(self, obj): | |
123 return type(obj) not in _DNR and hasattr(obj, '__class__') | |
124 | |
125 def guess_headers(self, obj): | |
126 headers = [] | |
127 for attr in dir(obj): | |
128 # an object's __dict__ could technically have non-string keys | |
129 try: | |
130 val = getattr(obj, attr) | |
131 except Exception: | |
132 # seen on greenlet: `run` shows in dir() but raises | |
133 # AttributeError. Also properties misbehave. | |
134 continue | |
135 if callable(val): | |
136 continue | |
137 headers.append(attr) | |
138 return headers | |
139 | |
140 def get_entry(self, obj, headers): | |
141 values = [] | |
142 for h in headers: | |
143 try: | |
144 values.append(getattr(obj, h)) | |
145 except Exception: | |
146 values.append(None) | |
147 return values | |
148 | |
149 | |
150 # might be better to hardcode list support since it's so close to the | |
151 # core or might be better to make this the copy-style from_* importer | |
152 # and have the non-copy style be hardcoded in __init__ | |
153 class ListInputType(InputType): | |
154 def check_type(self, obj): | |
155 return isinstance(obj, MutableSequence) | |
156 | |
157 def guess_headers(self, obj): | |
158 return None | |
159 | |
160 def get_entry(self, obj, headers): | |
161 return obj | |
162 | |
163 def get_entry_seq(self, obj_seq, headers): | |
164 return obj_seq | |
165 | |
166 | |
167 class TupleInputType(InputType): | |
168 def check_type(self, obj): | |
169 return isinstance(obj, tuple) | |
170 | |
171 def guess_headers(self, obj): | |
172 return None | |
173 | |
174 def get_entry(self, obj, headers): | |
175 return list(obj) | |
176 | |
177 def get_entry_seq(self, obj_seq, headers): | |
178 return [list(t) for t in obj_seq] | |
179 | |
180 | |
181 class NamedTupleInputType(InputType): | |
182 def check_type(self, obj): | |
183 return hasattr(obj, '_fields') and isinstance(obj, tuple) | |
184 | |
185 def guess_headers(self, obj): | |
186 return list(obj._fields) | |
187 | |
188 def get_entry(self, obj, headers): | |
189 return [getattr(obj, h, None) for h in headers] | |
190 | |
191 def get_entry_seq(self, obj_seq, headers): | |
192 return [[getattr(obj, h, None) for h in headers] for obj in obj_seq] | |
193 | |
194 | |
195 class Table(object): | |
196 """ | |
197 This Table class is meant to be simple, low-overhead, and extensible. Its | |
198 most common use would be for translation between in-memory data | |
199 structures and serialization formats, such as HTML and console-ready text. | |
200 | |
201 As such, it stores data in list-of-lists format, and *does not* copy | |
202 lists passed in. It also reserves the right to modify those lists in a | |
203 "filling" process, whereby short lists are extended to the width of | |
204 the table (usually determined by number of headers). This greatly | |
205 reduces overhead and processing/validation that would have to occur | |
206 otherwise. | |
207 | |
208 General description of headers behavior: | |
209 | |
210 Headers describe the columns, but are not part of the data, however, | |
211 if the *headers* argument is omitted, Table tries to infer header | |
212 names from the data. It is possible to have a table with no headers, | |
213 just pass in ``headers=None``. | |
214 | |
215 Supported inputs: | |
216 | |
217 * :class:`list` of :class:`list` objects | |
218 * :class:`dict` (list/single) | |
219 * :class:`object` (list/single) | |
220 * :class:`collections.namedtuple` (list/single) | |
221 * TODO: DB API cursor? | |
222 * TODO: json | |
223 | |
224 Supported outputs: | |
225 | |
226 * HTML | |
227 * Pretty text (also usable as GF Markdown) | |
228 * TODO: CSV | |
229 * TODO: json | |
230 * TODO: json lines | |
231 | |
232 To minimize resident size, the Table data is stored as a list of lists. | |
233 """ | |
234 | |
235 # order definitely matters here | |
236 _input_types = [DictInputType(), ListInputType(), | |
237 NamedTupleInputType(), TupleInputType(), | |
238 ObjectInputType()] | |
239 | |
240 _html_tr, _html_tr_close = '<tr>', '</tr>' | |
241 _html_th, _html_th_close = '<th>', '</th>' | |
242 _html_td, _html_td_close = '<td>', '</td>' | |
243 _html_thead, _html_thead_close = '<thead>', '</thead>' | |
244 _html_tbody, _html_tbody_close = '<tbody>', '</tbody>' | |
245 | |
246 # _html_tfoot, _html_tfoot_close = '<tfoot>', '</tfoot>' | |
247 _html_table_tag, _html_table_tag_close = '<table>', '</table>' | |
248 | |
249 def __init__(self, data=None, headers=_MISSING, metadata=None): | |
250 if headers is _MISSING: | |
251 headers = [] | |
252 if data: | |
253 headers, data = list(data[0]), islice(data, 1, None) | |
254 self.headers = headers or [] | |
255 self.metadata = metadata or {} | |
256 self._data = [] | |
257 self._width = 0 | |
258 | |
259 self.extend(data) | |
260 | |
261 def extend(self, data): | |
262 """ | |
263 Append the given data to the end of the Table. | |
264 """ | |
265 if not data: | |
266 return | |
267 self._data.extend(data) | |
268 self._set_width() | |
269 self._fill() | |
270 | |
271 def _set_width(self, reset=False): | |
272 if reset: | |
273 self._width = 0 | |
274 if self._width: | |
275 return | |
276 if self.headers: | |
277 self._width = len(self.headers) | |
278 return | |
279 self._width = max([len(d) for d in self._data]) | |
280 | |
281 def _fill(self): | |
282 width, filler = self._width, [None] | |
283 if not width: | |
284 return | |
285 for d in self._data: | |
286 rem = width - len(d) | |
287 if rem > 0: | |
288 d.extend(filler * rem) | |
289 return | |
290 | |
291 @classmethod | |
292 def from_dict(cls, data, headers=_MISSING, max_depth=1, metadata=None): | |
293 """Create a Table from a :class:`dict`. Operates the same as | |
294 :meth:`from_data`, but forces interpretation of the data as a | |
295 Mapping. | |
296 """ | |
297 return cls.from_data(data=data, headers=headers, | |
298 max_depth=max_depth, _data_type=DictInputType(), | |
299 metadata=metadata) | |
300 | |
301 @classmethod | |
302 def from_list(cls, data, headers=_MISSING, max_depth=1, metadata=None): | |
303 """Create a Table from a :class:`list`. Operates the same as | |
304 :meth:`from_data`, but forces the interpretation of the data | |
305 as a Sequence. | |
306 """ | |
307 return cls.from_data(data=data, headers=headers, | |
308 max_depth=max_depth, _data_type=ListInputType(), | |
309 metadata=metadata) | |
310 | |
311 @classmethod | |
312 def from_object(cls, data, headers=_MISSING, max_depth=1, metadata=None): | |
313 """Create a Table from an :class:`object`. Operates the same as | |
314 :meth:`from_data`, but forces the interpretation of the data | |
315 as an object. May be useful for some :class:`dict` and | |
316 :class:`list` subtypes. | |
317 """ | |
318 return cls.from_data(data=data, headers=headers, | |
319 max_depth=max_depth, _data_type=ObjectInputType(), | |
320 metadata=metadata) | |
321 | |
322 @classmethod | |
323 def from_data(cls, data, headers=_MISSING, max_depth=1, **kwargs): | |
324 | |
325 """Create a Table from any supported data, heuristically | |
326 selecting how to represent the data in Table format. | |
327 | |
328 Args: | |
329 data (object): Any object or iterable with data to be | |
330 imported to the Table. | |
331 | |
332 headers (iterable): An iterable of headers to be matched | |
333 to the data. If not explicitly passed, headers will be | |
334 guessed for certain datatypes. | |
335 | |
336 max_depth (int): The level to which nested Tables should | |
337 be created (default: 1). | |
338 | |
339 _data_type (InputType subclass): For advanced use cases, | |
340 do not guess the type of the input data, use this data | |
341 type instead. | |
342 """ | |
343 # TODO: seen/cycle detection/reuse ? | |
344 # maxdepth follows the same behavior as find command | |
345 # i.e., it doesn't work if max_depth=0 is passed in | |
346 metadata = kwargs.pop('metadata', None) | |
347 _data_type = kwargs.pop('_data_type', None) | |
348 | |
349 if max_depth < 1: | |
350 # return data instead? | |
351 return cls(headers=headers, metadata=metadata) | |
352 is_seq = isinstance(data, Sequence) | |
353 if is_seq: | |
354 if not data: | |
355 return cls(headers=headers, metadata=metadata) | |
356 to_check = data[0] | |
357 if not _data_type: | |
358 for it in cls._input_types: | |
359 if it.check_type(to_check): | |
360 _data_type = it | |
361 break | |
362 else: | |
363 # not particularly happy about this rewind-y approach | |
364 is_seq = False | |
365 to_check = data | |
366 else: | |
367 if type(data) in _DNR: | |
368 # hmm, got scalar data. | |
369 # raise an exception or make an exception, nahmsayn? | |
370 return cls([[data]], headers=headers, metadata=metadata) | |
371 to_check = data | |
372 if not _data_type: | |
373 for it in cls._input_types: | |
374 if it.check_type(to_check): | |
375 _data_type = it | |
376 break | |
377 else: | |
378 raise UnsupportedData('unsupported data type %r' | |
379 % type(data)) | |
380 if headers is _MISSING: | |
381 headers = _data_type.guess_headers(to_check) | |
382 if is_seq: | |
383 entries = _data_type.get_entry_seq(data, headers) | |
384 else: | |
385 entries = [_data_type.get_entry(data, headers)] | |
386 if max_depth > 1: | |
387 new_max_depth = max_depth - 1 | |
388 for i, entry in enumerate(entries): | |
389 for j, cell in enumerate(entry): | |
390 if type(cell) in _DNR: | |
391 # optimization to avoid function overhead | |
392 continue | |
393 try: | |
394 entries[i][j] = cls.from_data(cell, | |
395 max_depth=new_max_depth) | |
396 except UnsupportedData: | |
397 continue | |
398 return cls(entries, headers=headers, metadata=metadata) | |
399 | |
400 def __len__(self): | |
401 return len(self._data) | |
402 | |
403 def __getitem__(self, idx): | |
404 return self._data[idx] | |
405 | |
406 def __repr__(self): | |
407 cn = self.__class__.__name__ | |
408 if self.headers: | |
409 return '%s(headers=%r, data=%r)' % (cn, self.headers, self._data) | |
410 else: | |
411 return '%s(%r)' % (cn, self._data) | |
412 | |
413 def to_html(self, orientation=None, wrapped=True, | |
414 with_headers=True, with_newlines=True, | |
415 with_metadata=False, max_depth=1): | |
416 """Render this Table to HTML. Configure the structure of Table | |
417 HTML by subclassing and overriding ``_html_*`` class | |
418 attributes. | |
419 | |
420 Args: | |
421 orientation (str): one of 'auto', 'horizontal', or | |
422 'vertical' (or the first letter of any of | |
423 those). Default 'auto'. | |
424 wrapped (bool): whether or not to include the wrapping | |
425 '<table></table>' tags. Default ``True``, set to | |
426 ``False`` if appending multiple Table outputs or an | |
427 otherwise customized HTML wrapping tag is needed. | |
428 with_newlines (bool): Set to ``True`` if output should | |
429 include added newlines to make the HTML more | |
430 readable. Default ``False``. | |
431 with_metadata (bool/str): Set to ``True`` if output should | |
432 be preceded with a Table of preset metadata, if it | |
433 exists. Set to special value ``'bottom'`` if the | |
434 metadata Table HTML should come *after* the main HTML output. | |
435 max_depth (int): Indicate how deeply to nest HTML tables | |
436 before simply reverting to :func:`repr`-ing the nested | |
437 data. | |
438 | |
439 Returns: | |
440 A text string of the HTML of the rendered table. | |
441 | |
442 """ | |
443 lines = [] | |
444 headers = [] | |
445 if with_metadata and self.metadata: | |
446 metadata_table = Table.from_data(self.metadata, | |
447 max_depth=max_depth) | |
448 metadata_html = metadata_table.to_html(with_headers=True, | |
449 with_newlines=with_newlines, | |
450 with_metadata=False, | |
451 max_depth=max_depth) | |
452 if with_metadata != 'bottom': | |
453 lines.append(metadata_html) | |
454 lines.append('<br />') | |
455 | |
456 if with_headers and self.headers: | |
457 headers.extend(self.headers) | |
458 headers.extend([None] * (self._width - len(self.headers))) | |
459 if wrapped: | |
460 lines.append(self._html_table_tag) | |
461 orientation = orientation or 'auto' | |
462 ol = orientation[0].lower() | |
463 if ol == 'a': | |
464 ol = 'h' if len(self) > 1 else 'v' | |
465 if ol == 'h': | |
466 self._add_horizontal_html_lines(lines, headers=headers, | |
467 max_depth=max_depth) | |
468 elif ol == 'v': | |
469 self._add_vertical_html_lines(lines, headers=headers, | |
470 max_depth=max_depth) | |
471 else: | |
472 raise ValueError("expected one of 'auto', 'vertical', or" | |
473 " 'horizontal', not %r" % orientation) | |
474 if with_metadata and self.metadata and with_metadata == 'bottom': | |
475 lines.append('<br />') | |
476 lines.append(metadata_html) | |
477 | |
478 if wrapped: | |
479 lines.append(self._html_table_tag_close) | |
480 sep = '\n' if with_newlines else '' | |
481 return sep.join(lines) | |
482 | |
483 def get_cell_html(self, value): | |
484 """Called on each value in an HTML table. By default it simply escapes | |
485 the HTML. Override this method to add additional conditions | |
486 and behaviors, but take care to ensure the final output is | |
487 HTML escaped. | |
488 """ | |
489 return escape_html(value) | |
490 | |
491 def _add_horizontal_html_lines(self, lines, headers, max_depth): | |
492 esc = self.get_cell_html | |
493 new_depth = max_depth - 1 if max_depth > 1 else max_depth | |
494 if max_depth > 1: | |
495 new_depth = max_depth - 1 | |
496 if headers: | |
497 _thth = self._html_th_close + self._html_th | |
498 lines.append(self._html_thead) | |
499 lines.append(self._html_tr + self._html_th + | |
500 _thth.join([esc(h) for h in headers]) + | |
501 self._html_th_close + self._html_tr_close) | |
502 lines.append(self._html_thead_close) | |
503 trtd, _tdtd, _td_tr = (self._html_tr + self._html_td, | |
504 self._html_td_close + self._html_td, | |
505 self._html_td_close + self._html_tr_close) | |
506 lines.append(self._html_tbody) | |
507 for row in self._data: | |
508 if max_depth > 1: | |
509 _fill_parts = [] | |
510 for cell in row: | |
511 if isinstance(cell, Table): | |
512 _fill_parts.append(cell.to_html(max_depth=new_depth)) | |
513 else: | |
514 _fill_parts.append(esc(cell)) | |
515 else: | |
516 _fill_parts = [esc(c) for c in row] | |
517 lines.append(''.join([trtd, _tdtd.join(_fill_parts), _td_tr])) | |
518 lines.append(self._html_tbody_close) | |
519 | |
520 def _add_vertical_html_lines(self, lines, headers, max_depth): | |
521 esc = self.get_cell_html | |
522 new_depth = max_depth - 1 if max_depth > 1 else max_depth | |
523 tr, th, _th = self._html_tr, self._html_th, self._html_th_close | |
524 td, _tdtd = self._html_td, self._html_td_close + self._html_td | |
525 _td_tr = self._html_td_close + self._html_tr_close | |
526 for i in range(self._width): | |
527 line_parts = [tr] | |
528 if headers: | |
529 line_parts.extend([th, esc(headers[i]), _th]) | |
530 if max_depth > 1: | |
531 new_depth = max_depth - 1 | |
532 _fill_parts = [] | |
533 for row in self._data: | |
534 cell = row[i] | |
535 if isinstance(cell, Table): | |
536 _fill_parts.append(cell.to_html(max_depth=new_depth)) | |
537 else: | |
538 _fill_parts.append(esc(row[i])) | |
539 else: | |
540 _fill_parts = [esc(row[i]) for row in self._data] | |
541 line_parts.extend([td, _tdtd.join(_fill_parts), _td_tr]) | |
542 lines.append(''.join(line_parts)) | |
543 | |
544 def to_text(self, with_headers=True, maxlen=None): | |
545 """Get the Table's textual representation. Only works well | |
546 for Tables with non-recursive data. | |
547 | |
548 Args: | |
549 with_headers (bool): Whether to include a header row at the top. | |
550 maxlen (int): Max length of data in each cell. | |
551 """ | |
552 lines = [] | |
553 widths = [] | |
554 headers = list(self.headers) | |
555 text_data = [[to_text(cell, maxlen=maxlen) for cell in row] | |
556 for row in self._data] | |
557 for idx in range(self._width): | |
558 cur_widths = [len(cur) for cur in text_data] | |
559 if with_headers: | |
560 cur_widths.append(len(to_text(headers[idx], maxlen=maxlen))) | |
561 widths.append(max(cur_widths)) | |
562 if with_headers: | |
563 lines.append(' | '.join([h.center(widths[i]) | |
564 for i, h in enumerate(headers)])) | |
565 lines.append('-|-'.join(['-' * w for w in widths])) | |
566 for row in text_data: | |
567 lines.append(' | '.join([cell.center(widths[j]) | |
568 for j, cell in enumerate(row)])) | |
569 return '\n'.join(lines) |