Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/boltons/statsutils.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.7/site-packages/boltons/statsutils.py Sat May 02 07:14:21 2020 -0400 @@ -0,0 +1,790 @@ +# -*- coding: utf-8 -*- +"""``statsutils`` provides tools aimed primarily at descriptive +statistics for data analysis, such as :func:`mean` (average), +:func:`median`, :func:`variance`, and many others, + +The :class:`Stats` type provides all the main functionality of the +``statsutils`` module. A :class:`Stats` object wraps a given dataset, +providing all statistical measures as property attributes. These +attributes cache their results, which allows efficient computation of +multiple measures, as many measures rely on other measures. For +example, relative standard deviation (:attr:`Stats.rel_std_dev`) +relies on both the mean and standard deviation. The Stats object +caches those results so no rework is done. + +The :class:`Stats` type's attributes have module-level counterparts for +convenience when the computation reuse advantages do not apply. + +>>> stats = Stats(range(42)) +>>> stats.mean +20.5 +>>> mean(range(42)) +20.5 + +Statistics is a large field, and ``statsutils`` is focused on a few +basic techniques that are useful in software. The following is a brief +introduction to those techniques. For a more in-depth introduction, +`Statistics for Software +<https://www.paypal-engineering.com/2016/04/11/statistics-for-software/>`_, +an article I wrote on the topic. It introduces key terminology vital +to effective usage of statistics. + +Statistical moments +------------------- + +Python programmers are probably familiar with the concept of the +*mean* or *average*, which gives a rough quantitiative middle value by +which a sample can be can be generalized. However, the mean is just +the first of four `moment`_-based measures by which a sample or +distribution can be measured. + +The four `Standardized moments`_ are: + + 1. `Mean`_ - :func:`mean` - theoretical middle value + 2. `Variance`_ - :func:`variance` - width of value dispersion + 3. `Skewness`_ - :func:`skewness` - symmetry of distribution + 4. `Kurtosis`_ - :func:`kurtosis` - "peakiness" or "long-tailed"-ness + +For more information check out `the Moment article on Wikipedia`_. + +.. _moment: https://en.wikipedia.org/wiki/Moment_(mathematics) +.. _Standardized moments: https://en.wikipedia.org/wiki/Standardized_moment +.. _Mean: https://en.wikipedia.org/wiki/Mean +.. _Variance: https://en.wikipedia.org/wiki/Variance +.. _Skewness: https://en.wikipedia.org/wiki/Skewness +.. _Kurtosis: https://en.wikipedia.org/wiki/Kurtosis +.. _the Moment article on Wikipedia: https://en.wikipedia.org/wiki/Moment_(mathematics) + +Keep in mind that while these moments can give a bit more insight into +the shape and distribution of data, they do not guarantee a complete +picture. Wildly different datasets can have the same values for all +four moments, so generalize wisely. + +Robust statistics +----------------- + +Moment-based statistics are notorious for being easily skewed by +outliers. The whole field of robust statistics aims to mitigate this +dilemma. ``statsutils`` also includes several robust statistical methods: + + * `Median`_ - The middle value of a sorted dataset + * `Trimean`_ - Another robust measure of the data's central tendency + * `Median Absolute Deviation`_ (MAD) - A robust measure of + variability, a natural counterpart to :func:`variance`. + * `Trimming`_ - Reducing a dataset to only the middle majority of + data is a simple way of making other estimators more robust. + +.. _Median: https://en.wikipedia.org/wiki/Median +.. _Trimean: https://en.wikipedia.org/wiki/Trimean +.. _Median Absolute Deviation: https://en.wikipedia.org/wiki/Median_absolute_deviation +.. _Trimming: https://en.wikipedia.org/wiki/Trimmed_estimator + + +Online and Offline Statistics +----------------------------- + +Unrelated to computer networking, `online`_ statistics involve +calculating statistics in a `streaming`_ fashion, without all the data +being available. The :class:`Stats` type is meant for the more +traditional offline statistics when all the data is available. For +pure-Python online statistics accumulators, look at the `Lithoxyl`_ +system instrumentation package. + +.. _Online: https://en.wikipedia.org/wiki/Online_algorithm +.. _streaming: https://en.wikipedia.org/wiki/Streaming_algorithm +.. _Lithoxyl: https://github.com/mahmoud/lithoxyl + +""" + +from __future__ import print_function + +import bisect +from math import floor, ceil + + +class _StatsProperty(object): + def __init__(self, name, func): + self.name = name + self.func = func + self.internal_name = '_' + name + + doc = func.__doc__ or '' + pre_doctest_doc, _, _ = doc.partition('>>>') + self.__doc__ = pre_doctest_doc + + def __get__(self, obj, objtype=None): + if obj is None: + return self + if not obj.data: + return obj.default + try: + return getattr(obj, self.internal_name) + except AttributeError: + setattr(obj, self.internal_name, self.func(obj)) + return getattr(obj, self.internal_name) + + +class Stats(object): + """The ``Stats`` type is used to represent a group of unordered + statistical datapoints for calculations such as mean, median, and + variance. + + Args: + + data (list): List or other iterable containing numeric values. + default (float): A value to be returned when a given + statistical measure is not defined. 0.0 by default, but + ``float('nan')`` is appropriate for stricter applications. + use_copy (bool): By default Stats objects copy the initial + data into a new list to avoid issues with + modifications. Pass ``False`` to disable this behavior. + is_sorted (bool): Presorted data can skip an extra sorting + step for a little speed boost. Defaults to False. + + """ + def __init__(self, data, default=0.0, use_copy=True, is_sorted=False): + self._use_copy = use_copy + self._is_sorted = is_sorted + if use_copy: + self.data = list(data) + else: + self.data = data + + self.default = default + cls = self.__class__ + self._prop_attr_names = [a for a in dir(self) + if isinstance(getattr(cls, a, None), + _StatsProperty)] + self._pearson_precision = 0 + + def __len__(self): + return len(self.data) + + def __iter__(self): + return iter(self.data) + + def _get_sorted_data(self): + """When using a copy of the data, it's better to have that copy be + sorted, but we do it lazily using this method, in case no + sorted measures are used. I.e., if median is never called, + sorting would be a waste. + + When not using a copy, it's presumed that all optimizations + are on the user. + """ + if not self._use_copy: + return sorted(self.data) + elif not self._is_sorted: + self.data.sort() + return self.data + + def clear_cache(self): + """``Stats`` objects automatically cache intermediary calculations + that can be reused. For instance, accessing the ``std_dev`` + attribute after the ``variance`` attribute will be + significantly faster for medium-to-large datasets. + + If you modify the object by adding additional data points, + call this function to have the cached statistics recomputed. + + """ + for attr_name in self._prop_attr_names: + attr_name = getattr(self.__class__, attr_name).internal_name + if not hasattr(self, attr_name): + continue + delattr(self, attr_name) + return + + def _calc_count(self): + """The number of items in this Stats object. Returns the same as + :func:`len` on a Stats object, but provided for pandas terminology + parallelism. + + >>> Stats(range(20)).count + 20 + """ + return len(self.data) + count = _StatsProperty('count', _calc_count) + + def _calc_mean(self): + """ + The arithmetic mean, or "average". Sum of the values divided by + the number of values. + + >>> mean(range(20)) + 9.5 + >>> mean(list(range(19)) + [949]) # 949 is an arbitrary outlier + 56.0 + """ + return sum(self.data, 0.0) / len(self.data) + mean = _StatsProperty('mean', _calc_mean) + + def _calc_max(self): + """ + The maximum value present in the data. + + >>> Stats([2, 1, 3]).max + 3 + """ + if self._is_sorted: + return self.data[-1] + return max(self.data) + max = _StatsProperty('max', _calc_max) + + def _calc_min(self): + """ + The minimum value present in the data. + + >>> Stats([2, 1, 3]).min + 1 + """ + if self._is_sorted: + return self.data[0] + return min(self.data) + min = _StatsProperty('min', _calc_min) + + def _calc_median(self): + """ + The median is either the middle value or the average of the two + middle values of a sample. Compared to the mean, it's generally + more resilient to the presence of outliers in the sample. + + >>> median([2, 1, 3]) + 2 + >>> median(range(97)) + 48 + >>> median(list(range(96)) + [1066]) # 1066 is an arbitrary outlier + 48 + """ + return self._get_quantile(self._get_sorted_data(), 0.5) + median = _StatsProperty('median', _calc_median) + + def _calc_iqr(self): + """Inter-quartile range (IQR) is the difference between the 75th + percentile and 25th percentile. IQR is a robust measure of + dispersion, like standard deviation, but safer to compare + between datasets, as it is less influenced by outliers. + + >>> iqr([1, 2, 3, 4, 5]) + 2 + >>> iqr(range(1001)) + 500 + """ + return self.get_quantile(0.75) - self.get_quantile(0.25) + iqr = _StatsProperty('iqr', _calc_iqr) + + def _calc_trimean(self): + """The trimean is a robust measure of central tendency, like the + median, that takes the weighted average of the median and the + upper and lower quartiles. + + >>> trimean([2, 1, 3]) + 2.0 + >>> trimean(range(97)) + 48.0 + >>> trimean(list(range(96)) + [1066]) # 1066 is an arbitrary outlier + 48.0 + + """ + sorted_data = self._get_sorted_data() + gq = lambda q: self._get_quantile(sorted_data, q) + return (gq(0.25) + (2 * gq(0.5)) + gq(0.75)) / 4.0 + trimean = _StatsProperty('trimean', _calc_trimean) + + def _calc_variance(self): + """\ + Variance is the average of the squares of the difference between + each value and the mean. + + >>> variance(range(97)) + 784.0 + """ + global mean # defined elsewhere in this file + return mean(self._get_pow_diffs(2)) + variance = _StatsProperty('variance', _calc_variance) + + def _calc_std_dev(self): + """\ + Standard deviation. Square root of the variance. + + >>> std_dev(range(97)) + 28.0 + """ + return self.variance ** 0.5 + std_dev = _StatsProperty('std_dev', _calc_std_dev) + + def _calc_median_abs_dev(self): + """\ + Median Absolute Deviation is a robust measure of statistical + dispersion: http://en.wikipedia.org/wiki/Median_absolute_deviation + + >>> median_abs_dev(range(97)) + 24.0 + """ + global median # defined elsewhere in this file + sorted_vals = sorted(self.data) + x = float(median(sorted_vals)) + return median([abs(x - v) for v in sorted_vals]) + median_abs_dev = _StatsProperty('median_abs_dev', _calc_median_abs_dev) + mad = median_abs_dev # convenience + + def _calc_rel_std_dev(self): + """\ + Standard deviation divided by the absolute value of the average. + + http://en.wikipedia.org/wiki/Relative_standard_deviation + + >>> print('%1.3f' % rel_std_dev(range(97))) + 0.583 + """ + abs_mean = abs(self.mean) + if abs_mean: + return self.std_dev / abs_mean + else: + return self.default + rel_std_dev = _StatsProperty('rel_std_dev', _calc_rel_std_dev) + + def _calc_skewness(self): + """\ + Indicates the asymmetry of a curve. Positive values mean the bulk + of the values are on the left side of the average and vice versa. + + http://en.wikipedia.org/wiki/Skewness + + See the module docstring for more about statistical moments. + + >>> skewness(range(97)) # symmetrical around 48.0 + 0.0 + >>> left_skewed = skewness(list(range(97)) + list(range(10))) + >>> right_skewed = skewness(list(range(97)) + list(range(87, 97))) + >>> round(left_skewed, 3), round(right_skewed, 3) + (0.114, -0.114) + """ + data, s_dev = self.data, self.std_dev + if len(data) > 1 and s_dev > 0: + return (sum(self._get_pow_diffs(3)) / + float((len(data) - 1) * (s_dev ** 3))) + else: + return self.default + skewness = _StatsProperty('skewness', _calc_skewness) + + def _calc_kurtosis(self): + """\ + Indicates how much data is in the tails of the distribution. The + result is always positive, with the normal "bell-curve" + distribution having a kurtosis of 3. + + http://en.wikipedia.org/wiki/Kurtosis + + See the module docstring for more about statistical moments. + + >>> kurtosis(range(9)) + 1.99125 + + With a kurtosis of 1.99125, [0, 1, 2, 3, 4, 5, 6, 7, 8] is more + centrally distributed than the normal curve. + """ + data, s_dev = self.data, self.std_dev + if len(data) > 1 and s_dev > 0: + return (sum(self._get_pow_diffs(4)) / + float((len(data) - 1) * (s_dev ** 4))) + else: + return 0.0 + kurtosis = _StatsProperty('kurtosis', _calc_kurtosis) + + def _calc_pearson_type(self): + precision = self._pearson_precision + skewness = self.skewness + kurtosis = self.kurtosis + beta1 = skewness ** 2.0 + beta2 = kurtosis * 1.0 + + # TODO: range checks? + + c0 = (4 * beta2) - (3 * beta1) + c1 = skewness * (beta2 + 3) + c2 = (2 * beta2) - (3 * beta1) - 6 + + if round(c1, precision) == 0: + if round(beta2, precision) == 3: + return 0 # Normal + else: + if beta2 < 3: + return 2 # Symmetric Beta + elif beta2 > 3: + return 7 + elif round(c2, precision) == 0: + return 3 # Gamma + else: + k = c1 ** 2 / (4 * c0 * c2) + if k < 0: + return 1 # Beta + raise RuntimeError('missed a spot') + pearson_type = _StatsProperty('pearson_type', _calc_pearson_type) + + @staticmethod + def _get_quantile(sorted_data, q): + data, n = sorted_data, len(sorted_data) + idx = q / 1.0 * (n - 1) + idx_f, idx_c = int(floor(idx)), int(ceil(idx)) + if idx_f == idx_c: + return data[idx_f] + return (data[idx_f] * (idx_c - idx)) + (data[idx_c] * (idx - idx_f)) + + def get_quantile(self, q): + """Get a quantile from the dataset. Quantiles are floating point + values between ``0.0`` and ``1.0``, with ``0.0`` representing + the minimum value in the dataset and ``1.0`` representing the + maximum. ``0.5`` represents the median: + + >>> Stats(range(100)).get_quantile(0.5) + 49.5 + """ + q = float(q) + if not 0.0 <= q <= 1.0: + raise ValueError('expected q between 0.0 and 1.0, not %r' % q) + elif not self.data: + return self.default + return self._get_quantile(self._get_sorted_data(), q) + + def get_zscore(self, value): + """Get the z-score for *value* in the group. If the standard deviation + is 0, 0 inf or -inf will be returned to indicate whether the value is + equal to, greater than or below the group's mean. + """ + mean = self.mean + if self.std_dev == 0: + if value == mean: + return 0 + if value > mean: + return float('inf') + if value < mean: + return float('-inf') + return (float(value) - mean) / self.std_dev + + def trim_relative(self, amount=0.15): + """A utility function used to cut a proportion of values off each end + of a list of values. This has the effect of limiting the + effect of outliers. + + Args: + amount (float): A value between 0.0 and 0.5 to trim off of + each side of the data. + + .. note: + + This operation modifies the data in-place. It does not + make or return a copy. + + """ + trim = float(amount) + if not 0.0 <= trim < 0.5: + raise ValueError('expected amount between 0.0 and 0.5, not %r' + % trim) + size = len(self.data) + size_diff = int(size * trim) + if size_diff == 0.0: + return + self.data = self._get_sorted_data()[size_diff:-size_diff] + self.clear_cache() + + def _get_pow_diffs(self, power): + """ + A utility function used for calculating statistical moments. + """ + m = self.mean + return [(v - m) ** power for v in self.data] + + def _get_bin_bounds(self, count=None, with_max=False): + if not self.data: + return [0.0] # TODO: raise? + + data = self.data + len_data, min_data, max_data = len(data), min(data), max(data) + + if len_data < 4: + if not count: + count = len_data + dx = (max_data - min_data) / float(count) + bins = [min_data + (dx * i) for i in range(count)] + elif count is None: + # freedman algorithm for fixed-width bin selection + q25, q75 = self.get_quantile(0.25), self.get_quantile(0.75) + dx = 2 * (q75 - q25) / (len_data ** (1 / 3.0)) + bin_count = max(1, int(ceil((max_data - min_data) / dx))) + bins = [min_data + (dx * i) for i in range(bin_count + 1)] + bins = [b for b in bins if b < max_data] + else: + dx = (max_data - min_data) / float(count) + bins = [min_data + (dx * i) for i in range(count)] + + if with_max: + bins.append(float(max_data)) + + return bins + + def get_histogram_counts(self, bins=None, **kw): + """Produces a list of ``(bin, count)`` pairs comprising a histogram of + the Stats object's data, using fixed-width bins. See + :meth:`Stats.format_histogram` for more details. + + Args: + bins (int): maximum number of bins, or list of + floating-point bin boundaries. Defaults to the output of + Freedman's algorithm. + bin_digits (int): Number of digits used to round down the + bin boundaries. Defaults to 1. + + The output of this method can be stored and/or modified, and + then passed to :func:`statsutils.format_histogram_counts` to + achieve the same text formatting as the + :meth:`~Stats.format_histogram` method. This can be useful for + snapshotting over time. + """ + bin_digits = int(kw.pop('bin_digits', 1)) + if kw: + raise TypeError('unexpected keyword arguments: %r' % kw.keys()) + + if not bins: + bins = self._get_bin_bounds() + else: + try: + bin_count = int(bins) + except TypeError: + try: + bins = [float(x) for x in bins] + except Exception: + raise ValueError('bins expected integer bin count or list' + ' of float bin boundaries, not %r' % bins) + if self.min < bins[0]: + bins = [self.min] + bins + else: + bins = self._get_bin_bounds(bin_count) + + # floor and ceil really should have taken ndigits, like round() + round_factor = 10.0 ** bin_digits + bins = [floor(b * round_factor) / round_factor for b in bins] + bins = sorted(set(bins)) + + idxs = [bisect.bisect(bins, d) - 1 for d in self.data] + count_map = {} # would have used Counter, but py26 support + for idx in idxs: + try: + count_map[idx] += 1 + except KeyError: + count_map[idx] = 1 + + bin_counts = [(b, count_map.get(i, 0)) for i, b in enumerate(bins)] + + return bin_counts + + def format_histogram(self, bins=None, **kw): + """Produces a textual histogram of the data, using fixed-width bins, + allowing for simple visualization, even in console environments. + + >>> data = list(range(20)) + list(range(5, 15)) + [10] + >>> print(Stats(data).format_histogram(width=30)) + 0.0: 5 ######### + 4.4: 8 ############### + 8.9: 11 #################### + 13.3: 5 ######### + 17.8: 2 #### + + In this histogram, five values are between 0.0 and 4.4, eight + are between 4.4 and 8.9, and two values lie between 17.8 and + the max. + + You can specify the number of bins, or provide a list of + bin boundaries themselves. If no bins are provided, as in the + example above, `Freedman's algorithm`_ for bin selection is + used. + + Args: + bins (int): Maximum number of bins for the + histogram. Also accepts a list of floating-point + bin boundaries. If the minimum boundary is still + greater than the minimum value in the data, that + boundary will be implicitly added. Defaults to the bin + boundaries returned by `Freedman's algorithm`_. + bin_digits (int): Number of digits to round each bin + to. Note that bins are always rounded down to avoid + clipping any data. Defaults to 1. + width (int): integer number of columns in the longest line + in the histogram. Defaults to console width on Python + 3.3+, or 80 if that is not available. + format_bin (callable): Called on each bin to create a + label for the final output. Use this function to add + units, such as "ms" for milliseconds. + + Should you want something more programmatically reusable, see + the :meth:`~Stats.get_histogram_counts` method, the output of + is used by format_histogram. The :meth:`~Stats.describe` + method is another useful summarization method, albeit less + visual. + + .. _Freedman's algorithm: https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule + """ + width = kw.pop('width', None) + format_bin = kw.pop('format_bin', None) + bin_counts = self.get_histogram_counts(bins=bins, **kw) + return format_histogram_counts(bin_counts, + width=width, + format_bin=format_bin) + + def describe(self, quantiles=None, format=None): + """Provides standard summary statistics for the data in the Stats + object, in one of several convenient formats. + + Args: + quantiles (list): A list of numeric values to use as + quantiles in the resulting summary. All values must be + 0.0-1.0, with 0.5 representing the median. Defaults to + ``[0.25, 0.5, 0.75]``, representing the standard + quartiles. + format (str): Controls the return type of the function, + with one of three valid values: ``"dict"`` gives back + a :class:`dict` with the appropriate keys and + values. ``"list"`` is a list of key-value pairs in an + order suitable to pass to an OrderedDict or HTML + table. ``"text"`` converts the values to text suitable + for printing, as seen below. + + Here is the information returned by a default ``describe``, as + presented in the ``"text"`` format: + + >>> stats = Stats(range(1, 8)) + >>> print(stats.describe(format='text')) + count: 7 + mean: 4.0 + std_dev: 2.0 + mad: 2.0 + min: 1 + 0.25: 2.5 + 0.5: 4 + 0.75: 5.5 + max: 7 + + For more advanced descriptive statistics, check out my blog + post on the topic `Statistics for Software + <https://www.paypal-engineering.com/2016/04/11/statistics-for-software/>`_. + + """ + if format is None: + format = 'dict' + elif format not in ('dict', 'list', 'text'): + raise ValueError('invalid format for describe,' + ' expected one of "dict"/"list"/"text", not %r' + % format) + quantiles = quantiles or [0.25, 0.5, 0.75] + q_items = [] + for q in quantiles: + q_val = self.get_quantile(q) + q_items.append((str(q), q_val)) + + items = [('count', self.count), + ('mean', self.mean), + ('std_dev', self.std_dev), + ('mad', self.mad), + ('min', self.min)] + + items.extend(q_items) + items.append(('max', self.max)) + if format == 'dict': + ret = dict(items) + elif format == 'list': + ret = items + elif format == 'text': + ret = '\n'.join(['%s%s' % ((label + ':').ljust(10), val) + for label, val in items]) + return ret + + +def describe(data, quantiles=None, format=None): + """A convenience function to get standard summary statistics useful + for describing most data. See :meth:`Stats.describe` for more + details. + + >>> print(describe(range(7), format='text')) + count: 7 + mean: 3.0 + std_dev: 2.0 + mad: 2.0 + min: 0 + 0.25: 1.5 + 0.5: 3 + 0.75: 4.5 + max: 6 + + See :meth:`Stats.format_histogram` for another very useful + summarization that uses textual visualization. + """ + return Stats(data).describe(quantiles=quantiles, format=format) + + +def _get_conv_func(attr_name): + def stats_helper(data, default=0.0): + return getattr(Stats(data, default=default, use_copy=False), + attr_name) + return stats_helper + + +for attr_name, attr in list(Stats.__dict__.items()): + if isinstance(attr, _StatsProperty): + if attr_name in ('max', 'min', 'count'): # don't shadow builtins + continue + if attr_name in ('mad',): # convenience aliases + continue + func = _get_conv_func(attr_name) + func.__doc__ = attr.func.__doc__ + globals()[attr_name] = func + delattr(Stats, '_calc_' + attr_name) +# cleanup +del attr +del attr_name +del func + + +def format_histogram_counts(bin_counts, width=None, format_bin=None): + """The formatting logic behind :meth:`Stats.format_histogram`, which + takes the output of :meth:`Stats.get_histogram_counts`, and passes + them to this function. + + Args: + bin_counts (list): A list of bin values to counts. + width (int): Number of character columns in the text output, + defaults to 80 or console width in Python 3.3+. + format_bin (callable): Used to convert bin values into string + labels. + """ + lines = [] + if not format_bin: + format_bin = lambda v: v + if not width: + try: + import shutil # python 3 convenience + width = shutil.get_terminal_size()[0] + except Exception: + width = 80 + + bins = [b for b, _ in bin_counts] + count_max = max([count for _, count in bin_counts]) + count_cols = len(str(count_max)) + + labels = ['%s' % format_bin(b) for b in bins] + label_cols = max([len(l) for l in labels]) + tmp_line = '%s: %s #' % ('x' * label_cols, count_max) + + bar_cols = max(width - len(tmp_line), 3) + line_k = float(bar_cols) / count_max + tmpl = "{label:>{label_cols}}: {count:>{count_cols}} {bar}" + for label, (bin_val, count) in zip(labels, bin_counts): + bar_len = int(round(count * line_k)) + bar = ('#' * bar_len) or '|' + line = tmpl.format(label=label, + label_cols=label_cols, + count=count, + count_cols=count_cols, + bar=bar) + lines.append(line) + + return '\n'.join(lines)