Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/boltons/listutils.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
| author | shellac |
|---|---|
| date | Mon, 01 Jun 2020 08:59:25 -0400 |
| parents | 79f47841a781 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:79f47841a781 | 5:9b1c78e6ba9c |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """Python's builtin :class:`list` is a very fast and efficient | |
| 3 sequence type, but it could be better for certain access patterns, | |
| 4 such as non-sequential insertion into a large lists. ``listutils`` | |
| 5 provides a pure-Python solution to this problem. | |
| 6 | |
| 7 For utilities for working with iterables and lists, check out | |
| 8 :mod:`iterutils`. For the a :class:`list`-based version of | |
| 9 :class:`collections.namedtuple`, check out :mod:`namedutils`. | |
| 10 """ | |
| 11 | |
| 12 from __future__ import print_function, division | |
| 13 | |
| 14 import operator | |
| 15 from math import log as math_log | |
| 16 from itertools import chain, islice | |
| 17 | |
| 18 try: | |
| 19 from typeutils import make_sentinel | |
| 20 _MISSING = make_sentinel(var_name='_MISSING') | |
| 21 except ImportError: | |
| 22 _MISSING = object() | |
| 23 | |
| 24 try: | |
| 25 xrange | |
| 26 except NameError: | |
| 27 # Python 3 compat | |
| 28 xrange = range | |
| 29 | |
| 30 # TODO: expose splaylist? | |
| 31 __all__ = ['BList', 'BarrelList'] | |
| 32 | |
| 33 | |
| 34 # TODO: comparators | |
| 35 # TODO: keep track of list lengths and bisect to the right list for | |
| 36 # faster getitem (and slightly slower setitem and delitem ops) | |
| 37 | |
| 38 class BarrelList(list): | |
| 39 """The ``BarrelList`` is a :class:`list` subtype backed by many | |
| 40 dynamically-scaled sublists, to provide better scaling and random | |
| 41 insertion/deletion characteristics. It is a subtype of the builtin | |
| 42 :class:`list` and has an identical API, supporting indexing, | |
| 43 slicing, sorting, etc. If application requirements call for | |
| 44 something more performant, consider the `blist module available on | |
| 45 PyPI`_. | |
| 46 | |
| 47 The name comes by way of Kurt Rose, who said it reminded him of | |
| 48 barrel shifters. Not sure how, but it's BList-like, so the name | |
| 49 stuck. BList is of course a reference to `B-trees`_. | |
| 50 | |
| 51 Args: | |
| 52 iterable: An optional iterable of initial values for the list. | |
| 53 | |
| 54 >>> blist = BList(xrange(100000)) | |
| 55 >>> blist.pop(50000) | |
| 56 50000 | |
| 57 >>> len(blist) | |
| 58 99999 | |
| 59 >>> len(blist.lists) # how many underlying lists | |
| 60 8 | |
| 61 >>> slice_idx = blist.lists[0][-1] | |
| 62 >>> blist[slice_idx:slice_idx + 2] | |
| 63 BarrelList([11637, 11638]) | |
| 64 | |
| 65 Slicing is supported and works just fine across list borders, | |
| 66 returning another instance of the BarrelList. | |
| 67 | |
| 68 .. _blist module available on PyPI: https://pypi.python.org/pypi/blist | |
| 69 .. _B-trees: https://en.wikipedia.org/wiki/B-tree | |
| 70 | |
| 71 """ | |
| 72 | |
| 73 _size_factor = 1520 | |
| 74 "This size factor is the result of tuning using the tune() function below." | |
| 75 | |
| 76 def __init__(self, iterable=None): | |
| 77 self.lists = [[]] | |
| 78 if iterable: | |
| 79 self.extend(iterable) | |
| 80 | |
| 81 @property | |
| 82 def _cur_size_limit(self): | |
| 83 len_self, size_factor = len(self), self._size_factor | |
| 84 return int(round(size_factor * math_log(len_self + 2, 2))) | |
| 85 | |
| 86 def _translate_index(self, index): | |
| 87 if index < 0: | |
| 88 index += len(self) | |
| 89 rel_idx, lists = index, self.lists | |
| 90 for list_idx in range(len(lists)): | |
| 91 len_list = len(lists[list_idx]) | |
| 92 if rel_idx < len_list: | |
| 93 break | |
| 94 rel_idx -= len_list | |
| 95 if rel_idx < 0: | |
| 96 return None, None | |
| 97 return list_idx, rel_idx | |
| 98 | |
| 99 def _balance_list(self, list_idx): | |
| 100 if list_idx < 0: | |
| 101 list_idx += len(self.lists) | |
| 102 cur_list, len_self = self.lists[list_idx], len(self) | |
| 103 size_limit = self._cur_size_limit | |
| 104 if len(cur_list) > size_limit: | |
| 105 half_limit = size_limit // 2 | |
| 106 while len(cur_list) > half_limit: | |
| 107 next_list_idx = list_idx + 1 | |
| 108 self.lists.insert(next_list_idx, cur_list[-half_limit:]) | |
| 109 del cur_list[-half_limit:] | |
| 110 return True | |
| 111 return False | |
| 112 | |
| 113 def insert(self, index, item): | |
| 114 if len(self.lists) == 1: | |
| 115 self.lists[0].insert(index, item) | |
| 116 self._balance_list(0) | |
| 117 else: | |
| 118 list_idx, rel_idx = self._translate_index(index) | |
| 119 if list_idx is None: | |
| 120 raise IndexError() | |
| 121 self.lists[list_idx].insert(rel_idx, item) | |
| 122 self._balance_list(list_idx) | |
| 123 return | |
| 124 | |
| 125 def append(self, item): | |
| 126 self.lists[-1].append(item) | |
| 127 | |
| 128 def extend(self, iterable): | |
| 129 self.lists[-1].extend(iterable) | |
| 130 | |
| 131 def pop(self, *a): | |
| 132 lists = self.lists | |
| 133 if len(lists) == 1 and not a: | |
| 134 return self.lists[0].pop() | |
| 135 index = a and a[0] | |
| 136 if index == () or index is None or index == -1: | |
| 137 ret = lists[-1].pop() | |
| 138 if len(lists) > 1 and not lists[-1]: | |
| 139 lists.pop() | |
| 140 else: | |
| 141 list_idx, rel_idx = self._translate_index(index) | |
| 142 if list_idx is None: | |
| 143 raise IndexError() | |
| 144 ret = lists[list_idx].pop(rel_idx) | |
| 145 self._balance_list(list_idx) | |
| 146 return ret | |
| 147 | |
| 148 def iter_slice(self, start, stop, step=None): | |
| 149 iterable = self # TODO: optimization opportunities abound | |
| 150 # start_list_idx, stop_list_idx = 0, len(self.lists) | |
| 151 if start is None: | |
| 152 start = 0 | |
| 153 if stop is None: | |
| 154 stop = len(self) | |
| 155 if step is not None and step < 0: | |
| 156 step = -step | |
| 157 start, stop = -start, -stop - 1 | |
| 158 iterable = reversed(self) | |
| 159 if start < 0: | |
| 160 start += len(self) | |
| 161 # start_list_idx, start_rel_idx = self._translate_index(start) | |
| 162 if stop < 0: | |
| 163 stop += len(self) | |
| 164 # stop_list_idx, stop_rel_idx = self._translate_index(stop) | |
| 165 return islice(iterable, start, stop, step) | |
| 166 | |
| 167 def del_slice(self, start, stop, step=None): | |
| 168 if step is not None and abs(step) > 1: # punt | |
| 169 new_list = chain(self.iter_slice(0, start, step), | |
| 170 self.iter_slice(stop, None, step)) | |
| 171 self.lists[0][:] = new_list | |
| 172 self._balance_list(0) | |
| 173 return | |
| 174 if start is None: | |
| 175 start = 0 | |
| 176 if stop is None: | |
| 177 stop = len(self) | |
| 178 start_list_idx, start_rel_idx = self._translate_index(start) | |
| 179 stop_list_idx, stop_rel_idx = self._translate_index(stop) | |
| 180 if start_list_idx is None: | |
| 181 raise IndexError() | |
| 182 if stop_list_idx is None: | |
| 183 raise IndexError() | |
| 184 | |
| 185 if start_list_idx == stop_list_idx: | |
| 186 del self.lists[start_list_idx][start_rel_idx:stop_rel_idx] | |
| 187 elif start_list_idx < stop_list_idx: | |
| 188 del self.lists[start_list_idx + 1:stop_list_idx] | |
| 189 del self.lists[start_list_idx][start_rel_idx:] | |
| 190 del self.lists[stop_list_idx][:stop_rel_idx] | |
| 191 else: | |
| 192 assert False, ('start list index should never translate to' | |
| 193 ' greater than stop list index') | |
| 194 | |
| 195 __delslice__ = del_slice | |
| 196 | |
| 197 @classmethod | |
| 198 def from_iterable(cls, it): | |
| 199 return cls(it) | |
| 200 | |
| 201 def __iter__(self): | |
| 202 return chain(*self.lists) | |
| 203 | |
| 204 def __reversed__(self): | |
| 205 return chain.from_iterable(reversed(l) for l in reversed(self.lists)) | |
| 206 | |
| 207 def __len__(self): | |
| 208 return sum([len(l) for l in self.lists]) | |
| 209 | |
| 210 def __contains__(self, item): | |
| 211 for cur in self.lists: | |
| 212 if item in cur: | |
| 213 return True | |
| 214 return False | |
| 215 | |
| 216 def __getitem__(self, index): | |
| 217 try: | |
| 218 start, stop, step = index.start, index.stop, index.step | |
| 219 except AttributeError: | |
| 220 index = operator.index(index) | |
| 221 else: | |
| 222 iter_slice = self.iter_slice(start, stop, step) | |
| 223 ret = self.from_iterable(iter_slice) | |
| 224 return ret | |
| 225 list_idx, rel_idx = self._translate_index(index) | |
| 226 if list_idx is None: | |
| 227 raise IndexError() | |
| 228 return self.lists[list_idx][rel_idx] | |
| 229 | |
| 230 def __delitem__(self, index): | |
| 231 try: | |
| 232 start, stop, step = index.start, index.stop, index.step | |
| 233 except AttributeError: | |
| 234 index = operator.index(index) | |
| 235 else: | |
| 236 self.del_slice(start, stop, step) | |
| 237 return | |
| 238 list_idx, rel_idx = self._translate_index(index) | |
| 239 if list_idx is None: | |
| 240 raise IndexError() | |
| 241 del self.lists[list_idx][rel_idx] | |
| 242 | |
| 243 def __setitem__(self, index, item): | |
| 244 try: | |
| 245 start, stop, step = index.start, index.stop, index.step | |
| 246 except AttributeError: | |
| 247 index = operator.index(index) | |
| 248 else: | |
| 249 if len(self.lists) == 1: | |
| 250 self.lists[0][index] = item | |
| 251 else: | |
| 252 tmp = list(self) | |
| 253 tmp[index] = item | |
| 254 self.lists[:] = [tmp] | |
| 255 self._balance_list(0) | |
| 256 return | |
| 257 list_idx, rel_idx = self._translate_index(index) | |
| 258 if list_idx is None: | |
| 259 raise IndexError() | |
| 260 self.lists[list_idx][rel_idx] = item | |
| 261 | |
| 262 def __getslice__(self, start, stop): | |
| 263 iter_slice = self.iter_slice(start, stop, 1) | |
| 264 return self.from_iterable(iter_slice) | |
| 265 | |
| 266 def __setslice__(self, start, stop, sequence): | |
| 267 if len(self.lists) == 1: | |
| 268 self.lists[0][start:stop] = sequence | |
| 269 else: | |
| 270 tmp = list(self) | |
| 271 tmp[start:stop] = sequence | |
| 272 self.lists[:] = [tmp] | |
| 273 self._balance_list(0) | |
| 274 return | |
| 275 | |
| 276 def __repr__(self): | |
| 277 return '%s(%r)' % (self.__class__.__name__, list(self)) | |
| 278 | |
| 279 def sort(self): | |
| 280 # poor pythonist's mergesort, it's faster than sorted(self) | |
| 281 # when the lists' average length is greater than 512. | |
| 282 if len(self.lists) == 1: | |
| 283 self.lists[0].sort() | |
| 284 else: | |
| 285 for li in self.lists: | |
| 286 li.sort() | |
| 287 tmp_sorted = sorted(chain.from_iterable(self.lists)) | |
| 288 del self.lists[:] | |
| 289 self.lists[0] = tmp_sorted | |
| 290 self._balance_list(0) | |
| 291 | |
| 292 def reverse(self): | |
| 293 for cur in self.lists: | |
| 294 cur.reverse() | |
| 295 self.lists.reverse() | |
| 296 | |
| 297 def count(self, item): | |
| 298 return sum([cur.count(item) for cur in self.lists]) | |
| 299 | |
| 300 def index(self, item): | |
| 301 len_accum = 0 | |
| 302 for cur in self.lists: | |
| 303 try: | |
| 304 rel_idx = cur.index(item) | |
| 305 return len_accum + rel_idx | |
| 306 except ValueError: | |
| 307 len_accum += len(cur) | |
| 308 raise ValueError('%r is not in list' % (item,)) | |
| 309 | |
| 310 | |
| 311 BList = BarrelList | |
| 312 | |
| 313 | |
| 314 class SplayList(list): | |
| 315 """Like a `splay tree`_, the SplayList facilitates moving higher | |
| 316 utility items closer to the front of the list for faster access. | |
| 317 | |
| 318 .. _splay tree: https://en.wikipedia.org/wiki/Splay_tree | |
| 319 """ | |
| 320 | |
| 321 def shift(self, item_index, dest_index=0): | |
| 322 if item_index == dest_index: | |
| 323 return | |
| 324 item = self.pop(item_index) | |
| 325 self.insert(dest_index, item) | |
| 326 | |
| 327 def swap(self, item_index, dest_index): | |
| 328 self[dest_index], self[item_index] = self[item_index], self[dest_index] |
