comparison env/lib/python3.7/site-packages/boltons/listutils.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 # -*- coding: utf-8 -*-
2 """Python's builtin :class:`list` is a very fast and efficient
3 sequence type, but it could be better for certain access patterns,
4 such as non-sequential insertion into a large lists. ``listutils``
5 provides a pure-Python solution to this problem.
6
7 For utilities for working with iterables and lists, check out
8 :mod:`iterutils`. For the a :class:`list`-based version of
9 :class:`collections.namedtuple`, check out :mod:`namedutils`.
10 """
11
12 from __future__ import print_function, division
13
14 import operator
15 from math import log as math_log
16 from itertools import chain, islice
17
18 try:
19 from typeutils import make_sentinel
20 _MISSING = make_sentinel(var_name='_MISSING')
21 except ImportError:
22 _MISSING = object()
23
24 try:
25 xrange
26 except NameError:
27 # Python 3 compat
28 xrange = range
29
30 # TODO: expose splaylist?
31 __all__ = ['BList', 'BarrelList']
32
33
34 # TODO: comparators
35 # TODO: keep track of list lengths and bisect to the right list for
36 # faster getitem (and slightly slower setitem and delitem ops)
37
38 class BarrelList(list):
39 """The ``BarrelList`` is a :class:`list` subtype backed by many
40 dynamically-scaled sublists, to provide better scaling and random
41 insertion/deletion characteristics. It is a subtype of the builtin
42 :class:`list` and has an identical API, supporting indexing,
43 slicing, sorting, etc. If application requirements call for
44 something more performant, consider the `blist module available on
45 PyPI`_.
46
47 The name comes by way of Kurt Rose, who said it reminded him of
48 barrel shifters. Not sure how, but it's BList-like, so the name
49 stuck. BList is of course a reference to `B-trees`_.
50
51 Args:
52 iterable: An optional iterable of initial values for the list.
53
54 >>> blist = BList(xrange(100000))
55 >>> blist.pop(50000)
56 50000
57 >>> len(blist)
58 99999
59 >>> len(blist.lists) # how many underlying lists
60 8
61 >>> slice_idx = blist.lists[0][-1]
62 >>> blist[slice_idx:slice_idx + 2]
63 BarrelList([11637, 11638])
64
65 Slicing is supported and works just fine across list borders,
66 returning another instance of the BarrelList.
67
68 .. _blist module available on PyPI: https://pypi.python.org/pypi/blist
69 .. _B-trees: https://en.wikipedia.org/wiki/B-tree
70
71 """
72
73 _size_factor = 1520
74 "This size factor is the result of tuning using the tune() function below."
75
76 def __init__(self, iterable=None):
77 self.lists = [[]]
78 if iterable:
79 self.extend(iterable)
80
81 @property
82 def _cur_size_limit(self):
83 len_self, size_factor = len(self), self._size_factor
84 return int(round(size_factor * math_log(len_self + 2, 2)))
85
86 def _translate_index(self, index):
87 if index < 0:
88 index += len(self)
89 rel_idx, lists = index, self.lists
90 for list_idx in range(len(lists)):
91 len_list = len(lists[list_idx])
92 if rel_idx < len_list:
93 break
94 rel_idx -= len_list
95 if rel_idx < 0:
96 return None, None
97 return list_idx, rel_idx
98
99 def _balance_list(self, list_idx):
100 if list_idx < 0:
101 list_idx += len(self.lists)
102 cur_list, len_self = self.lists[list_idx], len(self)
103 size_limit = self._cur_size_limit
104 if len(cur_list) > size_limit:
105 half_limit = size_limit // 2
106 while len(cur_list) > half_limit:
107 next_list_idx = list_idx + 1
108 self.lists.insert(next_list_idx, cur_list[-half_limit:])
109 del cur_list[-half_limit:]
110 return True
111 return False
112
113 def insert(self, index, item):
114 if len(self.lists) == 1:
115 self.lists[0].insert(index, item)
116 self._balance_list(0)
117 else:
118 list_idx, rel_idx = self._translate_index(index)
119 if list_idx is None:
120 raise IndexError()
121 self.lists[list_idx].insert(rel_idx, item)
122 self._balance_list(list_idx)
123 return
124
125 def append(self, item):
126 self.lists[-1].append(item)
127
128 def extend(self, iterable):
129 self.lists[-1].extend(iterable)
130
131 def pop(self, *a):
132 lists = self.lists
133 if len(lists) == 1 and not a:
134 return self.lists[0].pop()
135 index = a and a[0]
136 if index == () or index is None or index == -1:
137 ret = lists[-1].pop()
138 if len(lists) > 1 and not lists[-1]:
139 lists.pop()
140 else:
141 list_idx, rel_idx = self._translate_index(index)
142 if list_idx is None:
143 raise IndexError()
144 ret = lists[list_idx].pop(rel_idx)
145 self._balance_list(list_idx)
146 return ret
147
148 def iter_slice(self, start, stop, step=None):
149 iterable = self # TODO: optimization opportunities abound
150 # start_list_idx, stop_list_idx = 0, len(self.lists)
151 if start is None:
152 start = 0
153 if stop is None:
154 stop = len(self)
155 if step is not None and step < 0:
156 step = -step
157 start, stop = -start, -stop - 1
158 iterable = reversed(self)
159 if start < 0:
160 start += len(self)
161 # start_list_idx, start_rel_idx = self._translate_index(start)
162 if stop < 0:
163 stop += len(self)
164 # stop_list_idx, stop_rel_idx = self._translate_index(stop)
165 return islice(iterable, start, stop, step)
166
167 def del_slice(self, start, stop, step=None):
168 if step is not None and abs(step) > 1: # punt
169 new_list = chain(self.iter_slice(0, start, step),
170 self.iter_slice(stop, None, step))
171 self.lists[0][:] = new_list
172 self._balance_list(0)
173 return
174 if start is None:
175 start = 0
176 if stop is None:
177 stop = len(self)
178 start_list_idx, start_rel_idx = self._translate_index(start)
179 stop_list_idx, stop_rel_idx = self._translate_index(stop)
180 if start_list_idx is None:
181 raise IndexError()
182 if stop_list_idx is None:
183 raise IndexError()
184
185 if start_list_idx == stop_list_idx:
186 del self.lists[start_list_idx][start_rel_idx:stop_rel_idx]
187 elif start_list_idx < stop_list_idx:
188 del self.lists[start_list_idx + 1:stop_list_idx]
189 del self.lists[start_list_idx][start_rel_idx:]
190 del self.lists[stop_list_idx][:stop_rel_idx]
191 else:
192 assert False, ('start list index should never translate to'
193 ' greater than stop list index')
194
195 __delslice__ = del_slice
196
197 @classmethod
198 def from_iterable(cls, it):
199 return cls(it)
200
201 def __iter__(self):
202 return chain(*self.lists)
203
204 def __reversed__(self):
205 return chain.from_iterable(reversed(l) for l in reversed(self.lists))
206
207 def __len__(self):
208 return sum([len(l) for l in self.lists])
209
210 def __contains__(self, item):
211 for cur in self.lists:
212 if item in cur:
213 return True
214 return False
215
216 def __getitem__(self, index):
217 try:
218 start, stop, step = index.start, index.stop, index.step
219 except AttributeError:
220 index = operator.index(index)
221 else:
222 iter_slice = self.iter_slice(start, stop, step)
223 ret = self.from_iterable(iter_slice)
224 return ret
225 list_idx, rel_idx = self._translate_index(index)
226 if list_idx is None:
227 raise IndexError()
228 return self.lists[list_idx][rel_idx]
229
230 def __delitem__(self, index):
231 try:
232 start, stop, step = index.start, index.stop, index.step
233 except AttributeError:
234 index = operator.index(index)
235 else:
236 self.del_slice(start, stop, step)
237 return
238 list_idx, rel_idx = self._translate_index(index)
239 if list_idx is None:
240 raise IndexError()
241 del self.lists[list_idx][rel_idx]
242
243 def __setitem__(self, index, item):
244 try:
245 start, stop, step = index.start, index.stop, index.step
246 except AttributeError:
247 index = operator.index(index)
248 else:
249 if len(self.lists) == 1:
250 self.lists[0][index] = item
251 else:
252 tmp = list(self)
253 tmp[index] = item
254 self.lists[:] = [tmp]
255 self._balance_list(0)
256 return
257 list_idx, rel_idx = self._translate_index(index)
258 if list_idx is None:
259 raise IndexError()
260 self.lists[list_idx][rel_idx] = item
261
262 def __getslice__(self, start, stop):
263 iter_slice = self.iter_slice(start, stop, 1)
264 return self.from_iterable(iter_slice)
265
266 def __setslice__(self, start, stop, sequence):
267 if len(self.lists) == 1:
268 self.lists[0][start:stop] = sequence
269 else:
270 tmp = list(self)
271 tmp[start:stop] = sequence
272 self.lists[:] = [tmp]
273 self._balance_list(0)
274 return
275
276 def __repr__(self):
277 return '%s(%r)' % (self.__class__.__name__, list(self))
278
279 def sort(self):
280 # poor pythonist's mergesort, it's faster than sorted(self)
281 # when the lists' average length is greater than 512.
282 if len(self.lists) == 1:
283 self.lists[0].sort()
284 else:
285 for li in self.lists:
286 li.sort()
287 tmp_sorted = sorted(chain.from_iterable(self.lists))
288 del self.lists[:]
289 self.lists[0] = tmp_sorted
290 self._balance_list(0)
291
292 def reverse(self):
293 for cur in self.lists:
294 cur.reverse()
295 self.lists.reverse()
296
297 def count(self, item):
298 return sum([cur.count(item) for cur in self.lists])
299
300 def index(self, item):
301 len_accum = 0
302 for cur in self.lists:
303 try:
304 rel_idx = cur.index(item)
305 return len_accum + rel_idx
306 except ValueError:
307 len_accum += len(cur)
308 raise ValueError('%r is not in list' % (item,))
309
310
311 BList = BarrelList
312
313
314 class SplayList(list):
315 """Like a `splay tree`_, the SplayList facilitates moving higher
316 utility items closer to the front of the list for faster access.
317
318 .. _splay tree: https://en.wikipedia.org/wiki/Splay_tree
319 """
320
321 def shift(self, item_index, dest_index=0):
322 if item_index == dest_index:
323 return
324 item = self.pop(item_index)
325 self.insert(dest_index, item)
326
327 def swap(self, item_index, dest_index):
328 self[dest_index], self[item_index] = self[item_index], self[dest_index]