comparison env/lib/python3.9/site-packages/boltons/iterutils.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """:mod:`itertools` is full of great examples of Python generator
3 usage. However, there are still some critical gaps. ``iterutils``
4 fills many of those gaps with featureful, tested, and Pythonic
5 solutions.
6
7 Many of the functions below have two versions, one which
8 returns an iterator (denoted by the ``*_iter`` naming pattern), and a
9 shorter-named convenience form that returns a list. Some of the
10 following are based on examples in itertools docs.
11 """
12
13 import os
14 import math
15 import time
16 import codecs
17 import random
18 import itertools
19
20 try:
21 from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable
22 except ImportError:
23 from collections import Mapping, Sequence, Set, ItemsView, Iterable
24
25
26 try:
27 from typeutils import make_sentinel
28 _UNSET = make_sentinel('_UNSET')
29 _REMAP_EXIT = make_sentinel('_REMAP_EXIT')
30 except ImportError:
31 _REMAP_EXIT = object()
32 _UNSET = object()
33
34 try:
35 from future_builtins import filter
36 from itertools import izip
37 _IS_PY3 = False
38 except ImportError:
39 # Python 3 compat
40 _IS_PY3 = True
41 basestring = (str, bytes)
42 unicode = str
43 izip, xrange = zip, range
44
45
46 def is_iterable(obj):
47 """Similar in nature to :func:`callable`, ``is_iterable`` returns
48 ``True`` if an object is `iterable`_, ``False`` if not.
49
50 >>> is_iterable([])
51 True
52 >>> is_iterable(object())
53 False
54
55 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable
56 """
57 try:
58 iter(obj)
59 except TypeError:
60 return False
61 return True
62
63
64 def is_scalar(obj):
65 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an
66 object is an iterable container type. Strings are considered
67 scalar as well, because strings are more often treated as whole
68 values as opposed to iterables of 1-character substrings.
69
70 >>> is_scalar(object())
71 True
72 >>> is_scalar(range(10))
73 False
74 >>> is_scalar('hello')
75 True
76 """
77 return not is_iterable(obj) or isinstance(obj, basestring)
78
79
80 def is_collection(obj):
81 """The opposite of :func:`is_scalar`. Returns ``True`` if an object
82 is an iterable other than a string.
83
84 >>> is_collection(object())
85 False
86 >>> is_collection(range(10))
87 True
88 >>> is_collection('hello')
89 False
90 """
91 return is_iterable(obj) and not isinstance(obj, basestring)
92
93
94 def split(src, sep=None, maxsplit=None):
95 """Splits an iterable based on a separator. Like :meth:`str.split`,
96 but for all iterables. Returns a list of lists.
97
98 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])
99 [['hi', 'hello'], ['sup'], ['soap']]
100
101 See :func:`split_iter` docs for more info.
102 """
103 return list(split_iter(src, sep, maxsplit))
104
105
106 def split_iter(src, sep=None, maxsplit=None):
107 """Splits an iterable based on a separator, *sep*, a max of
108 *maxsplit* times (no max by default). *sep* can be:
109
110 * a single value
111 * an iterable of separators
112 * a single-argument callable that returns True when a separator is
113 encountered
114
115 ``split_iter()`` yields lists of non-separator values. A separator will
116 never appear in the output.
117
118 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]))
119 [['hi', 'hello'], ['sup'], ['soap']]
120
121 Note that ``split_iter`` is based on :func:`str.split`, so if
122 *sep* is ``None``, ``split()`` **groups** separators. If empty lists
123 are desired between two contiguous ``None`` values, simply use
124 ``sep=[None]``:
125
126 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None]))
127 [['hi', 'hello'], ['sup']]
128 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None]))
129 [['hi', 'hello'], [], ['sup'], []]
130
131 Using a callable separator:
132
133 >>> falsy_sep = lambda x: not x
134 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))
135 [['hi', 'hello'], [], ['sup'], []]
136
137 See :func:`split` for a list-returning version.
138
139 """
140 if not is_iterable(src):
141 raise TypeError('expected an iterable')
142
143 if maxsplit is not None:
144 maxsplit = int(maxsplit)
145 if maxsplit == 0:
146 yield [src]
147 return
148
149 if callable(sep):
150 sep_func = sep
151 elif not is_scalar(sep):
152 sep = frozenset(sep)
153 sep_func = lambda x: x in sep
154 else:
155 sep_func = lambda x: x == sep
156
157 cur_group = []
158 split_count = 0
159 for s in src:
160 if maxsplit is not None and split_count >= maxsplit:
161 sep_func = lambda x: False
162 if sep_func(s):
163 if sep is None and not cur_group:
164 # If sep is none, str.split() "groups" separators
165 # check the str.split() docs for more info
166 continue
167 split_count += 1
168 yield cur_group
169 cur_group = []
170 else:
171 cur_group.append(s)
172
173 if cur_group or sep is not None:
174 yield cur_group
175 return
176
177
178 def lstrip(iterable, strip_value=None):
179 """Strips values from the beginning of an iterable. Stripped items will
180 match the value of the argument strip_value. Functionality is analigous
181 to that of the method str.lstrip. Returns a list.
182
183 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo')
184 ['Bar', 'Bam']
185
186 """
187 return list(lstrip_iter(iterable, strip_value))
188
189
190 def lstrip_iter(iterable, strip_value=None):
191 """Strips values from the beginning of an iterable. Stripped items will
192 match the value of the argument strip_value. Functionality is analigous
193 to that of the method str.lstrip. Returns a generator.
194
195 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo'))
196 ['Bar', 'Bam']
197
198 """
199 iterator = iter(iterable)
200 for i in iterator:
201 if i != strip_value:
202 yield i
203 break
204 for i in iterator:
205 yield i
206
207
208 def rstrip(iterable, strip_value=None):
209 """Strips values from the end of an iterable. Stripped items will
210 match the value of the argument strip_value. Functionality is analigous
211 to that of the method str.rstrip. Returns a list.
212
213 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam')
214 ['Foo', 'Bar']
215
216 """
217 return list(rstrip_iter(iterable,strip_value))
218
219
220 def rstrip_iter(iterable, strip_value=None):
221 """Strips values from the end of an iterable. Stripped items will
222 match the value of the argument strip_value. Functionality is analigous
223 to that of the method str.rstrip. Returns a generator.
224
225 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam'))
226 ['Foo', 'Bar']
227
228 """
229 iterator = iter(iterable)
230 for i in iterator:
231 if i == strip_value:
232 cache = list()
233 cache.append(i)
234 broken = False
235 for i in iterator:
236 if i == strip_value:
237 cache.append(i)
238 else:
239 broken = True
240 break
241 if not broken: # Return to caller here because the end of the
242 return # iterator has been reached
243 for t in cache:
244 yield t
245 yield i
246
247
248 def strip(iterable, strip_value=None):
249 """Strips values from the beginning and end of an iterable. Stripped items
250 will match the value of the argument strip_value. Functionality is
251 analigous to that of the method str.strip. Returns a list.
252
253 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')
254 ['Foo', 'Bar', 'Bam']
255
256 """
257 return list(strip_iter(iterable,strip_value))
258
259
260 def strip_iter(iterable,strip_value=None):
261 """Strips values from the beginning and end of an iterable. Stripped items
262 will match the value of the argument strip_value. Functionality is
263 analigous to that of the method str.strip. Returns a generator.
264
265 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu'))
266 ['Foo', 'Bar', 'Bam']
267
268 """
269 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value)
270
271
272 def chunked(src, size, count=None, **kw):
273 """Returns a list of *count* chunks, each with *size* elements,
274 generated from iterable *src*. If *src* is not evenly divisible by
275 *size*, the final chunk will have fewer than *size* elements.
276 Provide the *fill* keyword argument to provide a pad value and
277 enable padding, otherwise no padding will take place.
278
279 >>> chunked(range(10), 3)
280 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
281 >>> chunked(range(10), 3, fill=None)
282 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
283 >>> chunked(range(10), 3, count=2)
284 [[0, 1, 2], [3, 4, 5]]
285
286 See :func:`chunked_iter` for more info.
287 """
288 chunk_iter = chunked_iter(src, size, **kw)
289 if count is None:
290 return list(chunk_iter)
291 else:
292 return list(itertools.islice(chunk_iter, count))
293
294
295 def chunked_iter(src, size, **kw):
296 """Generates *size*-sized chunks from *src* iterable. Unless the
297 optional *fill* keyword argument is provided, iterables not evenly
298 divisible by *size* will have a final chunk that is smaller than
299 *size*.
300
301 >>> list(chunked_iter(range(10), 3))
302 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
303 >>> list(chunked_iter(range(10), 3, fill=None))
304 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
305
306 Note that ``fill=None`` in fact uses ``None`` as the fill value.
307 """
308 # TODO: add count kwarg?
309 if not is_iterable(src):
310 raise TypeError('expected an iterable')
311 size = int(size)
312 if size <= 0:
313 raise ValueError('expected a positive integer chunk size')
314 do_fill = True
315 try:
316 fill_val = kw.pop('fill')
317 except KeyError:
318 do_fill = False
319 fill_val = None
320 if kw:
321 raise ValueError('got unexpected keyword arguments: %r' % kw.keys())
322 if not src:
323 return
324 postprocess = lambda chk: chk
325 if isinstance(src, basestring):
326 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk)
327 if _IS_PY3 and isinstance(src, bytes):
328 postprocess = lambda chk: bytes(chk)
329 src_iter = iter(src)
330 while True:
331 cur_chunk = list(itertools.islice(src_iter, size))
332 if not cur_chunk:
333 break
334 lc = len(cur_chunk)
335 if lc < size and do_fill:
336 cur_chunk[lc:] = [fill_val] * (size - lc)
337 yield postprocess(cur_chunk)
338 return
339
340
341 def pairwise(src):
342 """Convenience function for calling :func:`windowed` on *src*, with
343 *size* set to 2.
344
345 >>> pairwise(range(5))
346 [(0, 1), (1, 2), (2, 3), (3, 4)]
347 >>> pairwise([])
348 []
349
350 The number of pairs is always one less than the number of elements
351 in the iterable passed in, except on empty inputs, which returns
352 an empty list.
353 """
354 return windowed(src, 2)
355
356
357 def pairwise_iter(src):
358 """Convenience function for calling :func:`windowed_iter` on *src*,
359 with *size* set to 2.
360
361 >>> list(pairwise_iter(range(5)))
362 [(0, 1), (1, 2), (2, 3), (3, 4)]
363 >>> list(pairwise_iter([]))
364 []
365
366 The number of pairs is always one less than the number of elements
367 in the iterable passed in, or zero, when *src* is empty.
368
369 """
370 return windowed_iter(src, 2)
371
372
373 def windowed(src, size):
374 """Returns tuples with exactly length *size*. If the iterable is
375 too short to make a window of length *size*, no tuples are
376 returned. See :func:`windowed_iter` for more.
377 """
378 return list(windowed_iter(src, size))
379
380
381 def windowed_iter(src, size):
382 """Returns tuples with length *size* which represent a sliding
383 window over iterable *src*.
384
385 >>> list(windowed_iter(range(7), 3))
386 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]
387
388 If the iterable is too short to make a window of length *size*,
389 then no window tuples are returned.
390
391 >>> list(windowed_iter(range(3), 5))
392 []
393 """
394 # TODO: lists? (for consistency)
395 tees = itertools.tee(src, size)
396 try:
397 for i, t in enumerate(tees):
398 for _ in xrange(i):
399 next(t)
400 except StopIteration:
401 return izip([])
402 return izip(*tees)
403
404
405 def xfrange(stop, start=None, step=1.0):
406 """Same as :func:`frange`, but generator-based instead of returning a
407 list.
408
409 >>> tuple(xfrange(1, 3, step=0.75))
410 (1.0, 1.75, 2.5)
411
412 See :func:`frange` for more details.
413 """
414 if not step:
415 raise ValueError('step must be non-zero')
416 if start is None:
417 start, stop = 0.0, stop * 1.0
418 else:
419 # swap when all args are used
420 stop, start = start * 1.0, stop * 1.0
421 cur = start
422 while cur < stop:
423 yield cur
424 cur += step
425
426
427 def frange(stop, start=None, step=1.0):
428 """A :func:`range` clone for float-based ranges.
429
430 >>> frange(5)
431 [0.0, 1.0, 2.0, 3.0, 4.0]
432 >>> frange(6, step=1.25)
433 [0.0, 1.25, 2.5, 3.75, 5.0]
434 >>> frange(100.5, 101.5, 0.25)
435 [100.5, 100.75, 101.0, 101.25]
436 >>> frange(5, 0)
437 []
438 >>> frange(5, 0, step=-1.25)
439 [5.0, 3.75, 2.5, 1.25]
440 """
441 if not step:
442 raise ValueError('step must be non-zero')
443 if start is None:
444 start, stop = 0.0, stop * 1.0
445 else:
446 # swap when all args are used
447 stop, start = start * 1.0, stop * 1.0
448 count = int(math.ceil((stop - start) / step))
449 ret = [None] * count
450 if not ret:
451 return ret
452 ret[0] = start
453 for i in xrange(1, count):
454 ret[i] = ret[i - 1] + step
455 return ret
456
457
458 def backoff(start, stop, count=None, factor=2.0, jitter=False):
459 """Returns a list of geometrically-increasing floating-point numbers,
460 suitable for usage with `exponential backoff`_. Exactly like
461 :func:`backoff_iter`, but without the ``'repeat'`` option for
462 *count*. See :func:`backoff_iter` for more details.
463
464 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
465
466 >>> backoff(1, 10)
467 [1.0, 2.0, 4.0, 8.0, 10.0]
468 """
469 if count == 'repeat':
470 raise ValueError("'repeat' supported in backoff_iter, not backoff")
471 return list(backoff_iter(start, stop, count=count,
472 factor=factor, jitter=jitter))
473
474
475 def backoff_iter(start, stop, count=None, factor=2.0, jitter=False):
476 """Generates a sequence of geometrically-increasing floats, suitable
477 for usage with `exponential backoff`_. Starts with *start*,
478 increasing by *factor* until *stop* is reached, optionally
479 stopping iteration once *count* numbers are yielded. *factor*
480 defaults to 2. In general retrying with properly-configured
481 backoff creates a better-behaved component for a larger service
482 ecosystem.
483
484 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
485
486 >>> list(backoff_iter(1.0, 10.0, count=5))
487 [1.0, 2.0, 4.0, 8.0, 10.0]
488 >>> list(backoff_iter(1.0, 10.0, count=8))
489 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0]
490 >>> list(backoff_iter(0.25, 100.0, factor=10))
491 [0.25, 2.5, 25.0, 100.0]
492
493 A simplified usage example:
494
495 .. code-block:: python
496
497 for timeout in backoff_iter(0.25, 5.0):
498 try:
499 res = network_call()
500 break
501 except Exception as e:
502 log(e)
503 time.sleep(timeout)
504
505 An enhancement for large-scale systems would be to add variation,
506 or *jitter*, to timeout values. This is done to avoid a thundering
507 herd on the receiving end of the network call.
508
509 Finally, for *count*, the special value ``'repeat'`` can be passed to
510 continue yielding indefinitely.
511
512 Args:
513
514 start (float): Positive number for baseline.
515 stop (float): Positive number for maximum.
516 count (int): Number of steps before stopping
517 iteration. Defaults to the number of steps between *start* and
518 *stop*. Pass the string, `'repeat'`, to continue iteration
519 indefinitely.
520 factor (float): Rate of exponential increase. Defaults to `2.0`,
521 e.g., `[1, 2, 4, 8, 16]`.
522 jitter (float): A factor between `-1.0` and `1.0`, used to
523 uniformly randomize and thus spread out timeouts in a distributed
524 system, avoiding rhythm effects. Positive values use the base
525 backoff curve as a maximum, negative values use the curve as a
526 minimum. Set to 1.0 or `True` for a jitter approximating
527 Ethernet's time-tested backoff solution. Defaults to `False`.
528
529 """
530 start = float(start)
531 stop = float(stop)
532 factor = float(factor)
533 if start < 0.0:
534 raise ValueError('expected start >= 0, not %r' % start)
535 if factor < 1.0:
536 raise ValueError('expected factor >= 1.0, not %r' % factor)
537 if stop == 0.0:
538 raise ValueError('expected stop >= 0')
539 if stop < start:
540 raise ValueError('expected stop >= start, not %r' % stop)
541 if count is None:
542 denom = start if start else 1
543 count = 1 + math.ceil(math.log(stop/denom, factor))
544 count = count if start else count + 1
545 if count != 'repeat' and count < 0:
546 raise ValueError('count must be positive or "repeat", not %r' % count)
547 if jitter:
548 jitter = float(jitter)
549 if not (-1.0 <= jitter <= 1.0):
550 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter)
551
552 cur, i = start, 0
553 while count == 'repeat' or i < count:
554 if not jitter:
555 cur_ret = cur
556 elif jitter:
557 cur_ret = cur - (cur * jitter * random.random())
558 yield cur_ret
559 i += 1
560 if cur == 0:
561 cur = 1
562 elif cur < stop:
563 cur *= factor
564 if cur > stop:
565 cur = stop
566 return
567
568
569 def bucketize(src, key=bool, value_transform=None, key_filter=None):
570 """Group values in the *src* iterable by the value returned by *key*.
571
572 >>> bucketize(range(5))
573 {False: [0], True: [1, 2, 3, 4]}
574 >>> is_odd = lambda x: x % 2 == 1
575 >>> bucketize(range(5), is_odd)
576 {False: [0, 2, 4], True: [1, 3]}
577
578 *key* is :class:`bool` by default, but can either be a callable or a string
579 name of the attribute on which to bucketize objects.
580
581 >>> bucketize([1+1j, 2+2j, 1, 2], key='real')
582 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]}
583
584 Value lists are not deduplicated:
585
586 >>> bucketize([None, None, None, 'hello'])
587 {False: [None, None, None], True: ['hello']}
588
589 Bucketize into more than 3 groups
590
591 >>> bucketize(range(10), lambda x: x % 3)
592 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}
593
594 ``bucketize`` has a couple of advanced options useful in certain
595 cases. *value_transform* can be used to modify values as they are
596 added to buckets, and *key_filter* will allow excluding certain
597 buckets from being collected.
598
599 >>> bucketize(range(5), value_transform=lambda x: x*x)
600 {False: [0], True: [1, 4, 9, 16]}
601
602 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1)
603 {0: [0, 3, 6, 9], 2: [2, 5, 8]}
604
605 Note in some of these examples there were at most two keys, ``True`` and
606 ``False``, and each key present has a list with at least one
607 item. See :func:`partition` for a version specialized for binary
608 use cases.
609
610 """
611 if not is_iterable(src):
612 raise TypeError('expected an iterable')
613
614 if isinstance(key, basestring):
615 key_func = lambda x: getattr(x, key, x)
616 elif callable(key):
617 key_func = key
618 else:
619 raise TypeError('expected key to be callable or a string')
620
621 if value_transform is None:
622 value_transform = lambda x: x
623 if not callable(value_transform):
624 raise TypeError('expected callable value transform function')
625
626 ret = {}
627 for val in src:
628 key_of_val = key_func(val)
629 if key_filter is None or key_filter(key_of_val):
630 ret.setdefault(key_of_val, []).append(value_transform(val))
631 return ret
632
633
634 def partition(src, key=bool):
635 """No relation to :meth:`str.partition`, ``partition`` is like
636 :func:`bucketize`, but for added convenience returns a tuple of
637 ``(truthy_values, falsy_values)``.
638
639 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye'])
640 >>> nonempty
641 ['hi', 'bye']
642
643 *key* defaults to :class:`bool`, but can be carefully overridden to
644 use either a function that returns either ``True`` or ``False`` or
645 a string name of the attribute on which to partition objects.
646
647 >>> import string
648 >>> is_digit = lambda x: x in string.digits
649 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit)
650 >>> ''.join(decimal_digits), ''.join(hexletters)
651 ('0123456789', 'abcdefABCDEF')
652 """
653 bucketized = bucketize(src, key)
654 return bucketized.get(True, []), bucketized.get(False, [])
655
656
657 def unique(src, key=None):
658 """``unique()`` returns a list of unique values, as determined by
659 *key*, in the order they first appeared in the input iterable,
660 *src*.
661
662 >>> ones_n_zeros = '11010110001010010101010'
663 >>> ''.join(unique(ones_n_zeros))
664 '10'
665
666 See :func:`unique_iter` docs for more details.
667 """
668 return list(unique_iter(src, key))
669
670
671 def unique_iter(src, key=None):
672 """Yield unique elements from the iterable, *src*, based on *key*,
673 in the order in which they first appeared in *src*.
674
675 >>> repetitious = [1, 2, 3] * 10
676 >>> list(unique_iter(repetitious))
677 [1, 2, 3]
678
679 By default, *key* is the object itself, but *key* can either be a
680 callable or, for convenience, a string name of the attribute on
681 which to uniqueify objects, falling back on identity when the
682 attribute is not present.
683
684 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']
685 >>> list(unique_iter(pleasantries, key=lambda x: len(x)))
686 ['hi', 'hello', 'bye']
687 """
688 if not is_iterable(src):
689 raise TypeError('expected an iterable, not %r' % type(src))
690 if key is None:
691 key_func = lambda x: x
692 elif callable(key):
693 key_func = key
694 elif isinstance(key, basestring):
695 key_func = lambda x: getattr(x, key, x)
696 else:
697 raise TypeError('"key" expected a string or callable, not %r' % key)
698 seen = set()
699 for i in src:
700 k = key_func(i)
701 if k not in seen:
702 seen.add(k)
703 yield i
704 return
705
706
707 def redundant(src, key=None, groups=False):
708 """The complement of :func:`unique()`.
709
710 By default returns non-unique/duplicate values as a list of the
711 *first* redundant value in *src*. Pass ``groups=True`` to get
712 groups of all values with redundancies, ordered by position of the
713 first redundant value. This is useful in conjunction with some
714 normalizing *key* function.
715
716 >>> redundant([1, 2, 3, 4])
717 []
718 >>> redundant([1, 2, 3, 2, 3, 3, 4])
719 [2, 3]
720 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True)
721 [[2, 2], [3, 3, 3]]
722
723 An example using a *key* function to do case-insensitive
724 redundancy detection.
725
726 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower)
727 ['Hi']
728 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower)
729 [['hi', 'Hi', 'HI']]
730
731 *key* should also be used when the values in *src* are not hashable.
732
733 .. note::
734
735 This output of this function is designed for reporting
736 duplicates in contexts when a unique input is desired. Due to
737 the grouped return type, there is no streaming equivalent of
738 this function for the time being.
739
740 """
741 if key is None:
742 pass
743 elif callable(key):
744 key_func = key
745 elif isinstance(key, basestring):
746 key_func = lambda x: getattr(x, key, x)
747 else:
748 raise TypeError('"key" expected a string or callable, not %r' % key)
749 seen = {} # key to first seen item
750 redundant_order = []
751 redundant_groups = {}
752 for i in src:
753 k = key_func(i) if key else i
754 if k not in seen:
755 seen[k] = i
756 else:
757 if k in redundant_groups:
758 if groups:
759 redundant_groups[k].append(i)
760 else:
761 redundant_order.append(k)
762 redundant_groups[k] = [seen[k], i]
763 if not groups:
764 ret = [redundant_groups[k][1] for k in redundant_order]
765 else:
766 ret = [redundant_groups[k] for k in redundant_order]
767 return ret
768
769
770 def one(src, default=None, key=None):
771 """Along the same lines as builtins, :func:`all` and :func:`any`, and
772 similar to :func:`first`, ``one()`` returns the single object in
773 the given iterable *src* that evaluates to ``True``, as determined
774 by callable *key*. If unset, *key* defaults to :class:`bool`. If
775 no such objects are found, *default* is returned. If *default* is
776 not passed, ``None`` is returned.
777
778 If *src* has more than one object that evaluates to ``True``, or
779 if there is no object that fulfills such condition, return
780 *default*. It's like an `XOR`_ over an iterable.
781
782 >>> one((True, False, False))
783 True
784 >>> one((True, False, True))
785 >>> one((0, 0, 'a'))
786 'a'
787 >>> one((0, False, None))
788 >>> one((True, True), default=False)
789 False
790 >>> bool(one(('', 1)))
791 True
792 >>> one((10, 20, 30, 42), key=lambda i: i > 40)
793 42
794
795 See `Martín Gaitán's original repo`_ for further use cases.
796
797 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one
798 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or
799
800 """
801 ones = list(itertools.islice(filter(key, src), 2))
802 return ones[0] if len(ones) == 1 else default
803
804
805 def first(iterable, default=None, key=None):
806 """Return first element of *iterable* that evaluates to ``True``, else
807 return ``None`` or optional *default*. Similar to :func:`one`.
808
809 >>> first([0, False, None, [], (), 42])
810 42
811 >>> first([0, False, None, [], ()]) is None
812 True
813 >>> first([0, False, None, [], ()], default='ohai')
814 'ohai'
815 >>> import re
816 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)'])
817 >>> m.group(1)
818 'bc'
819
820 The optional *key* argument specifies a one-argument predicate function
821 like that used for *filter()*. The *key* argument, if supplied, should be
822 in keyword form. For example, finding the first even number in an iterable:
823
824 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0)
825 4
826
827 Contributed by Hynek Schlawack, author of `the original standalone module`_.
828
829 .. _the original standalone module: https://github.com/hynek/first
830 """
831 return next(filter(key, iterable), default)
832
833
834 def flatten_iter(iterable):
835 """``flatten_iter()`` yields all the elements from *iterable* while
836 collapsing any nested iterables.
837
838 >>> nested = [[1, 2], [[3], [4, 5]]]
839 >>> list(flatten_iter(nested))
840 [1, 2, 3, 4, 5]
841 """
842 for item in iterable:
843 if isinstance(item, Iterable) and not isinstance(item, basestring):
844 for subitem in flatten_iter(item):
845 yield subitem
846 else:
847 yield item
848
849 def flatten(iterable):
850 """``flatten()`` returns a collapsed list of all the elements from
851 *iterable* while collapsing any nested iterables.
852
853 >>> nested = [[1, 2], [[3], [4, 5]]]
854 >>> flatten(nested)
855 [1, 2, 3, 4, 5]
856 """
857 return list(flatten_iter(iterable))
858
859
860 def same(iterable, ref=_UNSET):
861 """``same()`` returns ``True`` when all values in *iterable* are
862 equal to one another, or optionally a reference value,
863 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates
864 an iterable and returns a :class:`bool`. ``same()`` returns
865 ``True`` for empty iterables.
866
867 >>> same([])
868 True
869 >>> same([1])
870 True
871 >>> same(['a', 'a', 'a'])
872 True
873 >>> same(range(20))
874 False
875 >>> same([[], []])
876 True
877 >>> same([[], []], ref='test')
878 False
879
880 """
881 iterator = iter(iterable)
882 if ref is _UNSET:
883 ref = next(iterator, ref)
884 return all(val == ref for val in iterator)
885
886
887 def default_visit(path, key, value):
888 # print('visit(%r, %r, %r)' % (path, key, value))
889 return key, value
890
891 # enable the extreme: monkeypatching iterutils with a different default_visit
892 _orig_default_visit = default_visit
893
894
895 def default_enter(path, key, value):
896 # print('enter(%r, %r)' % (key, value))
897 if isinstance(value, basestring):
898 return value, False
899 elif isinstance(value, Mapping):
900 return value.__class__(), ItemsView(value)
901 elif isinstance(value, Sequence):
902 return value.__class__(), enumerate(value)
903 elif isinstance(value, Set):
904 return value.__class__(), enumerate(value)
905 else:
906 # files, strings, other iterables, and scalars are not
907 # traversed
908 return value, False
909
910
911 def default_exit(path, key, old_parent, new_parent, new_items):
912 # print('exit(%r, %r, %r, %r, %r)'
913 # % (path, key, old_parent, new_parent, new_items))
914 ret = new_parent
915 if isinstance(new_parent, Mapping):
916 new_parent.update(new_items)
917 elif isinstance(new_parent, Sequence):
918 vals = [v for i, v in new_items]
919 try:
920 new_parent.extend(vals)
921 except AttributeError:
922 ret = new_parent.__class__(vals) # tuples
923 elif isinstance(new_parent, Set):
924 vals = [v for i, v in new_items]
925 try:
926 new_parent.update(vals)
927 except AttributeError:
928 ret = new_parent.__class__(vals) # frozensets
929 else:
930 raise RuntimeError('unexpected iterable type: %r' % type(new_parent))
931 return ret
932
933
934 def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,
935 **kwargs):
936 """The remap ("recursive map") function is used to traverse and
937 transform nested structures. Lists, tuples, sets, and dictionaries
938 are just a few of the data structures nested into heterogenous
939 tree-like structures that are so common in programming.
940 Unfortunately, Python's built-in ways to manipulate collections
941 are almost all flat. List comprehensions may be fast and succinct,
942 but they do not recurse, making it tedious to apply quick changes
943 or complex transforms to real-world data.
944
945 remap goes where list comprehensions cannot.
946
947 Here's an example of removing all Nones from some data:
948
949 >>> from pprint import pprint
950 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None},
951 ... 'Babylon 5': 6, 'Dr. Who': None}
952 >>> pprint(remap(reviews, lambda p, k, v: v is not None))
953 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}}
954
955 Notice how both Nones have been removed despite the nesting in the
956 dictionary. Not bad for a one-liner, and that's just the beginning.
957 See `this remap cookbook`_ for more delicious recipes.
958
959 .. _this remap cookbook: http://sedimental.org/remap.html
960
961 remap takes four main arguments: the object to traverse and three
962 optional callables which determine how the remapped object will be
963 created.
964
965 Args:
966
967 root: The target object to traverse. By default, remap
968 supports iterables like :class:`list`, :class:`tuple`,
969 :class:`dict`, and :class:`set`, but any object traversable by
970 *enter* will work.
971 visit (callable): This function is called on every item in
972 *root*. It must accept three positional arguments, *path*,
973 *key*, and *value*. *path* is simply a tuple of parents'
974 keys. *visit* should return the new key-value pair. It may
975 also return ``True`` as shorthand to keep the old item
976 unmodified, or ``False`` to drop the item from the new
977 structure. *visit* is called after *enter*, on the new parent.
978
979 The *visit* function is called for every item in root,
980 including duplicate items. For traversable values, it is
981 called on the new parent object, after all its children
982 have been visited. The default visit behavior simply
983 returns the key-value pair unmodified.
984 enter (callable): This function controls which items in *root*
985 are traversed. It accepts the same arguments as *visit*: the
986 path, the key, and the value of the current item. It returns a
987 pair of the blank new parent, and an iterator over the items
988 which should be visited. If ``False`` is returned instead of
989 an iterator, the value will not be traversed.
990
991 The *enter* function is only called once per unique value. The
992 default enter behavior support mappings, sequences, and
993 sets. Strings and all other iterables will not be traversed.
994 exit (callable): This function determines how to handle items
995 once they have been visited. It gets the same three
996 arguments as the other functions -- *path*, *key*, *value*
997 -- plus two more: the blank new parent object returned
998 from *enter*, and a list of the new items, as remapped by
999 *visit*.
1000
1001 Like *enter*, the *exit* function is only called once per
1002 unique value. The default exit behavior is to simply add
1003 all new items to the new parent, e.g., using
1004 :meth:`list.extend` and :meth:`dict.update` to add to the
1005 new parent. Immutable objects, such as a :class:`tuple` or
1006 :class:`namedtuple`, must be recreated from scratch, but
1007 use the same type as the new parent passed back from the
1008 *enter* function.
1009 reraise_visit (bool): A pragmatic convenience for the *visit*
1010 callable. When set to ``False``, remap ignores any errors
1011 raised by the *visit* callback. Items causing exceptions
1012 are kept. See examples for more details.
1013
1014 remap is designed to cover the majority of cases with just the
1015 *visit* callable. While passing in multiple callables is very
1016 empowering, remap is designed so very few cases should require
1017 passing more than one function.
1018
1019 When passing *enter* and *exit*, it's common and easiest to build
1020 on the default behavior. Simply add ``from boltons.iterutils import
1021 default_enter`` (or ``default_exit``), and have your enter/exit
1022 function call the default behavior before or after your custom
1023 logic. See `this example`_.
1024
1025 Duplicate and self-referential objects (aka reference loops) are
1026 automatically handled internally, `as shown here`_.
1027
1028 .. _this example: http://sedimental.org/remap.html#sort_all_lists
1029 .. _as shown here: http://sedimental.org/remap.html#corner_cases
1030
1031 """
1032 # TODO: improve argument formatting in sphinx doc
1033 # TODO: enter() return (False, items) to continue traverse but cancel copy?
1034 if not callable(visit):
1035 raise TypeError('visit expected callable, not: %r' % visit)
1036 if not callable(enter):
1037 raise TypeError('enter expected callable, not: %r' % enter)
1038 if not callable(exit):
1039 raise TypeError('exit expected callable, not: %r' % exit)
1040 reraise_visit = kwargs.pop('reraise_visit', True)
1041 if kwargs:
1042 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys())
1043
1044 path, registry, stack = (), {}, [(None, root)]
1045 new_items_stack = []
1046 while stack:
1047 key, value = stack.pop()
1048 id_value = id(value)
1049 if key is _REMAP_EXIT:
1050 key, new_parent, old_parent = value
1051 id_value = id(old_parent)
1052 path, new_items = new_items_stack.pop()
1053 value = exit(path, key, old_parent, new_parent, new_items)
1054 registry[id_value] = value
1055 if not new_items_stack:
1056 continue
1057 elif id_value in registry:
1058 value = registry[id_value]
1059 else:
1060 res = enter(path, key, value)
1061 try:
1062 new_parent, new_items = res
1063 except TypeError:
1064 # TODO: handle False?
1065 raise TypeError('enter should return a tuple of (new_parent,'
1066 ' items_iterator), not: %r' % res)
1067 if new_items is not False:
1068 # traverse unless False is explicitly passed
1069 registry[id_value] = new_parent
1070 new_items_stack.append((path, []))
1071 if value is not root:
1072 path += (key,)
1073 stack.append((_REMAP_EXIT, (key, new_parent, value)))
1074 if new_items:
1075 stack.extend(reversed(list(new_items)))
1076 continue
1077 if visit is _orig_default_visit:
1078 # avoid function call overhead by inlining identity operation
1079 visited_item = (key, value)
1080 else:
1081 try:
1082 visited_item = visit(path, key, value)
1083 except Exception:
1084 if reraise_visit:
1085 raise
1086 visited_item = True
1087 if visited_item is False:
1088 continue # drop
1089 elif visited_item is True:
1090 visited_item = (key, value)
1091 # TODO: typecheck?
1092 # raise TypeError('expected (key, value) from visit(),'
1093 # ' not: %r' % visited_item)
1094 try:
1095 new_items_stack[-1][1].append(visited_item)
1096 except IndexError:
1097 raise TypeError('expected remappable root, not: %r' % root)
1098 return value
1099
1100
1101 class PathAccessError(KeyError, IndexError, TypeError):
1102 """An amalgamation of KeyError, IndexError, and TypeError,
1103 representing what can occur when looking up a path in a nested
1104 object.
1105 """
1106 def __init__(self, exc, seg, path):
1107 self.exc = exc
1108 self.seg = seg
1109 self.path = path
1110
1111 def __repr__(self):
1112 cn = self.__class__.__name__
1113 return '%s(%r, %r, %r)' % (cn, self.exc, self.seg, self.path)
1114
1115 def __str__(self):
1116 return ('could not access %r from path %r, got error: %r'
1117 % (self.seg, self.path, self.exc))
1118
1119
1120 def get_path(root, path, default=_UNSET):
1121 """Retrieve a value from a nested object via a tuple representing the
1122 lookup path.
1123
1124 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}}
1125 >>> get_path(root, ('a', 'b', 'c', 2, 0))
1126 3
1127
1128 The path format is intentionally consistent with that of
1129 :func:`remap`.
1130
1131 One of get_path's chief aims is improved error messaging. EAFP is
1132 great, but the error messages are not.
1133
1134 For instance, ``root['a']['b']['c'][2][1]`` gives back
1135 ``IndexError: list index out of range``
1136
1137 What went out of range where? get_path currently raises
1138 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2,
1139 1), got error: IndexError('list index out of range',)``, a
1140 subclass of IndexError and KeyError.
1141
1142 You can also pass a default that covers the entire operation,
1143 should the lookup fail at any level.
1144
1145 Args:
1146 root: The target nesting of dictionaries, lists, or other
1147 objects supporting ``__getitem__``.
1148 path (tuple): A list of strings and integers to be successively
1149 looked up within *root*.
1150 default: The value to be returned should any
1151 ``PathAccessError`` exceptions be raised.
1152 """
1153 if isinstance(path, basestring):
1154 path = path.split('.')
1155 cur = root
1156 try:
1157 for seg in path:
1158 try:
1159 cur = cur[seg]
1160 except (KeyError, IndexError) as exc:
1161 raise PathAccessError(exc, seg, path)
1162 except TypeError as exc:
1163 # either string index in a list, or a parent that
1164 # doesn't support indexing
1165 try:
1166 seg = int(seg)
1167 cur = cur[seg]
1168 except (ValueError, KeyError, IndexError, TypeError):
1169 if not is_iterable(cur):
1170 exc = TypeError('%r object is not indexable'
1171 % type(cur).__name__)
1172 raise PathAccessError(exc, seg, path)
1173 except PathAccessError:
1174 if default is _UNSET:
1175 raise
1176 return default
1177 return cur
1178
1179
1180 def research(root, query=lambda p, k, v: True, reraise=False):
1181 """The :func:`research` function uses :func:`remap` to recurse over
1182 any data nested in *root*, and find values which match a given
1183 criterion, specified by the *query* callable.
1184
1185 Results are returned as a list of ``(path, value)`` pairs. The
1186 paths are tuples in the same format accepted by
1187 :func:`get_path`. This can be useful for comparing values nested
1188 in two or more different structures.
1189
1190 Here's a simple example that finds all integers:
1191
1192 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}
1193 >>> res = research(root, query=lambda p, k, v: isinstance(v, int))
1194 >>> print(sorted(res))
1195 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]
1196
1197 Note how *query* follows the same, familiar ``path, key, value``
1198 signature as the ``visit`` and ``enter`` functions on
1199 :func:`remap`, and returns a :class:`bool`.
1200
1201 Args:
1202 root: The target object to search. Supports the same types of
1203 objects as :func:`remap`, including :class:`list`,
1204 :class:`tuple`, :class:`dict`, and :class:`set`.
1205 query (callable): The function called on every object to
1206 determine whether to include it in the search results. The
1207 callable must accept three arguments, *path*, *key*, and
1208 *value*, commonly abbreviated *p*, *k*, and *v*, same as
1209 *enter* and *visit* from :func:`remap`.
1210 reraise (bool): Whether to reraise exceptions raised by *query*
1211 or to simply drop the result that caused the error.
1212
1213
1214 With :func:`research` it's easy to inspect the details of a data
1215 structure, like finding values that are at a certain depth (using
1216 ``len(p)``) and much more. If more advanced functionality is
1217 needed, check out the code and make your own :func:`remap`
1218 wrapper, and consider `submitting a patch`_!
1219
1220 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls
1221 """
1222 ret = []
1223
1224 if not callable(query):
1225 raise TypeError('query expected callable, not: %r' % query)
1226
1227 def enter(path, key, value):
1228 try:
1229 if query(path, key, value):
1230 ret.append((path + (key,), value))
1231 except Exception:
1232 if reraise:
1233 raise
1234 return default_enter(path, key, value)
1235
1236 remap(root, enter=enter)
1237 return ret
1238
1239
1240 # TODO: recollect()
1241 # TODO: refilter()
1242 # TODO: reiter()
1243
1244
1245 # GUID iterators: 10x faster and somewhat more compact than uuid.
1246
1247 class GUIDerator(object):
1248 """The GUIDerator is an iterator that yields a globally-unique
1249 identifier (GUID) on every iteration. The GUIDs produced are
1250 hexadecimal strings.
1251
1252 Testing shows it to be around 12x faster than the uuid module. By
1253 default it is also more compact, partly due to its default 96-bit
1254 (24-hexdigit) length. 96 bits of randomness means that there is a
1255 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more
1256 or less uniqueness is desired, the *size* argument can be adjusted
1257 accordingly.
1258
1259 Args:
1260 size (int): character length of the GUID, defaults to 24. Lengths
1261 between 20 and 36 are considered valid.
1262
1263 The GUIDerator has built-in fork protection that causes it to
1264 detect a fork on next iteration and reseed accordingly.
1265
1266 """
1267 def __init__(self, size=24):
1268 self.size = size
1269 if size < 20 or size > 36:
1270 raise ValueError('expected 20 < size <= 36')
1271 import hashlib
1272 self._sha1 = hashlib.sha1
1273 self.count = itertools.count()
1274 self.reseed()
1275
1276 def reseed(self):
1277 import socket
1278 self.pid = os.getpid()
1279 self.salt = '-'.join([str(self.pid),
1280 socket.gethostname() or b'<nohostname>',
1281 str(time.time()),
1282 codecs.encode(os.urandom(6),
1283 'hex_codec').decode('ascii')])
1284 # that codecs trick is the best/only way to get a bytes to
1285 # hexbytes in py2/3
1286 return
1287
1288 def __iter__(self):
1289 return self
1290
1291 if _IS_PY3:
1292 def __next__(self):
1293 if os.getpid() != self.pid:
1294 self.reseed()
1295 target_bytes = (self.salt + str(next(self.count))).encode('utf8')
1296 hash_text = self._sha1(target_bytes).hexdigest()[:self.size]
1297 return hash_text
1298 else:
1299 def __next__(self):
1300 if os.getpid() != self.pid:
1301 self.reseed()
1302 return self._sha1(self.salt +
1303 str(next(self.count))).hexdigest()[:self.size]
1304
1305 next = __next__
1306
1307
1308 class SequentialGUIDerator(GUIDerator):
1309 """Much like the standard GUIDerator, the SequentialGUIDerator is an
1310 iterator that yields a globally-unique identifier (GUID) on every
1311 iteration. The GUIDs produced are hexadecimal strings.
1312
1313 The SequentialGUIDerator differs in that it picks a starting GUID
1314 value and increments every iteration. This yields GUIDs which are
1315 of course unique, but also ordered and lexicographically sortable.
1316
1317 The SequentialGUIDerator is around 50% faster than the normal
1318 GUIDerator, making it almost 20x as fast as the built-in uuid
1319 module. By default it is also more compact, partly due to its
1320 96-bit (24-hexdigit) default length. 96 bits of randomness means that
1321 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64
1322 iterations. If more or less uniqueness is desired, the *size*
1323 argument can be adjusted accordingly.
1324
1325 Args:
1326 size (int): character length of the GUID, defaults to 24.
1327
1328 Note that with SequentialGUIDerator there is a chance of GUIDs
1329 growing larger than the size configured. The SequentialGUIDerator
1330 has built-in fork protection that causes it to detect a fork on
1331 next iteration and reseed accordingly.
1332
1333 """
1334
1335 if _IS_PY3:
1336 def reseed(self):
1337 super(SequentialGUIDerator, self).reseed()
1338 start_str = self._sha1(self.salt.encode('utf8')).hexdigest()
1339 self.start = int(start_str[:self.size], 16)
1340 self.start |= (1 << ((self.size * 4) - 2))
1341 else:
1342 def reseed(self):
1343 super(SequentialGUIDerator, self).reseed()
1344 start_str = self._sha1(self.salt).hexdigest()
1345 self.start = int(start_str[:self.size], 16)
1346 self.start |= (1 << ((self.size * 4) - 2))
1347
1348 def __next__(self):
1349 if os.getpid() != self.pid:
1350 self.reseed()
1351 return '%x' % (next(self.count) + self.start)
1352
1353 next = __next__
1354
1355
1356 guid_iter = GUIDerator()
1357 seq_guid_iter = SequentialGUIDerator()
1358
1359
1360 def soft_sorted(iterable, first=None, last=None, key=None, reverse=False):
1361 """For when you care about the order of some elements, but not about
1362 others.
1363
1364 Use this to float to the top and/or sink to the bottom a specific
1365 ordering, while sorting the rest of the elements according to
1366 normal :func:`sorted` rules.
1367
1368 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two'])
1369 ['one', 'two', 'a', 'b']
1370 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True)
1371 [6, 5, 3, 1, 0, 2, 4]
1372 >>> import string
1373 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower))
1374 'aA1023456789cCdDeEfFbB'
1375
1376 Args:
1377 iterable (list): A list or other iterable to sort.
1378 first (list): A sequence to enforce for elements which should
1379 appear at the beginning of the returned list.
1380 last (list): A sequence to enforce for elements which should
1381 appear at the end of the returned list.
1382 key (callable): Callable used to generate a comparable key for
1383 each item to be sorted, same as the key in
1384 :func:`sorted`. Note that entries in *first* and *last*
1385 should be the keys for the items. Defaults to
1386 passthrough/the identity function.
1387 reverse (bool): Whether or not elements not explicitly ordered
1388 by *first* and *last* should be in reverse order or not.
1389
1390 Returns a new list in sorted order.
1391 """
1392 first = first or []
1393 last = last or []
1394 key = key or (lambda x: x)
1395 seq = list(iterable)
1396 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))]
1397 other.sort(key=key, reverse=reverse)
1398
1399 if first:
1400 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x)))
1401 if last:
1402 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x)))
1403 return first + other + last
1404
1405
1406 def untyped_sorted(iterable, key=None, reverse=False):
1407 """A version of :func:`sorted` which will happily sort an iterable of
1408 heterogenous types and return a new list, similar to legacy Python's
1409 behavior.
1410
1411 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def'])
1412 [1, 2.0, 2, 'abc', 'def']
1413
1414 Note how mutually orderable types are sorted as expected, as in
1415 the case of the integers and floats above.
1416
1417 .. note::
1418
1419 Results may vary across Python versions and builds, but the
1420 function will produce a sorted list, except in the case of
1421 explicitly unorderable objects.
1422
1423 """
1424 class _Wrapper(object):
1425 slots = ('obj',)
1426
1427 def __init__(self, obj):
1428 self.obj = obj
1429
1430 def __lt__(self, other):
1431 obj = key(self.obj) if key is not None else self.obj
1432 other = key(other.obj) if key is not None else other.obj
1433 try:
1434 ret = obj < other
1435 except TypeError:
1436 ret = ((type(obj).__name__, id(type(obj)), obj)
1437 < (type(other).__name__, id(type(other)), other))
1438 return ret
1439
1440 if key is not None and not callable(key):
1441 raise TypeError('expected function or callable object for key, not: %r'
1442 % key)
1443
1444 return sorted(iterable, key=_Wrapper, reverse=reverse)
1445
1446 """
1447 May actually be faster to do an isinstance check for a str path
1448
1449 $ python -m timeit -s "x = [1]" "x[0]"
1450 10000000 loops, best of 3: 0.0207 usec per loop
1451 $ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass"
1452 10000000 loops, best of 3: 0.029 usec per loop
1453 $ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass"
1454 1000000 loops, best of 3: 0.315 usec per loop
1455 # setting up try/except is fast, only around 0.01us
1456 # actually triggering the exception takes almost 10x as long
1457
1458 $ python -m timeit -s "x = [1]" "isinstance(x, basestring)"
1459 10000000 loops, best of 3: 0.141 usec per loop
1460 $ python -m timeit -s "x = [1]" "isinstance(x, str)"
1461 10000000 loops, best of 3: 0.131 usec per loop
1462 $ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass"
1463 1000000 loops, best of 3: 0.443 usec per loop
1464 $ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass"
1465 1000000 loops, best of 3: 0.544 usec per loop
1466 """