comparison planemo/lib/python3.7/site-packages/boltons/iterutils.py @ 0:d30785e31577 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:18:57 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d30785e31577
1 # -*- coding: utf-8 -*-
2 """:mod:`itertools` is full of great examples of Python generator
3 usage. However, there are still some critical gaps. ``iterutils``
4 fills many of those gaps with featureful, tested, and Pythonic
5 solutions.
6
7 Many of the functions below have two versions, one which
8 returns an iterator (denoted by the ``*_iter`` naming pattern), and a
9 shorter-named convenience form that returns a list. Some of the
10 following are based on examples in itertools docs.
11 """
12
13 import os
14 import math
15 import time
16 import codecs
17 import random
18 import socket
19 import hashlib
20 import itertools
21
22 try:
23 from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable
24 except ImportError:
25 from collections import Mapping, Sequence, Set, ItemsView, Iterable
26
27
28 try:
29 from typeutils import make_sentinel
30 _UNSET = make_sentinel('_UNSET')
31 _REMAP_EXIT = make_sentinel('_REMAP_EXIT')
32 except ImportError:
33 _REMAP_EXIT = object()
34 _UNSET = object()
35
36 try:
37 from future_builtins import filter
38 from itertools import izip
39 _IS_PY3 = False
40 except ImportError:
41 # Python 3 compat
42 _IS_PY3 = True
43 basestring = (str, bytes)
44 unicode = str
45 izip, xrange = zip, range
46
47
48 def is_iterable(obj):
49 """Similar in nature to :func:`callable`, ``is_iterable`` returns
50 ``True`` if an object is `iterable`_, ``False`` if not.
51
52 >>> is_iterable([])
53 True
54 >>> is_iterable(object())
55 False
56
57 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable
58 """
59 try:
60 iter(obj)
61 except TypeError:
62 return False
63 return True
64
65
66 def is_scalar(obj):
67 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an
68 object is an iterable container type. Strings are considered
69 scalar as well, because strings are more often treated as whole
70 values as opposed to iterables of 1-character substrings.
71
72 >>> is_scalar(object())
73 True
74 >>> is_scalar(range(10))
75 False
76 >>> is_scalar('hello')
77 True
78 """
79 return not is_iterable(obj) or isinstance(obj, basestring)
80
81
82 def is_collection(obj):
83 """The opposite of :func:`is_scalar`. Returns ``True`` if an object
84 is an iterable other than a string.
85
86 >>> is_collection(object())
87 False
88 >>> is_collection(range(10))
89 True
90 >>> is_collection('hello')
91 False
92 """
93 return is_iterable(obj) and not isinstance(obj, basestring)
94
95
96 def split(src, sep=None, maxsplit=None):
97 """Splits an iterable based on a separator. Like :meth:`str.split`,
98 but for all iterables. Returns a list of lists.
99
100 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])
101 [['hi', 'hello'], ['sup'], ['soap']]
102
103 See :func:`split_iter` docs for more info.
104 """
105 return list(split_iter(src, sep, maxsplit))
106
107
108 def split_iter(src, sep=None, maxsplit=None):
109 """Splits an iterable based on a separator, *sep*, a max of
110 *maxsplit* times (no max by default). *sep* can be:
111
112 * a single value
113 * an iterable of separators
114 * a single-argument callable that returns True when a separator is
115 encountered
116
117 ``split_iter()`` yields lists of non-separator values. A separator will
118 never appear in the output.
119
120 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]))
121 [['hi', 'hello'], ['sup'], ['soap']]
122
123 Note that ``split_iter`` is based on :func:`str.split`, so if
124 *sep* is ``None``, ``split()`` **groups** separators. If empty lists
125 are desired between two contiguous ``None`` values, simply use
126 ``sep=[None]``:
127
128 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None]))
129 [['hi', 'hello'], ['sup']]
130 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None]))
131 [['hi', 'hello'], [], ['sup'], []]
132
133 Using a callable separator:
134
135 >>> falsy_sep = lambda x: not x
136 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))
137 [['hi', 'hello'], [], ['sup'], []]
138
139 See :func:`split` for a list-returning version.
140
141 """
142 if not is_iterable(src):
143 raise TypeError('expected an iterable')
144
145 if maxsplit is not None:
146 maxsplit = int(maxsplit)
147 if maxsplit == 0:
148 yield [src]
149 return
150
151 if callable(sep):
152 sep_func = sep
153 elif not is_scalar(sep):
154 sep = frozenset(sep)
155 sep_func = lambda x: x in sep
156 else:
157 sep_func = lambda x: x == sep
158
159 cur_group = []
160 split_count = 0
161 for s in src:
162 if maxsplit is not None and split_count >= maxsplit:
163 sep_func = lambda x: False
164 if sep_func(s):
165 if sep is None and not cur_group:
166 # If sep is none, str.split() "groups" separators
167 # check the str.split() docs for more info
168 continue
169 split_count += 1
170 yield cur_group
171 cur_group = []
172 else:
173 cur_group.append(s)
174
175 if cur_group or sep is not None:
176 yield cur_group
177 return
178
179
180 def lstrip(iterable, strip_value=None):
181 """Strips values from the beginning of an iterable. Stripped items will
182 match the value of the argument strip_value. Functionality is analigous
183 to that of the method str.lstrip. Returns a list.
184
185 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo')
186 ['Bar', 'Bam']
187
188 """
189 return list(lstrip_iter(iterable, strip_value))
190
191
192 def lstrip_iter(iterable, strip_value=None):
193 """Strips values from the beginning of an iterable. Stripped items will
194 match the value of the argument strip_value. Functionality is analigous
195 to that of the method str.lstrip. Returns a generator.
196
197 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo'))
198 ['Bar', 'Bam']
199
200 """
201 iterator = iter(iterable)
202 for i in iterator:
203 if i != strip_value:
204 yield i
205 break
206 for i in iterator:
207 yield i
208
209
210 def rstrip(iterable, strip_value=None):
211 """Strips values from the end of an iterable. Stripped items will
212 match the value of the argument strip_value. Functionality is analigous
213 to that of the method str.rstrip. Returns a list.
214
215 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam')
216 ['Foo', 'Bar']
217
218 """
219 return list(rstrip_iter(iterable,strip_value))
220
221
222 def rstrip_iter(iterable, strip_value=None):
223 """Strips values from the end of an iterable. Stripped items will
224 match the value of the argument strip_value. Functionality is analigous
225 to that of the method str.rstrip. Returns a generator.
226
227 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam'))
228 ['Foo', 'Bar']
229
230 """
231 iterator = iter(iterable)
232 for i in iterator:
233 if i == strip_value:
234 cache = list()
235 cache.append(i)
236 broken = False
237 for i in iterator:
238 if i == strip_value:
239 cache.append(i)
240 else:
241 broken = True
242 break
243 if not broken: # Return to caller here because the end of the
244 return # iterator has been reached
245 for t in cache:
246 yield t
247 yield i
248
249
250 def strip(iterable, strip_value=None):
251 """Strips values from the beginning and end of an iterable. Stripped items
252 will match the value of the argument strip_value. Functionality is
253 analigous to that of the method str.strip. Returns a list.
254
255 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')
256 ['Foo', 'Bar', 'Bam']
257
258 """
259 return list(strip_iter(iterable,strip_value))
260
261
262 def strip_iter(iterable,strip_value=None):
263 """Strips values from the beginning and end of an iterable. Stripped items
264 will match the value of the argument strip_value. Functionality is
265 analigous to that of the method str.strip. Returns a generator.
266
267 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu'))
268 ['Foo', 'Bar', 'Bam']
269
270 """
271 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value)
272
273
274 def chunked(src, size, count=None, **kw):
275 """Returns a list of *count* chunks, each with *size* elements,
276 generated from iterable *src*. If *src* is not evenly divisible by
277 *size*, the final chunk will have fewer than *size* elements.
278 Provide the *fill* keyword argument to provide a pad value and
279 enable padding, otherwise no padding will take place.
280
281 >>> chunked(range(10), 3)
282 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
283 >>> chunked(range(10), 3, fill=None)
284 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
285 >>> chunked(range(10), 3, count=2)
286 [[0, 1, 2], [3, 4, 5]]
287
288 See :func:`chunked_iter` for more info.
289 """
290 chunk_iter = chunked_iter(src, size, **kw)
291 if count is None:
292 return list(chunk_iter)
293 else:
294 return list(itertools.islice(chunk_iter, count))
295
296
297 def chunked_iter(src, size, **kw):
298 """Generates *size*-sized chunks from *src* iterable. Unless the
299 optional *fill* keyword argument is provided, iterables not evenly
300 divisible by *size* will have a final chunk that is smaller than
301 *size*.
302
303 >>> list(chunked_iter(range(10), 3))
304 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
305 >>> list(chunked_iter(range(10), 3, fill=None))
306 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
307
308 Note that ``fill=None`` in fact uses ``None`` as the fill value.
309 """
310 # TODO: add count kwarg?
311 if not is_iterable(src):
312 raise TypeError('expected an iterable')
313 size = int(size)
314 if size <= 0:
315 raise ValueError('expected a positive integer chunk size')
316 do_fill = True
317 try:
318 fill_val = kw.pop('fill')
319 except KeyError:
320 do_fill = False
321 fill_val = None
322 if kw:
323 raise ValueError('got unexpected keyword arguments: %r' % kw.keys())
324 if not src:
325 return
326 postprocess = lambda chk: chk
327 if isinstance(src, basestring):
328 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk)
329 if _IS_PY3 and isinstance(src, bytes):
330 postprocess = lambda chk: bytes(chk)
331 src_iter = iter(src)
332 while True:
333 cur_chunk = list(itertools.islice(src_iter, size))
334 if not cur_chunk:
335 break
336 lc = len(cur_chunk)
337 if lc < size and do_fill:
338 cur_chunk[lc:] = [fill_val] * (size - lc)
339 yield postprocess(cur_chunk)
340 return
341
342
343 def pairwise(src):
344 """Convenience function for calling :func:`windowed` on *src*, with
345 *size* set to 2.
346
347 >>> pairwise(range(5))
348 [(0, 1), (1, 2), (2, 3), (3, 4)]
349 >>> pairwise([])
350 []
351
352 The number of pairs is always one less than the number of elements
353 in the iterable passed in, except on empty inputs, which returns
354 an empty list.
355 """
356 return windowed(src, 2)
357
358
359 def pairwise_iter(src):
360 """Convenience function for calling :func:`windowed_iter` on *src*,
361 with *size* set to 2.
362
363 >>> list(pairwise_iter(range(5)))
364 [(0, 1), (1, 2), (2, 3), (3, 4)]
365 >>> list(pairwise_iter([]))
366 []
367
368 The number of pairs is always one less than the number of elements
369 in the iterable passed in, or zero, when *src* is empty.
370
371 """
372 return windowed_iter(src, 2)
373
374
375 def windowed(src, size):
376 """Returns tuples with exactly length *size*. If the iterable is
377 too short to make a window of length *size*, no tuples are
378 returned. See :func:`windowed_iter` for more.
379 """
380 return list(windowed_iter(src, size))
381
382
383 def windowed_iter(src, size):
384 """Returns tuples with length *size* which represent a sliding
385 window over iterable *src*.
386
387 >>> list(windowed_iter(range(7), 3))
388 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]
389
390 If the iterable is too short to make a window of length *size*,
391 then no window tuples are returned.
392
393 >>> list(windowed_iter(range(3), 5))
394 []
395 """
396 # TODO: lists? (for consistency)
397 tees = itertools.tee(src, size)
398 try:
399 for i, t in enumerate(tees):
400 for _ in xrange(i):
401 next(t)
402 except StopIteration:
403 return izip([])
404 return izip(*tees)
405
406
407 def xfrange(stop, start=None, step=1.0):
408 """Same as :func:`frange`, but generator-based instead of returning a
409 list.
410
411 >>> tuple(xfrange(1, 3, step=0.75))
412 (1.0, 1.75, 2.5)
413
414 See :func:`frange` for more details.
415 """
416 if not step:
417 raise ValueError('step must be non-zero')
418 if start is None:
419 start, stop = 0.0, stop * 1.0
420 else:
421 # swap when all args are used
422 stop, start = start * 1.0, stop * 1.0
423 cur = start
424 while cur < stop:
425 yield cur
426 cur += step
427
428
429 def frange(stop, start=None, step=1.0):
430 """A :func:`range` clone for float-based ranges.
431
432 >>> frange(5)
433 [0.0, 1.0, 2.0, 3.0, 4.0]
434 >>> frange(6, step=1.25)
435 [0.0, 1.25, 2.5, 3.75, 5.0]
436 >>> frange(100.5, 101.5, 0.25)
437 [100.5, 100.75, 101.0, 101.25]
438 >>> frange(5, 0)
439 []
440 >>> frange(5, 0, step=-1.25)
441 [5.0, 3.75, 2.5, 1.25]
442 """
443 if not step:
444 raise ValueError('step must be non-zero')
445 if start is None:
446 start, stop = 0.0, stop * 1.0
447 else:
448 # swap when all args are used
449 stop, start = start * 1.0, stop * 1.0
450 count = int(math.ceil((stop - start) / step))
451 ret = [None] * count
452 if not ret:
453 return ret
454 ret[0] = start
455 for i in xrange(1, count):
456 ret[i] = ret[i - 1] + step
457 return ret
458
459
460 def backoff(start, stop, count=None, factor=2.0, jitter=False):
461 """Returns a list of geometrically-increasing floating-point numbers,
462 suitable for usage with `exponential backoff`_. Exactly like
463 :func:`backoff_iter`, but without the ``'repeat'`` option for
464 *count*. See :func:`backoff_iter` for more details.
465
466 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
467
468 >>> backoff(1, 10)
469 [1.0, 2.0, 4.0, 8.0, 10.0]
470 """
471 if count == 'repeat':
472 raise ValueError("'repeat' supported in backoff_iter, not backoff")
473 return list(backoff_iter(start, stop, count=count,
474 factor=factor, jitter=jitter))
475
476
477 def backoff_iter(start, stop, count=None, factor=2.0, jitter=False):
478 """Generates a sequence of geometrically-increasing floats, suitable
479 for usage with `exponential backoff`_. Starts with *start*,
480 increasing by *factor* until *stop* is reached, optionally
481 stopping iteration once *count* numbers are yielded. *factor*
482 defaults to 2. In general retrying with properly-configured
483 backoff creates a better-behaved component for a larger service
484 ecosystem.
485
486 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
487
488 >>> list(backoff_iter(1.0, 10.0, count=5))
489 [1.0, 2.0, 4.0, 8.0, 10.0]
490 >>> list(backoff_iter(1.0, 10.0, count=8))
491 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0]
492 >>> list(backoff_iter(0.25, 100.0, factor=10))
493 [0.25, 2.5, 25.0, 100.0]
494
495 A simplified usage example:
496
497 .. code-block:: python
498
499 for timeout in backoff_iter(0.25, 5.0):
500 try:
501 res = network_call()
502 break
503 except Exception as e:
504 log(e)
505 time.sleep(timeout)
506
507 An enhancement for large-scale systems would be to add variation,
508 or *jitter*, to timeout values. This is done to avoid a thundering
509 herd on the receiving end of the network call.
510
511 Finally, for *count*, the special value ``'repeat'`` can be passed to
512 continue yielding indefinitely.
513
514 Args:
515
516 start (float): Positive number for baseline.
517 stop (float): Positive number for maximum.
518 count (int): Number of steps before stopping
519 iteration. Defaults to the number of steps between *start* and
520 *stop*. Pass the string, `'repeat'`, to continue iteration
521 indefinitely.
522 factor (float): Rate of exponential increase. Defaults to `2.0`,
523 e.g., `[1, 2, 4, 8, 16]`.
524 jitter (float): A factor between `-1.0` and `1.0`, used to
525 uniformly randomize and thus spread out timeouts in a distributed
526 system, avoiding rhythm effects. Positive values use the base
527 backoff curve as a maximum, negative values use the curve as a
528 minimum. Set to 1.0 or `True` for a jitter approximating
529 Ethernet's time-tested backoff solution. Defaults to `False`.
530
531 """
532 start = float(start)
533 stop = float(stop)
534 factor = float(factor)
535 if start < 0.0:
536 raise ValueError('expected start >= 0, not %r' % start)
537 if factor < 1.0:
538 raise ValueError('expected factor >= 1.0, not %r' % factor)
539 if stop == 0.0:
540 raise ValueError('expected stop >= 0')
541 if stop < start:
542 raise ValueError('expected stop >= start, not %r' % stop)
543 if count is None:
544 denom = start if start else 1
545 count = 1 + math.ceil(math.log(stop/denom, factor))
546 count = count if start else count + 1
547 if count != 'repeat' and count < 0:
548 raise ValueError('count must be positive or "repeat", not %r' % count)
549 if jitter:
550 jitter = float(jitter)
551 if not (-1.0 <= jitter <= 1.0):
552 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter)
553
554 cur, i = start, 0
555 while count == 'repeat' or i < count:
556 if not jitter:
557 cur_ret = cur
558 elif jitter:
559 cur_ret = cur - (cur * jitter * random.random())
560 yield cur_ret
561 i += 1
562 if cur == 0:
563 cur = 1
564 elif cur < stop:
565 cur *= factor
566 if cur > stop:
567 cur = stop
568 return
569
570
571 def bucketize(src, key=bool, value_transform=None, key_filter=None):
572 """Group values in the *src* iterable by the value returned by *key*.
573
574 >>> bucketize(range(5))
575 {False: [0], True: [1, 2, 3, 4]}
576 >>> is_odd = lambda x: x % 2 == 1
577 >>> bucketize(range(5), is_odd)
578 {False: [0, 2, 4], True: [1, 3]}
579
580 *key* is :class:`bool` by default, but can either be a callable or a string
581 name of the attribute on which to bucketize objects.
582
583 >>> bucketize([1+1j, 2+2j, 1, 2], key='real')
584 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]}
585
586 Value lists are not deduplicated:
587
588 >>> bucketize([None, None, None, 'hello'])
589 {False: [None, None, None], True: ['hello']}
590
591 Bucketize into more than 3 groups
592
593 >>> bucketize(range(10), lambda x: x % 3)
594 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}
595
596 ``bucketize`` has a couple of advanced options useful in certain
597 cases. *value_transform* can be used to modify values as they are
598 added to buckets, and *key_filter* will allow excluding certain
599 buckets from being collected.
600
601 >>> bucketize(range(5), value_transform=lambda x: x*x)
602 {False: [0], True: [1, 4, 9, 16]}
603
604 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1)
605 {0: [0, 3, 6, 9], 2: [2, 5, 8]}
606
607 Note in some of these examples there were at most two keys, ``True`` and
608 ``False``, and each key present has a list with at least one
609 item. See :func:`partition` for a version specialized for binary
610 use cases.
611
612 """
613 if not is_iterable(src):
614 raise TypeError('expected an iterable')
615
616 if isinstance(key, basestring):
617 key_func = lambda x: getattr(x, key, x)
618 elif callable(key):
619 key_func = key
620 else:
621 raise TypeError('expected key to be callable or a string')
622
623 if value_transform is None:
624 value_transform = lambda x: x
625 if not callable(value_transform):
626 raise TypeError('expected callable value transform function')
627
628 ret = {}
629 for val in src:
630 key_of_val = key_func(val)
631 if key_filter is None or key_filter(key_of_val):
632 ret.setdefault(key_of_val, []).append(value_transform(val))
633 return ret
634
635
636 def partition(src, key=bool):
637 """No relation to :meth:`str.partition`, ``partition`` is like
638 :func:`bucketize`, but for added convenience returns a tuple of
639 ``(truthy_values, falsy_values)``.
640
641 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye'])
642 >>> nonempty
643 ['hi', 'bye']
644
645 *key* defaults to :class:`bool`, but can be carefully overridden to
646 use either a function that returns either ``True`` or ``False`` or
647 a string name of the attribute on which to partition objects.
648
649 >>> import string
650 >>> is_digit = lambda x: x in string.digits
651 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit)
652 >>> ''.join(decimal_digits), ''.join(hexletters)
653 ('0123456789', 'abcdefABCDEF')
654 """
655 bucketized = bucketize(src, key)
656 return bucketized.get(True, []), bucketized.get(False, [])
657
658
659 def unique(src, key=None):
660 """``unique()`` returns a list of unique values, as determined by
661 *key*, in the order they first appeared in the input iterable,
662 *src*.
663
664 >>> ones_n_zeros = '11010110001010010101010'
665 >>> ''.join(unique(ones_n_zeros))
666 '10'
667
668 See :func:`unique_iter` docs for more details.
669 """
670 return list(unique_iter(src, key))
671
672
673 def unique_iter(src, key=None):
674 """Yield unique elements from the iterable, *src*, based on *key*,
675 in the order in which they first appeared in *src*.
676
677 >>> repetitious = [1, 2, 3] * 10
678 >>> list(unique_iter(repetitious))
679 [1, 2, 3]
680
681 By default, *key* is the object itself, but *key* can either be a
682 callable or, for convenience, a string name of the attribute on
683 which to uniqueify objects, falling back on identity when the
684 attribute is not present.
685
686 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']
687 >>> list(unique_iter(pleasantries, key=lambda x: len(x)))
688 ['hi', 'hello', 'bye']
689 """
690 if not is_iterable(src):
691 raise TypeError('expected an iterable, not %r' % type(src))
692 if key is None:
693 key_func = lambda x: x
694 elif callable(key):
695 key_func = key
696 elif isinstance(key, basestring):
697 key_func = lambda x: getattr(x, key, x)
698 else:
699 raise TypeError('"key" expected a string or callable, not %r' % key)
700 seen = set()
701 for i in src:
702 k = key_func(i)
703 if k not in seen:
704 seen.add(k)
705 yield i
706 return
707
708
709 def redundant(src, key=None, groups=False):
710 """The complement of :func:`unique()`.
711
712 By default returns non-unique values as a list of the *first*
713 redundant value in *src*. Pass ``groups=True`` to get groups of
714 all values with redundancies, ordered by position of the first
715 redundant value. This is useful in conjunction with some
716 normalizing *key* function.
717
718 >>> redundant([1, 2, 3, 4])
719 []
720 >>> redundant([1, 2, 3, 2, 3, 3, 4])
721 [2, 3]
722 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True)
723 [[2, 2], [3, 3, 3]]
724
725 An example using a *key* function to do case-insensitive
726 redundancy detection.
727
728 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower)
729 ['Hi']
730 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower)
731 [['hi', 'Hi', 'HI']]
732
733 *key* should also be used when the values in *src* are not hashable.
734
735 .. note::
736
737 This output of this function is designed for reporting
738 duplicates in contexts when a unique input is desired. Due to
739 the grouped return type, there is no streaming equivalent of
740 this function for the time being.
741
742 """
743 if key is None:
744 pass
745 elif callable(key):
746 key_func = key
747 elif isinstance(key, basestring):
748 key_func = lambda x: getattr(x, key, x)
749 else:
750 raise TypeError('"key" expected a string or callable, not %r' % key)
751 seen = {} # key to first seen item
752 redundant_order = []
753 redundant_groups = {}
754 for i in src:
755 k = key_func(i) if key else i
756 if k not in seen:
757 seen[k] = i
758 else:
759 if k in redundant_groups:
760 if groups:
761 redundant_groups[k].append(i)
762 else:
763 redundant_order.append(k)
764 redundant_groups[k] = [seen[k], i]
765 if not groups:
766 ret = [redundant_groups[k][1] for k in redundant_order]
767 else:
768 ret = [redundant_groups[k] for k in redundant_order]
769 return ret
770
771
772 def one(src, default=None, key=None):
773 """Along the same lines as builtins, :func:`all` and :func:`any`, and
774 similar to :func:`first`, ``one()`` returns the single object in
775 the given iterable *src* that evaluates to ``True``, as determined
776 by callable *key*. If unset, *key* defaults to :class:`bool`. If
777 no such objects are found, *default* is returned. If *default* is
778 not passed, ``None`` is returned.
779
780 If *src* has more than one object that evaluates to ``True``, or
781 if there is no object that fulfills such condition, return
782 *default*. It's like an `XOR`_ over an iterable.
783
784 >>> one((True, False, False))
785 True
786 >>> one((True, False, True))
787 >>> one((0, 0, 'a'))
788 'a'
789 >>> one((0, False, None))
790 >>> one((True, True), default=False)
791 False
792 >>> bool(one(('', 1)))
793 True
794 >>> one((10, 20, 30, 42), key=lambda i: i > 40)
795 42
796
797 See `Martín Gaitán's original repo`_ for further use cases.
798
799 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one
800 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or
801
802 """
803 ones = list(itertools.islice(filter(key, src), 2))
804 return ones[0] if len(ones) == 1 else default
805
806
807 def first(iterable, default=None, key=None):
808 """Return first element of *iterable* that evaluates to ``True``, else
809 return ``None`` or optional *default*. Similar to :func:`one`.
810
811 >>> first([0, False, None, [], (), 42])
812 42
813 >>> first([0, False, None, [], ()]) is None
814 True
815 >>> first([0, False, None, [], ()], default='ohai')
816 'ohai'
817 >>> import re
818 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)'])
819 >>> m.group(1)
820 'bc'
821
822 The optional *key* argument specifies a one-argument predicate function
823 like that used for *filter()*. The *key* argument, if supplied, should be
824 in keyword form. For example, finding the first even number in an iterable:
825
826 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0)
827 4
828
829 Contributed by Hynek Schlawack, author of `the original standalone module`_.
830
831 .. _the original standalone module: https://github.com/hynek/first
832 """
833 return next(filter(key, iterable), default)
834
835
836 def flatten_iter(iterable):
837 """``flatten_iter()`` yields all the elements from *iterable* while
838 collapsing any nested iterables.
839
840 >>> nested = [[1, 2], [[3], [4, 5]]]
841 >>> list(flatten_iter(nested))
842 [1, 2, 3, 4, 5]
843 """
844 for item in iterable:
845 if isinstance(item, Iterable) and not isinstance(item, basestring):
846 for subitem in flatten_iter(item):
847 yield subitem
848 else:
849 yield item
850
851 def flatten(iterable):
852 """``flatten()`` returns a collapsed list of all the elements from
853 *iterable* while collapsing any nested iterables.
854
855 >>> nested = [[1, 2], [[3], [4, 5]]]
856 >>> flatten(nested)
857 [1, 2, 3, 4, 5]
858 """
859 return list(flatten_iter(iterable))
860
861
862 def same(iterable, ref=_UNSET):
863 """``same()`` returns ``True`` when all values in *iterable* are
864 equal to one another, or optionally a reference value,
865 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates
866 an iterable and returns a :class:`bool`. ``same()`` returns
867 ``True`` for empty iterables.
868
869 >>> same([])
870 True
871 >>> same([1])
872 True
873 >>> same(['a', 'a', 'a'])
874 True
875 >>> same(range(20))
876 False
877 >>> same([[], []])
878 True
879 >>> same([[], []], ref='test')
880 False
881
882 """
883 iterator = iter(iterable)
884 if ref is _UNSET:
885 ref = next(iterator, ref)
886 return all(val == ref for val in iterator)
887
888
889 def default_visit(path, key, value):
890 # print('visit(%r, %r, %r)' % (path, key, value))
891 return key, value
892
893 # enable the extreme: monkeypatching iterutils with a different default_visit
894 _orig_default_visit = default_visit
895
896
897 def default_enter(path, key, value):
898 # print('enter(%r, %r)' % (key, value))
899 if isinstance(value, basestring):
900 return value, False
901 elif isinstance(value, Mapping):
902 return value.__class__(), ItemsView(value)
903 elif isinstance(value, Sequence):
904 return value.__class__(), enumerate(value)
905 elif isinstance(value, Set):
906 return value.__class__(), enumerate(value)
907 else:
908 # files, strings, other iterables, and scalars are not
909 # traversed
910 return value, False
911
912
913 def default_exit(path, key, old_parent, new_parent, new_items):
914 # print('exit(%r, %r, %r, %r, %r)'
915 # % (path, key, old_parent, new_parent, new_items))
916 ret = new_parent
917 if isinstance(new_parent, Mapping):
918 new_parent.update(new_items)
919 elif isinstance(new_parent, Sequence):
920 vals = [v for i, v in new_items]
921 try:
922 new_parent.extend(vals)
923 except AttributeError:
924 ret = new_parent.__class__(vals) # tuples
925 elif isinstance(new_parent, Set):
926 vals = [v for i, v in new_items]
927 try:
928 new_parent.update(vals)
929 except AttributeError:
930 ret = new_parent.__class__(vals) # frozensets
931 else:
932 raise RuntimeError('unexpected iterable type: %r' % type(new_parent))
933 return ret
934
935
936 def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,
937 **kwargs):
938 """The remap ("recursive map") function is used to traverse and
939 transform nested structures. Lists, tuples, sets, and dictionaries
940 are just a few of the data structures nested into heterogenous
941 tree-like structures that are so common in programming.
942 Unfortunately, Python's built-in ways to manipulate collections
943 are almost all flat. List comprehensions may be fast and succinct,
944 but they do not recurse, making it tedious to apply quick changes
945 or complex transforms to real-world data.
946
947 remap goes where list comprehensions cannot.
948
949 Here's an example of removing all Nones from some data:
950
951 >>> from pprint import pprint
952 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None},
953 ... 'Babylon 5': 6, 'Dr. Who': None}
954 >>> pprint(remap(reviews, lambda p, k, v: v is not None))
955 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}}
956
957 Notice how both Nones have been removed despite the nesting in the
958 dictionary. Not bad for a one-liner, and that's just the beginning.
959 See `this remap cookbook`_ for more delicious recipes.
960
961 .. _this remap cookbook: http://sedimental.org/remap.html
962
963 remap takes four main arguments: the object to traverse and three
964 optional callables which determine how the remapped object will be
965 created.
966
967 Args:
968
969 root: The target object to traverse. By default, remap
970 supports iterables like :class:`list`, :class:`tuple`,
971 :class:`dict`, and :class:`set`, but any object traversable by
972 *enter* will work.
973 visit (callable): This function is called on every item in
974 *root*. It must accept three positional arguments, *path*,
975 *key*, and *value*. *path* is simply a tuple of parents'
976 keys. *visit* should return the new key-value pair. It may
977 also return ``True`` as shorthand to keep the old item
978 unmodified, or ``False`` to drop the item from the new
979 structure. *visit* is called after *enter*, on the new parent.
980
981 The *visit* function is called for every item in root,
982 including duplicate items. For traversable values, it is
983 called on the new parent object, after all its children
984 have been visited. The default visit behavior simply
985 returns the key-value pair unmodified.
986 enter (callable): This function controls which items in *root*
987 are traversed. It accepts the same arguments as *visit*: the
988 path, the key, and the value of the current item. It returns a
989 pair of the blank new parent, and an iterator over the items
990 which should be visited. If ``False`` is returned instead of
991 an iterator, the value will not be traversed.
992
993 The *enter* function is only called once per unique value. The
994 default enter behavior support mappings, sequences, and
995 sets. Strings and all other iterables will not be traversed.
996 exit (callable): This function determines how to handle items
997 once they have been visited. It gets the same three
998 arguments as the other functions -- *path*, *key*, *value*
999 -- plus two more: the blank new parent object returned
1000 from *enter*, and a list of the new items, as remapped by
1001 *visit*.
1002
1003 Like *enter*, the *exit* function is only called once per
1004 unique value. The default exit behavior is to simply add
1005 all new items to the new parent, e.g., using
1006 :meth:`list.extend` and :meth:`dict.update` to add to the
1007 new parent. Immutable objects, such as a :class:`tuple` or
1008 :class:`namedtuple`, must be recreated from scratch, but
1009 use the same type as the new parent passed back from the
1010 *enter* function.
1011 reraise_visit (bool): A pragmatic convenience for the *visit*
1012 callable. When set to ``False``, remap ignores any errors
1013 raised by the *visit* callback. Items causing exceptions
1014 are kept. See examples for more details.
1015
1016 remap is designed to cover the majority of cases with just the
1017 *visit* callable. While passing in multiple callables is very
1018 empowering, remap is designed so very few cases should require
1019 passing more than one function.
1020
1021 When passing *enter* and *exit*, it's common and easiest to build
1022 on the default behavior. Simply add ``from boltons.iterutils import
1023 default_enter`` (or ``default_exit``), and have your enter/exit
1024 function call the default behavior before or after your custom
1025 logic. See `this example`_.
1026
1027 Duplicate and self-referential objects (aka reference loops) are
1028 automatically handled internally, `as shown here`_.
1029
1030 .. _this example: http://sedimental.org/remap.html#sort_all_lists
1031 .. _as shown here: http://sedimental.org/remap.html#corner_cases
1032
1033 """
1034 # TODO: improve argument formatting in sphinx doc
1035 # TODO: enter() return (False, items) to continue traverse but cancel copy?
1036 if not callable(visit):
1037 raise TypeError('visit expected callable, not: %r' % visit)
1038 if not callable(enter):
1039 raise TypeError('enter expected callable, not: %r' % enter)
1040 if not callable(exit):
1041 raise TypeError('exit expected callable, not: %r' % exit)
1042 reraise_visit = kwargs.pop('reraise_visit', True)
1043 if kwargs:
1044 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys())
1045
1046 path, registry, stack = (), {}, [(None, root)]
1047 new_items_stack = []
1048 while stack:
1049 key, value = stack.pop()
1050 id_value = id(value)
1051 if key is _REMAP_EXIT:
1052 key, new_parent, old_parent = value
1053 id_value = id(old_parent)
1054 path, new_items = new_items_stack.pop()
1055 value = exit(path, key, old_parent, new_parent, new_items)
1056 registry[id_value] = value
1057 if not new_items_stack:
1058 continue
1059 elif id_value in registry:
1060 value = registry[id_value]
1061 else:
1062 res = enter(path, key, value)
1063 try:
1064 new_parent, new_items = res
1065 except TypeError:
1066 # TODO: handle False?
1067 raise TypeError('enter should return a tuple of (new_parent,'
1068 ' items_iterator), not: %r' % res)
1069 if new_items is not False:
1070 # traverse unless False is explicitly passed
1071 registry[id_value] = new_parent
1072 new_items_stack.append((path, []))
1073 if value is not root:
1074 path += (key,)
1075 stack.append((_REMAP_EXIT, (key, new_parent, value)))
1076 if new_items:
1077 stack.extend(reversed(list(new_items)))
1078 continue
1079 if visit is _orig_default_visit:
1080 # avoid function call overhead by inlining identity operation
1081 visited_item = (key, value)
1082 else:
1083 try:
1084 visited_item = visit(path, key, value)
1085 except Exception:
1086 if reraise_visit:
1087 raise
1088 visited_item = True
1089 if visited_item is False:
1090 continue # drop
1091 elif visited_item is True:
1092 visited_item = (key, value)
1093 # TODO: typecheck?
1094 # raise TypeError('expected (key, value) from visit(),'
1095 # ' not: %r' % visited_item)
1096 try:
1097 new_items_stack[-1][1].append(visited_item)
1098 except IndexError:
1099 raise TypeError('expected remappable root, not: %r' % root)
1100 return value
1101
1102
1103 class PathAccessError(KeyError, IndexError, TypeError):
1104 """An amalgamation of KeyError, IndexError, and TypeError,
1105 representing what can occur when looking up a path in a nested
1106 object.
1107 """
1108 def __init__(self, exc, seg, path):
1109 self.exc = exc
1110 self.seg = seg
1111 self.path = path
1112
1113 def __repr__(self):
1114 cn = self.__class__.__name__
1115 return '%s(%r, %r, %r)' % (cn, self.exc, self.seg, self.path)
1116
1117 def __str__(self):
1118 return ('could not access %r from path %r, got error: %r'
1119 % (self.seg, self.path, self.exc))
1120
1121
1122 def get_path(root, path, default=_UNSET):
1123 """Retrieve a value from a nested object via a tuple representing the
1124 lookup path.
1125
1126 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}}
1127 >>> get_path(root, ('a', 'b', 'c', 2, 0))
1128 3
1129
1130 The path format is intentionally consistent with that of
1131 :func:`remap`.
1132
1133 One of get_path's chief aims is improved error messaging. EAFP is
1134 great, but the error messages are not.
1135
1136 For instance, ``root['a']['b']['c'][2][1]`` gives back
1137 ``IndexError: list index out of range``
1138
1139 What went out of range where? get_path currently raises
1140 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2,
1141 1), got error: IndexError('list index out of range',)``, a
1142 subclass of IndexError and KeyError.
1143
1144 You can also pass a default that covers the entire operation,
1145 should the lookup fail at any level.
1146
1147 Args:
1148 root: The target nesting of dictionaries, lists, or other
1149 objects supporting ``__getitem__``.
1150 path (tuple): A list of strings and integers to be successively
1151 looked up within *root*.
1152 default: The value to be returned should any
1153 ``PathAccessError`` exceptions be raised.
1154 """
1155 if isinstance(path, basestring):
1156 path = path.split('.')
1157 cur = root
1158 try:
1159 for seg in path:
1160 try:
1161 cur = cur[seg]
1162 except (KeyError, IndexError) as exc:
1163 raise PathAccessError(exc, seg, path)
1164 except TypeError as exc:
1165 # either string index in a list, or a parent that
1166 # doesn't support indexing
1167 try:
1168 seg = int(seg)
1169 cur = cur[seg]
1170 except (ValueError, KeyError, IndexError, TypeError):
1171 if not is_iterable(cur):
1172 exc = TypeError('%r object is not indexable'
1173 % type(cur).__name__)
1174 raise PathAccessError(exc, seg, path)
1175 except PathAccessError:
1176 if default is _UNSET:
1177 raise
1178 return default
1179 return cur
1180
1181
1182 def research(root, query=lambda p, k, v: True, reraise=False):
1183 """The :func:`research` function uses :func:`remap` to recurse over
1184 any data nested in *root*, and find values which match a given
1185 criterion, specified by the *query* callable.
1186
1187 Results are returned as a list of ``(path, value)`` pairs. The
1188 paths are tuples in the same format accepted by
1189 :func:`get_path`. This can be useful for comparing values nested
1190 in two or more different structures.
1191
1192 Here's a simple example that finds all integers:
1193
1194 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}
1195 >>> res = research(root, query=lambda p, k, v: isinstance(v, int))
1196 >>> print(sorted(res))
1197 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]
1198
1199 Note how *query* follows the same, familiar ``path, key, value``
1200 signature as the ``visit`` and ``enter`` functions on
1201 :func:`remap`, and returns a :class:`bool`.
1202
1203 Args:
1204 root: The target object to search. Supports the same types of
1205 objects as :func:`remap`, including :class:`list`,
1206 :class:`tuple`, :class:`dict`, and :class:`set`.
1207 query (callable): The function called on every object to
1208 determine whether to include it in the search results. The
1209 callable must accept three arguments, *path*, *key*, and
1210 *value*, commonly abbreviated *p*, *k*, and *v*, same as
1211 *enter* and *visit* from :func:`remap`.
1212 reraise (bool): Whether to reraise exceptions raised by *query*
1213 or to simply drop the result that caused the error.
1214
1215
1216 With :func:`research` it's easy to inspect the details of a data
1217 structure, like finding values that are at a certain depth (using
1218 ``len(p)``) and much more. If more advanced functionality is
1219 needed, check out the code and make your own :func:`remap`
1220 wrapper, and consider `submitting a patch`_!
1221
1222 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls
1223 """
1224 ret = []
1225
1226 if not callable(query):
1227 raise TypeError('query expected callable, not: %r' % query)
1228
1229 def enter(path, key, value):
1230 try:
1231 if query(path, key, value):
1232 ret.append((path + (key,), value))
1233 except Exception:
1234 if reraise:
1235 raise
1236 return default_enter(path, key, value)
1237
1238 remap(root, enter=enter)
1239 return ret
1240
1241
1242 # TODO: recollect()
1243 # TODO: refilter()
1244 # TODO: reiter()
1245
1246
1247 # GUID iterators: 10x faster and somewhat more compact than uuid.
1248
1249 class GUIDerator(object):
1250 """The GUIDerator is an iterator that yields a globally-unique
1251 identifier (GUID) on every iteration. The GUIDs produced are
1252 hexadecimal strings.
1253
1254 Testing shows it to be around 12x faster than the uuid module. By
1255 default it is also more compact, partly due to its default 96-bit
1256 (24-hexdigit) length. 96 bits of randomness means that there is a
1257 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more
1258 or less uniqueness is desired, the *size* argument can be adjusted
1259 accordingly.
1260
1261 Args:
1262 size (int): character length of the GUID, defaults to 24. Lengths
1263 between 20 and 36 are considered valid.
1264
1265 The GUIDerator has built-in fork protection that causes it to
1266 detect a fork on next iteration and reseed accordingly.
1267
1268 """
1269 def __init__(self, size=24):
1270 self.size = size
1271 if size < 20 or size > 36:
1272 raise ValueError('expected 20 < size <= 36')
1273 self.count = itertools.count()
1274 self.reseed()
1275
1276 def reseed(self):
1277 self.pid = os.getpid()
1278 self.salt = '-'.join([str(self.pid),
1279 socket.gethostname() or b'<nohostname>',
1280 str(time.time()),
1281 codecs.encode(os.urandom(6),
1282 'hex_codec').decode('ascii')])
1283 # that codecs trick is the best/only way to get a bytes to
1284 # hexbytes in py2/3
1285 return
1286
1287 def __iter__(self):
1288 return self
1289
1290 if _IS_PY3:
1291 def __next__(self):
1292 if os.getpid() != self.pid:
1293 self.reseed()
1294 target_bytes = (self.salt + str(next(self.count))).encode('utf8')
1295 hash_text = hashlib.sha1(target_bytes).hexdigest()[:self.size]
1296 return hash_text
1297 else:
1298 def __next__(self):
1299 if os.getpid() != self.pid:
1300 self.reseed()
1301 return hashlib.sha1(self.salt +
1302 str(next(self.count))).hexdigest()[:self.size]
1303
1304 next = __next__
1305
1306
1307 class SequentialGUIDerator(GUIDerator):
1308 """Much like the standard GUIDerator, the SequentialGUIDerator is an
1309 iterator that yields a globally-unique identifier (GUID) on every
1310 iteration. The GUIDs produced are hexadecimal strings.
1311
1312 The SequentialGUIDerator differs in that it picks a starting GUID
1313 value and increments every iteration. This yields GUIDs which are
1314 of course unique, but also ordered and lexicographically sortable.
1315
1316 The SequentialGUIDerator is around 50% faster than the normal
1317 GUIDerator, making it almost 20x as fast as the built-in uuid
1318 module. By default it is also more compact, partly due to its
1319 96-bit (24-hexdigit) default length. 96 bits of randomness means that
1320 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64
1321 iterations. If more or less uniqueness is desired, the *size*
1322 argument can be adjusted accordingly.
1323
1324 Args:
1325 size (int): character length of the GUID, defaults to 24.
1326
1327 Note that with SequentialGUIDerator there is a chance of GUIDs
1328 growing larger than the size configured. The SequentialGUIDerator
1329 has built-in fork protection that causes it to detect a fork on
1330 next iteration and reseed accordingly.
1331
1332 """
1333
1334 if _IS_PY3:
1335 def reseed(self):
1336 super(SequentialGUIDerator, self).reseed()
1337 start_str = hashlib.sha1(self.salt.encode('utf8')).hexdigest()
1338 self.start = int(start_str[:self.size], 16)
1339 self.start |= (1 << ((self.size * 4) - 2))
1340 else:
1341 def reseed(self):
1342 super(SequentialGUIDerator, self).reseed()
1343 start_str = hashlib.sha1(self.salt).hexdigest()
1344 self.start = int(start_str[:self.size], 16)
1345 self.start |= (1 << ((self.size * 4) - 2))
1346
1347 def __next__(self):
1348 if os.getpid() != self.pid:
1349 self.reseed()
1350 return '%x' % (next(self.count) + self.start)
1351
1352 next = __next__
1353
1354
1355 guid_iter = GUIDerator()
1356 seq_guid_iter = SequentialGUIDerator()
1357
1358
1359 def soft_sorted(iterable, first=None, last=None, key=None, reverse=False):
1360 """For when you care about the order of some elements, but not about
1361 others.
1362
1363 Use this to float to the top and/or sink to the bottom a specific
1364 ordering, while sorting the rest of the elements according to
1365 normal :func:`sorted` rules.
1366
1367 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two'])
1368 ['one', 'two', 'a', 'b']
1369 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True)
1370 [6, 5, 3, 1, 0, 2, 4]
1371 >>> import string
1372 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower))
1373 'aA1023456789cCdDeEfFbB'
1374
1375 Args:
1376 iterable (list): A list or other iterable to sort.
1377 first (list): A sequence to enforce for elements which should
1378 appear at the beginning of the returned list.
1379 last (list): A sequence to enforce for elements which should
1380 appear at the end of the returned list.
1381 key (callable): Callable used to generate a comparable key for
1382 each item to be sorted, same as the key in
1383 :func:`sorted`. Note that entries in *first* and *last*
1384 should be the keys for the items. Defaults to
1385 passthrough/the identity function.
1386 reverse (bool): Whether or not elements not explicitly ordered
1387 by *first* and *last* should be in reverse order or not.
1388
1389 Returns a new list in sorted order.
1390 """
1391 first = first or []
1392 last = last or []
1393 key = key or (lambda x: x)
1394 seq = list(iterable)
1395 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))]
1396 other.sort(key=key, reverse=reverse)
1397
1398 if first:
1399 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x)))
1400 if last:
1401 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x)))
1402 return first + other + last
1403
1404
1405 def untyped_sorted(iterable, key=None, reverse=False):
1406 """A version of :func:`sorted` which will happily sort an iterable of
1407 heterogenous types and return a new list, similar to legacy Python's
1408 behavior.
1409
1410 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def'])
1411 [1, 2.0, 2, 'abc', 'def']
1412
1413 Note how mutually orderable types are sorted as expected, as in
1414 the case of the integers and floats above.
1415
1416 .. note::
1417
1418 Results may vary across Python versions and builds, but the
1419 function will produce a sorted list, except in the case of
1420 explicitly unorderable objects.
1421
1422 """
1423 class _Wrapper(object):
1424 slots = ('obj',)
1425
1426 def __init__(self, obj):
1427 self.obj = obj
1428
1429 def __lt__(self, other):
1430 obj = key(self.obj) if key is not None else self.obj
1431 other = key(other.obj) if key is not None else other.obj
1432 try:
1433 ret = obj < other
1434 except TypeError:
1435 ret = ((type(obj).__name__, id(type(obj)), obj)
1436 < (type(other).__name__, id(type(other)), other))
1437 return ret
1438
1439 if key is not None and not callable(key):
1440 raise TypeError('expected function or callable object for key, not: %r'
1441 % key)
1442
1443 return sorted(iterable, key=_Wrapper, reverse=reverse)
1444
1445 """
1446 May actually be faster to do an isinstance check for a str path
1447
1448 $ python -m timeit -s "x = [1]" "x[0]"
1449 10000000 loops, best of 3: 0.0207 usec per loop
1450 $ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass"
1451 10000000 loops, best of 3: 0.029 usec per loop
1452 $ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass"
1453 1000000 loops, best of 3: 0.315 usec per loop
1454 # setting up try/except is fast, only around 0.01us
1455 # actually triggering the exception takes almost 10x as long
1456
1457 $ python -m timeit -s "x = [1]" "isinstance(x, basestring)"
1458 10000000 loops, best of 3: 0.141 usec per loop
1459 $ python -m timeit -s "x = [1]" "isinstance(x, str)"
1460 10000000 loops, best of 3: 0.131 usec per loop
1461 $ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass"
1462 1000000 loops, best of 3: 0.443 usec per loop
1463 $ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass"
1464 1000000 loops, best of 3: 0.544 usec per loop
1465 """