]> jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/pkg_resources/_vendor/more_itertools/more.py
init: venv aand flask
[dlqueue.git] / venv / lib / python3.11 / site-packages / pkg_resources / _vendor / more_itertools / more.py
1 import warnings
2
3 from collections import Counter, defaultdict, deque, abc
4 from collections.abc import Sequence
5 from functools import partial, reduce, wraps
6 from heapq import heapify, heapreplace, heappop
7 from itertools import (
8 chain,
9 compress,
10 count,
11 cycle,
12 dropwhile,
13 groupby,
14 islice,
15 repeat,
16 starmap,
17 takewhile,
18 tee,
19 zip_longest,
20 )
21 from math import exp, factorial, floor, log
22 from queue import Empty, Queue
23 from random import random, randrange, uniform
24 from operator import itemgetter, mul, sub, gt, lt, ge, le
25 from sys import hexversion, maxsize
26 from time import monotonic
27
28 from .recipes import (
29 _marker,
30 _zip_equal,
31 UnequalIterablesError,
32 consume,
33 flatten,
34 pairwise,
35 powerset,
36 take,
37 unique_everseen,
38 all_equal,
39 )
40
41 __all__ = [
42 'AbortThread',
43 'SequenceView',
44 'UnequalIterablesError',
45 'adjacent',
46 'all_unique',
47 'always_iterable',
48 'always_reversible',
49 'bucket',
50 'callback_iter',
51 'chunked',
52 'chunked_even',
53 'circular_shifts',
54 'collapse',
55 'combination_index',
56 'consecutive_groups',
57 'constrained_batches',
58 'consumer',
59 'count_cycle',
60 'countable',
61 'difference',
62 'distinct_combinations',
63 'distinct_permutations',
64 'distribute',
65 'divide',
66 'duplicates_everseen',
67 'duplicates_justseen',
68 'exactly_n',
69 'filter_except',
70 'first',
71 'gray_product',
72 'groupby_transform',
73 'ichunked',
74 'iequals',
75 'ilen',
76 'interleave',
77 'interleave_evenly',
78 'interleave_longest',
79 'intersperse',
80 'is_sorted',
81 'islice_extended',
82 'iterate',
83 'last',
84 'locate',
85 'longest_common_prefix',
86 'lstrip',
87 'make_decorator',
88 'map_except',
89 'map_if',
90 'map_reduce',
91 'mark_ends',
92 'minmax',
93 'nth_or_last',
94 'nth_permutation',
95 'nth_product',
96 'numeric_range',
97 'one',
98 'only',
99 'padded',
100 'partitions',
101 'peekable',
102 'permutation_index',
103 'product_index',
104 'raise_',
105 'repeat_each',
106 'repeat_last',
107 'replace',
108 'rlocate',
109 'rstrip',
110 'run_length',
111 'sample',
112 'seekable',
113 'set_partitions',
114 'side_effect',
115 'sliced',
116 'sort_together',
117 'split_after',
118 'split_at',
119 'split_before',
120 'split_into',
121 'split_when',
122 'spy',
123 'stagger',
124 'strip',
125 'strictly_n',
126 'substrings',
127 'substrings_indexes',
128 'time_limited',
129 'unique_in_window',
130 'unique_to_each',
131 'unzip',
132 'value_chain',
133 'windowed',
134 'windowed_complete',
135 'with_iter',
136 'zip_broadcast',
137 'zip_equal',
138 'zip_offset',
139 ]
140
141
142 def chunked(iterable, n, strict=False):
143 """Break *iterable* into lists of length *n*:
144
145 >>> list(chunked([1, 2, 3, 4, 5, 6], 3))
146 [[1, 2, 3], [4, 5, 6]]
147
148 By the default, the last yielded list will have fewer than *n* elements
149 if the length of *iterable* is not divisible by *n*:
150
151 >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3))
152 [[1, 2, 3], [4, 5, 6], [7, 8]]
153
154 To use a fill-in value instead, see the :func:`grouper` recipe.
155
156 If the length of *iterable* is not divisible by *n* and *strict* is
157 ``True``, then ``ValueError`` will be raised before the last
158 list is yielded.
159
160 """
161 iterator = iter(partial(take, n, iter(iterable)), [])
162 if strict:
163 if n is None:
164 raise ValueError('n must not be None when using strict mode.')
165
166 def ret():
167 for chunk in iterator:
168 if len(chunk) != n:
169 raise ValueError('iterable is not divisible by n.')
170 yield chunk
171
172 return iter(ret())
173 else:
174 return iterator
175
176
177 def first(iterable, default=_marker):
178 """Return the first item of *iterable*, or *default* if *iterable* is
179 empty.
180
181 >>> first([0, 1, 2, 3])
182 0
183 >>> first([], 'some default')
184 'some default'
185
186 If *default* is not provided and there are no items in the iterable,
187 raise ``ValueError``.
188
189 :func:`first` is useful when you have a generator of expensive-to-retrieve
190 values and want any arbitrary one. It is marginally shorter than
191 ``next(iter(iterable), default)``.
192
193 """
194 try:
195 return next(iter(iterable))
196 except StopIteration as e:
197 if default is _marker:
198 raise ValueError(
199 'first() was called on an empty iterable, and no '
200 'default value was provided.'
201 ) from e
202 return default
203
204
205 def last(iterable, default=_marker):
206 """Return the last item of *iterable*, or *default* if *iterable* is
207 empty.
208
209 >>> last([0, 1, 2, 3])
210 3
211 >>> last([], 'some default')
212 'some default'
213
214 If *default* is not provided and there are no items in the iterable,
215 raise ``ValueError``.
216 """
217 try:
218 if isinstance(iterable, Sequence):
219 return iterable[-1]
220 # Work around https://bugs.python.org/issue38525
221 elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0):
222 return next(reversed(iterable))
223 else:
224 return deque(iterable, maxlen=1)[-1]
225 except (IndexError, TypeError, StopIteration):
226 if default is _marker:
227 raise ValueError(
228 'last() was called on an empty iterable, and no default was '
229 'provided.'
230 )
231 return default
232
233
234 def nth_or_last(iterable, n, default=_marker):
235 """Return the nth or the last item of *iterable*,
236 or *default* if *iterable* is empty.
237
238 >>> nth_or_last([0, 1, 2, 3], 2)
239 2
240 >>> nth_or_last([0, 1], 2)
241 1
242 >>> nth_or_last([], 0, 'some default')
243 'some default'
244
245 If *default* is not provided and there are no items in the iterable,
246 raise ``ValueError``.
247 """
248 return last(islice(iterable, n + 1), default=default)
249
250
251 class peekable:
252 """Wrap an iterator to allow lookahead and prepending elements.
253
254 Call :meth:`peek` on the result to get the value that will be returned
255 by :func:`next`. This won't advance the iterator:
256
257 >>> p = peekable(['a', 'b'])
258 >>> p.peek()
259 'a'
260 >>> next(p)
261 'a'
262
263 Pass :meth:`peek` a default value to return that instead of raising
264 ``StopIteration`` when the iterator is exhausted.
265
266 >>> p = peekable([])
267 >>> p.peek('hi')
268 'hi'
269
270 peekables also offer a :meth:`prepend` method, which "inserts" items
271 at the head of the iterable:
272
273 >>> p = peekable([1, 2, 3])
274 >>> p.prepend(10, 11, 12)
275 >>> next(p)
276 10
277 >>> p.peek()
278 11
279 >>> list(p)
280 [11, 12, 1, 2, 3]
281
282 peekables can be indexed. Index 0 is the item that will be returned by
283 :func:`next`, index 1 is the item after that, and so on:
284 The values up to the given index will be cached.
285
286 >>> p = peekable(['a', 'b', 'c', 'd'])
287 >>> p[0]
288 'a'
289 >>> p[1]
290 'b'
291 >>> next(p)
292 'a'
293
294 Negative indexes are supported, but be aware that they will cache the
295 remaining items in the source iterator, which may require significant
296 storage.
297
298 To check whether a peekable is exhausted, check its truth value:
299
300 >>> p = peekable(['a', 'b'])
301 >>> if p: # peekable has items
302 ... list(p)
303 ['a', 'b']
304 >>> if not p: # peekable is exhausted
305 ... list(p)
306 []
307
308 """
309
310 def __init__(self, iterable):
311 self._it = iter(iterable)
312 self._cache = deque()
313
314 def __iter__(self):
315 return self
316
317 def __bool__(self):
318 try:
319 self.peek()
320 except StopIteration:
321 return False
322 return True
323
324 def peek(self, default=_marker):
325 """Return the item that will be next returned from ``next()``.
326
327 Return ``default`` if there are no items left. If ``default`` is not
328 provided, raise ``StopIteration``.
329
330 """
331 if not self._cache:
332 try:
333 self._cache.append(next(self._it))
334 except StopIteration:
335 if default is _marker:
336 raise
337 return default
338 return self._cache[0]
339
340 def prepend(self, *items):
341 """Stack up items to be the next ones returned from ``next()`` or
342 ``self.peek()``. The items will be returned in
343 first in, first out order::
344
345 >>> p = peekable([1, 2, 3])
346 >>> p.prepend(10, 11, 12)
347 >>> next(p)
348 10
349 >>> list(p)
350 [11, 12, 1, 2, 3]
351
352 It is possible, by prepending items, to "resurrect" a peekable that
353 previously raised ``StopIteration``.
354
355 >>> p = peekable([])
356 >>> next(p)
357 Traceback (most recent call last):
358 ...
359 StopIteration
360 >>> p.prepend(1)
361 >>> next(p)
362 1
363 >>> next(p)
364 Traceback (most recent call last):
365 ...
366 StopIteration
367
368 """
369 self._cache.extendleft(reversed(items))
370
371 def __next__(self):
372 if self._cache:
373 return self._cache.popleft()
374
375 return next(self._it)
376
377 def _get_slice(self, index):
378 # Normalize the slice's arguments
379 step = 1 if (index.step is None) else index.step
380 if step > 0:
381 start = 0 if (index.start is None) else index.start
382 stop = maxsize if (index.stop is None) else index.stop
383 elif step < 0:
384 start = -1 if (index.start is None) else index.start
385 stop = (-maxsize - 1) if (index.stop is None) else index.stop
386 else:
387 raise ValueError('slice step cannot be zero')
388
389 # If either the start or stop index is negative, we'll need to cache
390 # the rest of the iterable in order to slice from the right side.
391 if (start < 0) or (stop < 0):
392 self._cache.extend(self._it)
393 # Otherwise we'll need to find the rightmost index and cache to that
394 # point.
395 else:
396 n = min(max(start, stop) + 1, maxsize)
397 cache_len = len(self._cache)
398 if n >= cache_len:
399 self._cache.extend(islice(self._it, n - cache_len))
400
401 return list(self._cache)[index]
402
403 def __getitem__(self, index):
404 if isinstance(index, slice):
405 return self._get_slice(index)
406
407 cache_len = len(self._cache)
408 if index < 0:
409 self._cache.extend(self._it)
410 elif index >= cache_len:
411 self._cache.extend(islice(self._it, index + 1 - cache_len))
412
413 return self._cache[index]
414
415
416 def consumer(func):
417 """Decorator that automatically advances a PEP-342-style "reverse iterator"
418 to its first yield point so you don't have to call ``next()`` on it
419 manually.
420
421 >>> @consumer
422 ... def tally():
423 ... i = 0
424 ... while True:
425 ... print('Thing number %s is %s.' % (i, (yield)))
426 ... i += 1
427 ...
428 >>> t = tally()
429 >>> t.send('red')
430 Thing number 0 is red.
431 >>> t.send('fish')
432 Thing number 1 is fish.
433
434 Without the decorator, you would have to call ``next(t)`` before
435 ``t.send()`` could be used.
436
437 """
438
439 @wraps(func)
440 def wrapper(*args, **kwargs):
441 gen = func(*args, **kwargs)
442 next(gen)
443 return gen
444
445 return wrapper
446
447
448 def ilen(iterable):
449 """Return the number of items in *iterable*.
450
451 >>> ilen(x for x in range(1000000) if x % 3 == 0)
452 333334
453
454 This consumes the iterable, so handle with care.
455
456 """
457 # This approach was selected because benchmarks showed it's likely the
458 # fastest of the known implementations at the time of writing.
459 # See GitHub tracker: #236, #230.
460 counter = count()
461 deque(zip(iterable, counter), maxlen=0)
462 return next(counter)
463
464
465 def iterate(func, start):
466 """Return ``start``, ``func(start)``, ``func(func(start))``, ...
467
468 >>> from itertools import islice
469 >>> list(islice(iterate(lambda x: 2*x, 1), 10))
470 [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
471
472 """
473 while True:
474 yield start
475 start = func(start)
476
477
478 def with_iter(context_manager):
479 """Wrap an iterable in a ``with`` statement, so it closes once exhausted.
480
481 For example, this will close the file when the iterator is exhausted::
482
483 upper_lines = (line.upper() for line in with_iter(open('foo')))
484
485 Any context manager which returns an iterable is a candidate for
486 ``with_iter``.
487
488 """
489 with context_manager as iterable:
490 yield from iterable
491
492
493 def one(iterable, too_short=None, too_long=None):
494 """Return the first item from *iterable*, which is expected to contain only
495 that item. Raise an exception if *iterable* is empty or has more than one
496 item.
497
498 :func:`one` is useful for ensuring that an iterable contains only one item.
499 For example, it can be used to retrieve the result of a database query
500 that is expected to return a single row.
501
502 If *iterable* is empty, ``ValueError`` will be raised. You may specify a
503 different exception with the *too_short* keyword:
504
505 >>> it = []
506 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
507 Traceback (most recent call last):
508 ...
509 ValueError: too many items in iterable (expected 1)'
510 >>> too_short = IndexError('too few items')
511 >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL
512 Traceback (most recent call last):
513 ...
514 IndexError: too few items
515
516 Similarly, if *iterable* contains more than one item, ``ValueError`` will
517 be raised. You may specify a different exception with the *too_long*
518 keyword:
519
520 >>> it = ['too', 'many']
521 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
522 Traceback (most recent call last):
523 ...
524 ValueError: Expected exactly one item in iterable, but got 'too',
525 'many', and perhaps more.
526 >>> too_long = RuntimeError
527 >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL
528 Traceback (most recent call last):
529 ...
530 RuntimeError
531
532 Note that :func:`one` attempts to advance *iterable* twice to ensure there
533 is only one item. See :func:`spy` or :func:`peekable` to check iterable
534 contents less destructively.
535
536 """
537 it = iter(iterable)
538
539 try:
540 first_value = next(it)
541 except StopIteration as e:
542 raise (
543 too_short or ValueError('too few items in iterable (expected 1)')
544 ) from e
545
546 try:
547 second_value = next(it)
548 except StopIteration:
549 pass
550 else:
551 msg = (
552 'Expected exactly one item in iterable, but got {!r}, {!r}, '
553 'and perhaps more.'.format(first_value, second_value)
554 )
555 raise too_long or ValueError(msg)
556
557 return first_value
558
559
560 def raise_(exception, *args):
561 raise exception(*args)
562
563
564 def strictly_n(iterable, n, too_short=None, too_long=None):
565 """Validate that *iterable* has exactly *n* items and return them if
566 it does. If it has fewer than *n* items, call function *too_short*
567 with those items. If it has more than *n* items, call function
568 *too_long* with the first ``n + 1`` items.
569
570 >>> iterable = ['a', 'b', 'c', 'd']
571 >>> n = 4
572 >>> list(strictly_n(iterable, n))
573 ['a', 'b', 'c', 'd']
574
575 By default, *too_short* and *too_long* are functions that raise
576 ``ValueError``.
577
578 >>> list(strictly_n('ab', 3)) # doctest: +IGNORE_EXCEPTION_DETAIL
579 Traceback (most recent call last):
580 ...
581 ValueError: too few items in iterable (got 2)
582
583 >>> list(strictly_n('abc', 2)) # doctest: +IGNORE_EXCEPTION_DETAIL
584 Traceback (most recent call last):
585 ...
586 ValueError: too many items in iterable (got at least 3)
587
588 You can instead supply functions that do something else.
589 *too_short* will be called with the number of items in *iterable*.
590 *too_long* will be called with `n + 1`.
591
592 >>> def too_short(item_count):
593 ... raise RuntimeError
594 >>> it = strictly_n('abcd', 6, too_short=too_short)
595 >>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL
596 Traceback (most recent call last):
597 ...
598 RuntimeError
599
600 >>> def too_long(item_count):
601 ... print('The boss is going to hear about this')
602 >>> it = strictly_n('abcdef', 4, too_long=too_long)
603 >>> list(it)
604 The boss is going to hear about this
605 ['a', 'b', 'c', 'd']
606
607 """
608 if too_short is None:
609 too_short = lambda item_count: raise_(
610 ValueError,
611 'Too few items in iterable (got {})'.format(item_count),
612 )
613
614 if too_long is None:
615 too_long = lambda item_count: raise_(
616 ValueError,
617 'Too many items in iterable (got at least {})'.format(item_count),
618 )
619
620 it = iter(iterable)
621 for i in range(n):
622 try:
623 item = next(it)
624 except StopIteration:
625 too_short(i)
626 return
627 else:
628 yield item
629
630 try:
631 next(it)
632 except StopIteration:
633 pass
634 else:
635 too_long(n + 1)
636
637
638 def distinct_permutations(iterable, r=None):
639 """Yield successive distinct permutations of the elements in *iterable*.
640
641 >>> sorted(distinct_permutations([1, 0, 1]))
642 [(0, 1, 1), (1, 0, 1), (1, 1, 0)]
643
644 Equivalent to ``set(permutations(iterable))``, except duplicates are not
645 generated and thrown away. For larger input sequences this is much more
646 efficient.
647
648 Duplicate permutations arise when there are duplicated elements in the
649 input iterable. The number of items returned is
650 `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of
651 items input, and each `x_i` is the count of a distinct item in the input
652 sequence.
653
654 If *r* is given, only the *r*-length permutations are yielded.
655
656 >>> sorted(distinct_permutations([1, 0, 1], r=2))
657 [(0, 1), (1, 0), (1, 1)]
658 >>> sorted(distinct_permutations(range(3), r=2))
659 [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
660
661 """
662
663 # Algorithm: https://w.wiki/Qai
664 def _full(A):
665 while True:
666 # Yield the permutation we have
667 yield tuple(A)
668
669 # Find the largest index i such that A[i] < A[i + 1]
670 for i in range(size - 2, -1, -1):
671 if A[i] < A[i + 1]:
672 break
673 # If no such index exists, this permutation is the last one
674 else:
675 return
676
677 # Find the largest index j greater than j such that A[i] < A[j]
678 for j in range(size - 1, i, -1):
679 if A[i] < A[j]:
680 break
681
682 # Swap the value of A[i] with that of A[j], then reverse the
683 # sequence from A[i + 1] to form the new permutation
684 A[i], A[j] = A[j], A[i]
685 A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1]
686
687 # Algorithm: modified from the above
688 def _partial(A, r):
689 # Split A into the first r items and the last r items
690 head, tail = A[:r], A[r:]
691 right_head_indexes = range(r - 1, -1, -1)
692 left_tail_indexes = range(len(tail))
693
694 while True:
695 # Yield the permutation we have
696 yield tuple(head)
697
698 # Starting from the right, find the first index of the head with
699 # value smaller than the maximum value of the tail - call it i.
700 pivot = tail[-1]
701 for i in right_head_indexes:
702 if head[i] < pivot:
703 break
704 pivot = head[i]
705 else:
706 return
707
708 # Starting from the left, find the first value of the tail
709 # with a value greater than head[i] and swap.
710 for j in left_tail_indexes:
711 if tail[j] > head[i]:
712 head[i], tail[j] = tail[j], head[i]
713 break
714 # If we didn't find one, start from the right and find the first
715 # index of the head with a value greater than head[i] and swap.
716 else:
717 for j in right_head_indexes:
718 if head[j] > head[i]:
719 head[i], head[j] = head[j], head[i]
720 break
721
722 # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)]
723 tail += head[: i - r : -1] # head[i + 1:][::-1]
724 i += 1
725 head[i:], tail[:] = tail[: r - i], tail[r - i :]
726
727 items = sorted(iterable)
728
729 size = len(items)
730 if r is None:
731 r = size
732
733 if 0 < r <= size:
734 return _full(items) if (r == size) else _partial(items, r)
735
736 return iter(() if r else ((),))
737
738
739 def intersperse(e, iterable, n=1):
740 """Intersperse filler element *e* among the items in *iterable*, leaving
741 *n* items between each filler element.
742
743 >>> list(intersperse('!', [1, 2, 3, 4, 5]))
744 [1, '!', 2, '!', 3, '!', 4, '!', 5]
745
746 >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2))
747 [1, 2, None, 3, 4, None, 5]
748
749 """
750 if n == 0:
751 raise ValueError('n must be > 0')
752 elif n == 1:
753 # interleave(repeat(e), iterable) -> e, x_0, e, x_1, e, x_2...
754 # islice(..., 1, None) -> x_0, e, x_1, e, x_2...
755 return islice(interleave(repeat(e), iterable), 1, None)
756 else:
757 # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]...
758 # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]...
759 # flatten(...) -> x_0, x_1, e, x_2, x_3...
760 filler = repeat([e])
761 chunks = chunked(iterable, n)
762 return flatten(islice(interleave(filler, chunks), 1, None))
763
764
765 def unique_to_each(*iterables):
766 """Return the elements from each of the input iterables that aren't in the
767 other input iterables.
768
769 For example, suppose you have a set of packages, each with a set of
770 dependencies::
771
772 {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}
773
774 If you remove one package, which dependencies can also be removed?
775
776 If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not
777 associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for
778 ``pkg_2``, and ``D`` is only needed for ``pkg_3``::
779
780 >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'})
781 [['A'], ['C'], ['D']]
782
783 If there are duplicates in one input iterable that aren't in the others
784 they will be duplicated in the output. Input order is preserved::
785
786 >>> unique_to_each("mississippi", "missouri")
787 [['p', 'p'], ['o', 'u', 'r']]
788
789 It is assumed that the elements of each iterable are hashable.
790
791 """
792 pool = [list(it) for it in iterables]
793 counts = Counter(chain.from_iterable(map(set, pool)))
794 uniques = {element for element in counts if counts[element] == 1}
795 return [list(filter(uniques.__contains__, it)) for it in pool]
796
797
798 def windowed(seq, n, fillvalue=None, step=1):
799 """Return a sliding window of width *n* over the given iterable.
800
801 >>> all_windows = windowed([1, 2, 3, 4, 5], 3)
802 >>> list(all_windows)
803 [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
804
805 When the window is larger than the iterable, *fillvalue* is used in place
806 of missing values:
807
808 >>> list(windowed([1, 2, 3], 4))
809 [(1, 2, 3, None)]
810
811 Each window will advance in increments of *step*:
812
813 >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2))
814 [(1, 2, 3), (3, 4, 5), (5, 6, '!')]
815
816 To slide into the iterable's items, use :func:`chain` to add filler items
817 to the left:
818
819 >>> iterable = [1, 2, 3, 4]
820 >>> n = 3
821 >>> padding = [None] * (n - 1)
822 >>> list(windowed(chain(padding, iterable), 3))
823 [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)]
824 """
825 if n < 0:
826 raise ValueError('n must be >= 0')
827 if n == 0:
828 yield tuple()
829 return
830 if step < 1:
831 raise ValueError('step must be >= 1')
832
833 window = deque(maxlen=n)
834 i = n
835 for _ in map(window.append, seq):
836 i -= 1
837 if not i:
838 i = step
839 yield tuple(window)
840
841 size = len(window)
842 if size == 0:
843 return
844 elif size < n:
845 yield tuple(chain(window, repeat(fillvalue, n - size)))
846 elif 0 < i < min(step, n):
847 window += (fillvalue,) * i
848 yield tuple(window)
849
850
851 def substrings(iterable):
852 """Yield all of the substrings of *iterable*.
853
854 >>> [''.join(s) for s in substrings('more')]
855 ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more']
856
857 Note that non-string iterables can also be subdivided.
858
859 >>> list(substrings([0, 1, 2]))
860 [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)]
861
862 """
863 # The length-1 substrings
864 seq = []
865 for item in iter(iterable):
866 seq.append(item)
867 yield (item,)
868 seq = tuple(seq)
869 item_count = len(seq)
870
871 # And the rest
872 for n in range(2, item_count + 1):
873 for i in range(item_count - n + 1):
874 yield seq[i : i + n]
875
876
877 def substrings_indexes(seq, reverse=False):
878 """Yield all substrings and their positions in *seq*
879
880 The items yielded will be a tuple of the form ``(substr, i, j)``, where
881 ``substr == seq[i:j]``.
882
883 This function only works for iterables that support slicing, such as
884 ``str`` objects.
885
886 >>> for item in substrings_indexes('more'):
887 ... print(item)
888 ('m', 0, 1)
889 ('o', 1, 2)
890 ('r', 2, 3)
891 ('e', 3, 4)
892 ('mo', 0, 2)
893 ('or', 1, 3)
894 ('re', 2, 4)
895 ('mor', 0, 3)
896 ('ore', 1, 4)
897 ('more', 0, 4)
898
899 Set *reverse* to ``True`` to yield the same items in the opposite order.
900
901
902 """
903 r = range(1, len(seq) + 1)
904 if reverse:
905 r = reversed(r)
906 return (
907 (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1)
908 )
909
910
911 class bucket:
912 """Wrap *iterable* and return an object that buckets it iterable into
913 child iterables based on a *key* function.
914
915 >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
916 >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character
917 >>> sorted(list(s)) # Get the keys
918 ['a', 'b', 'c']
919 >>> a_iterable = s['a']
920 >>> next(a_iterable)
921 'a1'
922 >>> next(a_iterable)
923 'a2'
924 >>> list(s['b'])
925 ['b1', 'b2', 'b3']
926
927 The original iterable will be advanced and its items will be cached until
928 they are used by the child iterables. This may require significant storage.
929
930 By default, attempting to select a bucket to which no items belong will
931 exhaust the iterable and cache all values.
932 If you specify a *validator* function, selected buckets will instead be
933 checked against it.
934
935 >>> from itertools import count
936 >>> it = count(1, 2) # Infinite sequence of odd numbers
937 >>> key = lambda x: x % 10 # Bucket by last digit
938 >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
939 >>> s = bucket(it, key=key, validator=validator)
940 >>> 2 in s
941 False
942 >>> list(s[2])
943 []
944
945 """
946
947 def __init__(self, iterable, key, validator=None):
948 self._it = iter(iterable)
949 self._key = key
950 self._cache = defaultdict(deque)
951 self._validator = validator or (lambda x: True)
952
953 def __contains__(self, value):
954 if not self._validator(value):
955 return False
956
957 try:
958 item = next(self[value])
959 except StopIteration:
960 return False
961 else:
962 self._cache[value].appendleft(item)
963
964 return True
965
966 def _get_values(self, value):
967 """
968 Helper to yield items from the parent iterator that match *value*.
969 Items that don't match are stored in the local cache as they
970 are encountered.
971 """
972 while True:
973 # If we've cached some items that match the target value, emit
974 # the first one and evict it from the cache.
975 if self._cache[value]:
976 yield self._cache[value].popleft()
977 # Otherwise we need to advance the parent iterator to search for
978 # a matching item, caching the rest.
979 else:
980 while True:
981 try:
982 item = next(self._it)
983 except StopIteration:
984 return
985 item_value = self._key(item)
986 if item_value == value:
987 yield item
988 break
989 elif self._validator(item_value):
990 self._cache[item_value].append(item)
991
992 def __iter__(self):
993 for item in self._it:
994 item_value = self._key(item)
995 if self._validator(item_value):
996 self._cache[item_value].append(item)
997
998 yield from self._cache.keys()
999
1000 def __getitem__(self, value):
1001 if not self._validator(value):
1002 return iter(())
1003
1004 return self._get_values(value)
1005
1006
1007 def spy(iterable, n=1):
1008 """Return a 2-tuple with a list containing the first *n* elements of
1009 *iterable*, and an iterator with the same items as *iterable*.
1010 This allows you to "look ahead" at the items in the iterable without
1011 advancing it.
1012
1013 There is one item in the list by default:
1014
1015 >>> iterable = 'abcdefg'
1016 >>> head, iterable = spy(iterable)
1017 >>> head
1018 ['a']
1019 >>> list(iterable)
1020 ['a', 'b', 'c', 'd', 'e', 'f', 'g']
1021
1022 You may use unpacking to retrieve items instead of lists:
1023
1024 >>> (head,), iterable = spy('abcdefg')
1025 >>> head
1026 'a'
1027 >>> (first, second), iterable = spy('abcdefg', 2)
1028 >>> first
1029 'a'
1030 >>> second
1031 'b'
1032
1033 The number of items requested can be larger than the number of items in
1034 the iterable:
1035
1036 >>> iterable = [1, 2, 3, 4, 5]
1037 >>> head, iterable = spy(iterable, 10)
1038 >>> head
1039 [1, 2, 3, 4, 5]
1040 >>> list(iterable)
1041 [1, 2, 3, 4, 5]
1042
1043 """
1044 it = iter(iterable)
1045 head = take(n, it)
1046
1047 return head.copy(), chain(head, it)
1048
1049
1050 def interleave(*iterables):
1051 """Return a new iterable yielding from each iterable in turn,
1052 until the shortest is exhausted.
1053
1054 >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8]))
1055 [1, 4, 6, 2, 5, 7]
1056
1057 For a version that doesn't terminate after the shortest iterable is
1058 exhausted, see :func:`interleave_longest`.
1059
1060 """
1061 return chain.from_iterable(zip(*iterables))
1062
1063
1064 def interleave_longest(*iterables):
1065 """Return a new iterable yielding from each iterable in turn,
1066 skipping any that are exhausted.
1067
1068 >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8]))
1069 [1, 4, 6, 2, 5, 7, 3, 8]
1070
1071 This function produces the same output as :func:`roundrobin`, but may
1072 perform better for some inputs (in particular when the number of iterables
1073 is large).
1074
1075 """
1076 i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker))
1077 return (x for x in i if x is not _marker)
1078
1079
1080 def interleave_evenly(iterables, lengths=None):
1081 """
1082 Interleave multiple iterables so that their elements are evenly distributed
1083 throughout the output sequence.
1084
1085 >>> iterables = [1, 2, 3, 4, 5], ['a', 'b']
1086 >>> list(interleave_evenly(iterables))
1087 [1, 2, 'a', 3, 4, 'b', 5]
1088
1089 >>> iterables = [[1, 2, 3], [4, 5], [6, 7, 8]]
1090 >>> list(interleave_evenly(iterables))
1091 [1, 6, 4, 2, 7, 3, 8, 5]
1092
1093 This function requires iterables of known length. Iterables without
1094 ``__len__()`` can be used by manually specifying lengths with *lengths*:
1095
1096 >>> from itertools import combinations, repeat
1097 >>> iterables = [combinations(range(4), 2), ['a', 'b', 'c']]
1098 >>> lengths = [4 * (4 - 1) // 2, 3]
1099 >>> list(interleave_evenly(iterables, lengths=lengths))
1100 [(0, 1), (0, 2), 'a', (0, 3), (1, 2), 'b', (1, 3), (2, 3), 'c']
1101
1102 Based on Bresenham's algorithm.
1103 """
1104 if lengths is None:
1105 try:
1106 lengths = [len(it) for it in iterables]
1107 except TypeError:
1108 raise ValueError(
1109 'Iterable lengths could not be determined automatically. '
1110 'Specify them with the lengths keyword.'
1111 )
1112 elif len(iterables) != len(lengths):
1113 raise ValueError('Mismatching number of iterables and lengths.')
1114
1115 dims = len(lengths)
1116
1117 # sort iterables by length, descending
1118 lengths_permute = sorted(
1119 range(dims), key=lambda i: lengths[i], reverse=True
1120 )
1121 lengths_desc = [lengths[i] for i in lengths_permute]
1122 iters_desc = [iter(iterables[i]) for i in lengths_permute]
1123
1124 # the longest iterable is the primary one (Bresenham: the longest
1125 # distance along an axis)
1126 delta_primary, deltas_secondary = lengths_desc[0], lengths_desc[1:]
1127 iter_primary, iters_secondary = iters_desc[0], iters_desc[1:]
1128 errors = [delta_primary // dims] * len(deltas_secondary)
1129
1130 to_yield = sum(lengths)
1131 while to_yield:
1132 yield next(iter_primary)
1133 to_yield -= 1
1134 # update errors for each secondary iterable
1135 errors = [e - delta for e, delta in zip(errors, deltas_secondary)]
1136
1137 # those iterables for which the error is negative are yielded
1138 # ("diagonal step" in Bresenham)
1139 for i, e in enumerate(errors):
1140 if e < 0:
1141 yield next(iters_secondary[i])
1142 to_yield -= 1
1143 errors[i] += delta_primary
1144
1145
1146 def collapse(iterable, base_type=None, levels=None):
1147 """Flatten an iterable with multiple levels of nesting (e.g., a list of
1148 lists of tuples) into non-iterable types.
1149
1150 >>> iterable = [(1, 2), ([3, 4], [[5], [6]])]
1151 >>> list(collapse(iterable))
1152 [1, 2, 3, 4, 5, 6]
1153
1154 Binary and text strings are not considered iterable and
1155 will not be collapsed.
1156
1157 To avoid collapsing other types, specify *base_type*:
1158
1159 >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']]
1160 >>> list(collapse(iterable, base_type=tuple))
1161 ['ab', ('cd', 'ef'), 'gh', 'ij']
1162
1163 Specify *levels* to stop flattening after a certain level:
1164
1165 >>> iterable = [('a', ['b']), ('c', ['d'])]
1166 >>> list(collapse(iterable)) # Fully flattened
1167 ['a', 'b', 'c', 'd']
1168 >>> list(collapse(iterable, levels=1)) # Only one level flattened
1169 ['a', ['b'], 'c', ['d']]
1170
1171 """
1172
1173 def walk(node, level):
1174 if (
1175 ((levels is not None) and (level > levels))
1176 or isinstance(node, (str, bytes))
1177 or ((base_type is not None) and isinstance(node, base_type))
1178 ):
1179 yield node
1180 return
1181
1182 try:
1183 tree = iter(node)
1184 except TypeError:
1185 yield node
1186 return
1187 else:
1188 for child in tree:
1189 yield from walk(child, level + 1)
1190
1191 yield from walk(iterable, 0)
1192
1193
1194 def side_effect(func, iterable, chunk_size=None, before=None, after=None):
1195 """Invoke *func* on each item in *iterable* (or on each *chunk_size* group
1196 of items) before yielding the item.
1197
1198 `func` must be a function that takes a single argument. Its return value
1199 will be discarded.
1200
1201 *before* and *after* are optional functions that take no arguments. They
1202 will be executed before iteration starts and after it ends, respectively.
1203
1204 `side_effect` can be used for logging, updating progress bars, or anything
1205 that is not functionally "pure."
1206
1207 Emitting a status message:
1208
1209 >>> from more_itertools import consume
1210 >>> func = lambda item: print('Received {}'.format(item))
1211 >>> consume(side_effect(func, range(2)))
1212 Received 0
1213 Received 1
1214
1215 Operating on chunks of items:
1216
1217 >>> pair_sums = []
1218 >>> func = lambda chunk: pair_sums.append(sum(chunk))
1219 >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2))
1220 [0, 1, 2, 3, 4, 5]
1221 >>> list(pair_sums)
1222 [1, 5, 9]
1223
1224 Writing to a file-like object:
1225
1226 >>> from io import StringIO
1227 >>> from more_itertools import consume
1228 >>> f = StringIO()
1229 >>> func = lambda x: print(x, file=f)
1230 >>> before = lambda: print(u'HEADER', file=f)
1231 >>> after = f.close
1232 >>> it = [u'a', u'b', u'c']
1233 >>> consume(side_effect(func, it, before=before, after=after))
1234 >>> f.closed
1235 True
1236
1237 """
1238 try:
1239 if before is not None:
1240 before()
1241
1242 if chunk_size is None:
1243 for item in iterable:
1244 func(item)
1245 yield item
1246 else:
1247 for chunk in chunked(iterable, chunk_size):
1248 func(chunk)
1249 yield from chunk
1250 finally:
1251 if after is not None:
1252 after()
1253
1254
1255 def sliced(seq, n, strict=False):
1256 """Yield slices of length *n* from the sequence *seq*.
1257
1258 >>> list(sliced((1, 2, 3, 4, 5, 6), 3))
1259 [(1, 2, 3), (4, 5, 6)]
1260
1261 By the default, the last yielded slice will have fewer than *n* elements
1262 if the length of *seq* is not divisible by *n*:
1263
1264 >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3))
1265 [(1, 2, 3), (4, 5, 6), (7, 8)]
1266
1267 If the length of *seq* is not divisible by *n* and *strict* is
1268 ``True``, then ``ValueError`` will be raised before the last
1269 slice is yielded.
1270
1271 This function will only work for iterables that support slicing.
1272 For non-sliceable iterables, see :func:`chunked`.
1273
1274 """
1275 iterator = takewhile(len, (seq[i : i + n] for i in count(0, n)))
1276 if strict:
1277
1278 def ret():
1279 for _slice in iterator:
1280 if len(_slice) != n:
1281 raise ValueError("seq is not divisible by n.")
1282 yield _slice
1283
1284 return iter(ret())
1285 else:
1286 return iterator
1287
1288
1289 def split_at(iterable, pred, maxsplit=-1, keep_separator=False):
1290 """Yield lists of items from *iterable*, where each list is delimited by
1291 an item where callable *pred* returns ``True``.
1292
1293 >>> list(split_at('abcdcba', lambda x: x == 'b'))
1294 [['a'], ['c', 'd', 'c'], ['a']]
1295
1296 >>> list(split_at(range(10), lambda n: n % 2 == 1))
1297 [[0], [2], [4], [6], [8], []]
1298
1299 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1300 then there is no limit on the number of splits:
1301
1302 >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2))
1303 [[0], [2], [4, 5, 6, 7, 8, 9]]
1304
1305 By default, the delimiting items are not included in the output.
1306 To include them, set *keep_separator* to ``True``.
1307
1308 >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True))
1309 [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']]
1310
1311 """
1312 if maxsplit == 0:
1313 yield list(iterable)
1314 return
1315
1316 buf = []
1317 it = iter(iterable)
1318 for item in it:
1319 if pred(item):
1320 yield buf
1321 if keep_separator:
1322 yield [item]
1323 if maxsplit == 1:
1324 yield list(it)
1325 return
1326 buf = []
1327 maxsplit -= 1
1328 else:
1329 buf.append(item)
1330 yield buf
1331
1332
1333 def split_before(iterable, pred, maxsplit=-1):
1334 """Yield lists of items from *iterable*, where each list ends just before
1335 an item for which callable *pred* returns ``True``:
1336
1337 >>> list(split_before('OneTwo', lambda s: s.isupper()))
1338 [['O', 'n', 'e'], ['T', 'w', 'o']]
1339
1340 >>> list(split_before(range(10), lambda n: n % 3 == 0))
1341 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
1342
1343 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1344 then there is no limit on the number of splits:
1345
1346 >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2))
1347 [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]]
1348 """
1349 if maxsplit == 0:
1350 yield list(iterable)
1351 return
1352
1353 buf = []
1354 it = iter(iterable)
1355 for item in it:
1356 if pred(item) and buf:
1357 yield buf
1358 if maxsplit == 1:
1359 yield [item] + list(it)
1360 return
1361 buf = []
1362 maxsplit -= 1
1363 buf.append(item)
1364 if buf:
1365 yield buf
1366
1367
1368 def split_after(iterable, pred, maxsplit=-1):
1369 """Yield lists of items from *iterable*, where each list ends with an
1370 item where callable *pred* returns ``True``:
1371
1372 >>> list(split_after('one1two2', lambda s: s.isdigit()))
1373 [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
1374
1375 >>> list(split_after(range(10), lambda n: n % 3 == 0))
1376 [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
1377
1378 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1379 then there is no limit on the number of splits:
1380
1381 >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2))
1382 [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]]
1383
1384 """
1385 if maxsplit == 0:
1386 yield list(iterable)
1387 return
1388
1389 buf = []
1390 it = iter(iterable)
1391 for item in it:
1392 buf.append(item)
1393 if pred(item) and buf:
1394 yield buf
1395 if maxsplit == 1:
1396 buf = list(it)
1397 if buf:
1398 yield buf
1399 return
1400 buf = []
1401 maxsplit -= 1
1402 if buf:
1403 yield buf
1404
1405
1406 def split_when(iterable, pred, maxsplit=-1):
1407 """Split *iterable* into pieces based on the output of *pred*.
1408 *pred* should be a function that takes successive pairs of items and
1409 returns ``True`` if the iterable should be split in between them.
1410
1411 For example, to find runs of increasing numbers, split the iterable when
1412 element ``i`` is larger than element ``i + 1``:
1413
1414 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y))
1415 [[1, 2, 3, 3], [2, 5], [2, 4], [2]]
1416
1417 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1418 then there is no limit on the number of splits:
1419
1420 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2],
1421 ... lambda x, y: x > y, maxsplit=2))
1422 [[1, 2, 3, 3], [2, 5], [2, 4, 2]]
1423
1424 """
1425 if maxsplit == 0:
1426 yield list(iterable)
1427 return
1428
1429 it = iter(iterable)
1430 try:
1431 cur_item = next(it)
1432 except StopIteration:
1433 return
1434
1435 buf = [cur_item]
1436 for next_item in it:
1437 if pred(cur_item, next_item):
1438 yield buf
1439 if maxsplit == 1:
1440 yield [next_item] + list(it)
1441 return
1442 buf = []
1443 maxsplit -= 1
1444
1445 buf.append(next_item)
1446 cur_item = next_item
1447
1448 yield buf
1449
1450
1451 def split_into(iterable, sizes):
1452 """Yield a list of sequential items from *iterable* of length 'n' for each
1453 integer 'n' in *sizes*.
1454
1455 >>> list(split_into([1,2,3,4,5,6], [1,2,3]))
1456 [[1], [2, 3], [4, 5, 6]]
1457
1458 If the sum of *sizes* is smaller than the length of *iterable*, then the
1459 remaining items of *iterable* will not be returned.
1460
1461 >>> list(split_into([1,2,3,4,5,6], [2,3]))
1462 [[1, 2], [3, 4, 5]]
1463
1464 If the sum of *sizes* is larger than the length of *iterable*, fewer items
1465 will be returned in the iteration that overruns *iterable* and further
1466 lists will be empty:
1467
1468 >>> list(split_into([1,2,3,4], [1,2,3,4]))
1469 [[1], [2, 3], [4], []]
1470
1471 When a ``None`` object is encountered in *sizes*, the returned list will
1472 contain items up to the end of *iterable* the same way that itertools.slice
1473 does:
1474
1475 >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None]))
1476 [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
1477
1478 :func:`split_into` can be useful for grouping a series of items where the
1479 sizes of the groups are not uniform. An example would be where in a row
1480 from a table, multiple columns represent elements of the same feature
1481 (e.g. a point represented by x,y,z) but, the format is not the same for
1482 all columns.
1483 """
1484 # convert the iterable argument into an iterator so its contents can
1485 # be consumed by islice in case it is a generator
1486 it = iter(iterable)
1487
1488 for size in sizes:
1489 if size is None:
1490 yield list(it)
1491 return
1492 else:
1493 yield list(islice(it, size))
1494
1495
1496 def padded(iterable, fillvalue=None, n=None, next_multiple=False):
1497 """Yield the elements from *iterable*, followed by *fillvalue*, such that
1498 at least *n* items are emitted.
1499
1500 >>> list(padded([1, 2, 3], '?', 5))
1501 [1, 2, 3, '?', '?']
1502
1503 If *next_multiple* is ``True``, *fillvalue* will be emitted until the
1504 number of items emitted is a multiple of *n*::
1505
1506 >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True))
1507 [1, 2, 3, 4, None, None]
1508
1509 If *n* is ``None``, *fillvalue* will be emitted indefinitely.
1510
1511 """
1512 it = iter(iterable)
1513 if n is None:
1514 yield from chain(it, repeat(fillvalue))
1515 elif n < 1:
1516 raise ValueError('n must be at least 1')
1517 else:
1518 item_count = 0
1519 for item in it:
1520 yield item
1521 item_count += 1
1522
1523 remaining = (n - item_count) % n if next_multiple else n - item_count
1524 for _ in range(remaining):
1525 yield fillvalue
1526
1527
1528 def repeat_each(iterable, n=2):
1529 """Repeat each element in *iterable* *n* times.
1530
1531 >>> list(repeat_each('ABC', 3))
1532 ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C']
1533 """
1534 return chain.from_iterable(map(repeat, iterable, repeat(n)))
1535
1536
1537 def repeat_last(iterable, default=None):
1538 """After the *iterable* is exhausted, keep yielding its last element.
1539
1540 >>> list(islice(repeat_last(range(3)), 5))
1541 [0, 1, 2, 2, 2]
1542
1543 If the iterable is empty, yield *default* forever::
1544
1545 >>> list(islice(repeat_last(range(0), 42), 5))
1546 [42, 42, 42, 42, 42]
1547
1548 """
1549 item = _marker
1550 for item in iterable:
1551 yield item
1552 final = default if item is _marker else item
1553 yield from repeat(final)
1554
1555
1556 def distribute(n, iterable):
1557 """Distribute the items from *iterable* among *n* smaller iterables.
1558
1559 >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6])
1560 >>> list(group_1)
1561 [1, 3, 5]
1562 >>> list(group_2)
1563 [2, 4, 6]
1564
1565 If the length of *iterable* is not evenly divisible by *n*, then the
1566 length of the returned iterables will not be identical:
1567
1568 >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7])
1569 >>> [list(c) for c in children]
1570 [[1, 4, 7], [2, 5], [3, 6]]
1571
1572 If the length of *iterable* is smaller than *n*, then the last returned
1573 iterables will be empty:
1574
1575 >>> children = distribute(5, [1, 2, 3])
1576 >>> [list(c) for c in children]
1577 [[1], [2], [3], [], []]
1578
1579 This function uses :func:`itertools.tee` and may require significant
1580 storage. If you need the order items in the smaller iterables to match the
1581 original iterable, see :func:`divide`.
1582
1583 """
1584 if n < 1:
1585 raise ValueError('n must be at least 1')
1586
1587 children = tee(iterable, n)
1588 return [islice(it, index, None, n) for index, it in enumerate(children)]
1589
1590
1591 def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None):
1592 """Yield tuples whose elements are offset from *iterable*.
1593 The amount by which the `i`-th item in each tuple is offset is given by
1594 the `i`-th item in *offsets*.
1595
1596 >>> list(stagger([0, 1, 2, 3]))
1597 [(None, 0, 1), (0, 1, 2), (1, 2, 3)]
1598 >>> list(stagger(range(8), offsets=(0, 2, 4)))
1599 [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)]
1600
1601 By default, the sequence will end when the final element of a tuple is the
1602 last item in the iterable. To continue until the first element of a tuple
1603 is the last item in the iterable, set *longest* to ``True``::
1604
1605 >>> list(stagger([0, 1, 2, 3], longest=True))
1606 [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
1607
1608 By default, ``None`` will be used to replace offsets beyond the end of the
1609 sequence. Specify *fillvalue* to use some other value.
1610
1611 """
1612 children = tee(iterable, len(offsets))
1613
1614 return zip_offset(
1615 *children, offsets=offsets, longest=longest, fillvalue=fillvalue
1616 )
1617
1618
1619 def zip_equal(*iterables):
1620 """``zip`` the input *iterables* together, but raise
1621 ``UnequalIterablesError`` if they aren't all the same length.
1622
1623 >>> it_1 = range(3)
1624 >>> it_2 = iter('abc')
1625 >>> list(zip_equal(it_1, it_2))
1626 [(0, 'a'), (1, 'b'), (2, 'c')]
1627
1628 >>> it_1 = range(3)
1629 >>> it_2 = iter('abcd')
1630 >>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL
1631 Traceback (most recent call last):
1632 ...
1633 more_itertools.more.UnequalIterablesError: Iterables have different
1634 lengths
1635
1636 """
1637 if hexversion >= 0x30A00A6:
1638 warnings.warn(
1639 (
1640 'zip_equal will be removed in a future version of '
1641 'more-itertools. Use the builtin zip function with '
1642 'strict=True instead.'
1643 ),
1644 DeprecationWarning,
1645 )
1646
1647 return _zip_equal(*iterables)
1648
1649
1650 def zip_offset(*iterables, offsets, longest=False, fillvalue=None):
1651 """``zip`` the input *iterables* together, but offset the `i`-th iterable
1652 by the `i`-th item in *offsets*.
1653
1654 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1)))
1655 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')]
1656
1657 This can be used as a lightweight alternative to SciPy or pandas to analyze
1658 data sets in which some series have a lead or lag relationship.
1659
1660 By default, the sequence will end when the shortest iterable is exhausted.
1661 To continue until the longest iterable is exhausted, set *longest* to
1662 ``True``.
1663
1664 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True))
1665 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')]
1666
1667 By default, ``None`` will be used to replace offsets beyond the end of the
1668 sequence. Specify *fillvalue* to use some other value.
1669
1670 """
1671 if len(iterables) != len(offsets):
1672 raise ValueError("Number of iterables and offsets didn't match")
1673
1674 staggered = []
1675 for it, n in zip(iterables, offsets):
1676 if n < 0:
1677 staggered.append(chain(repeat(fillvalue, -n), it))
1678 elif n > 0:
1679 staggered.append(islice(it, n, None))
1680 else:
1681 staggered.append(it)
1682
1683 if longest:
1684 return zip_longest(*staggered, fillvalue=fillvalue)
1685
1686 return zip(*staggered)
1687
1688
1689 def sort_together(iterables, key_list=(0,), key=None, reverse=False):
1690 """Return the input iterables sorted together, with *key_list* as the
1691 priority for sorting. All iterables are trimmed to the length of the
1692 shortest one.
1693
1694 This can be used like the sorting function in a spreadsheet. If each
1695 iterable represents a column of data, the key list determines which
1696 columns are used for sorting.
1697
1698 By default, all iterables are sorted using the ``0``-th iterable::
1699
1700 >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')]
1701 >>> sort_together(iterables)
1702 [(1, 2, 3, 4), ('d', 'c', 'b', 'a')]
1703
1704 Set a different key list to sort according to another iterable.
1705 Specifying multiple keys dictates how ties are broken::
1706
1707 >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')]
1708 >>> sort_together(iterables, key_list=(1, 2))
1709 [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')]
1710
1711 To sort by a function of the elements of the iterable, pass a *key*
1712 function. Its arguments are the elements of the iterables corresponding to
1713 the key list::
1714
1715 >>> names = ('a', 'b', 'c')
1716 >>> lengths = (1, 2, 3)
1717 >>> widths = (5, 2, 1)
1718 >>> def area(length, width):
1719 ... return length * width
1720 >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area)
1721 [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)]
1722
1723 Set *reverse* to ``True`` to sort in descending order.
1724
1725 >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True)
1726 [(3, 2, 1), ('a', 'b', 'c')]
1727
1728 """
1729 if key is None:
1730 # if there is no key function, the key argument to sorted is an
1731 # itemgetter
1732 key_argument = itemgetter(*key_list)
1733 else:
1734 # if there is a key function, call it with the items at the offsets
1735 # specified by the key function as arguments
1736 key_list = list(key_list)
1737 if len(key_list) == 1:
1738 # if key_list contains a single item, pass the item at that offset
1739 # as the only argument to the key function
1740 key_offset = key_list[0]
1741 key_argument = lambda zipped_items: key(zipped_items[key_offset])
1742 else:
1743 # if key_list contains multiple items, use itemgetter to return a
1744 # tuple of items, which we pass as *args to the key function
1745 get_key_items = itemgetter(*key_list)
1746 key_argument = lambda zipped_items: key(
1747 *get_key_items(zipped_items)
1748 )
1749
1750 return list(
1751 zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse))
1752 )
1753
1754
1755 def unzip(iterable):
1756 """The inverse of :func:`zip`, this function disaggregates the elements
1757 of the zipped *iterable*.
1758
1759 The ``i``-th iterable contains the ``i``-th element from each element
1760 of the zipped iterable. The first element is used to determine the
1761 length of the remaining elements.
1762
1763 >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
1764 >>> letters, numbers = unzip(iterable)
1765 >>> list(letters)
1766 ['a', 'b', 'c', 'd']
1767 >>> list(numbers)
1768 [1, 2, 3, 4]
1769
1770 This is similar to using ``zip(*iterable)``, but it avoids reading
1771 *iterable* into memory. Note, however, that this function uses
1772 :func:`itertools.tee` and thus may require significant storage.
1773
1774 """
1775 head, iterable = spy(iter(iterable))
1776 if not head:
1777 # empty iterable, e.g. zip([], [], [])
1778 return ()
1779 # spy returns a one-length iterable as head
1780 head = head[0]
1781 iterables = tee(iterable, len(head))
1782
1783 def itemgetter(i):
1784 def getter(obj):
1785 try:
1786 return obj[i]
1787 except IndexError:
1788 # basically if we have an iterable like
1789 # iter([(1, 2, 3), (4, 5), (6,)])
1790 # the second unzipped iterable would fail at the third tuple
1791 # since it would try to access tup[1]
1792 # same with the third unzipped iterable and the second tuple
1793 # to support these "improperly zipped" iterables,
1794 # we create a custom itemgetter
1795 # which just stops the unzipped iterables
1796 # at first length mismatch
1797 raise StopIteration
1798
1799 return getter
1800
1801 return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables))
1802
1803
1804 def divide(n, iterable):
1805 """Divide the elements from *iterable* into *n* parts, maintaining
1806 order.
1807
1808 >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6])
1809 >>> list(group_1)
1810 [1, 2, 3]
1811 >>> list(group_2)
1812 [4, 5, 6]
1813
1814 If the length of *iterable* is not evenly divisible by *n*, then the
1815 length of the returned iterables will not be identical:
1816
1817 >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7])
1818 >>> [list(c) for c in children]
1819 [[1, 2, 3], [4, 5], [6, 7]]
1820
1821 If the length of the iterable is smaller than n, then the last returned
1822 iterables will be empty:
1823
1824 >>> children = divide(5, [1, 2, 3])
1825 >>> [list(c) for c in children]
1826 [[1], [2], [3], [], []]
1827
1828 This function will exhaust the iterable before returning and may require
1829 significant storage. If order is not important, see :func:`distribute`,
1830 which does not first pull the iterable into memory.
1831
1832 """
1833 if n < 1:
1834 raise ValueError('n must be at least 1')
1835
1836 try:
1837 iterable[:0]
1838 except TypeError:
1839 seq = tuple(iterable)
1840 else:
1841 seq = iterable
1842
1843 q, r = divmod(len(seq), n)
1844
1845 ret = []
1846 stop = 0
1847 for i in range(1, n + 1):
1848 start = stop
1849 stop += q + 1 if i <= r else q
1850 ret.append(iter(seq[start:stop]))
1851
1852 return ret
1853
1854
1855 def always_iterable(obj, base_type=(str, bytes)):
1856 """If *obj* is iterable, return an iterator over its items::
1857
1858 >>> obj = (1, 2, 3)
1859 >>> list(always_iterable(obj))
1860 [1, 2, 3]
1861
1862 If *obj* is not iterable, return a one-item iterable containing *obj*::
1863
1864 >>> obj = 1
1865 >>> list(always_iterable(obj))
1866 [1]
1867
1868 If *obj* is ``None``, return an empty iterable:
1869
1870 >>> obj = None
1871 >>> list(always_iterable(None))
1872 []
1873
1874 By default, binary and text strings are not considered iterable::
1875
1876 >>> obj = 'foo'
1877 >>> list(always_iterable(obj))
1878 ['foo']
1879
1880 If *base_type* is set, objects for which ``isinstance(obj, base_type)``
1881 returns ``True`` won't be considered iterable.
1882
1883 >>> obj = {'a': 1}
1884 >>> list(always_iterable(obj)) # Iterate over the dict's keys
1885 ['a']
1886 >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
1887 [{'a': 1}]
1888
1889 Set *base_type* to ``None`` to avoid any special handling and treat objects
1890 Python considers iterable as iterable:
1891
1892 >>> obj = 'foo'
1893 >>> list(always_iterable(obj, base_type=None))
1894 ['f', 'o', 'o']
1895 """
1896 if obj is None:
1897 return iter(())
1898
1899 if (base_type is not None) and isinstance(obj, base_type):
1900 return iter((obj,))
1901
1902 try:
1903 return iter(obj)
1904 except TypeError:
1905 return iter((obj,))
1906
1907
1908 def adjacent(predicate, iterable, distance=1):
1909 """Return an iterable over `(bool, item)` tuples where the `item` is
1910 drawn from *iterable* and the `bool` indicates whether
1911 that item satisfies the *predicate* or is adjacent to an item that does.
1912
1913 For example, to find whether items are adjacent to a ``3``::
1914
1915 >>> list(adjacent(lambda x: x == 3, range(6)))
1916 [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)]
1917
1918 Set *distance* to change what counts as adjacent. For example, to find
1919 whether items are two places away from a ``3``:
1920
1921 >>> list(adjacent(lambda x: x == 3, range(6), distance=2))
1922 [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)]
1923
1924 This is useful for contextualizing the results of a search function.
1925 For example, a code comparison tool might want to identify lines that
1926 have changed, but also surrounding lines to give the viewer of the diff
1927 context.
1928
1929 The predicate function will only be called once for each item in the
1930 iterable.
1931
1932 See also :func:`groupby_transform`, which can be used with this function
1933 to group ranges of items with the same `bool` value.
1934
1935 """
1936 # Allow distance=0 mainly for testing that it reproduces results with map()
1937 if distance < 0:
1938 raise ValueError('distance must be at least 0')
1939
1940 i1, i2 = tee(iterable)
1941 padding = [False] * distance
1942 selected = chain(padding, map(predicate, i1), padding)
1943 adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1))
1944 return zip(adjacent_to_selected, i2)
1945
1946
1947 def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None):
1948 """An extension of :func:`itertools.groupby` that can apply transformations
1949 to the grouped data.
1950
1951 * *keyfunc* is a function computing a key value for each item in *iterable*
1952 * *valuefunc* is a function that transforms the individual items from
1953 *iterable* after grouping
1954 * *reducefunc* is a function that transforms each group of items
1955
1956 >>> iterable = 'aAAbBBcCC'
1957 >>> keyfunc = lambda k: k.upper()
1958 >>> valuefunc = lambda v: v.lower()
1959 >>> reducefunc = lambda g: ''.join(g)
1960 >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc))
1961 [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')]
1962
1963 Each optional argument defaults to an identity function if not specified.
1964
1965 :func:`groupby_transform` is useful when grouping elements of an iterable
1966 using a separate iterable as the key. To do this, :func:`zip` the iterables
1967 and pass a *keyfunc* that extracts the first element and a *valuefunc*
1968 that extracts the second element::
1969
1970 >>> from operator import itemgetter
1971 >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3]
1972 >>> values = 'abcdefghi'
1973 >>> iterable = zip(keys, values)
1974 >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1))
1975 >>> [(k, ''.join(g)) for k, g in grouper]
1976 [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')]
1977
1978 Note that the order of items in the iterable is significant.
1979 Only adjacent items are grouped together, so if you don't want any
1980 duplicate groups, you should sort the iterable by the key function.
1981
1982 """
1983 ret = groupby(iterable, keyfunc)
1984 if valuefunc:
1985 ret = ((k, map(valuefunc, g)) for k, g in ret)
1986 if reducefunc:
1987 ret = ((k, reducefunc(g)) for k, g in ret)
1988
1989 return ret
1990
1991
1992 class numeric_range(abc.Sequence, abc.Hashable):
1993 """An extension of the built-in ``range()`` function whose arguments can
1994 be any orderable numeric type.
1995
1996 With only *stop* specified, *start* defaults to ``0`` and *step*
1997 defaults to ``1``. The output items will match the type of *stop*:
1998
1999 >>> list(numeric_range(3.5))
2000 [0.0, 1.0, 2.0, 3.0]
2001
2002 With only *start* and *stop* specified, *step* defaults to ``1``. The
2003 output items will match the type of *start*:
2004
2005 >>> from decimal import Decimal
2006 >>> start = Decimal('2.1')
2007 >>> stop = Decimal('5.1')
2008 >>> list(numeric_range(start, stop))
2009 [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')]
2010
2011 With *start*, *stop*, and *step* specified the output items will match
2012 the type of ``start + step``:
2013
2014 >>> from fractions import Fraction
2015 >>> start = Fraction(1, 2) # Start at 1/2
2016 >>> stop = Fraction(5, 2) # End at 5/2
2017 >>> step = Fraction(1, 2) # Count by 1/2
2018 >>> list(numeric_range(start, stop, step))
2019 [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)]
2020
2021 If *step* is zero, ``ValueError`` is raised. Negative steps are supported:
2022
2023 >>> list(numeric_range(3, -1, -1.0))
2024 [3.0, 2.0, 1.0, 0.0]
2025
2026 Be aware of the limitations of floating point numbers; the representation
2027 of the yielded numbers may be surprising.
2028
2029 ``datetime.datetime`` objects can be used for *start* and *stop*, if *step*
2030 is a ``datetime.timedelta`` object:
2031
2032 >>> import datetime
2033 >>> start = datetime.datetime(2019, 1, 1)
2034 >>> stop = datetime.datetime(2019, 1, 3)
2035 >>> step = datetime.timedelta(days=1)
2036 >>> items = iter(numeric_range(start, stop, step))
2037 >>> next(items)
2038 datetime.datetime(2019, 1, 1, 0, 0)
2039 >>> next(items)
2040 datetime.datetime(2019, 1, 2, 0, 0)
2041
2042 """
2043
2044 _EMPTY_HASH = hash(range(0, 0))
2045
2046 def __init__(self, *args):
2047 argc = len(args)
2048 if argc == 1:
2049 (self._stop,) = args
2050 self._start = type(self._stop)(0)
2051 self._step = type(self._stop - self._start)(1)
2052 elif argc == 2:
2053 self._start, self._stop = args
2054 self._step = type(self._stop - self._start)(1)
2055 elif argc == 3:
2056 self._start, self._stop, self._step = args
2057 elif argc == 0:
2058 raise TypeError(
2059 'numeric_range expected at least '
2060 '1 argument, got {}'.format(argc)
2061 )
2062 else:
2063 raise TypeError(
2064 'numeric_range expected at most '
2065 '3 arguments, got {}'.format(argc)
2066 )
2067
2068 self._zero = type(self._step)(0)
2069 if self._step == self._zero:
2070 raise ValueError('numeric_range() arg 3 must not be zero')
2071 self._growing = self._step > self._zero
2072 self._init_len()
2073
2074 def __bool__(self):
2075 if self._growing:
2076 return self._start < self._stop
2077 else:
2078 return self._start > self._stop
2079
2080 def __contains__(self, elem):
2081 if self._growing:
2082 if self._start <= elem < self._stop:
2083 return (elem - self._start) % self._step == self._zero
2084 else:
2085 if self._start >= elem > self._stop:
2086 return (self._start - elem) % (-self._step) == self._zero
2087
2088 return False
2089
2090 def __eq__(self, other):
2091 if isinstance(other, numeric_range):
2092 empty_self = not bool(self)
2093 empty_other = not bool(other)
2094 if empty_self or empty_other:
2095 return empty_self and empty_other # True if both empty
2096 else:
2097 return (
2098 self._start == other._start
2099 and self._step == other._step
2100 and self._get_by_index(-1) == other._get_by_index(-1)
2101 )
2102 else:
2103 return False
2104
2105 def __getitem__(self, key):
2106 if isinstance(key, int):
2107 return self._get_by_index(key)
2108 elif isinstance(key, slice):
2109 step = self._step if key.step is None else key.step * self._step
2110
2111 if key.start is None or key.start <= -self._len:
2112 start = self._start
2113 elif key.start >= self._len:
2114 start = self._stop
2115 else: # -self._len < key.start < self._len
2116 start = self._get_by_index(key.start)
2117
2118 if key.stop is None or key.stop >= self._len:
2119 stop = self._stop
2120 elif key.stop <= -self._len:
2121 stop = self._start
2122 else: # -self._len < key.stop < self._len
2123 stop = self._get_by_index(key.stop)
2124
2125 return numeric_range(start, stop, step)
2126 else:
2127 raise TypeError(
2128 'numeric range indices must be '
2129 'integers or slices, not {}'.format(type(key).__name__)
2130 )
2131
2132 def __hash__(self):
2133 if self:
2134 return hash((self._start, self._get_by_index(-1), self._step))
2135 else:
2136 return self._EMPTY_HASH
2137
2138 def __iter__(self):
2139 values = (self._start + (n * self._step) for n in count())
2140 if self._growing:
2141 return takewhile(partial(gt, self._stop), values)
2142 else:
2143 return takewhile(partial(lt, self._stop), values)
2144
2145 def __len__(self):
2146 return self._len
2147
2148 def _init_len(self):
2149 if self._growing:
2150 start = self._start
2151 stop = self._stop
2152 step = self._step
2153 else:
2154 start = self._stop
2155 stop = self._start
2156 step = -self._step
2157 distance = stop - start
2158 if distance <= self._zero:
2159 self._len = 0
2160 else: # distance > 0 and step > 0: regular euclidean division
2161 q, r = divmod(distance, step)
2162 self._len = int(q) + int(r != self._zero)
2163
2164 def __reduce__(self):
2165 return numeric_range, (self._start, self._stop, self._step)
2166
2167 def __repr__(self):
2168 if self._step == 1:
2169 return "numeric_range({}, {})".format(
2170 repr(self._start), repr(self._stop)
2171 )
2172 else:
2173 return "numeric_range({}, {}, {})".format(
2174 repr(self._start), repr(self._stop), repr(self._step)
2175 )
2176
2177 def __reversed__(self):
2178 return iter(
2179 numeric_range(
2180 self._get_by_index(-1), self._start - self._step, -self._step
2181 )
2182 )
2183
2184 def count(self, value):
2185 return int(value in self)
2186
2187 def index(self, value):
2188 if self._growing:
2189 if self._start <= value < self._stop:
2190 q, r = divmod(value - self._start, self._step)
2191 if r == self._zero:
2192 return int(q)
2193 else:
2194 if self._start >= value > self._stop:
2195 q, r = divmod(self._start - value, -self._step)
2196 if r == self._zero:
2197 return int(q)
2198
2199 raise ValueError("{} is not in numeric range".format(value))
2200
2201 def _get_by_index(self, i):
2202 if i < 0:
2203 i += self._len
2204 if i < 0 or i >= self._len:
2205 raise IndexError("numeric range object index out of range")
2206 return self._start + i * self._step
2207
2208
2209 def count_cycle(iterable, n=None):
2210 """Cycle through the items from *iterable* up to *n* times, yielding
2211 the number of completed cycles along with each item. If *n* is omitted the
2212 process repeats indefinitely.
2213
2214 >>> list(count_cycle('AB', 3))
2215 [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')]
2216
2217 """
2218 iterable = tuple(iterable)
2219 if not iterable:
2220 return iter(())
2221 counter = count() if n is None else range(n)
2222 return ((i, item) for i in counter for item in iterable)
2223
2224
2225 def mark_ends(iterable):
2226 """Yield 3-tuples of the form ``(is_first, is_last, item)``.
2227
2228 >>> list(mark_ends('ABC'))
2229 [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')]
2230
2231 Use this when looping over an iterable to take special action on its first
2232 and/or last items:
2233
2234 >>> iterable = ['Header', 100, 200, 'Footer']
2235 >>> total = 0
2236 >>> for is_first, is_last, item in mark_ends(iterable):
2237 ... if is_first:
2238 ... continue # Skip the header
2239 ... if is_last:
2240 ... continue # Skip the footer
2241 ... total += item
2242 >>> print(total)
2243 300
2244 """
2245 it = iter(iterable)
2246
2247 try:
2248 b = next(it)
2249 except StopIteration:
2250 return
2251
2252 try:
2253 for i in count():
2254 a = b
2255 b = next(it)
2256 yield i == 0, False, a
2257
2258 except StopIteration:
2259 yield i == 0, True, a
2260
2261
2262 def locate(iterable, pred=bool, window_size=None):
2263 """Yield the index of each item in *iterable* for which *pred* returns
2264 ``True``.
2265
2266 *pred* defaults to :func:`bool`, which will select truthy items:
2267
2268 >>> list(locate([0, 1, 1, 0, 1, 0, 0]))
2269 [1, 2, 4]
2270
2271 Set *pred* to a custom function to, e.g., find the indexes for a particular
2272 item.
2273
2274 >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b'))
2275 [1, 3]
2276
2277 If *window_size* is given, then the *pred* function will be called with
2278 that many items. This enables searching for sub-sequences:
2279
2280 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
2281 >>> pred = lambda *args: args == (1, 2, 3)
2282 >>> list(locate(iterable, pred=pred, window_size=3))
2283 [1, 5, 9]
2284
2285 Use with :func:`seekable` to find indexes and then retrieve the associated
2286 items:
2287
2288 >>> from itertools import count
2289 >>> from more_itertools import seekable
2290 >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count())
2291 >>> it = seekable(source)
2292 >>> pred = lambda x: x > 100
2293 >>> indexes = locate(it, pred=pred)
2294 >>> i = next(indexes)
2295 >>> it.seek(i)
2296 >>> next(it)
2297 106
2298
2299 """
2300 if window_size is None:
2301 return compress(count(), map(pred, iterable))
2302
2303 if window_size < 1:
2304 raise ValueError('window size must be at least 1')
2305
2306 it = windowed(iterable, window_size, fillvalue=_marker)
2307 return compress(count(), starmap(pred, it))
2308
2309
2310 def longest_common_prefix(iterables):
2311 """Yield elements of the longest common prefix amongst given *iterables*.
2312
2313 >>> ''.join(longest_common_prefix(['abcd', 'abc', 'abf']))
2314 'ab'
2315
2316 """
2317 return (c[0] for c in takewhile(all_equal, zip(*iterables)))
2318
2319
2320 def lstrip(iterable, pred):
2321 """Yield the items from *iterable*, but strip any from the beginning
2322 for which *pred* returns ``True``.
2323
2324 For example, to remove a set of items from the start of an iterable:
2325
2326 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2327 >>> pred = lambda x: x in {None, False, ''}
2328 >>> list(lstrip(iterable, pred))
2329 [1, 2, None, 3, False, None]
2330
2331 This function is analogous to to :func:`str.lstrip`, and is essentially
2332 an wrapper for :func:`itertools.dropwhile`.
2333
2334 """
2335 return dropwhile(pred, iterable)
2336
2337
2338 def rstrip(iterable, pred):
2339 """Yield the items from *iterable*, but strip any from the end
2340 for which *pred* returns ``True``.
2341
2342 For example, to remove a set of items from the end of an iterable:
2343
2344 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2345 >>> pred = lambda x: x in {None, False, ''}
2346 >>> list(rstrip(iterable, pred))
2347 [None, False, None, 1, 2, None, 3]
2348
2349 This function is analogous to :func:`str.rstrip`.
2350
2351 """
2352 cache = []
2353 cache_append = cache.append
2354 cache_clear = cache.clear
2355 for x in iterable:
2356 if pred(x):
2357 cache_append(x)
2358 else:
2359 yield from cache
2360 cache_clear()
2361 yield x
2362
2363
2364 def strip(iterable, pred):
2365 """Yield the items from *iterable*, but strip any from the
2366 beginning and end for which *pred* returns ``True``.
2367
2368 For example, to remove a set of items from both ends of an iterable:
2369
2370 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2371 >>> pred = lambda x: x in {None, False, ''}
2372 >>> list(strip(iterable, pred))
2373 [1, 2, None, 3]
2374
2375 This function is analogous to :func:`str.strip`.
2376
2377 """
2378 return rstrip(lstrip(iterable, pred), pred)
2379
2380
2381 class islice_extended:
2382 """An extension of :func:`itertools.islice` that supports negative values
2383 for *stop*, *start*, and *step*.
2384
2385 >>> iterable = iter('abcdefgh')
2386 >>> list(islice_extended(iterable, -4, -1))
2387 ['e', 'f', 'g']
2388
2389 Slices with negative values require some caching of *iterable*, but this
2390 function takes care to minimize the amount of memory required.
2391
2392 For example, you can use a negative step with an infinite iterator:
2393
2394 >>> from itertools import count
2395 >>> list(islice_extended(count(), 110, 99, -2))
2396 [110, 108, 106, 104, 102, 100]
2397
2398 You can also use slice notation directly:
2399
2400 >>> iterable = map(str, count())
2401 >>> it = islice_extended(iterable)[10:20:2]
2402 >>> list(it)
2403 ['10', '12', '14', '16', '18']
2404
2405 """
2406
2407 def __init__(self, iterable, *args):
2408 it = iter(iterable)
2409 if args:
2410 self._iterable = _islice_helper(it, slice(*args))
2411 else:
2412 self._iterable = it
2413
2414 def __iter__(self):
2415 return self
2416
2417 def __next__(self):
2418 return next(self._iterable)
2419
2420 def __getitem__(self, key):
2421 if isinstance(key, slice):
2422 return islice_extended(_islice_helper(self._iterable, key))
2423
2424 raise TypeError('islice_extended.__getitem__ argument must be a slice')
2425
2426
2427 def _islice_helper(it, s):
2428 start = s.start
2429 stop = s.stop
2430 if s.step == 0:
2431 raise ValueError('step argument must be a non-zero integer or None.')
2432 step = s.step or 1
2433
2434 if step > 0:
2435 start = 0 if (start is None) else start
2436
2437 if start < 0:
2438 # Consume all but the last -start items
2439 cache = deque(enumerate(it, 1), maxlen=-start)
2440 len_iter = cache[-1][0] if cache else 0
2441
2442 # Adjust start to be positive
2443 i = max(len_iter + start, 0)
2444
2445 # Adjust stop to be positive
2446 if stop is None:
2447 j = len_iter
2448 elif stop >= 0:
2449 j = min(stop, len_iter)
2450 else:
2451 j = max(len_iter + stop, 0)
2452
2453 # Slice the cache
2454 n = j - i
2455 if n <= 0:
2456 return
2457
2458 for index, item in islice(cache, 0, n, step):
2459 yield item
2460 elif (stop is not None) and (stop < 0):
2461 # Advance to the start position
2462 next(islice(it, start, start), None)
2463
2464 # When stop is negative, we have to carry -stop items while
2465 # iterating
2466 cache = deque(islice(it, -stop), maxlen=-stop)
2467
2468 for index, item in enumerate(it):
2469 cached_item = cache.popleft()
2470 if index % step == 0:
2471 yield cached_item
2472 cache.append(item)
2473 else:
2474 # When both start and stop are positive we have the normal case
2475 yield from islice(it, start, stop, step)
2476 else:
2477 start = -1 if (start is None) else start
2478
2479 if (stop is not None) and (stop < 0):
2480 # Consume all but the last items
2481 n = -stop - 1
2482 cache = deque(enumerate(it, 1), maxlen=n)
2483 len_iter = cache[-1][0] if cache else 0
2484
2485 # If start and stop are both negative they are comparable and
2486 # we can just slice. Otherwise we can adjust start to be negative
2487 # and then slice.
2488 if start < 0:
2489 i, j = start, stop
2490 else:
2491 i, j = min(start - len_iter, -1), None
2492
2493 for index, item in list(cache)[i:j:step]:
2494 yield item
2495 else:
2496 # Advance to the stop position
2497 if stop is not None:
2498 m = stop + 1
2499 next(islice(it, m, m), None)
2500
2501 # stop is positive, so if start is negative they are not comparable
2502 # and we need the rest of the items.
2503 if start < 0:
2504 i = start
2505 n = None
2506 # stop is None and start is positive, so we just need items up to
2507 # the start index.
2508 elif stop is None:
2509 i = None
2510 n = start + 1
2511 # Both stop and start are positive, so they are comparable.
2512 else:
2513 i = None
2514 n = start - stop
2515 if n <= 0:
2516 return
2517
2518 cache = list(islice(it, n))
2519
2520 yield from cache[i::step]
2521
2522
2523 def always_reversible(iterable):
2524 """An extension of :func:`reversed` that supports all iterables, not
2525 just those which implement the ``Reversible`` or ``Sequence`` protocols.
2526
2527 >>> print(*always_reversible(x for x in range(3)))
2528 2 1 0
2529
2530 If the iterable is already reversible, this function returns the
2531 result of :func:`reversed()`. If the iterable is not reversible,
2532 this function will cache the remaining items in the iterable and
2533 yield them in reverse order, which may require significant storage.
2534 """
2535 try:
2536 return reversed(iterable)
2537 except TypeError:
2538 return reversed(list(iterable))
2539
2540
2541 def consecutive_groups(iterable, ordering=lambda x: x):
2542 """Yield groups of consecutive items using :func:`itertools.groupby`.
2543 The *ordering* function determines whether two items are adjacent by
2544 returning their position.
2545
2546 By default, the ordering function is the identity function. This is
2547 suitable for finding runs of numbers:
2548
2549 >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40]
2550 >>> for group in consecutive_groups(iterable):
2551 ... print(list(group))
2552 [1]
2553 [10, 11, 12]
2554 [20]
2555 [30, 31, 32, 33]
2556 [40]
2557
2558 For finding runs of adjacent letters, try using the :meth:`index` method
2559 of a string of letters:
2560
2561 >>> from string import ascii_lowercase
2562 >>> iterable = 'abcdfgilmnop'
2563 >>> ordering = ascii_lowercase.index
2564 >>> for group in consecutive_groups(iterable, ordering):
2565 ... print(list(group))
2566 ['a', 'b', 'c', 'd']
2567 ['f', 'g']
2568 ['i']
2569 ['l', 'm', 'n', 'o', 'p']
2570
2571 Each group of consecutive items is an iterator that shares it source with
2572 *iterable*. When an an output group is advanced, the previous group is
2573 no longer available unless its elements are copied (e.g., into a ``list``).
2574
2575 >>> iterable = [1, 2, 11, 12, 21, 22]
2576 >>> saved_groups = []
2577 >>> for group in consecutive_groups(iterable):
2578 ... saved_groups.append(list(group)) # Copy group elements
2579 >>> saved_groups
2580 [[1, 2], [11, 12], [21, 22]]
2581
2582 """
2583 for k, g in groupby(
2584 enumerate(iterable), key=lambda x: x[0] - ordering(x[1])
2585 ):
2586 yield map(itemgetter(1), g)
2587
2588
2589 def difference(iterable, func=sub, *, initial=None):
2590 """This function is the inverse of :func:`itertools.accumulate`. By default
2591 it will compute the first difference of *iterable* using
2592 :func:`operator.sub`:
2593
2594 >>> from itertools import accumulate
2595 >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10
2596 >>> list(difference(iterable))
2597 [0, 1, 2, 3, 4]
2598
2599 *func* defaults to :func:`operator.sub`, but other functions can be
2600 specified. They will be applied as follows::
2601
2602 A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ...
2603
2604 For example, to do progressive division:
2605
2606 >>> iterable = [1, 2, 6, 24, 120]
2607 >>> func = lambda x, y: x // y
2608 >>> list(difference(iterable, func))
2609 [1, 2, 3, 4, 5]
2610
2611 If the *initial* keyword is set, the first element will be skipped when
2612 computing successive differences.
2613
2614 >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10)
2615 >>> list(difference(it, initial=10))
2616 [1, 2, 3]
2617
2618 """
2619 a, b = tee(iterable)
2620 try:
2621 first = [next(b)]
2622 except StopIteration:
2623 return iter([])
2624
2625 if initial is not None:
2626 first = []
2627
2628 return chain(first, map(func, b, a))
2629
2630
2631 class SequenceView(Sequence):
2632 """Return a read-only view of the sequence object *target*.
2633
2634 :class:`SequenceView` objects are analogous to Python's built-in
2635 "dictionary view" types. They provide a dynamic view of a sequence's items,
2636 meaning that when the sequence updates, so does the view.
2637
2638 >>> seq = ['0', '1', '2']
2639 >>> view = SequenceView(seq)
2640 >>> view
2641 SequenceView(['0', '1', '2'])
2642 >>> seq.append('3')
2643 >>> view
2644 SequenceView(['0', '1', '2', '3'])
2645
2646 Sequence views support indexing, slicing, and length queries. They act
2647 like the underlying sequence, except they don't allow assignment:
2648
2649 >>> view[1]
2650 '1'
2651 >>> view[1:-1]
2652 ['1', '2']
2653 >>> len(view)
2654 4
2655
2656 Sequence views are useful as an alternative to copying, as they don't
2657 require (much) extra storage.
2658
2659 """
2660
2661 def __init__(self, target):
2662 if not isinstance(target, Sequence):
2663 raise TypeError
2664 self._target = target
2665
2666 def __getitem__(self, index):
2667 return self._target[index]
2668
2669 def __len__(self):
2670 return len(self._target)
2671
2672 def __repr__(self):
2673 return '{}({})'.format(self.__class__.__name__, repr(self._target))
2674
2675
2676 class seekable:
2677 """Wrap an iterator to allow for seeking backward and forward. This
2678 progressively caches the items in the source iterable so they can be
2679 re-visited.
2680
2681 Call :meth:`seek` with an index to seek to that position in the source
2682 iterable.
2683
2684 To "reset" an iterator, seek to ``0``:
2685
2686 >>> from itertools import count
2687 >>> it = seekable((str(n) for n in count()))
2688 >>> next(it), next(it), next(it)
2689 ('0', '1', '2')
2690 >>> it.seek(0)
2691 >>> next(it), next(it), next(it)
2692 ('0', '1', '2')
2693 >>> next(it)
2694 '3'
2695
2696 You can also seek forward:
2697
2698 >>> it = seekable((str(n) for n in range(20)))
2699 >>> it.seek(10)
2700 >>> next(it)
2701 '10'
2702 >>> it.seek(20) # Seeking past the end of the source isn't a problem
2703 >>> list(it)
2704 []
2705 >>> it.seek(0) # Resetting works even after hitting the end
2706 >>> next(it), next(it), next(it)
2707 ('0', '1', '2')
2708
2709 Call :meth:`peek` to look ahead one item without advancing the iterator:
2710
2711 >>> it = seekable('1234')
2712 >>> it.peek()
2713 '1'
2714 >>> list(it)
2715 ['1', '2', '3', '4']
2716 >>> it.peek(default='empty')
2717 'empty'
2718
2719 Before the iterator is at its end, calling :func:`bool` on it will return
2720 ``True``. After it will return ``False``:
2721
2722 >>> it = seekable('5678')
2723 >>> bool(it)
2724 True
2725 >>> list(it)
2726 ['5', '6', '7', '8']
2727 >>> bool(it)
2728 False
2729
2730 You may view the contents of the cache with the :meth:`elements` method.
2731 That returns a :class:`SequenceView`, a view that updates automatically:
2732
2733 >>> it = seekable((str(n) for n in range(10)))
2734 >>> next(it), next(it), next(it)
2735 ('0', '1', '2')
2736 >>> elements = it.elements()
2737 >>> elements
2738 SequenceView(['0', '1', '2'])
2739 >>> next(it)
2740 '3'
2741 >>> elements
2742 SequenceView(['0', '1', '2', '3'])
2743
2744 By default, the cache grows as the source iterable progresses, so beware of
2745 wrapping very large or infinite iterables. Supply *maxlen* to limit the
2746 size of the cache (this of course limits how far back you can seek).
2747
2748 >>> from itertools import count
2749 >>> it = seekable((str(n) for n in count()), maxlen=2)
2750 >>> next(it), next(it), next(it), next(it)
2751 ('0', '1', '2', '3')
2752 >>> list(it.elements())
2753 ['2', '3']
2754 >>> it.seek(0)
2755 >>> next(it), next(it), next(it), next(it)
2756 ('2', '3', '4', '5')
2757 >>> next(it)
2758 '6'
2759
2760 """
2761
2762 def __init__(self, iterable, maxlen=None):
2763 self._source = iter(iterable)
2764 if maxlen is None:
2765 self._cache = []
2766 else:
2767 self._cache = deque([], maxlen)
2768 self._index = None
2769
2770 def __iter__(self):
2771 return self
2772
2773 def __next__(self):
2774 if self._index is not None:
2775 try:
2776 item = self._cache[self._index]
2777 except IndexError:
2778 self._index = None
2779 else:
2780 self._index += 1
2781 return item
2782
2783 item = next(self._source)
2784 self._cache.append(item)
2785 return item
2786
2787 def __bool__(self):
2788 try:
2789 self.peek()
2790 except StopIteration:
2791 return False
2792 return True
2793
2794 def peek(self, default=_marker):
2795 try:
2796 peeked = next(self)
2797 except StopIteration:
2798 if default is _marker:
2799 raise
2800 return default
2801 if self._index is None:
2802 self._index = len(self._cache)
2803 self._index -= 1
2804 return peeked
2805
2806 def elements(self):
2807 return SequenceView(self._cache)
2808
2809 def seek(self, index):
2810 self._index = index
2811 remainder = index - len(self._cache)
2812 if remainder > 0:
2813 consume(self, remainder)
2814
2815
2816 class run_length:
2817 """
2818 :func:`run_length.encode` compresses an iterable with run-length encoding.
2819 It yields groups of repeated items with the count of how many times they
2820 were repeated:
2821
2822 >>> uncompressed = 'abbcccdddd'
2823 >>> list(run_length.encode(uncompressed))
2824 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
2825
2826 :func:`run_length.decode` decompresses an iterable that was previously
2827 compressed with run-length encoding. It yields the items of the
2828 decompressed iterable:
2829
2830 >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
2831 >>> list(run_length.decode(compressed))
2832 ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd']
2833
2834 """
2835
2836 @staticmethod
2837 def encode(iterable):
2838 return ((k, ilen(g)) for k, g in groupby(iterable))
2839
2840 @staticmethod
2841 def decode(iterable):
2842 return chain.from_iterable(repeat(k, n) for k, n in iterable)
2843
2844
2845 def exactly_n(iterable, n, predicate=bool):
2846 """Return ``True`` if exactly ``n`` items in the iterable are ``True``
2847 according to the *predicate* function.
2848
2849 >>> exactly_n([True, True, False], 2)
2850 True
2851 >>> exactly_n([True, True, False], 1)
2852 False
2853 >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3)
2854 True
2855
2856 The iterable will be advanced until ``n + 1`` truthy items are encountered,
2857 so avoid calling it on infinite iterables.
2858
2859 """
2860 return len(take(n + 1, filter(predicate, iterable))) == n
2861
2862
2863 def circular_shifts(iterable):
2864 """Return a list of circular shifts of *iterable*.
2865
2866 >>> circular_shifts(range(4))
2867 [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)]
2868 """
2869 lst = list(iterable)
2870 return take(len(lst), windowed(cycle(lst), len(lst)))
2871
2872
2873 def make_decorator(wrapping_func, result_index=0):
2874 """Return a decorator version of *wrapping_func*, which is a function that
2875 modifies an iterable. *result_index* is the position in that function's
2876 signature where the iterable goes.
2877
2878 This lets you use itertools on the "production end," i.e. at function
2879 definition. This can augment what the function returns without changing the
2880 function's code.
2881
2882 For example, to produce a decorator version of :func:`chunked`:
2883
2884 >>> from more_itertools import chunked
2885 >>> chunker = make_decorator(chunked, result_index=0)
2886 >>> @chunker(3)
2887 ... def iter_range(n):
2888 ... return iter(range(n))
2889 ...
2890 >>> list(iter_range(9))
2891 [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
2892
2893 To only allow truthy items to be returned:
2894
2895 >>> truth_serum = make_decorator(filter, result_index=1)
2896 >>> @truth_serum(bool)
2897 ... def boolean_test():
2898 ... return [0, 1, '', ' ', False, True]
2899 ...
2900 >>> list(boolean_test())
2901 [1, ' ', True]
2902
2903 The :func:`peekable` and :func:`seekable` wrappers make for practical
2904 decorators:
2905
2906 >>> from more_itertools import peekable
2907 >>> peekable_function = make_decorator(peekable)
2908 >>> @peekable_function()
2909 ... def str_range(*args):
2910 ... return (str(x) for x in range(*args))
2911 ...
2912 >>> it = str_range(1, 20, 2)
2913 >>> next(it), next(it), next(it)
2914 ('1', '3', '5')
2915 >>> it.peek()
2916 '7'
2917 >>> next(it)
2918 '7'
2919
2920 """
2921
2922 # See https://sites.google.com/site/bbayles/index/decorator_factory for
2923 # notes on how this works.
2924 def decorator(*wrapping_args, **wrapping_kwargs):
2925 def outer_wrapper(f):
2926 def inner_wrapper(*args, **kwargs):
2927 result = f(*args, **kwargs)
2928 wrapping_args_ = list(wrapping_args)
2929 wrapping_args_.insert(result_index, result)
2930 return wrapping_func(*wrapping_args_, **wrapping_kwargs)
2931
2932 return inner_wrapper
2933
2934 return outer_wrapper
2935
2936 return decorator
2937
2938
2939 def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None):
2940 """Return a dictionary that maps the items in *iterable* to categories
2941 defined by *keyfunc*, transforms them with *valuefunc*, and
2942 then summarizes them by category with *reducefunc*.
2943
2944 *valuefunc* defaults to the identity function if it is unspecified.
2945 If *reducefunc* is unspecified, no summarization takes place:
2946
2947 >>> keyfunc = lambda x: x.upper()
2948 >>> result = map_reduce('abbccc', keyfunc)
2949 >>> sorted(result.items())
2950 [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])]
2951
2952 Specifying *valuefunc* transforms the categorized items:
2953
2954 >>> keyfunc = lambda x: x.upper()
2955 >>> valuefunc = lambda x: 1
2956 >>> result = map_reduce('abbccc', keyfunc, valuefunc)
2957 >>> sorted(result.items())
2958 [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])]
2959
2960 Specifying *reducefunc* summarizes the categorized items:
2961
2962 >>> keyfunc = lambda x: x.upper()
2963 >>> valuefunc = lambda x: 1
2964 >>> reducefunc = sum
2965 >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc)
2966 >>> sorted(result.items())
2967 [('A', 1), ('B', 2), ('C', 3)]
2968
2969 You may want to filter the input iterable before applying the map/reduce
2970 procedure:
2971
2972 >>> all_items = range(30)
2973 >>> items = [x for x in all_items if 10 <= x <= 20] # Filter
2974 >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1
2975 >>> categories = map_reduce(items, keyfunc=keyfunc)
2976 >>> sorted(categories.items())
2977 [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])]
2978 >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum)
2979 >>> sorted(summaries.items())
2980 [(0, 90), (1, 75)]
2981
2982 Note that all items in the iterable are gathered into a list before the
2983 summarization step, which may require significant storage.
2984
2985 The returned object is a :obj:`collections.defaultdict` with the
2986 ``default_factory`` set to ``None``, such that it behaves like a normal
2987 dictionary.
2988
2989 """
2990 valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc
2991
2992 ret = defaultdict(list)
2993 for item in iterable:
2994 key = keyfunc(item)
2995 value = valuefunc(item)
2996 ret[key].append(value)
2997
2998 if reducefunc is not None:
2999 for key, value_list in ret.items():
3000 ret[key] = reducefunc(value_list)
3001
3002 ret.default_factory = None
3003 return ret
3004
3005
3006 def rlocate(iterable, pred=bool, window_size=None):
3007 """Yield the index of each item in *iterable* for which *pred* returns
3008 ``True``, starting from the right and moving left.
3009
3010 *pred* defaults to :func:`bool`, which will select truthy items:
3011
3012 >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4
3013 [4, 2, 1]
3014
3015 Set *pred* to a custom function to, e.g., find the indexes for a particular
3016 item:
3017
3018 >>> iterable = iter('abcb')
3019 >>> pred = lambda x: x == 'b'
3020 >>> list(rlocate(iterable, pred))
3021 [3, 1]
3022
3023 If *window_size* is given, then the *pred* function will be called with
3024 that many items. This enables searching for sub-sequences:
3025
3026 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
3027 >>> pred = lambda *args: args == (1, 2, 3)
3028 >>> list(rlocate(iterable, pred=pred, window_size=3))
3029 [9, 5, 1]
3030
3031 Beware, this function won't return anything for infinite iterables.
3032 If *iterable* is reversible, ``rlocate`` will reverse it and search from
3033 the right. Otherwise, it will search from the left and return the results
3034 in reverse order.
3035
3036 See :func:`locate` to for other example applications.
3037
3038 """
3039 if window_size is None:
3040 try:
3041 len_iter = len(iterable)
3042 return (len_iter - i - 1 for i in locate(reversed(iterable), pred))
3043 except TypeError:
3044 pass
3045
3046 return reversed(list(locate(iterable, pred, window_size)))
3047
3048
3049 def replace(iterable, pred, substitutes, count=None, window_size=1):
3050 """Yield the items from *iterable*, replacing the items for which *pred*
3051 returns ``True`` with the items from the iterable *substitutes*.
3052
3053 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1]
3054 >>> pred = lambda x: x == 0
3055 >>> substitutes = (2, 3)
3056 >>> list(replace(iterable, pred, substitutes))
3057 [1, 1, 2, 3, 1, 1, 2, 3, 1, 1]
3058
3059 If *count* is given, the number of replacements will be limited:
3060
3061 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0]
3062 >>> pred = lambda x: x == 0
3063 >>> substitutes = [None]
3064 >>> list(replace(iterable, pred, substitutes, count=2))
3065 [1, 1, None, 1, 1, None, 1, 1, 0]
3066
3067 Use *window_size* to control the number of items passed as arguments to
3068 *pred*. This allows for locating and replacing subsequences.
3069
3070 >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5]
3071 >>> window_size = 3
3072 >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred
3073 >>> substitutes = [3, 4] # Splice in these items
3074 >>> list(replace(iterable, pred, substitutes, window_size=window_size))
3075 [3, 4, 5, 3, 4, 5]
3076
3077 """
3078 if window_size < 1:
3079 raise ValueError('window_size must be at least 1')
3080
3081 # Save the substitutes iterable, since it's used more than once
3082 substitutes = tuple(substitutes)
3083
3084 # Add padding such that the number of windows matches the length of the
3085 # iterable
3086 it = chain(iterable, [_marker] * (window_size - 1))
3087 windows = windowed(it, window_size)
3088
3089 n = 0
3090 for w in windows:
3091 # If the current window matches our predicate (and we haven't hit
3092 # our maximum number of replacements), splice in the substitutes
3093 # and then consume the following windows that overlap with this one.
3094 # For example, if the iterable is (0, 1, 2, 3, 4...)
3095 # and the window size is 2, we have (0, 1), (1, 2), (2, 3)...
3096 # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2)
3097 if pred(*w):
3098 if (count is None) or (n < count):
3099 n += 1
3100 yield from substitutes
3101 consume(windows, window_size - 1)
3102 continue
3103
3104 # If there was no match (or we've reached the replacement limit),
3105 # yield the first item from the window.
3106 if w and (w[0] is not _marker):
3107 yield w[0]
3108
3109
3110 def partitions(iterable):
3111 """Yield all possible order-preserving partitions of *iterable*.
3112
3113 >>> iterable = 'abc'
3114 >>> for part in partitions(iterable):
3115 ... print([''.join(p) for p in part])
3116 ['abc']
3117 ['a', 'bc']
3118 ['ab', 'c']
3119 ['a', 'b', 'c']
3120
3121 This is unrelated to :func:`partition`.
3122
3123 """
3124 sequence = list(iterable)
3125 n = len(sequence)
3126 for i in powerset(range(1, n)):
3127 yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))]
3128
3129
3130 def set_partitions(iterable, k=None):
3131 """
3132 Yield the set partitions of *iterable* into *k* parts. Set partitions are
3133 not order-preserving.
3134
3135 >>> iterable = 'abc'
3136 >>> for part in set_partitions(iterable, 2):
3137 ... print([''.join(p) for p in part])
3138 ['a', 'bc']
3139 ['ab', 'c']
3140 ['b', 'ac']
3141
3142
3143 If *k* is not given, every set partition is generated.
3144
3145 >>> iterable = 'abc'
3146 >>> for part in set_partitions(iterable):
3147 ... print([''.join(p) for p in part])
3148 ['abc']
3149 ['a', 'bc']
3150 ['ab', 'c']
3151 ['b', 'ac']
3152 ['a', 'b', 'c']
3153
3154 """
3155 L = list(iterable)
3156 n = len(L)
3157 if k is not None:
3158 if k < 1:
3159 raise ValueError(
3160 "Can't partition in a negative or zero number of groups"
3161 )
3162 elif k > n:
3163 return
3164
3165 def set_partitions_helper(L, k):
3166 n = len(L)
3167 if k == 1:
3168 yield [L]
3169 elif n == k:
3170 yield [[s] for s in L]
3171 else:
3172 e, *M = L
3173 for p in set_partitions_helper(M, k - 1):
3174 yield [[e], *p]
3175 for p in set_partitions_helper(M, k):
3176 for i in range(len(p)):
3177 yield p[:i] + [[e] + p[i]] + p[i + 1 :]
3178
3179 if k is None:
3180 for k in range(1, n + 1):
3181 yield from set_partitions_helper(L, k)
3182 else:
3183 yield from set_partitions_helper(L, k)
3184
3185
3186 class time_limited:
3187 """
3188 Yield items from *iterable* until *limit_seconds* have passed.
3189 If the time limit expires before all items have been yielded, the
3190 ``timed_out`` parameter will be set to ``True``.
3191
3192 >>> from time import sleep
3193 >>> def generator():
3194 ... yield 1
3195 ... yield 2
3196 ... sleep(0.2)
3197 ... yield 3
3198 >>> iterable = time_limited(0.1, generator())
3199 >>> list(iterable)
3200 [1, 2]
3201 >>> iterable.timed_out
3202 True
3203
3204 Note that the time is checked before each item is yielded, and iteration
3205 stops if the time elapsed is greater than *limit_seconds*. If your time
3206 limit is 1 second, but it takes 2 seconds to generate the first item from
3207 the iterable, the function will run for 2 seconds and not yield anything.
3208
3209 """
3210
3211 def __init__(self, limit_seconds, iterable):
3212 if limit_seconds < 0:
3213 raise ValueError('limit_seconds must be positive')
3214 self.limit_seconds = limit_seconds
3215 self._iterable = iter(iterable)
3216 self._start_time = monotonic()
3217 self.timed_out = False
3218
3219 def __iter__(self):
3220 return self
3221
3222 def __next__(self):
3223 item = next(self._iterable)
3224 if monotonic() - self._start_time > self.limit_seconds:
3225 self.timed_out = True
3226 raise StopIteration
3227
3228 return item
3229
3230
3231 def only(iterable, default=None, too_long=None):
3232 """If *iterable* has only one item, return it.
3233 If it has zero items, return *default*.
3234 If it has more than one item, raise the exception given by *too_long*,
3235 which is ``ValueError`` by default.
3236
3237 >>> only([], default='missing')
3238 'missing'
3239 >>> only([1])
3240 1
3241 >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL
3242 Traceback (most recent call last):
3243 ...
3244 ValueError: Expected exactly one item in iterable, but got 1, 2,
3245 and perhaps more.'
3246 >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL
3247 Traceback (most recent call last):
3248 ...
3249 TypeError
3250
3251 Note that :func:`only` attempts to advance *iterable* twice to ensure there
3252 is only one item. See :func:`spy` or :func:`peekable` to check
3253 iterable contents less destructively.
3254 """
3255 it = iter(iterable)
3256 first_value = next(it, default)
3257
3258 try:
3259 second_value = next(it)
3260 except StopIteration:
3261 pass
3262 else:
3263 msg = (
3264 'Expected exactly one item in iterable, but got {!r}, {!r}, '
3265 'and perhaps more.'.format(first_value, second_value)
3266 )
3267 raise too_long or ValueError(msg)
3268
3269 return first_value
3270
3271
3272 class _IChunk:
3273 def __init__(self, iterable, n):
3274 self._it = islice(iterable, n)
3275 self._cache = deque()
3276
3277 def fill_cache(self):
3278 self._cache.extend(self._it)
3279
3280 def __iter__(self):
3281 return self
3282
3283 def __next__(self):
3284 try:
3285 return next(self._it)
3286 except StopIteration:
3287 if self._cache:
3288 return self._cache.popleft()
3289 else:
3290 raise
3291
3292
3293 def ichunked(iterable, n):
3294 """Break *iterable* into sub-iterables with *n* elements each.
3295 :func:`ichunked` is like :func:`chunked`, but it yields iterables
3296 instead of lists.
3297
3298 If the sub-iterables are read in order, the elements of *iterable*
3299 won't be stored in memory.
3300 If they are read out of order, :func:`itertools.tee` is used to cache
3301 elements as necessary.
3302
3303 >>> from itertools import count
3304 >>> all_chunks = ichunked(count(), 4)
3305 >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks)
3306 >>> list(c_2) # c_1's elements have been cached; c_3's haven't been
3307 [4, 5, 6, 7]
3308 >>> list(c_1)
3309 [0, 1, 2, 3]
3310 >>> list(c_3)
3311 [8, 9, 10, 11]
3312
3313 """
3314 source = peekable(iter(iterable))
3315 ichunk_marker = object()
3316 while True:
3317 # Check to see whether we're at the end of the source iterable
3318 item = source.peek(ichunk_marker)
3319 if item is ichunk_marker:
3320 return
3321
3322 chunk = _IChunk(source, n)
3323 yield chunk
3324
3325 # Advance the source iterable and fill previous chunk's cache
3326 chunk.fill_cache()
3327
3328
3329 def iequals(*iterables):
3330 """Return ``True`` if all given *iterables* are equal to each other,
3331 which means that they contain the same elements in the same order.
3332
3333 The function is useful for comparing iterables of different data types
3334 or iterables that do not support equality checks.
3335
3336 >>> iequals("abc", ['a', 'b', 'c'], ('a', 'b', 'c'), iter("abc"))
3337 True
3338
3339 >>> iequals("abc", "acb")
3340 False
3341
3342 Not to be confused with :func:`all_equals`, which checks whether all
3343 elements of iterable are equal to each other.
3344
3345 """
3346 return all(map(all_equal, zip_longest(*iterables, fillvalue=object())))
3347
3348
3349 def distinct_combinations(iterable, r):
3350 """Yield the distinct combinations of *r* items taken from *iterable*.
3351
3352 >>> list(distinct_combinations([0, 0, 1], 2))
3353 [(0, 0), (0, 1)]
3354
3355 Equivalent to ``set(combinations(iterable))``, except duplicates are not
3356 generated and thrown away. For larger input sequences this is much more
3357 efficient.
3358
3359 """
3360 if r < 0:
3361 raise ValueError('r must be non-negative')
3362 elif r == 0:
3363 yield ()
3364 return
3365 pool = tuple(iterable)
3366 generators = [unique_everseen(enumerate(pool), key=itemgetter(1))]
3367 current_combo = [None] * r
3368 level = 0
3369 while generators:
3370 try:
3371 cur_idx, p = next(generators[-1])
3372 except StopIteration:
3373 generators.pop()
3374 level -= 1
3375 continue
3376 current_combo[level] = p
3377 if level + 1 == r:
3378 yield tuple(current_combo)
3379 else:
3380 generators.append(
3381 unique_everseen(
3382 enumerate(pool[cur_idx + 1 :], cur_idx + 1),
3383 key=itemgetter(1),
3384 )
3385 )
3386 level += 1
3387
3388
3389 def filter_except(validator, iterable, *exceptions):
3390 """Yield the items from *iterable* for which the *validator* function does
3391 not raise one of the specified *exceptions*.
3392
3393 *validator* is called for each item in *iterable*.
3394 It should be a function that accepts one argument and raises an exception
3395 if that item is not valid.
3396
3397 >>> iterable = ['1', '2', 'three', '4', None]
3398 >>> list(filter_except(int, iterable, ValueError, TypeError))
3399 ['1', '2', '4']
3400
3401 If an exception other than one given by *exceptions* is raised by
3402 *validator*, it is raised like normal.
3403 """
3404 for item in iterable:
3405 try:
3406 validator(item)
3407 except exceptions:
3408 pass
3409 else:
3410 yield item
3411
3412
3413 def map_except(function, iterable, *exceptions):
3414 """Transform each item from *iterable* with *function* and yield the
3415 result, unless *function* raises one of the specified *exceptions*.
3416
3417 *function* is called to transform each item in *iterable*.
3418 It should accept one argument.
3419
3420 >>> iterable = ['1', '2', 'three', '4', None]
3421 >>> list(map_except(int, iterable, ValueError, TypeError))
3422 [1, 2, 4]
3423
3424 If an exception other than one given by *exceptions* is raised by
3425 *function*, it is raised like normal.
3426 """
3427 for item in iterable:
3428 try:
3429 yield function(item)
3430 except exceptions:
3431 pass
3432
3433
3434 def map_if(iterable, pred, func, func_else=lambda x: x):
3435 """Evaluate each item from *iterable* using *pred*. If the result is
3436 equivalent to ``True``, transform the item with *func* and yield it.
3437 Otherwise, transform the item with *func_else* and yield it.
3438
3439 *pred*, *func*, and *func_else* should each be functions that accept
3440 one argument. By default, *func_else* is the identity function.
3441
3442 >>> from math import sqrt
3443 >>> iterable = list(range(-5, 5))
3444 >>> iterable
3445 [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4]
3446 >>> list(map_if(iterable, lambda x: x > 3, lambda x: 'toobig'))
3447 [-5, -4, -3, -2, -1, 0, 1, 2, 3, 'toobig']
3448 >>> list(map_if(iterable, lambda x: x >= 0,
3449 ... lambda x: f'{sqrt(x):.2f}', lambda x: None))
3450 [None, None, None, None, None, '0.00', '1.00', '1.41', '1.73', '2.00']
3451 """
3452 for item in iterable:
3453 yield func(item) if pred(item) else func_else(item)
3454
3455
3456 def _sample_unweighted(iterable, k):
3457 # Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li:
3458 # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))".
3459
3460 # Fill up the reservoir (collection of samples) with the first `k` samples
3461 reservoir = take(k, iterable)
3462
3463 # Generate random number that's the largest in a sample of k U(0,1) numbers
3464 # Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic
3465 W = exp(log(random()) / k)
3466
3467 # The number of elements to skip before changing the reservoir is a random
3468 # number with a geometric distribution. Sample it using random() and logs.
3469 next_index = k + floor(log(random()) / log(1 - W))
3470
3471 for index, element in enumerate(iterable, k):
3472 if index == next_index:
3473 reservoir[randrange(k)] = element
3474 # The new W is the largest in a sample of k U(0, `old_W`) numbers
3475 W *= exp(log(random()) / k)
3476 next_index += floor(log(random()) / log(1 - W)) + 1
3477
3478 return reservoir
3479
3480
3481 def _sample_weighted(iterable, k, weights):
3482 # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. :
3483 # "Weighted random sampling with a reservoir".
3484
3485 # Log-transform for numerical stability for weights that are small/large
3486 weight_keys = (log(random()) / weight for weight in weights)
3487
3488 # Fill up the reservoir (collection of samples) with the first `k`
3489 # weight-keys and elements, then heapify the list.
3490 reservoir = take(k, zip(weight_keys, iterable))
3491 heapify(reservoir)
3492
3493 # The number of jumps before changing the reservoir is a random variable
3494 # with an exponential distribution. Sample it using random() and logs.
3495 smallest_weight_key, _ = reservoir[0]
3496 weights_to_skip = log(random()) / smallest_weight_key
3497
3498 for weight, element in zip(weights, iterable):
3499 if weight >= weights_to_skip:
3500 # The notation here is consistent with the paper, but we store
3501 # the weight-keys in log-space for better numerical stability.
3502 smallest_weight_key, _ = reservoir[0]
3503 t_w = exp(weight * smallest_weight_key)
3504 r_2 = uniform(t_w, 1) # generate U(t_w, 1)
3505 weight_key = log(r_2) / weight
3506 heapreplace(reservoir, (weight_key, element))
3507 smallest_weight_key, _ = reservoir[0]
3508 weights_to_skip = log(random()) / smallest_weight_key
3509 else:
3510 weights_to_skip -= weight
3511
3512 # Equivalent to [element for weight_key, element in sorted(reservoir)]
3513 return [heappop(reservoir)[1] for _ in range(k)]
3514
3515
3516 def sample(iterable, k, weights=None):
3517 """Return a *k*-length list of elements chosen (without replacement)
3518 from the *iterable*. Like :func:`random.sample`, but works on iterables
3519 of unknown length.
3520
3521 >>> iterable = range(100)
3522 >>> sample(iterable, 5) # doctest: +SKIP
3523 [81, 60, 96, 16, 4]
3524
3525 An iterable with *weights* may also be given:
3526
3527 >>> iterable = range(100)
3528 >>> weights = (i * i + 1 for i in range(100))
3529 >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP
3530 [79, 67, 74, 66, 78]
3531
3532 The algorithm can also be used to generate weighted random permutations.
3533 The relative weight of each item determines the probability that it
3534 appears late in the permutation.
3535
3536 >>> data = "abcdefgh"
3537 >>> weights = range(1, len(data) + 1)
3538 >>> sample(data, k=len(data), weights=weights) # doctest: +SKIP
3539 ['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f']
3540 """
3541 if k == 0:
3542 return []
3543
3544 iterable = iter(iterable)
3545 if weights is None:
3546 return _sample_unweighted(iterable, k)
3547 else:
3548 weights = iter(weights)
3549 return _sample_weighted(iterable, k, weights)
3550
3551
3552 def is_sorted(iterable, key=None, reverse=False, strict=False):
3553 """Returns ``True`` if the items of iterable are in sorted order, and
3554 ``False`` otherwise. *key* and *reverse* have the same meaning that they do
3555 in the built-in :func:`sorted` function.
3556
3557 >>> is_sorted(['1', '2', '3', '4', '5'], key=int)
3558 True
3559 >>> is_sorted([5, 4, 3, 1, 2], reverse=True)
3560 False
3561
3562 If *strict*, tests for strict sorting, that is, returns ``False`` if equal
3563 elements are found:
3564
3565 >>> is_sorted([1, 2, 2])
3566 True
3567 >>> is_sorted([1, 2, 2], strict=True)
3568 False
3569
3570 The function returns ``False`` after encountering the first out-of-order
3571 item. If there are no out-of-order items, the iterable is exhausted.
3572 """
3573
3574 compare = (le if reverse else ge) if strict else (lt if reverse else gt)
3575 it = iterable if key is None else map(key, iterable)
3576 return not any(starmap(compare, pairwise(it)))
3577
3578
3579 class AbortThread(BaseException):
3580 pass
3581
3582
3583 class callback_iter:
3584 """Convert a function that uses callbacks to an iterator.
3585
3586 Let *func* be a function that takes a `callback` keyword argument.
3587 For example:
3588
3589 >>> def func(callback=None):
3590 ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]:
3591 ... if callback:
3592 ... callback(i, c)
3593 ... return 4
3594
3595
3596 Use ``with callback_iter(func)`` to get an iterator over the parameters
3597 that are delivered to the callback.
3598
3599 >>> with callback_iter(func) as it:
3600 ... for args, kwargs in it:
3601 ... print(args)
3602 (1, 'a')
3603 (2, 'b')
3604 (3, 'c')
3605
3606 The function will be called in a background thread. The ``done`` property
3607 indicates whether it has completed execution.
3608
3609 >>> it.done
3610 True
3611
3612 If it completes successfully, its return value will be available
3613 in the ``result`` property.
3614
3615 >>> it.result
3616 4
3617
3618 Notes:
3619
3620 * If the function uses some keyword argument besides ``callback``, supply
3621 *callback_kwd*.
3622 * If it finished executing, but raised an exception, accessing the
3623 ``result`` property will raise the same exception.
3624 * If it hasn't finished executing, accessing the ``result``
3625 property from within the ``with`` block will raise ``RuntimeError``.
3626 * If it hasn't finished executing, accessing the ``result`` property from
3627 outside the ``with`` block will raise a
3628 ``more_itertools.AbortThread`` exception.
3629 * Provide *wait_seconds* to adjust how frequently the it is polled for
3630 output.
3631
3632 """
3633
3634 def __init__(self, func, callback_kwd='callback', wait_seconds=0.1):
3635 self._func = func
3636 self._callback_kwd = callback_kwd
3637 self._aborted = False
3638 self._future = None
3639 self._wait_seconds = wait_seconds
3640 # Lazily import concurrent.future
3641 self._executor = __import__(
3642 ).futures.__import__("concurrent.futures").futures.ThreadPoolExecutor(max_workers=1)
3643 self._iterator = self._reader()
3644
3645 def __enter__(self):
3646 return self
3647
3648 def __exit__(self, exc_type, exc_value, traceback):
3649 self._aborted = True
3650 self._executor.shutdown()
3651
3652 def __iter__(self):
3653 return self
3654
3655 def __next__(self):
3656 return next(self._iterator)
3657
3658 @property
3659 def done(self):
3660 if self._future is None:
3661 return False
3662 return self._future.done()
3663
3664 @property
3665 def result(self):
3666 if not self.done:
3667 raise RuntimeError('Function has not yet completed')
3668
3669 return self._future.result()
3670
3671 def _reader(self):
3672 q = Queue()
3673
3674 def callback(*args, **kwargs):
3675 if self._aborted:
3676 raise AbortThread('canceled by user')
3677
3678 q.put((args, kwargs))
3679
3680 self._future = self._executor.submit(
3681 self._func, **{self._callback_kwd: callback}
3682 )
3683
3684 while True:
3685 try:
3686 item = q.get(timeout=self._wait_seconds)
3687 except Empty:
3688 pass
3689 else:
3690 q.task_done()
3691 yield item
3692
3693 if self._future.done():
3694 break
3695
3696 remaining = []
3697 while True:
3698 try:
3699 item = q.get_nowait()
3700 except Empty:
3701 break
3702 else:
3703 q.task_done()
3704 remaining.append(item)
3705 q.join()
3706 yield from remaining
3707
3708
3709 def windowed_complete(iterable, n):
3710 """
3711 Yield ``(beginning, middle, end)`` tuples, where:
3712
3713 * Each ``middle`` has *n* items from *iterable*
3714 * Each ``beginning`` has the items before the ones in ``middle``
3715 * Each ``end`` has the items after the ones in ``middle``
3716
3717 >>> iterable = range(7)
3718 >>> n = 3
3719 >>> for beginning, middle, end in windowed_complete(iterable, n):
3720 ... print(beginning, middle, end)
3721 () (0, 1, 2) (3, 4, 5, 6)
3722 (0,) (1, 2, 3) (4, 5, 6)
3723 (0, 1) (2, 3, 4) (5, 6)
3724 (0, 1, 2) (3, 4, 5) (6,)
3725 (0, 1, 2, 3) (4, 5, 6) ()
3726
3727 Note that *n* must be at least 0 and most equal to the length of
3728 *iterable*.
3729
3730 This function will exhaust the iterable and may require significant
3731 storage.
3732 """
3733 if n < 0:
3734 raise ValueError('n must be >= 0')
3735
3736 seq = tuple(iterable)
3737 size = len(seq)
3738
3739 if n > size:
3740 raise ValueError('n must be <= len(seq)')
3741
3742 for i in range(size - n + 1):
3743 beginning = seq[:i]
3744 middle = seq[i : i + n]
3745 end = seq[i + n :]
3746 yield beginning, middle, end
3747
3748
3749 def all_unique(iterable, key=None):
3750 """
3751 Returns ``True`` if all the elements of *iterable* are unique (no two
3752 elements are equal).
3753
3754 >>> all_unique('ABCB')
3755 False
3756
3757 If a *key* function is specified, it will be used to make comparisons.
3758
3759 >>> all_unique('ABCb')
3760 True
3761 >>> all_unique('ABCb', str.lower)
3762 False
3763
3764 The function returns as soon as the first non-unique element is
3765 encountered. Iterables with a mix of hashable and unhashable items can
3766 be used, but the function will be slower for unhashable items.
3767 """
3768 seenset = set()
3769 seenset_add = seenset.add
3770 seenlist = []
3771 seenlist_add = seenlist.append
3772 for element in map(key, iterable) if key else iterable:
3773 try:
3774 if element in seenset:
3775 return False
3776 seenset_add(element)
3777 except TypeError:
3778 if element in seenlist:
3779 return False
3780 seenlist_add(element)
3781 return True
3782
3783
3784 def nth_product(index, *args):
3785 """Equivalent to ``list(product(*args))[index]``.
3786
3787 The products of *args* can be ordered lexicographically.
3788 :func:`nth_product` computes the product at sort position *index* without
3789 computing the previous products.
3790
3791 >>> nth_product(8, range(2), range(2), range(2), range(2))
3792 (1, 0, 0, 0)
3793
3794 ``IndexError`` will be raised if the given *index* is invalid.
3795 """
3796 pools = list(map(tuple, reversed(args)))
3797 ns = list(map(len, pools))
3798
3799 c = reduce(mul, ns)
3800
3801 if index < 0:
3802 index += c
3803
3804 if not 0 <= index < c:
3805 raise IndexError
3806
3807 result = []
3808 for pool, n in zip(pools, ns):
3809 result.append(pool[index % n])
3810 index //= n
3811
3812 return tuple(reversed(result))
3813
3814
3815 def nth_permutation(iterable, r, index):
3816 """Equivalent to ``list(permutations(iterable, r))[index]```
3817
3818 The subsequences of *iterable* that are of length *r* where order is
3819 important can be ordered lexicographically. :func:`nth_permutation`
3820 computes the subsequence at sort position *index* directly, without
3821 computing the previous subsequences.
3822
3823 >>> nth_permutation('ghijk', 2, 5)
3824 ('h', 'i')
3825
3826 ``ValueError`` will be raised If *r* is negative or greater than the length
3827 of *iterable*.
3828 ``IndexError`` will be raised if the given *index* is invalid.
3829 """
3830 pool = list(iterable)
3831 n = len(pool)
3832
3833 if r is None or r == n:
3834 r, c = n, factorial(n)
3835 elif not 0 <= r < n:
3836 raise ValueError
3837 else:
3838 c = factorial(n) // factorial(n - r)
3839
3840 if index < 0:
3841 index += c
3842
3843 if not 0 <= index < c:
3844 raise IndexError
3845
3846 if c == 0:
3847 return tuple()
3848
3849 result = [0] * r
3850 q = index * factorial(n) // c if r < n else index
3851 for d in range(1, n + 1):
3852 q, i = divmod(q, d)
3853 if 0 <= n - d < r:
3854 result[n - d] = i
3855 if q == 0:
3856 break
3857
3858 return tuple(map(pool.pop, result))
3859
3860
3861 def value_chain(*args):
3862 """Yield all arguments passed to the function in the same order in which
3863 they were passed. If an argument itself is iterable then iterate over its
3864 values.
3865
3866 >>> list(value_chain(1, 2, 3, [4, 5, 6]))
3867 [1, 2, 3, 4, 5, 6]
3868
3869 Binary and text strings are not considered iterable and are emitted
3870 as-is:
3871
3872 >>> list(value_chain('12', '34', ['56', '78']))
3873 ['12', '34', '56', '78']
3874
3875
3876 Multiple levels of nesting are not flattened.
3877
3878 """
3879 for value in args:
3880 if isinstance(value, (str, bytes)):
3881 yield value
3882 continue
3883 try:
3884 yield from value
3885 except TypeError:
3886 yield value
3887
3888
3889 def product_index(element, *args):
3890 """Equivalent to ``list(product(*args)).index(element)``
3891
3892 The products of *args* can be ordered lexicographically.
3893 :func:`product_index` computes the first index of *element* without
3894 computing the previous products.
3895
3896 >>> product_index([8, 2], range(10), range(5))
3897 42
3898
3899 ``ValueError`` will be raised if the given *element* isn't in the product
3900 of *args*.
3901 """
3902 index = 0
3903
3904 for x, pool in zip_longest(element, args, fillvalue=_marker):
3905 if x is _marker or pool is _marker:
3906 raise ValueError('element is not a product of args')
3907
3908 pool = tuple(pool)
3909 index = index * len(pool) + pool.index(x)
3910
3911 return index
3912
3913
3914 def combination_index(element, iterable):
3915 """Equivalent to ``list(combinations(iterable, r)).index(element)``
3916
3917 The subsequences of *iterable* that are of length *r* can be ordered
3918 lexicographically. :func:`combination_index` computes the index of the
3919 first *element*, without computing the previous combinations.
3920
3921 >>> combination_index('adf', 'abcdefg')
3922 10
3923
3924 ``ValueError`` will be raised if the given *element* isn't one of the
3925 combinations of *iterable*.
3926 """
3927 element = enumerate(element)
3928 k, y = next(element, (None, None))
3929 if k is None:
3930 return 0
3931
3932 indexes = []
3933 pool = enumerate(iterable)
3934 for n, x in pool:
3935 if x == y:
3936 indexes.append(n)
3937 tmp, y = next(element, (None, None))
3938 if tmp is None:
3939 break
3940 else:
3941 k = tmp
3942 else:
3943 raise ValueError('element is not a combination of iterable')
3944
3945 n, _ = last(pool, default=(n, None))
3946
3947 # Python versions below 3.8 don't have math.comb
3948 index = 1
3949 for i, j in enumerate(reversed(indexes), start=1):
3950 j = n - j
3951 if i <= j:
3952 index += factorial(j) // (factorial(i) * factorial(j - i))
3953
3954 return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index
3955
3956
3957 def permutation_index(element, iterable):
3958 """Equivalent to ``list(permutations(iterable, r)).index(element)```
3959
3960 The subsequences of *iterable* that are of length *r* where order is
3961 important can be ordered lexicographically. :func:`permutation_index`
3962 computes the index of the first *element* directly, without computing
3963 the previous permutations.
3964
3965 >>> permutation_index([1, 3, 2], range(5))
3966 19
3967
3968 ``ValueError`` will be raised if the given *element* isn't one of the
3969 permutations of *iterable*.
3970 """
3971 index = 0
3972 pool = list(iterable)
3973 for i, x in zip(range(len(pool), -1, -1), element):
3974 r = pool.index(x)
3975 index = index * i + r
3976 del pool[r]
3977
3978 return index
3979
3980
3981 class countable:
3982 """Wrap *iterable* and keep a count of how many items have been consumed.
3983
3984 The ``items_seen`` attribute starts at ``0`` and increments as the iterable
3985 is consumed:
3986
3987 >>> iterable = map(str, range(10))
3988 >>> it = countable(iterable)
3989 >>> it.items_seen
3990 0
3991 >>> next(it), next(it)
3992 ('0', '1')
3993 >>> list(it)
3994 ['2', '3', '4', '5', '6', '7', '8', '9']
3995 >>> it.items_seen
3996 10
3997 """
3998
3999 def __init__(self, iterable):
4000 self._it = iter(iterable)
4001 self.items_seen = 0
4002
4003 def __iter__(self):
4004 return self
4005
4006 def __next__(self):
4007 item = next(self._it)
4008 self.items_seen += 1
4009
4010 return item
4011
4012
4013 def chunked_even(iterable, n):
4014 """Break *iterable* into lists of approximately length *n*.
4015 Items are distributed such the lengths of the lists differ by at most
4016 1 item.
4017
4018 >>> iterable = [1, 2, 3, 4, 5, 6, 7]
4019 >>> n = 3
4020 >>> list(chunked_even(iterable, n)) # List lengths: 3, 2, 2
4021 [[1, 2, 3], [4, 5], [6, 7]]
4022 >>> list(chunked(iterable, n)) # List lengths: 3, 3, 1
4023 [[1, 2, 3], [4, 5, 6], [7]]
4024
4025 """
4026
4027 len_method = getattr(iterable, '__len__', None)
4028
4029 if len_method is None:
4030 return _chunked_even_online(iterable, n)
4031 else:
4032 return _chunked_even_finite(iterable, len_method(), n)
4033
4034
4035 def _chunked_even_online(iterable, n):
4036 buffer = []
4037 maxbuf = n + (n - 2) * (n - 1)
4038 for x in iterable:
4039 buffer.append(x)
4040 if len(buffer) == maxbuf:
4041 yield buffer[:n]
4042 buffer = buffer[n:]
4043 yield from _chunked_even_finite(buffer, len(buffer), n)
4044
4045
4046 def _chunked_even_finite(iterable, N, n):
4047 if N < 1:
4048 return
4049
4050 # Lists are either size `full_size <= n` or `partial_size = full_size - 1`
4051 q, r = divmod(N, n)
4052 num_lists = q + (1 if r > 0 else 0)
4053 q, r = divmod(N, num_lists)
4054 full_size = q + (1 if r > 0 else 0)
4055 partial_size = full_size - 1
4056 num_full = N - partial_size * num_lists
4057 num_partial = num_lists - num_full
4058
4059 buffer = []
4060 iterator = iter(iterable)
4061
4062 # Yield num_full lists of full_size
4063 for x in iterator:
4064 buffer.append(x)
4065 if len(buffer) == full_size:
4066 yield buffer
4067 buffer = []
4068 num_full -= 1
4069 if num_full <= 0:
4070 break
4071
4072 # Yield num_partial lists of partial_size
4073 for x in iterator:
4074 buffer.append(x)
4075 if len(buffer) == partial_size:
4076 yield buffer
4077 buffer = []
4078 num_partial -= 1
4079
4080
4081 def zip_broadcast(*objects, scalar_types=(str, bytes), strict=False):
4082 """A version of :func:`zip` that "broadcasts" any scalar
4083 (i.e., non-iterable) items into output tuples.
4084
4085 >>> iterable_1 = [1, 2, 3]
4086 >>> iterable_2 = ['a', 'b', 'c']
4087 >>> scalar = '_'
4088 >>> list(zip_broadcast(iterable_1, iterable_2, scalar))
4089 [(1, 'a', '_'), (2, 'b', '_'), (3, 'c', '_')]
4090
4091 The *scalar_types* keyword argument determines what types are considered
4092 scalar. It is set to ``(str, bytes)`` by default. Set it to ``None`` to
4093 treat strings and byte strings as iterable:
4094
4095 >>> list(zip_broadcast('abc', 0, 'xyz', scalar_types=None))
4096 [('a', 0, 'x'), ('b', 0, 'y'), ('c', 0, 'z')]
4097
4098 If the *strict* keyword argument is ``True``, then
4099 ``UnequalIterablesError`` will be raised if any of the iterables have
4100 different lengths.
4101 """
4102
4103 def is_scalar(obj):
4104 if scalar_types and isinstance(obj, scalar_types):
4105 return True
4106 try:
4107 iter(obj)
4108 except TypeError:
4109 return True
4110 else:
4111 return False
4112
4113 size = len(objects)
4114 if not size:
4115 return
4116
4117 iterables, iterable_positions = [], []
4118 scalars, scalar_positions = [], []
4119 for i, obj in enumerate(objects):
4120 if is_scalar(obj):
4121 scalars.append(obj)
4122 scalar_positions.append(i)
4123 else:
4124 iterables.append(iter(obj))
4125 iterable_positions.append(i)
4126
4127 if len(scalars) == size:
4128 yield tuple(objects)
4129 return
4130
4131 zipper = _zip_equal if strict else zip
4132 for item in zipper(*iterables):
4133 new_item = [None] * size
4134
4135 for i, elem in zip(iterable_positions, item):
4136 new_item[i] = elem
4137
4138 for i, elem in zip(scalar_positions, scalars):
4139 new_item[i] = elem
4140
4141 yield tuple(new_item)
4142
4143
4144 def unique_in_window(iterable, n, key=None):
4145 """Yield the items from *iterable* that haven't been seen recently.
4146 *n* is the size of the lookback window.
4147
4148 >>> iterable = [0, 1, 0, 2, 3, 0]
4149 >>> n = 3
4150 >>> list(unique_in_window(iterable, n))
4151 [0, 1, 2, 3, 0]
4152
4153 The *key* function, if provided, will be used to determine uniqueness:
4154
4155 >>> list(unique_in_window('abAcda', 3, key=lambda x: x.lower()))
4156 ['a', 'b', 'c', 'd', 'a']
4157
4158 The items in *iterable* must be hashable.
4159
4160 """
4161 if n <= 0:
4162 raise ValueError('n must be greater than 0')
4163
4164 window = deque(maxlen=n)
4165 uniques = set()
4166 use_key = key is not None
4167
4168 for item in iterable:
4169 k = key(item) if use_key else item
4170 if k in uniques:
4171 continue
4172
4173 if len(uniques) == n:
4174 uniques.discard(window[0])
4175
4176 uniques.add(k)
4177 window.append(k)
4178
4179 yield item
4180
4181
4182 def duplicates_everseen(iterable, key=None):
4183 """Yield duplicate elements after their first appearance.
4184
4185 >>> list(duplicates_everseen('mississippi'))
4186 ['s', 'i', 's', 's', 'i', 'p', 'i']
4187 >>> list(duplicates_everseen('AaaBbbCccAaa', str.lower))
4188 ['a', 'a', 'b', 'b', 'c', 'c', 'A', 'a', 'a']
4189
4190 This function is analagous to :func:`unique_everseen` and is subject to
4191 the same performance considerations.
4192
4193 """
4194 seen_set = set()
4195 seen_list = []
4196 use_key = key is not None
4197
4198 for element in iterable:
4199 k = key(element) if use_key else element
4200 try:
4201 if k not in seen_set:
4202 seen_set.add(k)
4203 else:
4204 yield element
4205 except TypeError:
4206 if k not in seen_list:
4207 seen_list.append(k)
4208 else:
4209 yield element
4210
4211
4212 def duplicates_justseen(iterable, key=None):
4213 """Yields serially-duplicate elements after their first appearance.
4214
4215 >>> list(duplicates_justseen('mississippi'))
4216 ['s', 's', 'p']
4217 >>> list(duplicates_justseen('AaaBbbCccAaa', str.lower))
4218 ['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a']
4219
4220 This function is analagous to :func:`unique_justseen`.
4221
4222 """
4223 return flatten(
4224 map(
4225 lambda group_tuple: islice_extended(group_tuple[1])[1:],
4226 groupby(iterable, key),
4227 )
4228 )
4229
4230
4231 def minmax(iterable_or_value, *others, key=None, default=_marker):
4232 """Returns both the smallest and largest items in an iterable
4233 or the largest of two or more arguments.
4234
4235 >>> minmax([3, 1, 5])
4236 (1, 5)
4237
4238 >>> minmax(4, 2, 6)
4239 (2, 6)
4240
4241 If a *key* function is provided, it will be used to transform the input
4242 items for comparison.
4243
4244 >>> minmax([5, 30], key=str) # '30' sorts before '5'
4245 (30, 5)
4246
4247 If a *default* value is provided, it will be returned if there are no
4248 input items.
4249
4250 >>> minmax([], default=(0, 0))
4251 (0, 0)
4252
4253 Otherwise ``ValueError`` is raised.
4254
4255 This function is based on the
4256 `recipe <http://code.activestate.com/recipes/577916/>`__ by
4257 Raymond Hettinger and takes care to minimize the number of comparisons
4258 performed.
4259 """
4260 iterable = (iterable_or_value, *others) if others else iterable_or_value
4261
4262 it = iter(iterable)
4263
4264 try:
4265 lo = hi = next(it)
4266 except StopIteration as e:
4267 if default is _marker:
4268 raise ValueError(
4269 '`minmax()` argument is an empty iterable. '
4270 'Provide a `default` value to suppress this error.'
4271 ) from e
4272 return default
4273
4274 # Different branches depending on the presence of key. This saves a lot
4275 # of unimportant copies which would slow the "key=None" branch
4276 # significantly down.
4277 if key is None:
4278 for x, y in zip_longest(it, it, fillvalue=lo):
4279 if y < x:
4280 x, y = y, x
4281 if x < lo:
4282 lo = x
4283 if hi < y:
4284 hi = y
4285
4286 else:
4287 lo_key = hi_key = key(lo)
4288
4289 for x, y in zip_longest(it, it, fillvalue=lo):
4290 x_key, y_key = key(x), key(y)
4291
4292 if y_key < x_key:
4293 x, y, x_key, y_key = y, x, y_key, x_key
4294 if x_key < lo_key:
4295 lo, lo_key = x, x_key
4296 if hi_key < y_key:
4297 hi, hi_key = y, y_key
4298
4299 return lo, hi
4300
4301
4302 def constrained_batches(
4303 iterable, max_size, max_count=None, get_len=len, strict=True
4304 ):
4305 """Yield batches of items from *iterable* with a combined size limited by
4306 *max_size*.
4307
4308 >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1']
4309 >>> list(constrained_batches(iterable, 10))
4310 [(b'12345', b'123'), (b'12345678', b'1', b'1'), (b'12', b'1')]
4311
4312 If a *max_count* is supplied, the number of items per batch is also
4313 limited:
4314
4315 >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1']
4316 >>> list(constrained_batches(iterable, 10, max_count = 2))
4317 [(b'12345', b'123'), (b'12345678', b'1'), (b'1', b'12'), (b'1',)]
4318
4319 If a *get_len* function is supplied, use that instead of :func:`len` to
4320 determine item size.
4321
4322 If *strict* is ``True``, raise ``ValueError`` if any single item is bigger
4323 than *max_size*. Otherwise, allow single items to exceed *max_size*.
4324 """
4325 if max_size <= 0:
4326 raise ValueError('maximum size must be greater than zero')
4327
4328 batch = []
4329 batch_size = 0
4330 batch_count = 0
4331 for item in iterable:
4332 item_len = get_len(item)
4333 if strict and item_len > max_size:
4334 raise ValueError('item size exceeds maximum size')
4335
4336 reached_count = batch_count == max_count
4337 reached_size = item_len + batch_size > max_size
4338 if batch_count and (reached_size or reached_count):
4339 yield tuple(batch)
4340 batch.clear()
4341 batch_size = 0
4342 batch_count = 0
4343
4344 batch.append(item)
4345 batch_size += item_len
4346 batch_count += 1
4347
4348 if batch:
4349 yield tuple(batch)
4350
4351
4352 def gray_product(*iterables):
4353 """Like :func:`itertools.product`, but return tuples in an order such
4354 that only one element in the generated tuple changes from one iteration
4355 to the next.
4356
4357 >>> list(gray_product('AB','CD'))
4358 [('A', 'C'), ('B', 'C'), ('B', 'D'), ('A', 'D')]
4359
4360 This function consumes all of the input iterables before producing output.
4361 If any of the input iterables have fewer than two items, ``ValueError``
4362 is raised.
4363
4364 For information on the algorithm, see
4365 `this section <https://www-cs-faculty.stanford.edu/~knuth/fasc2a.ps.gz>`__
4366 of Donald Knuth's *The Art of Computer Programming*.
4367 """
4368 all_iterables = tuple(tuple(x) for x in iterables)
4369 iterable_count = len(all_iterables)
4370 for iterable in all_iterables:
4371 if len(iterable) < 2:
4372 raise ValueError("each iterable must have two or more items")
4373
4374 # This is based on "Algorithm H" from section 7.2.1.1, page 20.
4375 # a holds the indexes of the source iterables for the n-tuple to be yielded
4376 # f is the array of "focus pointers"
4377 # o is the array of "directions"
4378 a = [0] * iterable_count
4379 f = list(range(iterable_count + 1))
4380 o = [1] * iterable_count
4381 while True:
4382 yield tuple(all_iterables[i][a[i]] for i in range(iterable_count))
4383 j = f[0]
4384 f[0] = 0
4385 if j == iterable_count:
4386 break
4387 a[j] = a[j] + o[j]
4388 if a[j] == 0 or a[j] == len(all_iterables[j]) - 1:
4389 o[j] = -o[j]
4390 f[j] = f[j + 1]
4391 f[j + 1] = j + 1