Is it possible to mark an iterator as premature?
Is there an idiomatic way to terminate the iterator early so that any further next () s raises StopIteration? (I can think of ugly ways, like overusing itertools.takewhile or dropping values ββuntil the thing is exhausted).
Edit:
My algorithm takes n iterators of unknown variable length as input. It uses izip_longest () to read one item from each in n-tuples until all are exhausted. Sometimes I find I want to stop typing early from one of the iterators according to some execution criterion, and replace it with a stream of defaults provided by izip_longest (). The least aggressive way I can think of is to "end" it somehow.
source to share
From itertools
Recipes :
def consume(iterator, n):
"Advance the iterator n-steps ahead. If n is none, consume entirely."
# Use functions that consume iterators at C speed.
if n is None:
# feed the entire iterator into a zero-length deque
collections.deque(iterator, maxlen=0)
else:
# advance to the empty slice starting at position n
next(islice(iterator, n, n), None)
source to share
class MyIter:
def __init__(self,what):
self.what = what
self.done = False
self.iter = iter(what)
def __iter__(self):
self.done = False
self.iter = iter(self.what)
def next(self):
if self.done: raise StopIteration
return next(self.iter)
x = MyIter(range(100))
print next(x)
x.done=True
next(x)
but that sounds like a bad idea at all
what you really have to do is
for my_iterator in all_iterators:
for element in my_iterator: #iterate over it
if check(element): #if whatever condition is true
break #then we are done with this iterator on to the next
for the example given in comments by @jme use something like this
for i,my_iterator in enumerate(all_iterators):
for j,element in enumerate(my_iterator): #iterate over it
if j > i: #if whatever condition is true
break #then we are done with this iterator on to the next
else:
do_something(element)
source to share
In your edit, you give your use case: you want something that behaves like izip_longest
, but allows you to disable iterators prematurely. Here's an iterator class that allows this, as well as "turning on" the iterator with the previous one off.
class TerminableZipper(object):
def __init__(self, iterators, fill="n/a"):
self.iterators = collections.OrderedDict((it, True)
for it in iterators)
self.fill = fill
self.zipper = itertools.izip_longest(*iterators, fillvalue=fill)
def disable(self, iterator):
self.iterators[iterator] = False
self._make_iterators()
def enable(self, iterator):
self.iterators[iterator] = True
self._make_iterators()
def _make_iterators(self):
def effective(it):
iterator, active = it
return iterator if active else iter([])
effective_iterators = map(effective, self.iterators.items())
self.zipper = itertools.izip_longest(*effective_iterators,
fillvalue=self.fill)
def __iter__(self):
return self
def next(self):
return next(self.zipper)
Example:
>>> it_a = itertools.repeat(0)
>>> it_b = iter(["a", "b", "c", "d", "e", "f"])
>>> it_c = iter(["q", "r", "x"])
>>> zipper = TerminableZipper([it_a, it_b, it_c])
>>> next(zipper)
(0, 'a', 'q')
>>> next(zipper)
(0, 'b', 'r')
>>> zipper.disable(it_a)
>>> next(zipper)
('n/a', 'c', 'x')
>>> zipper.enable(it_a)
>>> next(zipper)
(0, 'd', 'n/a')
source to share
Here is another answer that I decided to post separately because it is different than mine. I think this might be preferable: store the iterators in an ordered dict, matching each iterator with {True, False}
( True
if the iterator is active, False
otherwise). First, we want to use a function that takes such a dict and calls next
on each iterator, returning the default value and updating the iterator's status if it is exhausted:
import itertools
import collections
def deactivating_zipper(iterators, default):
while True:
values = []
for iterator, active in iterators.items():
if active:
try:
values.append(next(iterator))
except StopIteration:
values.append(default)
iterators[iterator] = False
else:
values.append(default)
if not any(iterators.values()):
return
else:
yield values
So, if we have three iterators:
it_a = iter(["a", "b", "c", "d", "e"])
it_b = iter([1,2,3,4,5,6,7,8])
it_c = iter(["foo", "bar", "baz", "quux"])
iterators = collections.OrderedDict((it, True) for it in (it_a, it_b, it_c))
We can just iterate over them like this:
for a,b,c in deactivating_zipper(iterators, "n/a"):
# deactivate it_a
if b == 3:
iterators[it_a] = False
print a,b,c
This gives the result:
a 1 foo
b 2 bar
c 3 baz
n/a 4 quux
n/a 5 n/a
n/a 6 n/a
n/a 7 n/a
n/a 8 n/a
source to share
In the end, I decided to give up itertools.takewhile()
. This is a bit more concise than other answers that use flags to "consume" an iterator at constant time:
from itertools import takewhile, izip_longest
def f(seqs):
done = [False] * len(seqs)
iters = [ takewhile(lambda _, i=i: not done[i], s) for i, s in enumerate(seqs) ]
zipped = izip_longest(*iters)
# for example:
print next(zipped)
done[1] = True
print next(zipped)
print next(zipped)
f((['a', 'b', 'c'], [1, 2, 3], ['foo', 'bar', 'baz']))
Output:
('a', 1, 'foo')
('b', None, 'bar')
('c', None, 'baz')
source to share