9.7. itertools — Functions creating iterators for efficient looping — Python v2.6.9 documentation (original) (raw)

This section shows recipes for creating an extended toolset using the existing itertools as building blocks.

The extended tools offer the same high performance as the underlying toolset. The superior memory performance is kept by processing elements one at a time rather than bringing the whole iterable into memory all at once. Code volume is kept small by linking the tools together in a functional style which helps eliminate temporary variables. High speed is retained by preferring “vectorized” building blocks over the use of for-loops and generators which incur interpreter overhead.

def take(n, iterable): "Return first n items of the iterable as a list" return list(islice(iterable, n))

def tabulate(function, start=0): "Return function(0), function(1), ..." return imap(function, count(start))

def consume(iterator, n): "Advance the iterator n-steps ahead. If n is none, consume entirely." # The technique uses objects that consume iterators at C speed. if n is None: # feed the entire iterator into a zero-length deque collections.deque(iterator, maxlen=0) else: # advance to the emtpy slice starting at position n next(islice(iterator, n, n), None)

def nth(iterable, n, default=None): "Returns the nth item or a default value" return next(islice(iterable, n, None), default)

def quantify(iterable, pred=bool): "Count how many times the predicate is true" return sum(imap(pred, iterable))

def padnone(iterable): """Returns the sequence elements and then returns None indefinitely.

Useful for emulating the behavior of the built-in map() function.
"""
return chain(iterable, repeat(None))

def ncycles(iterable, n): "Returns the sequence elements n times" return chain.from_iterable(repeat(tuple(iterable), n))

def dotproduct(vec1, vec2): return sum(imap(operator.mul, vec1, vec2))

def flatten(listOfLists): "Flatten one level of nesting" return chain.from_iterable(listOfLists)

def repeatfunc(func, times=None, *args): """Repeat calls to func with specified arguments.

Example:  repeatfunc(random.random)
"""
if times is None:
    return starmap(func, repeat(args))
return starmap(func, repeat(args, times))

def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." a, b = tee(iterable) next(b, None) return izip(a, b)

def grouper(n, iterable, fillvalue=None): "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" args = [iter(iterable)] * n return izip_longest(fillvalue=fillvalue, *args)

def roundrobin(*iterables): "roundrobin('ABC', 'D', 'EF') --> A D E B F C" # Recipe credited to George Sakkis pending = len(iterables) nexts = cycle(iter(it).next for it in iterables) while pending: try: for next in nexts: yield next() except StopIteration: pending -= 1 nexts = cycle(islice(nexts, pending))

def compress(data, selectors): "compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F" return (d for d, s in izip(data, selectors) if s)

def combinations_with_replacement(iterable, r): "combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC" # number items returned: (n+r-1)! / r! / (n-1)! pool = tuple(iterable) n = len(pool) if not n and r: return indices = [0] * r yield tuple(pool[i] for i in indices) while True: for i in reversed(range(r)): if indices[i] != n - 1: break else: return indices[i:] = [indices[i] + 1] * (r - i) yield tuple(pool[i] for i in indices)

def powerset(iterable): "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)" s = list(iterable) return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

def unique_everseen(iterable, key=None): "List unique elements, preserving order. Remember all elements ever seen." # unique_everseen('AAAABBBCCDAABBB') --> A B C D # unique_everseen('ABBCcAD', str.lower) --> A B C D seen = set() seen_add = seen.add if key is None: for element in ifilterfalse(seen.contains, iterable): seen_add(element) yield element else: for element in iterable: k = key(element) if k not in seen: seen_add(k) yield element

def unique_justseen(iterable, key=None): "List unique elements, preserving order. Remember only the element just seen." # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B # unique_justseen('ABBCcAD', str.lower) --> A B C A D return imap(next, imap(itemgetter(1), groupby(iterable, key)))

def iter_except(func, exception, first=None): """ Call a function repeatedly until an exception is raised.

Converts a call-until-exception interface to an iterator interface.
Like __builtin__.iter(func, sentinel) but uses an exception instead
of a sentinel to end the loop.

Examples:
    bsddbiter = iter_except(db.next, bsddb.error, db.first)
    heapiter = iter_except(functools.partial(heappop, h), IndexError)
    dictiter = iter_except(d.popitem, KeyError)
    dequeiter = iter_except(d.popleft, IndexError)
    queueiter = iter_except(q.get_nowait, Queue.Empty)
    setiter = iter_except(s.pop, KeyError)

"""
try:
    if first is not None:
        yield first()
    while 1:
        yield func()
except exception:
    pass

def random_product(*args, **kwds): "Random selection from itertools.product(*args, **kwds)" pools = map(tuple, args) * kwds.get('repeat', 1) return tuple(random.choice(pool) for pool in pools)

def random_permutation(iterable, r=None): "Random selection from itertools.permutations(iterable, r)" pool = tuple(iterable) r = len(pool) if r is None else r return tuple(random.sample(pool, r))

def random_combination(iterable, r): "Random selection from itertools.combinations(iterable, r)" pool = tuple(iterable) n = len(pool) indices = sorted(random.sample(xrange(n), r)) return tuple(pool[i] for i in indices)

def random_combination_with_replacement(iterable, r): "Random selection from itertools.combinations_with_replacement(iterable, r)" pool = tuple(iterable) n = len(pool) indices = sorted(random.randrange(n) for i in xrange(r)) return tuple(pool[i] for i in indices)

Note, many of the above recipes can be optimized by replacing global lookups with local variables defined as default values. For example, the_dotproduct_ recipe can be written as:

def dotproduct(vec1, vec2, sum=sum, imap=imap, mul=operator.mul): return sum(imap(mul, vec1, vec2))