itertools.flatten()? and copying generators/iterators.

Thu Oct 30 05:28:27 EST 2003

"Peter Otten" <__peter__ at web.de> wrote in message
news:bnqih2$v3c$01$1 at news.t-online.com...
> Francis Avila wrote:
>
> >> for flatten_dict(). So you get some speed up at the cost of uglier
though
> >> still quite readable code and under the assumption that either all or
no
> >> instances of a class are iterable.
> >
> > That's not always a good assumption to make, if we want to decide
whether
> > to iterate based upon some property of the object (which is actually how
I
> > originally discovered I needed a "flatten").
> >
> > Perhaps we can combine both conditions and caching optimizations?
>
> I didn't work this out, but I suppose the way to go is to allow three
states
> (ALWAYS, NEVER, NOCACHE) in the dictionary and only invoke itercond() on
> cache misses and NOCACHE values.
>
> Peter

Actually, now that I think of it, just put functions into the dictionary.
Do a type lookup, if False, don't iterate, if True, iterate, if a callable,
call it with the seq and have it return True/False and the replacement seq.
This corresponds to the four possible states: InstanceIterable,
InstanceNotIterable (dict keys with function values), and  ClassIterable,
ClassNotIterable (dict keys with True/False).

This would eliminate the "isinstance()" that would end up being at the
beginning of every conditional function, and there'd be no need to fret over
when to return None.  But if the issue of type were more complex than
looking at the class, the dict lookup wouldn't be enough.  Perhaps if there
were a default function in the cache dict, called on misses?  That would
eliminate the final argument to flatten_itercond.  Although, what would be
the key for cache dict, such that it never gets overwritten?

I really need to stop thinking about this silly flatten() function, but
here's a quick hack which passes the hastily-modified doctests:

--- Code ---

# Peter Otten's 'toiter' and dict-lookup flatten()s, combined by
# Francis Avila, with function-items in dict.
def flatten_dictcond(iterable, do_descend=None,
                       itercond=lambda seq:(None, seq)):
    """Yield leaves of iterable.

    itercond is the default handler, if there's no type match.
    add types and function handler to do_descend.

    Tests:

    >>> flatten = flatten_dictcond  #for doctest
    >>> tree = [1, 2, ['This', 4, 'is', ['a']], 7, 'funky tree']
    >>> list(flatten(tree))
    [1, 2, 'This', 4, 'is', 'a', 7, 'funky tree']
    >>> # Now lets break up strings into characters.
    >>> def tochars(s):
    ...     # This one works by modifying s.
    ...     # if not a char, split it into chars.
    ...     if len(s) != 1:
    ...         return True, tuple(s)
    ...     else:
    ...         return False, s
    ...
    >>> def stopchars(s):
    ...     #This one works by stopping descent when we reach a char.
    ...     if len(s) == 1:
    ...         return False, s
    ...     else:
    ...         return True, s
    ...
    >>> mres = list(flatten(tree, do_descend={type(''):tochars},))
    >>> sres = list(flatten(tree, {type(''):stopchars}))
    >>> if mres == sres:
    ...     print 'True'
    ...
    True
    >>> mres
    [1, 2, 'T', 'h', 'i', 's', 4, 'i', 's', 'a', 7, 'f', 'u', 'n', 'k', 'y',
' ', 't', 'r', 'e', 'e']
    >>> def dropnum(s):
    ...     return True, ()
    ...
    >>> list(flatten(tree, {type(''):False, type(0):dropnum}))
    ['This', 'is', 'a', 'funky tree']

    >>> flatten=flatten_dictcond #for doctest
    >>> class Node(object):
    ...     def __init__(self, label=None, data=None, children=()):
    ...         self.children = children
    ...         self.label = label
    ...         self.data = data
    ...     def __iter__(self):
    ...         return iter(self.children)
    ...
    >>> leaves = [Node(chr(i+65),i) for i in range(5)]
    >>> branches = [Node(chr(i+65), i, leaves) for i in range(5,10)]
    >>> root = [Node(chr(i+65), i, branches) for i in range(10,15)]
    >>> list(flatten(root))
    []
    >>> def leafdata(n):
    ...     if not n.children:
    ...         return False, (n.label, n.data)
    ...     else:
    ...         return True, n
    ...
    >>> list(flatten(root, {Node:leafdata}))
    [('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1),
('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E',
4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1),
('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E',
4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1),
('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E',
4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1),
('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E',
4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1),
('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E',
4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1),
('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E',
4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1),
('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E',
4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1),
('C', 2), ('D', 3), ('E', 4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E',
4), ('A', 0), ('B', 1), ('C', 2), ('D', 3), ('E', 4)]

    """
    if do_descend is None:
        do_descend = {type(''):False, type(u''):False}
    try:
        descend = do_descend[iterable.__class__]
    except KeyError:
        # Default handling remains the same until I think about it more.
        descend, iterable = itercond(iterable)
        if not descend is None:
            # If descend *is* None, class has consistent iterability.
            # But we don't know what it is, so we find out in the else
            # block.
            if descend: #InstanceIterable
                iterable = iter(iterable)
            # Else, descend is InstanceNotIterable.
        else:
            t = iterable.__class__
            try:
                iterable = iter(iterable)
            except TypeError:
                # ClassNotIterable
                descend = do_descend[t] = False
            else:
                # ClassIterable
                descend = do_descend[t] = True
    else:
        #Ok, cache hit.
        if callable(descend):
            descend, iterable = descend(iterable)
            # XXX I didn't modify the dict item here, did I?

    if not descend:
        yield iterable
    else:
        for elem in iterable:
            for subelem in flatten_dictcond(elem, do_descend, itercond):
                yield subelem

--
Francis Avila