[Python-Dev] "groupby" iterator

Raymond Hettinger python at rcn.com
Sat Nov 29 01:12:34 EST 2003


[Guido]
> I would make one change: after looking at another use case, I'd like
> to change the outer iterator to produce (key, grouper) tuples.  This
> way, you can write things like
> 
>   totals = {}
>   for key, group in sequence:
>       totals[key] = sum(group)

Here is an implementation that translates readily into C.  It uses
Guido's syntax and meets my requirement that bad things don't happen
when someone runs the outer iterator independently of the inner
iterator.

class groupby(object):
    __slots__ = ('keyfunc', 'it', 'tgtkey', 'currkey', 'currvalue')
    def __init__(self, key, iterable):
        NULL = 1+909.9j    # In C, use the real NULL
        self.keyfunc = key
        self.it = iter(iterable)
        self.tgtkey = NULL
        self.currkey = NULL
        self.currvalue = NULL
    def __iter__(self):
        return self
    def next(self):
        while self.currkey == self.tgtkey:
            self.currvalue = self.it.next() # Exit on StopIteration
            self.currkey = self.keyfunc(self.currvalue)
        self.tgtkey = self.currkey
        return (self.currkey, self._grouper(self.currkey))
    def _grouper(self, tgtkey):
        while self.currkey == tgtkey:
            yield self.currvalue
            self.currvalue = self.it.next() # Exit on StopIteration
            self.currkey = self.keyfunc(self.currvalue)

import unittest

class TestBasicOps(unittest.TestCase):

    def test_groupby(self):
        # Check zero length input
        self.assertEqual([], list(groupby(lambda r:r[0], [])))

        # Check normal input
        s = [(0, 10, 20), (0, 11,21), (0,12,21), (1,13,21), (1,14,22),
             (2,15,22), (3,16,23), (3,17,23)]
        dup = []
        for k, g in groupby(lambda r:r[0], s):
            for elem in g:
                self.assertEqual(k, elem[0])
                dup.append(elem)
        self.assertEqual(s, dup)

        # Check nested case
        dup = []
        for k, g in groupby(lambda r:r[0], s):
            for ik, ig in groupby(lambda r:r[2], g):      
                for elem in ig:
                    self.assertEqual(k, elem[0])
                    self.assertEqual(ik, elem[2])               
                    dup.append(elem)
        self.assertEqual(s, dup)      

        # Check case where inner iterator is not used
        keys = []
        for k, g in groupby(lambda r:r[0], s):
            keys.append(k)
        expectedkeys = set([r[0] for r in s])
        self.assertEqual(set(keys), expectedkeys)
        self.assertEqual(len(keys), len(expectedkeys))

suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestBasicOps))
unittest.TextTestRunner(verbosity=2).run(suite)



Raymond





More information about the Python-Dev mailing list