[Python-Dev] "groupby" iterator

Raymond Hettinger python at rcn.com
Sat Nov 29 01:12:34 EST 2003

```[Guido]
> I would make one change: after looking at another use case, I'd like
> to change the outer iterator to produce (key, grouper) tuples.  This
> way, you can write things like
>
>   totals = {}
>   for key, group in sequence:
>       totals[key] = sum(group)

Here is an implementation that translates readily into C.  It uses
Guido's syntax and meets my requirement that bad things don't happen
when someone runs the outer iterator independently of the inner
iterator.

class groupby(object):
__slots__ = ('keyfunc', 'it', 'tgtkey', 'currkey', 'currvalue')
def __init__(self, key, iterable):
NULL = 1+909.9j    # In C, use the real NULL
self.keyfunc = key
self.it = iter(iterable)
self.tgtkey = NULL
self.currkey = NULL
self.currvalue = NULL
def __iter__(self):
return self
def next(self):
while self.currkey == self.tgtkey:
self.currvalue = self.it.next() # Exit on StopIteration
self.currkey = self.keyfunc(self.currvalue)
self.tgtkey = self.currkey
return (self.currkey, self._grouper(self.currkey))
def _grouper(self, tgtkey):
while self.currkey == tgtkey:
yield self.currvalue
self.currvalue = self.it.next() # Exit on StopIteration
self.currkey = self.keyfunc(self.currvalue)

import unittest

class TestBasicOps(unittest.TestCase):

def test_groupby(self):
# Check zero length input
self.assertEqual([], list(groupby(lambda r:r[0], [])))

# Check normal input
s = [(0, 10, 20), (0, 11,21), (0,12,21), (1,13,21), (1,14,22),
(2,15,22), (3,16,23), (3,17,23)]
dup = []
for k, g in groupby(lambda r:r[0], s):
for elem in g:
self.assertEqual(k, elem[0])
dup.append(elem)
self.assertEqual(s, dup)

# Check nested case
dup = []
for k, g in groupby(lambda r:r[0], s):
for ik, ig in groupby(lambda r:r[2], g):
for elem in ig:
self.assertEqual(k, elem[0])
self.assertEqual(ik, elem[2])
dup.append(elem)
self.assertEqual(s, dup)

# Check case where inner iterator is not used
keys = []
for k, g in groupby(lambda r:r[0], s):
keys.append(k)
expectedkeys = set([r[0] for r in s])
self.assertEqual(set(keys), expectedkeys)
self.assertEqual(len(keys), len(expectedkeys))

suite = unittest.TestSuite()