
In article <000101c3b63f$c7fc4720$e841fea9@oemcomputer>, "Raymond Hettinger" <python@rcn.com> wrote:
Here is an implementation that translates readily into C. It uses Guido's syntax and meets my requirement that bad things don't happen when someone runs the outer iterator independently of the inner iterator.
If I understand your code correctly, running the outer iterator skips over any uniterated values from the inner iterator. I'd be happier with behavior like tee: the inner groups always return the same sequences of items, whether or not the inner iteration happens before the next outer iteration, but the memory cost is only small if you iterate through them in the expected order. E.g., see the "out of order" unit test in the code below. def identity(x): return x def groupby(iterable,key=identity): it = iter(iterable) first = it.next() while 1: group = bygroup(it,first,key) yield key(first),group first = group.nextgroup() class bygroup: """Iterator of items in a single group.""" def __init__(self, iterable, first, key=identity): """Instance variables: - self.lookahead: reversed list of items still to be output - self.groupid: group identity - self.key: func to turn iterated items into group ids - self.it: iterator, or None once we reach another group - self.postfinal: None (only valid once self.it is None) """ self.key = key self.it = iter(iterable) self.lookahead = [first] self.groupid = self.key(first) def __iter__(self): return self def group(self): return self.groupid def next(self): if self.lookahead: return self.lookahead.pop() if self.it is None: raise StopIteration x = self.it.next() if self.key(x) == self.groupid: return x self.postfinal = x self.it = None raise StopIteration def nextgroup(self): """Return first item of next group. Raises StopIteration if there is no next group.""" if self.it is not None: L = list(self) L.reverse() self.lookahead = L if self.it is not None: raise StopIteration return self.postfinal import unittest from sets import Set as set class TestBasicOps(unittest.TestCase): def test_groupby(self): # Check zero length input self.assertEqual([], list(groupby([],lambda r:r[0]))) # Check normal input s = [(0, 10, 20), (0, 11,21), (0,12,21), (1,13,21), (1,14,22), (2,15,22), (3,16,23), (3,17,23)] dup = [] for k, g in groupby(s, lambda r:r[0]): for elem in g: self.assertEqual(k, elem[0]) dup.append(elem) self.assertEqual(s, dup) # Check case where groups are iterated out of order nest1 = [] for k,g in groupby(s, lambda r:r[0]): nest1.append(list(g)) nest2 = [] for k,g in groupby(s, lambda r:r[0]): nest2.append(g) nest2 = [list(g) for g in nest2] self.assertEqual(nest1,nest2) # Check nested case dup = [] for k, g in groupby(s, lambda r:r[0]): for ik, ig in groupby(g, lambda r:r[2]): for elem in ig: self.assertEqual(k, elem[0]) self.assertEqual(ik, elem[2]) dup.append(elem) self.assertEqual(s, dup) # Check case where inner iterator is not used keys = [] for k, g in groupby(s, lambda r:r[0]): keys.append(k) expectedkeys = set([r[0] for r in s]) self.assertEqual(set(keys), expectedkeys) self.assertEqual(len(keys), len(expectedkeys)) suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(TestBasicOps)) unittest.TextTestRunner(verbosity=2).run(suite) -- David Eppstein http://www.ics.uci.edu/~eppstein/ Univ. of California, Irvine, School of Information & Computer Science