# Break up list into groups

James Stroud jstroud at mbi.ucla.edu
Tue Jul 17 01:05:30 CEST 2007

```Paul Rubin wrote:
> See:
>

Groupby is damn slow as far as I can tell (the Bates numbering in the
above link assumes more than the OP intended, I assume). It looks like
the author's original algorithm is the fastest python way as it bypasses
a lot of lookup, etc.

Here's the output from the script below (doit2 => groupby way):

doit
11.96 usec/pass
doit2
87.14 usec/pass

James

# timer script
from itertools import groupby
from timeit import Timer

alist = [0xF0, 1, 2, 3, 0xF0, 4, 5, 6,
0xF1, 7, 8, 0xF2, 9, 10, 11, 12, 13,
0xF0, 14, 0xF1, 15]

def doit(alist):
ary = []
for i in alist:
if 0xf0 & i:
ary.append([i])
else:
ary[-1].append(i)
return [x for x in ary if x]

def c(x):
return 0xf0 & x

def doit2(alist):
i = (list(g) for k,g in groupby(alist, c))
return [k for k in [j + i.next() for j in i] if len(k)>1]

print doit(alist)

print 'doit'
t = Timer('doit(alist)',
'from __main__ import groupby, doit, alist, c')
print "%.2f usec/pass" % (1000000 * t.timeit(number=100000)/100000)

print 'doit2'
t = Timer('doit2(alist)',
'from __main__ import groupby, doit2, alist, c')
print "%.2f usec/pass" % (1000000 * t.timeit(number=100000)/100000)

--
James Stroud
UCLA-DOE Institute for Genomics and Proteomics
Box 951570
Los Angeles, CA 90095

http://www.jamesstroud.com/

```