groupby - summing multiple columns in a list of lists

Peter Otten __peter__ at web.de
Tue May 17 14:24:14 EDT 2011


Jackson wrote:

> I'm currently using a function pasted in below. This allows me to sum
> a column (index) in a list of lists.
> 
> So if mylist = [[1, 2, 3], [1, 3, 4], [2, 3, 4], [2, 4, 5]]
> group_results(mylist,[0],1)
> 
> Returns:
> [(1, 5), (2, 7)]
> 
> What I would like to do is allow a tuple/list of index values, rather
> than a single index value to be summed up, so you could say
> group_results(mylist,[0],[1,2]) would return [(1, 5,7), (2, 7,9)] but
> I'm struggling to do so, any thoughts? Cheers
> 
> from itertools import groupby as gb
> from operator import itemgetter as ig
> 
> def group_results(table,keys,value):
>     res = []
>     nkey = ig(*keys)
>     value = ig(value)
>     for k, group in gb(sorted(table,key=ig(*keys)),nkey):
>         res.append((k,sum(value(row) for row in group)))
>     return res

You could write a version of sum() that can cope with tuples:

from itertools import groupby, imap

def itemgetter(keys, rowtype=tuple):
    def getitem(value):
        return rowtype(value[key] for key in keys)
    return getitem

def sum_all(rows):
    rows = iter(rows)
    sigma = next(rows)
    rowtype = type(sigma)
    sigma = list(sigma)
    for row in rows:
        for i, x in enumerate(row):
            sigma[i] += x
    return rowtype(sigma)

def group_results(table, key, value):
    get_key = itemgetter(key)
    get_value = itemgetter(value)
    table = sorted(table, key=get_key)
    for keyvalue, group in groupby(table, get_key):
        yield keyvalue + sum_all(imap(get_value, group))

but I'd probably use a dict-based approach:

def group_results(table, key, value):
    get_key = itemgetter(key)
    get_value = itemgetter(value)
    grouped = {}
    for row in table:
        key = get_key(row)
        value = get_value(row)
        if key in grouped:
            grouped[key] = tuple(a + b for a, b in zip(grouped[key], value))
        else:
            grouped[key] = value
    return [k + v for k, v in sorted(grouped.iteritems())]

if __name__ == "__main__":
    items = [(1, 2, 3), (1, 3, 4), (2, 3, 4), (2, 4, 5)]
    print list(group_results(items, [0], [1, 2]))

Note that the function built with my version of itemgetter() will always 
return a tuple.



More information about the Python-list mailing list