# Grouping pairs - suggested tools

Arnaud Delobelle arnodel at gmail.com
Tue Sep 21 21:34:17 CEST 2010

```On 21 Sep, 11:13, Peter Otten <__pete... at web.de> wrote:
[...]
>
> A straightforward implementation:
>
> \$ cat group_edges.py
> def find_groups(edges):
>     lookup = {} # node --> group
>     groups = {} # id(group) --> group
>     for a, b in edges:
>         if a in lookup:
>             if b in lookup:
>                 ga = lookup[a]
>                 gb = lookup[b]
>                 if ga is not gb:
>                     # merge groups
>                     if len(ga) < len(gb):
>                         # always update the smaller group
>                         ga, gb = gb, ga
>                     del groups[id(gb)]
>                     ga.update(gb)
>                     for k in gb:
>                         lookup[k] = ga
>             else:
>                 # add b to a's group
>                 ga = lookup[a]
>                 lookup[b] = ga
>         elif b in lookup:
>             # add a to b's group
>             gb = lookup[b]
>             lookup[a] = gb
>         else:
>             # create new group
>             g = set((a, b))
>             groups[id(g)] = g
>             lookup[a] = lookup[b] = g
>     return groups.values()
>
> def show_groups(edges):
>     groups = sorted(sorted(g) for g in find_groups(edges))
>     for i, group in enumerate(groups, 1):
>         for node in sorted(group):
>             print i, node
>

I think I would go for the two-step approach of constructing the graph
first and then recursively building connected components.  It sounds
more complicated at first but when you implement it it turns out quite
simple:

from collections import defaultdict
from itertools import count

def build_groups(edges):
neighbors = defaultdict(set)
for x, y in edges:

groups, group_indices = {}, count(1)
def set_group(x, group_index):
groups[x] = group_index
for y in neighbors[x]:
if y not in groups:
set_group(y, group_index)

for x in neighbors:
if x not in groups:
set_group(x, group_indices.next())
return groups

if __name__ == "__main__":
def print_groups(edges):
print " ".join(edges)
groups = build_groups(edges)
for x, i in sorted(groups.items()):
print i, x
print
examples = [
['ab', 'bc', 'ad', 'ef', 'gf', 'hi'],
['ac', 'bd', 'cd'],
]
for edges in examples:
print_groups(edges)

\$ python connected.py
ab bc ad ef gf hi
1 a
1 b
1 c
1 d
2 e
2 f
2 g
3 h
3 i

ac bd cd
1 a
1 b
1 c
1 d

--
Arnaud

```