Grouping pairs - suggested tools
Arnaud Delobelle
arnodel at gmail.com
Tue Sep 21 15:34:17 EDT 2010
On 21 Sep, 11:13, Peter Otten <__pete... at web.de> wrote:
[...]
>
> A straightforward implementation:
>
> $ cat group_edges.py
> def find_groups(edges):
> lookup = {} # node --> group
> groups = {} # id(group) --> group
> for a, b in edges:
> if a in lookup:
> if b in lookup:
> ga = lookup[a]
> gb = lookup[b]
> if ga is not gb:
> # merge groups
> if len(ga) < len(gb):
> # always update the smaller group
> ga, gb = gb, ga
> del groups[id(gb)]
> ga.update(gb)
> for k in gb:
> lookup[k] = ga
> else:
> # add b to a's group
> ga = lookup[a]
> ga.add(b)
> lookup[b] = ga
> elif b in lookup:
> # add a to b's group
> gb = lookup[b]
> gb.add(a)
> lookup[a] = gb
> else:
> # create new group
> g = set((a, b))
> groups[id(g)] = g
> lookup[a] = lookup[b] = g
> return groups.values()
>
> def show_groups(edges):
> groups = sorted(sorted(g) for g in find_groups(edges))
> for i, group in enumerate(groups, 1):
> for node in sorted(group):
> print i, node
>
I think I would go for the two-step approach of constructing the graph
first and then recursively building connected components. It sounds
more complicated at first but when you implement it it turns out quite
simple:
from collections import defaultdict
from itertools import count
def build_groups(edges):
neighbors = defaultdict(set)
for x, y in edges:
neighbors[x].add(y)
neighbors[y].add(x)
groups, group_indices = {}, count(1)
def set_group(x, group_index):
groups[x] = group_index
for y in neighbors[x]:
if y not in groups:
set_group(y, group_index)
for x in neighbors:
if x not in groups:
set_group(x, group_indices.next())
return groups
if __name__ == "__main__":
def print_groups(edges):
print " ".join(edges)
groups = build_groups(edges)
for x, i in sorted(groups.items()):
print i, x
print
examples = [
['ab', 'bc', 'ad', 'ef', 'gf', 'hi'],
['ac', 'bd', 'cd'],
]
for edges in examples:
print_groups(edges)
$ python connected.py
ab bc ad ef gf hi
1 a
1 b
1 c
1 d
2 e
2 f
2 g
3 h
3 i
ac bd cd
1 a
1 b
1 c
1 d
--
Arnaud
More information about the Python-list
mailing list