# TopSort in Python?

Tim Peters tim_one at email.msn.com
Thu Jul 1 11:53:50 EDT 1999

```[Dinu C. Gherman]
> Does anybody have a simple-minded-but-working full-Python
> implementation of topsort, the topological sorting algorithm?
> Or maybe *any* topsort? I remember only one such algorithm...
> python.org/search doesn't reveal any...

Searching for "topological" instead turns up two hits there, a few more on
DejaNews.  Apparently "the std" algorithm I posted years ago predates
DejaNews, though.  So here's a fancier version ...

check-out-aaron's-kjbuckets-too-ly y'rs  - tim

# topsort takes a list of pairs, where each pair (x, y) is taken to
# mean that x <= y wrt some abstract partial ordering.  The return
# value is a list, representing a total ordering that respects all
# the input constraints.
# E.g.,
#    topsort( [(1,2), (3,3)] )
# may return any of (but nothing other than)
#    [3, 1, 2]
#    [1, 3, 2]
#    [1, 2, 3]
# because those are the permutations of the input elements that
# respect the "1 precedes 2" and "3 precedes 3" input constraints.
# Note that a constraint of the form (x, x) is really just a trick
# to make sure x appears *somewhere* in the output list.
#
# If there's a cycle in the constraints, say
#    topsort( [(1,2), (2,1)] )
# then CycleError is raised, and the exception object supports
# many methods to help analyze and break the cycles.  This requires
# a good deal more code than topsort itself!

from exceptions import Exception

class CycleError(Exception):
def __init__(self, sofar, numpreds, succs):
Exception.__init__(self, "cycle in constraints",
sofar, numpreds, succs)
self.preds = None

# return as much of the total ordering as topsort was able to
# find before it hit a cycle
def get_partial(self):
return self

# return remaining elt -> count of predecessors map
def get_pred_counts(self):
return self

# return remaining elt -> list of successors map
def get_succs(self):
return self

# return remaining elements (== those that don't appear in
# get_partial())
def get_elements(self):
return self.get_pred_counts().keys()

# Return a list of pairs representing the full state of what's
# CycleError again, and if you invoke get_pairlist on *that*
# exception object, the result will be isomorphic to *this*
# invocation of get_pairlist).
# The idea is that you can use pick_a_cycle to find a cycle,
# through some means or another pick an (x,y) pair in the cycle
# you no longer want to respect, then remove that pair from the
# output of get_pairlist and try topsort again.
def get_pairlist(self):
succs = self.get_succs()
for x in self.get_elements():
if succs.has_key(x):
for y in succs[x]:
else:
# make sure x appears in topsort's output!

# return remaining elt -> list of predecessors map
def get_preds(self):
if self.preds is not None:
return self.preds
self.preds = preds = {}
remaining_elts = self.get_elements()
for x in remaining_elts:
preds[x] = []
succs = self.get_succs()

for x in remaining_elts:
if succs.has_key(x):
for y in succs[x]:
preds[y].append(x)

if __debug__:
for x in remaining_elts:
assert len(preds[x]) > 0
return preds

# return a cycle [x, ..., x] at random
def pick_a_cycle(self):
remaining_elts = self.get_elements()

# We know that everything in remaining_elts has a predecessor,
# but don't know that everything in it has a successor.  So
# crawling forward over succs may hit a dead end.  Instead we
# crawl backward over the preds until we hit a duplicate, then
# reverse the path.
preds = self.get_preds()
from random import choice
x = choice(remaining_elts)
index = {}
x = choice(preds[x])

def topsort(pairlist):
numpreds = {}   # elt -> # of predecessors
successors = {} # elt -> list of successors
for first, second in pairlist:
# make sure every elt is a key in numpreds
if not numpreds.has_key(first):
numpreds[first] = 0
if not numpreds.has_key(second):
numpreds[second] = 0

# if they're the same, there's no real dependence
if first == second:
continue

# since first < second, second gains a pred ...
numpreds[second] = numpreds[second] + 1

# ... and first gains a succ
if successors.has_key(first):
successors[first].append(second)
else:
successors[first] = [second]

# suck up everything without a predecessor
answer = filter(lambda x, numpreds=numpreds: numpreds[x] == 0,
numpreds.keys())

# for everything in answer, knock down the pred count on
# its successors; note that answer grows *in* the loop
assert numpreds[x] == 0
del numpreds[x]
if successors.has_key(x):
for y in successors[x]:
numpreds[y] = numpreds[y] - 1
if numpreds[y] == 0:
# following "del" isn't needed; just makes
# CycleError details easier to grasp
del successors[x]

if numpreds:
# everything in numpreds has at least one predecessor ->
# there's a cycle
if __debug__:
for x in numpreds.keys():
assert numpreds[x] > 0