# improving a huge double-for cycle

Bruno Desthuilliers bdesth.quelquechose at free.quelquepart.fr
Thu Sep 18 20:43:00 CEST 2008

```Alexzive a écrit :
> Hello there :) ,
>
> I am a python newbie and need to run following code for a task in an
> external simulation programm called "Abaqus" which makes use of python
> to access the mesh (ensamble of nodes with xy coordinates) of a
> certain geometrical model.
>
> [IN is the starting input containing the nodes to be check, there are
> some double nodes with the same x and y coordinates which need to be
> removed. SN is the output containing such double nodes]
>
> Code: Select all
>     for i in range(len(IN)): #scan all elements of the list IN
>       for j in range(len(IN)):
>         if i <> j:
>          if IN[i].coordinates == IN[j].coordinates:
>            if IN[i].coordinates == IN[j].coordinates:
>               SN.append(IN[i].label)
>
>
>
> Unfortunately my len(IN) is about 100.000 and the running time about
> 15h !!!! :(
>
> Any idea to improve it?

A couple ones have been submitted. Harald gets a point about the
redundant tests (even if his solution seems to be broken, cf below) -
your inner loop should have looked like :

for j in xrange(i+1, len(IN))

Now the obvious winner is pruebono - even unoptimized, using sets seems
to be *way* faster than even the most optimized corrected version of

Here's a quick bench - please everyone doublecheck it to make sure it's ok:

from timeit import Timer
import random

class Node(object):
def __init__(self, x, y):
self.coordinates = (x, y)
def __repr__(self):
return repr(self.coordinates)

# how to build src:
# src = [(random.randrange(10),random.randrange(10)) for x in xrange(100)]
src = [
(4, 9), (5, 0), (6, 6), (7, 2), (3, 6), (9, 6), (0, 1), (1, 6),
(0, 5), (1, 2), (8, 9), (5, 4), (1, 6), (7, 6), (9, 1), (7, 6),
(0, 1), (7, 4), (7, 4), (8, 4), (8, 4), (3, 5), (9, 6), (6, 1),
(3, 4), (4, 5), (0, 5), (6, 3), (2, 4), (1, 6), (9, 5), (1, 2),
(5, 8), (8, 5), (3, 1), (9, 4), (9, 4), (3, 3), (4, 8), (9, 7),
(8, 4), (6, 2), (1, 5), (5, 8), (8, 6), (0, 8), (5, 2), (3, 4),
(0, 5), (4, 4), (2, 9), (7, 7), (1, 0), (4, 2), (5, 7), (0, 4),
(2, 5), (0, 8), (7, 3), (9, 1), (0, 4), (5, 0), (4, 9), (0, 6),
(3, 0), (3, 0), (3, 9), (8, 3), (7, 9), (8, 5), (7, 6), (1, 5),
(0, 6), (5, 9), (6, 8), (0, 0), (4, 1), (3, 3), (5, 4), (5, 3),
(6, 1), (5, 4), (4, 5), (5, 8), (4, 1), (3, 6), (1, 9), (0, 5),
(6, 5), (5, 5), (6, 0), (0, 9), (2, 6), (0, 7), (5, 9), (7, 3),
(7, 9), (5, 4), (4, 9), (2, 9)
]
IN = [Node(x, y) for x, y in src]

def doubles0():
SN = []
for i in range(len(IN)): #scan all elements of the list IN
for j in range(len(IN)):
#print i, j
if i <> j:
if IN[i].coordinates == IN[j].coordinates:
if IN[i].coordinates == IN[j].coordinates:
SN.append(IN[i])
return SN

def doubles1():
"first solve an algoritmic problem"
SN = []
for i in range(len(IN)):
for j in range(i+1, len(IN)):
#print i, j
#if i != j:
if IN[i].coordinates == IN[j].coordinates:
if IN[i].coordinates == IN[j].coordinates:
SN.append(IN[i])
return SN

def doubles2():
"then avoid buildin useless lists"
SN = []
for i in xrange(len(IN)):
for j in xrange(i+1, len(IN)):
if IN[i].coordinates == IN[j].coordinates:
if IN[i].coordinates == IN[j].coordinates:
SN.append(IN[i])
return SN

def doubles3():
"then avoid uselessly calling a constant operation"
SN = []
in_len = len(IN)
for i in xrange(in_len):
for j in xrange(i+1, in_len):
if IN[i].coordinates == IN[j].coordinates:
if IN[i].coordinates == IN[j].coordinates:
SN.append(IN[i])
return SN

def doubles4():
"then alias commonly used methods"
SN = []
sn_append = SN.append
in_len = len(IN)
for i in xrange(in_len):
for j in xrange(i+1, in_len):
if IN[i].coordinates == IN[j].coordinates:
if IN[i].coordinates == IN[j].coordinates:
sn_append(IN[i])
return SN

def doubles5():
"then alias a couple other things"
SN = []
sn_append = SN.append
in_len = len(IN)
for i in xrange(in_len):
node_i = IN[i]
coords_i = node_i.coordinates
for j in xrange(i+1, in_len):
if coords_i == IN[j].coordinates:
if coords_i == IN[j].coordinates:
sn_append(node_i)
return SN

def doubles6():
"then simplify tests"
SN = []
sn_append = SN.append
in_len = len(IN)
for i in xrange(in_len):
node_i = IN[i]
coords_i = node_i.coordinates
for j in xrange(i+1, in_len):
if coords_i == IN[j].coordinates:
sn_append(node_i)
return SN

# Harald : uncomment this and run test_results. As far as I can tell, it
# doesn't yields the expected results

## IN7 = IN[:]
## def sortk7(n):
##     return n.coordinates

## def doubles7():
##     "is ordering better ? - Nope Sir, it's broken..."
##     IN7.sort(key=sortk)
##     SN = []
##     sn_append = SN.append
##     in_len = len(IN)
##     for i in xrange(in_len):
##         node_i = IN[i]
##         coords_i = node_i.coordinates
##         for j in xrange(i+1, in_len):
##             if coords_i == IN[j].coordinates:
##                 if coords_i == IN[j].coordinates:
##                     sn_append(node_i)
##             else:
##                 break

##     return SN

def doubles8():
"Using a set ?"
dup = set()
SN = []
for item in IN:
c = item.coordinates
if c in dup:
SN.append(item)
else:
return SN

def doubles9():
"Using a set is way faster - but can still be improved a bit"
dup = set()
SN = []
sn_append = SN.append

for item in IN:
c = item.coordinates
if c in dup:
sn_append(item)
else:
return SN

# need this for tests:
names_funcs = sorted(
(n, f) for n, f in locals().iteritems()
if n.startswith('doubles')
)

def test_results():
"""
make sure all solutions give correct results,
assuming the OP solution is at least correct !-)
"""
results = [
(n, set(o.coordinates for o in f()))
for n, f in names_funcs
]
_, correct = results
ok = True
for n, r in results[1:]:
if r != correct:
print "error on %s ?\n expected %s\n, got %s" \
% (n, correct, r)
ok = False
return ok

def test_times():
" And the winner is..."
results = [
(n, Timer(
'%s()' % n,
'from __main__ import %s' % n
).timeit(100)
)
for n, _ in names_funcs
]
print "\n".join("%s : %s" % r for r in results)

Results here (py2.5, gentoo linux, athlonxp1800, 512 ram):

>>> test_results()
True
>>> test_times()
doubles0 : 1.55667901039
doubles1 : 0.719144105911
doubles2 : 0.703393936157
doubles3 : 0.700654983521
doubles4 : 0.706257104874
doubles5 : 0.528184890747
doubles6 : 0.461633205414
doubles8 : 0.0134379863739
doubles9 : 0.0108540058136
>>>

Not surprisingly, half less iterations makes for half less time.
Aliasing, as often, proves to be a good optimization too. But obviously,
using the correct data structure / algorithm combo is the key : simpler
code, and 115 times faster (143 times with aliasing). If pruebono
solution's is correct (and it as AFAICT), your 15 hours computation
should by now take less than 10 minutes...

```