[Python-checkins] python/nondist/sandbox/set settimings.py, NONE, 1.1
rhettinger@users.sourceforge.net
rhettinger at users.sourceforge.net
Fri Jul 29 21:59:30 CEST 2005
Update of /cvsroot/python/python/nondist/sandbox/set
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16836
Added Files:
settimings.py
Log Message:
Sandbox a benchmarking tool.
Compares relative performance of dicts vs sets.
Compares set() vs sets.Set().
Compares operations with various input types (sets, dicts, lists, uniqlists).
Also useful for comparing the Py2.4 version to the Py2.5 version.
--- NEW FILE: settimings.py ---
# timings of the new sets module
from timeit import Timer
setup = """
from random import randrange, seed
from sets import Set, ImmutableSet
seed(845957276943582345234523452734241L)
n = 1000
a = [randrange(n) for i in xrange(n)]
b = [randrange(n) for i in xrange(n)]
seta = set(a)
Seta = Set(a)
setb = set(b)
dicta = dict.fromkeys(a, True)
dictb = dict.fromkeys(b, True)
NONE = None
TRUE = True
listseta = list(seta)
listsetb = list(setb)
dac = dicta.__contains__
dah = dicta.has_key
sac = seta.__contains__
sadd = seta.add
setc = set(seta)
for _ in setc: pass
seta.discard(None)
setb.discard(None)
"""
"""
If you don't already have a set,
then passing the list form is always better or as good a making a temp set
If you do have a set or dict, then passing it is better or as good (w/in 20)% as a listset()
The w/20 is because looping over sets is slower than looping over a list.
Need more study on difference (first result) and sym_diff_up (second result)
--------------
set(a).binop(b) vs set(a).binop(set(b))
when =, it means internally b is first converted to a set if not already
when <, it means the binop uses an algo that doesn't make a set first:
diff, diff_up, inter, inter_up, union, union_up
set(a).binop(listsetb) vs set(a).binop(setb)
when >>, it means that the list had to be coerced to a set first
diff, sym_diff_up
when >, it means that faster merge was used instead of iter
union, union_up,
when < 1/6, just means looping over dicts is slower than lists which is ok
inter, inter_up, diff_up
or something else
sym_diff has ?flawed strategy of makingset and passing to sym_up
B:makeset, A:sparseloop A:contains A-m:adds m:deletes
vs. B:makeset, A:sparseloop A:contains A-m:adds
B:sparseloop B:contains B-m:adds
delete B
"""
# set_diff use of iterable vs set
stmts = [
'list(seta)',
'list(dicta)',
'dicta.keys()',
'list(listseta)',
'list(setb)',
'setc.__contains__(None); list(setc)',
'setc.discard(None); list(setc)',
'frozenset(setb)',
'frozenset(listsetb)',
"'< means binop bypassed forcing to set'",
"seta.difference(b)",
"seta.difference(set(b))",
"Seta.difference(b)",
"seta.union(b)",
"seta.union(set(b))",
"Seta.union(b)",
"seta.intersection(b)",
"seta.intersection(set(b))",
"Seta.intersection(b)",
"seta.symmetric_difference(b)",
"seta.symmetric_difference(set(b))",
"Seta.symmetric_difference(b)",
"seta.difference_update(b)",
"seta.difference_update(set(b))",
"Seta.difference_update(b)",
"seta.update(b)",
"seta.update(set(b))",
"Seta.update(b)",
"seta.intersection_update(b)",
"seta.intersection_update(set(b))",
"Seta.intersection_update(b)",
"seta.symmetric_difference_update(b)",
"seta.symmetric_difference_update(set(b))",
"Seta.symmetric_difference_update(b)",
"'>> due to set coercion; > merge faster than iter; < 1/6 dict looping is slow'",
"seta.difference(listsetb)",
"seta.difference(setb)",
"seta.difference(dictb)",
"seta.union(listsetb)",
"seta.union(setb)",
"seta.union(dictb)",
"seta.intersection(listsetb)",
"seta.intersection(setb)",
"seta.intersection(dictb)",
"seta.symmetric_difference(listsetb)",
"seta.symmetric_difference(setb)",
"seta.symmetric_difference(dictb)",
"seta.difference_update(listsetb)",
"seta.difference_update(setb)",
"seta.difference_update(dictb)",
"seta.update(listsetb)",
"seta.update(setb)",
"seta.update(dictb)",
"seta.intersection_update(listsetb)",
"seta.intersection_update(setb)",
"seta.intersection_update(dictb)",
"seta.symmetric_difference_update(listsetb)",
"seta.symmetric_difference_update(setb)",
"seta.symmetric_difference_update(dictb)",
"for i in xrange(500): seta.add(i)",
"for i in xrange(500): Seta.add(i)",
"for i in xrange(500): dicta[i] = NONE",
"for i in xrange(500): sadd(i)",
"set(b)",
"Set(b)",
"dict.fromkeys(b, TRUE)",
"set(listsetb)",
"set(setb)",
"set(dictb)",
"for i in xrange(200): i in seta",
"for i in xrange(200): i in Seta",
"for i in xrange(200): i in dicta",
"for i in xrange(200): dah(i)",
"for i in xrange(200): dac(i)",
"for i in xrange(200): sac(i)",
"for i in xrange(200): seta.discard(i)",
"for i in xrange(20): Seta.discard(i)",
"for i in xrange(200): dicta.pop(i, NONE)",
# Repeat first five to check validity of timings
'list(seta)',
'list(dicta)',
'dicta.keys()',
'list(listseta)',
'list(setb)',
# Uniquification,
'list(set(a))',
'dict.fromkeys(a).keys()',
'list(dict.fromkeys(a))',
'list(set(b))',
'dict.fromkeys(b).keys()',
'list(dict.fromkeys(b))',
]
from time import time
import gc
start = time()
for stmt in stmts:
gc.collect()
print "%f\t%s" % (min(Timer(stmt, setup).repeat(9, 3500)), stmt)
end = time()
print end-start, "! total elapsed"
More information about the Python-checkins
mailing list