[Python-checkins] python/nondist/sandbox/set settimings.py, NONE, 1.1

Fri Jul 29 21:59:30 CEST 2005

Update of /cvsroot/python/python/nondist/sandbox/set
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16836

Added Files:
	settimings.py 
Log Message:
Sandbox a benchmarking tool.

Compares relative performance of dicts vs sets.
Compares set() vs sets.Set().
Compares operations with various input types (sets, dicts, lists, uniqlists).
Also useful for comparing the Py2.4 version to the Py2.5 version.

--- NEW FILE: settimings.py ---
# timings of the new sets module

from timeit import Timer

setup = """
from random import randrange, seed
from sets import Set, ImmutableSet
seed(845957276943582345234523452734241L)
n = 1000
a = [randrange(n) for i in xrange(n)]
b = [randrange(n) for i in xrange(n)]
seta = set(a)
Seta = Set(a)
setb = set(b)
dicta = dict.fromkeys(a, True)
dictb = dict.fromkeys(b, True)
NONE = None
TRUE = True
listseta = list(seta)
listsetb = list(setb)
dac = dicta.__contains__
dah = dicta.has_key
sac = seta.__contains__
sadd = seta.add
setc = set(seta)
for _ in setc: pass
seta.discard(None)
setb.discard(None)
"""

"""
If you don't already have a set,
   then passing the list form is always better or as good a making a temp set

If you do have a set or dict, then passing it is better or as good (w/in 20)% as a listset()

The w/20 is because looping over sets is slower than looping over a list.

Need more study on difference (first result) and sym_diff_up (second result)

--------------

set(a).binop(b)  vs set(a).binop(set(b))
   when =, it means internally b is first converted to a set if not already
   when <, it means the binop uses an algo that doesn't make a set first:
       diff, diff_up, inter, inter_up, union, union_up

set(a).binop(listsetb) vs set(a).binop(setb)
   when >>, it means that the list had to be coerced to a set first
       diff, sym_diff_up
   when >, it means that faster merge was used instead of iter
       union, union_up,

   when < 1/6, just means looping over dicts is slower than lists which is ok
       inter, inter_up, diff_up
   or something else
       sym_diff has ?flawed strategy of makingset and passing to sym_up
            B:makeset, A:sparseloop  A:contains  A-m:adds m:deletes
        vs. B:makeset, A:sparseloop  A:contains A-m:adds
                       B:sparseloop  B:contains B-m:adds
            delete B

"""

# set_diff use of iterable vs set
stmts = [
    'list(seta)',
    'list(dicta)',
    'dicta.keys()',
    'list(listseta)',
    'list(setb)',
    'setc.__contains__(None); list(setc)',
    'setc.discard(None); list(setc)',
    'frozenset(setb)',
    'frozenset(listsetb)',
    "'< means binop bypassed forcing to set'",
    "seta.difference(b)",
    "seta.difference(set(b))",
    "Seta.difference(b)",
    "seta.union(b)",
    "seta.union(set(b))",
    "Seta.union(b)",
    "seta.intersection(b)",
    "seta.intersection(set(b))",
    "Seta.intersection(b)",
    "seta.symmetric_difference(b)",
    "seta.symmetric_difference(set(b))",
    "Seta.symmetric_difference(b)",

    "seta.difference_update(b)",
    "seta.difference_update(set(b))",
    "Seta.difference_update(b)",
    "seta.update(b)",
    "seta.update(set(b))",
    "Seta.update(b)",
    "seta.intersection_update(b)",
    "seta.intersection_update(set(b))",
    "Seta.intersection_update(b)",
    "seta.symmetric_difference_update(b)",
    "seta.symmetric_difference_update(set(b))",
    "Seta.symmetric_difference_update(b)",

    "'>> due to set coercion; > merge faster than iter; < 1/6 dict looping is slow'",
    "seta.difference(listsetb)",
    "seta.difference(setb)",
    "seta.difference(dictb)",
    "seta.union(listsetb)",
    "seta.union(setb)",
    "seta.union(dictb)",
    "seta.intersection(listsetb)",
    "seta.intersection(setb)",
    "seta.intersection(dictb)",
    "seta.symmetric_difference(listsetb)",
    "seta.symmetric_difference(setb)",
    "seta.symmetric_difference(dictb)",

    "seta.difference_update(listsetb)",
    "seta.difference_update(setb)",
    "seta.difference_update(dictb)",
    "seta.update(listsetb)",
    "seta.update(setb)",
    "seta.update(dictb)",
    "seta.intersection_update(listsetb)",
    "seta.intersection_update(setb)",
    "seta.intersection_update(dictb)",
    "seta.symmetric_difference_update(listsetb)",
    "seta.symmetric_difference_update(setb)",
    "seta.symmetric_difference_update(dictb)",

    "for i in xrange(500): seta.add(i)",
    "for i in xrange(500): Seta.add(i)",
    "for i in xrange(500): dicta[i] = NONE",
    "for i in xrange(500): sadd(i)",
    "set(b)",
    "Set(b)",
    "dict.fromkeys(b, TRUE)",

    "set(listsetb)",
    "set(setb)",
    "set(dictb)",

    "for i in xrange(200): i in seta",
    "for i in xrange(200): i in Seta",
    "for i in xrange(200): i in dicta",

    "for i in xrange(200): dah(i)",
    "for i in xrange(200): dac(i)",
    "for i in xrange(200): sac(i)",

    "for i in xrange(200): seta.discard(i)",
    "for i in xrange(20):  Seta.discard(i)",
    "for i in xrange(200): dicta.pop(i, NONE)",

# Repeat first five to check validity of timings
    'list(seta)',
    'list(dicta)',
    'dicta.keys()',
    'list(listseta)',
    'list(setb)',

# Uniquification,
    'list(set(a))',
    'dict.fromkeys(a).keys()',
    'list(dict.fromkeys(a))',
    'list(set(b))',
    'dict.fromkeys(b).keys()',
    'list(dict.fromkeys(b))',
]

from time import time
import gc
start = time()

for stmt in stmts:
    gc.collect()
    print "%f\t%s" % (min(Timer(stmt, setup).repeat(9, 3500)), stmt)

end = time()
print end-start, "! total elapsed"