[pypy-svn] r17516 - in pypy/dist/pypy/rpython: . test

Tue Sep 13 03:19:56 CEST 2005

Author: tismer
Date: Tue Sep 13 03:19:53 2005
New Revision: 17516

Modified:
   pypy/dist/pypy/rpython/rdict.py
   pypy/dist/pypy/rpython/test/test_rdict.py
Log:
re-added the hash fieldto the rdict implementation.
This is a waste for many simple types, but so are other fields,
and we need to re-optimize, anyway.
Remarkable is the dramatic effect on performance:

              abs.richards   abs.pystone     rel.richards   rel.pystone
pypy-c-17439:   40929 ms         637.274      47.8           56.6
pypy-c-17512:   46105 ms         658.1        53.9           54.8
pypy-current:   33937 ms         698.415      39.6           51.7
python2.3.3:      856 ms       36081.6         1.0            1.0

richards naturally benefits much more from faster dict lookup, because it is
all about accessing instance variables.

All measurements were done on a windows notebook with 512 MB.
Threading disabled, -boehm -t-lowmem

Modified: pypy/dist/pypy/rpython/rdict.py
==============================================================================

--- pypy/dist/pypy/rpython/rdict.py	(original)
+++ pypy/dist/pypy/rpython/rdict.py	Tue Sep 13 03:19:53 2005
@@ -23,6 +23,7 @@
 #        bool valid;      # to mark if the entry is filled
 #        bool everused;   # to mark if the entry is or has ever been filled
 #        DICTVALUE value;
+#        int hash;
 #    }
 #    
 #    struct dicttable {
@@ -95,6 +96,7 @@
             self.DICTVALUE = self.value_repr.lowleveltype
             self.DICTENTRY = lltype.Struct("dictentry", 
                                 ("key", self.DICTKEY),
+                                ("hash", lltype.Signed),
                                 ("valid", lltype.Bool),
                                 ("everused", lltype.Bool),
                                 ("value", self.DICTVALUE))
@@ -305,7 +307,12 @@
     entry.value = value
     if entry.valid:
         return
+    if dictrepr.custom_eq_hash:
+        hash = hlinvoke(dictrepr.r_rdict_hashfn, d.fnkeyhash, key)
+    else:
+        hash = dictrepr.ll_keyhash(key)
     entry.key = key 
+    entry.hash = hash
     entry.valid = True
     d.num_items += 1
     if not entry.everused:
@@ -348,6 +355,7 @@
         if entry.valid:
            new_entry = ll_dict_lookup(d, entry.key, dictrepr)
            new_entry.key = entry.key
+           new_entry.hash = entry.hash
            new_entry.value = entry.value
            new_entry.valid = True
            new_entry.everused = True
@@ -370,16 +378,17 @@
         checkingkey = entry.key
         if checkingkey == key:
             return entry   # found the entry
-        if dictrepr.custom_eq_hash:
-            res = hlinvoke(dictrepr.r_rdict_eqfn, d.fnkeyeq, checkingkey, key)
-            if (entries != d.entries or
-                not entry.valid or entry.key != checkingkey):
-                # the compare did major nasty stuff to the dict: start over
-                return ll_dict_lookup(d, key, dictrepr)
-        else:
-            res = dictrepr.ll_keyeq is not None and dictrepr.ll_keyeq(checkingkey, key)
-        if res:
-            return entry   # found the entry
+        if entry.hash == hash:
+            if dictrepr.custom_eq_hash:
+                res = hlinvoke(dictrepr.r_rdict_eqfn, d.fnkeyeq, checkingkey, key)
+                if (entries != d.entries or
+                    not entry.valid or entry.key != checkingkey):
+                    # the compare did major nasty stuff to the dict: start over
+                    return ll_dict_lookup(d, key, dictrepr)
+            else:
+                res = dictrepr.ll_keyeq is not None and dictrepr.ll_keyeq(checkingkey, key)
+            if res:
+                return entry   # found the entry
         freeslot = lltype.nullptr(lltype.typeOf(entry).TO)
     elif entry.everused:
         freeslot = entry
@@ -398,16 +407,17 @@
             checkingkey = entry.key
             if checkingkey == key:
                 return entry
-            if dictrepr.custom_eq_hash:
-                res = hlinvoke(dictrepr.r_rdict_eqfn, d.fnkeyeq, checkingkey, key)
-                if (entries != d.entries or
-                    not entry.valid or entry.key != checkingkey):
-                    # the compare did major nasty stuff to the dict: start over
-                    return ll_dict_lookup(d, key, dictrepr)
-            else:
-                res = dictrepr.ll_keyeq is not None and dictrepr.ll_keyeq(checkingkey, key)
-            if res:
-                return entry
+            if entry.hash == hash:
+                if dictrepr.custom_eq_hash:
+                    res = hlinvoke(dictrepr.r_rdict_eqfn, d.fnkeyeq, checkingkey, key)
+                    if (entries != d.entries or
+                        not entry.valid or entry.key != checkingkey):
+                        # the compare did major nasty stuff to the dict: start over
+                        return ll_dict_lookup(d, key, dictrepr)
+                else:
+                    res = dictrepr.ll_keyeq is not None and dictrepr.ll_keyeq(checkingkey, key)
+                if res:
+                    return entry
         elif not freeslot:
             freeslot = entry 
         perturb >>= PERTURB_SHIFT
@@ -536,6 +546,7 @@
         d_entry = d.entries[i]
         entry = dict.entries[i]
         d_entry.key = entry.key
+        d_entry.hash = entry.hash
         d_entry.value = entry.value
         d_entry.valid = entry.valid
         d_entry.everused = entry.everused

Modified: pypy/dist/pypy/rpython/test/test_rdict.py
==============================================================================
--- pypy/dist/pypy/rpython/test/test_rdict.py	(original)
+++ pypy/dist/pypy/rpython/test/test_rdict.py	Tue Sep 13 03:19:53 2005
@@ -161,6 +161,20 @@
     res = interpret(func, [1])
     assert len(res.entries) == rdict.DICT_INITSIZE 
 
+def test_dict_valid_resize():
+    # see if we find our keys after resize
+    def func():
+        d = {}
+        # fill it up
+        for i in range(10):
+            d[str(i)] = 0
+        # delete again
+        for i in range(10):
+            del d[str(i)]
+        res = 0
+    # if it does not crash, we are fine. It crashes if you forget the hash field.
+    interpret(func, [])
+
 def test_dict_iteration():
     def func(i, j):
         d = {}