[pypy-svn] pypy default: (cfbolz, l.diekmann): fix dict.setdefault to only do one hash computation and dict lookup

l.diekmann commits-noreply at bitbucket.org
Fri Apr 1 15:17:59 CEST 2011


Author: Lukas Diekmann <lukas.diekmann at uni-duesseldorf.de>
Branch: 
Changeset: r43080:a0f33904910e
Date: 2011-04-01 15:16 +0200
http://bitbucket.org/pypy/pypy/changeset/a0f33904910e/

Log:	(cfbolz, l.diekmann): fix dict.setdefault to only do one hash
	computation and dict lookup

diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -108,6 +108,11 @@
         #return w_value or None
         return None
 
+    def impl_setdefault(self, w_key, w_default):
+        # here the dict is always empty
+        self._as_rdict().impl_fallback_setitem(w_key, w_default)
+        return w_default
+
     def impl_setitem(self, w_key, w_value):
         self._as_rdict().impl_fallback_setitem(w_key, w_value)
 
@@ -181,6 +186,9 @@
     # _________________________________________________________________
     # fallback implementation methods
 
+    def impl_fallback_setdefault(self, w_key, w_default):
+        return self.r_dict_content.setdefault(w_key, w_default)
+
     def impl_fallback_setitem(self, w_key, w_value):
         self.r_dict_content[w_key] = w_value
 
@@ -227,6 +235,7 @@
     ("length", 0),
     ("setitem_str", 2),
     ("setitem", 2),
+    ("setdefault", 2),
     ("delitem", 1),
     ("iter", 0),
     ("items", 0),
@@ -317,6 +326,14 @@
     def impl_setitem_str(self, key, w_value):
         self.content[key] = w_value
 
+    def impl_setdefault(self, w_key, w_default):
+        space = self.space
+        if space.is_w(space.type(w_key), space.w_str):
+            return self.content.setdefault(space.str_w(w_key), w_default)
+        else:
+            return self._as_rdict().impl_fallback_setdefault(w_key, w_default)
+
+
     def impl_delitem(self, w_key):
         space = self.space
         w_key_type = space.type(w_key)
@@ -788,12 +805,7 @@
 
 def dict_setdefault__DictMulti_ANY_ANY(space, w_dict, w_key, w_default):
     # XXX should be more efficient, with only one dict lookup
-    w_value = w_dict.getitem(w_key)
-    if w_value is not None:
-        return w_value
-    else:
-        w_dict.setitem(w_key, w_default)
-        return w_default
+    return w_dict.setdefault(w_key, w_default)
 
 def dict_pop__DictMulti_ANY(space, w_dict, w_key, defaults_w):
     len_defaults = len(defaults_w)

diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py
--- a/pypy/objspace/std/mapdict.py
+++ b/pypy/objspace/std/mapdict.py
@@ -604,6 +604,18 @@
         else:
             self._as_rdict().impl_fallback_setitem(w_key, w_value)
 
+    def impl_setdefault(self,  w_key, w_default):
+        space = self.space
+        if space.is_w(space.type(w_key), space.w_str):
+            key = space.str_w(w_key)
+            w_result = self.impl_getitem_str(key)
+            if w_result is not None:
+                return w_result
+            self.impl_setitem_str(key, w_default)
+            return w_default
+        else:
+            return self._as_rdict().impl_fallback_setdefault(w_key, w_default)
+
     def impl_delitem(self, w_key):
         space = self.space
         w_key_type = space.type(w_key)

diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -1,3 +1,4 @@
+import sys
 from pypy.interpreter.error import OperationError
 from pypy.objspace.std.dictmultiobject import \
      W_DictMultiObject, setitem__DictMulti_ANY_ANY, getitem__DictMulti_ANY, \
@@ -259,7 +260,22 @@
         d[33] = 99
         assert d == dd
         assert x == 99
-    
+
+    def test_setdefault_fast(self):
+        class Key(object):
+            calls = 0
+            def __hash__(self):
+                self.calls += 1
+                return object.__hash__(self)
+
+        k = Key()
+        d = {}
+        d.setdefault(k, [])
+        assert k.calls == 1
+
+        d.setdefault(k, 1)
+        assert k.calls == 2
+
     def test_update(self):
         d = {1:2, 3:4}
         dd = d.copy()
@@ -704,13 +720,20 @@
 
 
 class FakeString(str):
+    hash_count = 0
     def unwrap(self, space):
         self.unwrapped = True
         return str(self)
 
+    def __hash__(self):
+        self.hash_count += 1
+        return str.__hash__(self)
+
 # the minimal 'space' needed to use a W_DictMultiObject
 class FakeSpace:
+    hash_count = 0
     def hash_w(self, obj):
+        self.hash_count += 1
         return hash(obj)
     def unwrap(self, x):
         return x
@@ -726,6 +749,8 @@
         return []
     DictObjectCls = W_DictMultiObject
     def type(self, w_obj):
+        if isinstance(w_obj, FakeString):
+            return str
         return type(w_obj)
     w_str = str
     def str_w(self, string):
@@ -890,6 +915,19 @@
             impl.setitem(x, x)
         assert impl.r_dict_content is not None
 
+    def test_setdefault_fast(self):
+        on_pypy = "__pypy__" in sys.builtin_module_names
+        impl = self.impl
+        key = FakeString(self.string)
+        x = impl.setdefault(key, 1)
+        assert x == 1
+        if on_pypy:
+            assert key.hash_count == 1
+        x = impl.setdefault(key, 2)
+        assert x == 1
+        if on_pypy:
+            assert key.hash_count == 2
+
 class TestStrDictImplementation(BaseTestRDictImplementation):
     ImplementionClass = StrDictImplementation
 

diff --git a/pypy/objspace/std/celldict.py b/pypy/objspace/std/celldict.py
--- a/pypy/objspace/std/celldict.py
+++ b/pypy/objspace/std/celldict.py
@@ -34,11 +34,7 @@
 
     @jit.purefunction
     def _getcell_makenew(self, key):
-        res = self.content.get(key, None)
-        if res is not None:
-            return res
-        result = self.content[key] = ModuleCell()
-        return result
+        return self.content.setdefault(key, ModuleCell())
 
     def impl_setitem(self, w_key, w_value):
         space = self.space
@@ -50,6 +46,16 @@
     def impl_setitem_str(self, name, w_value):
         self.getcell(name, True).w_value = w_value
 
+    def impl_setdefault(self, w_key, w_default):
+        space = self.space
+        if space.is_w(space.type(w_key), space.w_str):
+            cell = self.getcell(space.str_w(w_key), True)
+            if cell.w_value is None:
+                cell.w_value = w_default
+            return cell.w_value
+        else:
+            return self._as_rdict().impl_fallback_setdefault(w_key, w_default)
+
     def impl_delitem(self, w_key):
         space = self.space
         w_key_type = space.type(w_key)


More information about the Pypy-commit mailing list