[pypy-svn] r39761 - in pypy/dist/pypy: config doc/config objspace/std objspace/std/test

arigo at codespeak.net arigo at codespeak.net
Sat Mar 3 10:05:51 CET 2007


Author: arigo
Date: Sat Mar  3 10:05:48 2007
New Revision: 39761

Added:
   pypy/dist/pypy/doc/config/objspace.std.withbucketdict.txt   (contents, props changed)
   pypy/dist/pypy/objspace/std/dictbucket.py   (contents, props changed)
   pypy/dist/pypy/objspace/std/test/test_dictbucket.py   (contents, props changed)
Modified:
   pypy/dist/pypy/config/pypyoption.py
   pypy/dist/pypy/objspace/std/dictmultiobject.py
   pypy/dist/pypy/objspace/std/objspace.py
   pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py
Log:
Implement general dictionaries as a hash table whose collision
resolution is via chained lists instead of open addressing.

Enable with --objspace-std-withbucketdict.  It replaces the
r_dict implementation for general dictionaries (but doesn't
change string dicts, at least so far).


Modified: pypy/dist/pypy/config/pypyoption.py
==============================================================================
--- pypy/dist/pypy/config/pypyoption.py	(original)
+++ pypy/dist/pypy/config/pypyoption.py	Sat Mar  3 10:05:48 2007
@@ -143,6 +143,12 @@
                    default=False,
                    requires=[("objspace.std.withmultidict", True)]),
 
+        BoolOption("withbucketdict",
+                   "use dictionaries with chained hash tables "
+                   "(default is open addressing)",
+                   default=False,
+                   requires=[("objspace.std.withmultidict", True)]),
+
         BoolOption("withrangelist",
                    "enable special range list implementation that does not "
                    "actually create the full list until the resulting "

Added: pypy/dist/pypy/doc/config/objspace.std.withbucketdict.txt
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/doc/config/objspace.std.withbucketdict.txt	Sat Mar  3 10:05:48 2007
@@ -0,0 +1,2 @@
+Implement general dictionaries as a hash table whose collision
+resolution is via chained lists instead of open addressing.

Added: pypy/dist/pypy/objspace/std/dictbucket.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/objspace/std/dictbucket.py	Sat Mar  3 10:05:48 2007
@@ -0,0 +1,164 @@
+from pypy.objspace.std.dictmultiobject import DictImplementation
+from pypy.objspace.std.dictmultiobject import IteratorImplementation
+
+
+class BucketNode:
+    def __init__(self, hash, w_key, w_value, next):
+        self.hash = hash
+        self.w_key = w_key
+        self.w_value = w_value
+        self.next = next
+
+
+DISTRIBUTE = 9
+
+
+class BucketDictImplementation(DictImplementation):
+
+    def __init__(self, space):
+        self.space = space
+        self.len = 0
+        self.table = [None] * 4
+
+    def __repr__(self):
+        bs = []
+        for node in self.table:
+            count = 0
+            while node is not None:
+                count += 1
+                node = node.next
+            bs.append(str(count))
+        return "%s<%s>" % (self.__class__.__name__, ', '.join(bs))
+
+    def get(self, w_key):
+        space = self.space
+        hash = space.hash_w(w_key)
+        index = (hash * DISTRIBUTE) & (len(self.table) - 1)
+        node = self.table[index]
+        while node is not None:
+            if node.hash == hash and space.eq_w(w_key, node.w_key):
+                return node.w_value
+            node = node.next
+        return None
+
+    def setitem(self, w_key, w_value):
+        space = self.space
+        hash = space.hash_w(w_key)
+        index = (hash * DISTRIBUTE) & (len(self.table) - 1)
+        node = head = self.table[index]
+        while node is not None:
+            if node.hash == hash and space.eq_w(w_key, node.w_key):
+                node.w_value = w_value
+                return self
+            node = node.next
+        self.table[index] = BucketNode(hash, w_key, w_value, head)
+        self.len += 1
+        if self.len > len(self.table):
+            self._resize()
+        return self
+
+    def setitem_str(self, w_key, w_value, shadows_type=True):
+        return self.setitem(w_key, w_value)
+
+    def delitem(self, w_key):
+        space = self.space
+        hash = space.hash_w(w_key)
+        index = (hash * DISTRIBUTE) & (len(self.table) - 1)
+        node = self.table[index]
+        prev = None
+        while node is not None:
+            if node.hash == hash and space.eq_w(w_key, node.w_key):
+                self.len -= 1
+                if self.len == 0:
+                    return self.space.emptydictimpl
+                if prev is None:
+                    self.table[index] = node.next
+                else:
+                    prev.next = node.next
+                if self.len < len(self.table) // 2:
+                    self._resize()
+                return self
+            prev = node
+            node = node.next
+        raise KeyError
+
+    def length(self):
+        return self.len
+
+    def _resize(self):
+        newsize = 4
+        while newsize < self.len:
+            newsize *= 2
+        newtable = [None] * newsize
+        for node in self.table:
+            while node is not None:
+                newindex = (node.hash * DISTRIBUTE) & (newsize - 1)
+                next = node.next
+                node.next = newtable[newindex]
+                newtable[newindex] = node
+                node = next
+        self.table = newtable
+
+    def iteritems(self):
+        return BucketDictItemIteratorImplementation(self.space, self)
+    def iterkeys(self):
+        return BucketDictKeyIteratorImplementation(self.space, self)
+    def itervalues(self):
+        return BucketDictValueIteratorImplementation(self.space, self)
+
+    def keys(self):
+        result_w = []
+        for node in self.table:
+            while node is not None:
+                result_w.append(node.w_key)
+                node = node.next
+        return result_w
+
+    def values(self):
+        result_w = []
+        for node in self.table:
+            while node is not None:
+                result_w.append(node.w_value)
+                node = node.next
+        return result_w
+
+    def items(self):
+        space = self.space
+        result_w = []
+        for node in self.table:
+            while node is not None:
+                w_item = space.newtuple([node.w_key, node.w_value])
+                result_w.append(w_item)
+                node = node.next
+        return result_w
+
+
+class BucketDictIteratorImplementation(IteratorImplementation):
+    def __init__(self, space, dictimplementation):
+        IteratorImplementation.__init__(self, space, dictimplementation)
+        self.index = 0
+        self.node = None
+
+    def next_entry(self):
+        node = self.node
+        while node is None:
+            table = self.dictimplementation.table
+            if self.index >= len(table):
+                return None
+            node = table[self.index]
+            self.index += 1
+        self.node = node.next
+        return self.get_result(node)
+
+
+class BucketDictKeyIteratorImplementation(BucketDictIteratorImplementation):
+    def get_result(self, node):
+        return node.w_key
+
+class BucketDictValueIteratorImplementation(BucketDictIteratorImplementation):
+    def get_result(self, node):
+        return node.w_value
+
+class BucketDictItemIteratorImplementation(BucketDictIteratorImplementation):
+    def get_result(self, node):
+        return self.space.newtuple([node.w_key, node.w_value])

Modified: pypy/dist/pypy/objspace/std/dictmultiobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/dictmultiobject.py	(original)
+++ pypy/dist/pypy/objspace/std/dictmultiobject.py	Sat Mar  3 10:05:48 2007
@@ -143,7 +143,8 @@
             return StrDictImplementation(self.space).setitem_str(w_key, w_value)
             #return SmallStrDictImplementation(self.space, w_key, w_value)
         else:
-            return RDictImplementation(self.space).setitem(w_key, w_value)
+            space = self.space
+            return space.DefaultDictImpl(space).setitem(w_key, w_value)
         #return SmallDictImplementation(self.space, w_key, w_value)
     def setitem_str(self, w_key, w_value, shadows_type=True):
         return StrDictImplementation(self.space).setitem_str(w_key, w_value)
@@ -205,7 +206,7 @@
             i += 1
 
     def _convert_to_rdict(self):
-        newimpl = RDictImplementation(self.space)
+        newimpl = self.space.DefaultDictImpl(self.space)
         i = 0
         while 1:
             entry = self.entries[i]
@@ -296,13 +297,13 @@
             i += 1
 
     def _convert_to_rdict(self):
-        newimpl = RDictImplementation(self.space)
+        newimpl = self.space.DefaultDictImpl(self.space)
         i = 0
         while 1:
             entry = self.entries[i]
             if entry.w_value is None:
                 break
-            newimpl.content[self.space.wrap(entry.key)] = entry.w_value
+            newimpl.setitem(self.space.wrap(entry.key), entry.w_value)
             i += 1
         return newimpl
 
@@ -450,7 +451,7 @@
 
 
     def _as_rdict(self):
-        newimpl = RDictImplementation(self.space)
+        newimpl = self.space.DefaultDictImpl(self.space)
         for k, w_v in self.content.items():
             newimpl.setitem(self.space.wrap(k), w_v)
         return newimpl
@@ -762,7 +763,7 @@
         if as_strdict:
             newimpl = StrDictImplementation(self.space)
         else:
-            newimpl = RDictImplementation(self.space)
+            newimpl = self.space.DefaultDictImpl(self.space)
         for k, i in self.structure.keys.items():
             if i >= 0:
                 newimpl.setitem_str(self.space.wrap(k), self.entries[i])

Modified: pypy/dist/pypy/objspace/std/objspace.py
==============================================================================
--- pypy/dist/pypy/objspace/std/objspace.py	(original)
+++ pypy/dist/pypy/objspace/std/objspace.py	Sat Mar  3 10:05:48 2007
@@ -127,6 +127,11 @@
             from pypy.objspace.std import dictmultiobject
             self.DictObjectCls = dictmultiobject.W_DictMultiObject
             self.emptydictimpl = dictmultiobject.EmptyDictImplementation(self)
+            if self.config.objspace.std.withbucketdict:
+                from pypy.objspace.std import dictbucket
+                self.DefaultDictImpl = dictbucket.BucketDictImplementation
+            else:
+                self.DefaultDictImpl = dictmultiobject.RDictImplementation
         else:
             from pypy.objspace.std import dictobject
             self.DictObjectCls = dictobject.W_DictObject

Added: pypy/dist/pypy/objspace/std/test/test_dictbucket.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/objspace/std/test/test_dictbucket.py	Sat Mar  3 10:05:48 2007
@@ -0,0 +1,10 @@
+from pypy.objspace.std.dictbucket import BucketDictImplementation
+from pypy.objspace.std.test import test_dictmultiobject
+
+
+Base = test_dictmultiobject.TestRDictImplementation
+
+class TestBucketDictImplementation(Base):
+    ImplementionClass = BucketDictImplementation
+    DevolvedClass     = BucketDictImplementation
+    DefaultDictImpl   = BucketDictImplementation

Modified: pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py	(original)
+++ pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py	Sat Mar  3 10:05:48 2007
@@ -69,6 +69,7 @@
         self.space = FakeSpace()
         self.space.emptydictimpl = EmptyDictImplementation(self.space)
         self.space.DictObjectCls = W_DictMultiObject
+        self.space.DefaultDictImpl = RDictImplementation
 
     def test_stressdict(self):
         from random import randint
@@ -86,11 +87,13 @@
     ImplementionClass = RDictImplementation
     DevolvedClass = RDictImplementation
     EmptyClass = EmptyDictImplementation
+    DefaultDictImpl = RDictImplementation
 
     def setup_method(self,method):
         self.space = FakeSpace()
         self.space.DictObjectCls = W_DictMultiObject
         self.space.emptydictimpl = EmptyDictImplementation(self.space)
+        self.space.DefaultDictImpl = self.DefaultDictImpl
         self.string = self.space.wrap("fish")
         self.string2 = self.space.wrap("fish2")
         self.impl = self.get_impl()



More information about the Pypy-commit mailing list