[pypy-svn] r39761 - in pypy/dist/pypy: config doc/config objspace/std objspace/std/test
arigo at codespeak.net
arigo at codespeak.net
Sat Mar 3 10:05:51 CET 2007
Author: arigo
Date: Sat Mar 3 10:05:48 2007
New Revision: 39761
Added:
pypy/dist/pypy/doc/config/objspace.std.withbucketdict.txt (contents, props changed)
pypy/dist/pypy/objspace/std/dictbucket.py (contents, props changed)
pypy/dist/pypy/objspace/std/test/test_dictbucket.py (contents, props changed)
Modified:
pypy/dist/pypy/config/pypyoption.py
pypy/dist/pypy/objspace/std/dictmultiobject.py
pypy/dist/pypy/objspace/std/objspace.py
pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py
Log:
Implement general dictionaries as a hash table whose collision
resolution is via chained lists instead of open addressing.
Enable with --objspace-std-withbucketdict. It replaces the
r_dict implementation for general dictionaries (but doesn't
change string dicts, at least so far).
Modified: pypy/dist/pypy/config/pypyoption.py
==============================================================================
--- pypy/dist/pypy/config/pypyoption.py (original)
+++ pypy/dist/pypy/config/pypyoption.py Sat Mar 3 10:05:48 2007
@@ -143,6 +143,12 @@
default=False,
requires=[("objspace.std.withmultidict", True)]),
+ BoolOption("withbucketdict",
+ "use dictionaries with chained hash tables "
+ "(default is open addressing)",
+ default=False,
+ requires=[("objspace.std.withmultidict", True)]),
+
BoolOption("withrangelist",
"enable special range list implementation that does not "
"actually create the full list until the resulting "
Added: pypy/dist/pypy/doc/config/objspace.std.withbucketdict.txt
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/doc/config/objspace.std.withbucketdict.txt Sat Mar 3 10:05:48 2007
@@ -0,0 +1,2 @@
+Implement general dictionaries as a hash table whose collision
+resolution is via chained lists instead of open addressing.
Added: pypy/dist/pypy/objspace/std/dictbucket.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/objspace/std/dictbucket.py Sat Mar 3 10:05:48 2007
@@ -0,0 +1,164 @@
+from pypy.objspace.std.dictmultiobject import DictImplementation
+from pypy.objspace.std.dictmultiobject import IteratorImplementation
+
+
+class BucketNode:
+ def __init__(self, hash, w_key, w_value, next):
+ self.hash = hash
+ self.w_key = w_key
+ self.w_value = w_value
+ self.next = next
+
+
+DISTRIBUTE = 9
+
+
+class BucketDictImplementation(DictImplementation):
+
+ def __init__(self, space):
+ self.space = space
+ self.len = 0
+ self.table = [None] * 4
+
+ def __repr__(self):
+ bs = []
+ for node in self.table:
+ count = 0
+ while node is not None:
+ count += 1
+ node = node.next
+ bs.append(str(count))
+ return "%s<%s>" % (self.__class__.__name__, ', '.join(bs))
+
+ def get(self, w_key):
+ space = self.space
+ hash = space.hash_w(w_key)
+ index = (hash * DISTRIBUTE) & (len(self.table) - 1)
+ node = self.table[index]
+ while node is not None:
+ if node.hash == hash and space.eq_w(w_key, node.w_key):
+ return node.w_value
+ node = node.next
+ return None
+
+ def setitem(self, w_key, w_value):
+ space = self.space
+ hash = space.hash_w(w_key)
+ index = (hash * DISTRIBUTE) & (len(self.table) - 1)
+ node = head = self.table[index]
+ while node is not None:
+ if node.hash == hash and space.eq_w(w_key, node.w_key):
+ node.w_value = w_value
+ return self
+ node = node.next
+ self.table[index] = BucketNode(hash, w_key, w_value, head)
+ self.len += 1
+ if self.len > len(self.table):
+ self._resize()
+ return self
+
+ def setitem_str(self, w_key, w_value, shadows_type=True):
+ return self.setitem(w_key, w_value)
+
+ def delitem(self, w_key):
+ space = self.space
+ hash = space.hash_w(w_key)
+ index = (hash * DISTRIBUTE) & (len(self.table) - 1)
+ node = self.table[index]
+ prev = None
+ while node is not None:
+ if node.hash == hash and space.eq_w(w_key, node.w_key):
+ self.len -= 1
+ if self.len == 0:
+ return self.space.emptydictimpl
+ if prev is None:
+ self.table[index] = node.next
+ else:
+ prev.next = node.next
+ if self.len < len(self.table) // 2:
+ self._resize()
+ return self
+ prev = node
+ node = node.next
+ raise KeyError
+
+ def length(self):
+ return self.len
+
+ def _resize(self):
+ newsize = 4
+ while newsize < self.len:
+ newsize *= 2
+ newtable = [None] * newsize
+ for node in self.table:
+ while node is not None:
+ newindex = (node.hash * DISTRIBUTE) & (newsize - 1)
+ next = node.next
+ node.next = newtable[newindex]
+ newtable[newindex] = node
+ node = next
+ self.table = newtable
+
+ def iteritems(self):
+ return BucketDictItemIteratorImplementation(self.space, self)
+ def iterkeys(self):
+ return BucketDictKeyIteratorImplementation(self.space, self)
+ def itervalues(self):
+ return BucketDictValueIteratorImplementation(self.space, self)
+
+ def keys(self):
+ result_w = []
+ for node in self.table:
+ while node is not None:
+ result_w.append(node.w_key)
+ node = node.next
+ return result_w
+
+ def values(self):
+ result_w = []
+ for node in self.table:
+ while node is not None:
+ result_w.append(node.w_value)
+ node = node.next
+ return result_w
+
+ def items(self):
+ space = self.space
+ result_w = []
+ for node in self.table:
+ while node is not None:
+ w_item = space.newtuple([node.w_key, node.w_value])
+ result_w.append(w_item)
+ node = node.next
+ return result_w
+
+
+class BucketDictIteratorImplementation(IteratorImplementation):
+ def __init__(self, space, dictimplementation):
+ IteratorImplementation.__init__(self, space, dictimplementation)
+ self.index = 0
+ self.node = None
+
+ def next_entry(self):
+ node = self.node
+ while node is None:
+ table = self.dictimplementation.table
+ if self.index >= len(table):
+ return None
+ node = table[self.index]
+ self.index += 1
+ self.node = node.next
+ return self.get_result(node)
+
+
+class BucketDictKeyIteratorImplementation(BucketDictIteratorImplementation):
+ def get_result(self, node):
+ return node.w_key
+
+class BucketDictValueIteratorImplementation(BucketDictIteratorImplementation):
+ def get_result(self, node):
+ return node.w_value
+
+class BucketDictItemIteratorImplementation(BucketDictIteratorImplementation):
+ def get_result(self, node):
+ return self.space.newtuple([node.w_key, node.w_value])
Modified: pypy/dist/pypy/objspace/std/dictmultiobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/dictmultiobject.py (original)
+++ pypy/dist/pypy/objspace/std/dictmultiobject.py Sat Mar 3 10:05:48 2007
@@ -143,7 +143,8 @@
return StrDictImplementation(self.space).setitem_str(w_key, w_value)
#return SmallStrDictImplementation(self.space, w_key, w_value)
else:
- return RDictImplementation(self.space).setitem(w_key, w_value)
+ space = self.space
+ return space.DefaultDictImpl(space).setitem(w_key, w_value)
#return SmallDictImplementation(self.space, w_key, w_value)
def setitem_str(self, w_key, w_value, shadows_type=True):
return StrDictImplementation(self.space).setitem_str(w_key, w_value)
@@ -205,7 +206,7 @@
i += 1
def _convert_to_rdict(self):
- newimpl = RDictImplementation(self.space)
+ newimpl = self.space.DefaultDictImpl(self.space)
i = 0
while 1:
entry = self.entries[i]
@@ -296,13 +297,13 @@
i += 1
def _convert_to_rdict(self):
- newimpl = RDictImplementation(self.space)
+ newimpl = self.space.DefaultDictImpl(self.space)
i = 0
while 1:
entry = self.entries[i]
if entry.w_value is None:
break
- newimpl.content[self.space.wrap(entry.key)] = entry.w_value
+ newimpl.setitem(self.space.wrap(entry.key), entry.w_value)
i += 1
return newimpl
@@ -450,7 +451,7 @@
def _as_rdict(self):
- newimpl = RDictImplementation(self.space)
+ newimpl = self.space.DefaultDictImpl(self.space)
for k, w_v in self.content.items():
newimpl.setitem(self.space.wrap(k), w_v)
return newimpl
@@ -762,7 +763,7 @@
if as_strdict:
newimpl = StrDictImplementation(self.space)
else:
- newimpl = RDictImplementation(self.space)
+ newimpl = self.space.DefaultDictImpl(self.space)
for k, i in self.structure.keys.items():
if i >= 0:
newimpl.setitem_str(self.space.wrap(k), self.entries[i])
Modified: pypy/dist/pypy/objspace/std/objspace.py
==============================================================================
--- pypy/dist/pypy/objspace/std/objspace.py (original)
+++ pypy/dist/pypy/objspace/std/objspace.py Sat Mar 3 10:05:48 2007
@@ -127,6 +127,11 @@
from pypy.objspace.std import dictmultiobject
self.DictObjectCls = dictmultiobject.W_DictMultiObject
self.emptydictimpl = dictmultiobject.EmptyDictImplementation(self)
+ if self.config.objspace.std.withbucketdict:
+ from pypy.objspace.std import dictbucket
+ self.DefaultDictImpl = dictbucket.BucketDictImplementation
+ else:
+ self.DefaultDictImpl = dictmultiobject.RDictImplementation
else:
from pypy.objspace.std import dictobject
self.DictObjectCls = dictobject.W_DictObject
Added: pypy/dist/pypy/objspace/std/test/test_dictbucket.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/objspace/std/test/test_dictbucket.py Sat Mar 3 10:05:48 2007
@@ -0,0 +1,10 @@
+from pypy.objspace.std.dictbucket import BucketDictImplementation
+from pypy.objspace.std.test import test_dictmultiobject
+
+
+Base = test_dictmultiobject.TestRDictImplementation
+
+class TestBucketDictImplementation(Base):
+ ImplementionClass = BucketDictImplementation
+ DevolvedClass = BucketDictImplementation
+ DefaultDictImpl = BucketDictImplementation
Modified: pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py (original)
+++ pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py Sat Mar 3 10:05:48 2007
@@ -69,6 +69,7 @@
self.space = FakeSpace()
self.space.emptydictimpl = EmptyDictImplementation(self.space)
self.space.DictObjectCls = W_DictMultiObject
+ self.space.DefaultDictImpl = RDictImplementation
def test_stressdict(self):
from random import randint
@@ -86,11 +87,13 @@
ImplementionClass = RDictImplementation
DevolvedClass = RDictImplementation
EmptyClass = EmptyDictImplementation
+ DefaultDictImpl = RDictImplementation
def setup_method(self,method):
self.space = FakeSpace()
self.space.DictObjectCls = W_DictMultiObject
self.space.emptydictimpl = EmptyDictImplementation(self.space)
+ self.space.DefaultDictImpl = self.DefaultDictImpl
self.string = self.space.wrap("fish")
self.string2 = self.space.wrap("fish2")
self.impl = self.get_impl()
More information about the Pypy-commit
mailing list