[pypy-svn] r35840 - in pypy/dist/pypy: config objspace/std objspace/std/test
cfbolz at codespeak.net
cfbolz at codespeak.net
Sun Dec 17 15:50:44 CET 2006
Author: cfbolz
Date: Sun Dec 17 15:50:40 2006
New Revision: 35840
Modified:
pypy/dist/pypy/config/pypyoption.py
pypy/dist/pypy/objspace/std/dictmultiobject.py
pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py
pypy/dist/pypy/objspace/std/test/test_dictobject.py
Log:
a very experimental dict implementation: try to make string-keyed dicts smaller
by having them share a "structure".
Modified: pypy/dist/pypy/config/pypyoption.py
==============================================================================
--- pypy/dist/pypy/config/pypyoption.py (original)
+++ pypy/dist/pypy/config/pypyoption.py Sun Dec 17 15:50:40 2006
@@ -97,6 +97,11 @@
default=False,
requires=[("objspace.std.withstrdict", False)]),
+ BoolOption("withsharingdict",
+ "use dictionaries that share the keys part",
+ default=False,
+ requires=[("objspace.std.withmultidict", True)]),
+
BoolOption("withdictmeasurement",
"create huge files with masses of information "
"about dictionaries",
Modified: pypy/dist/pypy/objspace/std/dictmultiobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/dictmultiobject.py (original)
+++ pypy/dist/pypy/objspace/std/dictmultiobject.py Sun Dec 17 15:50:40 2006
@@ -7,6 +7,17 @@
def _is_str(space, w_key):
return space.is_w(space.type(w_key), space.w_str)
+def _is_sane_hash(space, w_lookup_type):
+ """ Handles the case of a non string key lookup.
+ Types that have a sane hash/eq function should allow us to return True
+ directly to signal that the key is not in the dict in any case.
+ XXX The types should provide such a flag. """
+
+ # XXX there are much more types
+ return (space.is_w(w_lookup_type, space.w_NoneType) or
+ space.is_w(w_lookup_type, space.w_int))
+
+
# DictImplementation lattice
# a dictionary starts with an EmptyDictImplementation, and moves down
@@ -192,16 +203,6 @@
self.entries[0].w_value = w_value
self.valid = 1
- def _is_sane_hash(self, w_lookup_type):
- """ Handles the case of a non string key lookup.
- Types that have a sane hash/eq function should allow us to return True
- directly to signal that the key is not in the dict in any case.
- XXX The types should provide such a flag. """
-
- space = self.space
- # XXX there are many more such types
- return space.is_w(w_lookup_type, space.w_NoneType) or space.is_w(w_lookup_type, space.w_int)
-
def _lookup(self, key):
assert isinstance(key, str)
_hash = hash(key)
@@ -270,7 +271,7 @@
return self
else:
raise KeyError
- elif self._is_sane_hash(w_key_type):
+ elif _is_sane_hash(self.space, w_key_type):
raise KeyError
else:
return self._convert_to_rdict().delitem(w_key)
@@ -283,7 +284,7 @@
w_lookup_type = space.type(w_lookup)
if space.is_w(w_lookup_type, space.w_str):
return self._lookup(space.str_w(w_lookup)).w_value
- elif self._is_sane_hash(w_lookup_type):
+ elif _is_sane_hash(self.space, w_lookup_type):
return None
else:
return self._convert_to_rdict().get(w_lookup)
@@ -329,7 +330,7 @@
return self
else:
return space.emptydictimpl
- elif self._is_sane_hash(w_key_type):
+ elif _is_sane_hash(space, w_key_type):
raise KeyError
else:
return self._as_rdict().delitem(w_key)
@@ -342,7 +343,7 @@
w_lookup_type = space.type(w_lookup)
if space.is_w(w_lookup_type, space.w_str):
return self.content.get(space.str_w(w_lookup), None)
- elif self._is_sane_hash(w_lookup_type):
+ elif _is_sane_hash(space, w_lookup_type):
return None
else:
return self._as_rdict().get(w_lookup)
@@ -367,15 +368,6 @@
space = self.space
return [(space.wrap(key), w_value) for (key, w_value) in self.content.iteritems()]
- def _is_sane_hash(self, w_lookup_type):
- """ Handles the case of a non string key lookup.
- Types that have a sane hash/eq function should allow us to return True
- directly to signal that the key is not in the dict in any case.
- XXX The types should provide such a flag. """
-
- space = self.space
- # XXX there are much more types
- return space.is_w(w_lookup_type, space.w_NoneType) or space.is_w(w_lookup_type, space.w_int)
def _as_rdict(self):
newimpl = RDictImplementation(self.space)
@@ -421,6 +413,146 @@
def items(self):
return self.content.items()
+class SharedStructure(object):
+ def __init__(self, keys=None, length=0,
+ other_structs=None,
+ last_key=None,
+ back_struct=None):
+ if keys is None:
+ keys = {}
+ self.keys = keys
+ self.length = length
+ self.back_struct = back_struct
+ if other_structs is None:
+ other_structs = []
+ self.other_structs = other_structs
+ self.last_key = last_key
+ if last_key is not None:
+ assert back_struct is not None
+ self.propagating = False
+
+ def new_structure(self, added_key):
+ keys = {}
+ for key, item in self.keys.iteritems():
+ if item >= 0:
+ keys[key] = item
+ new_structure = SharedStructure(keys, self.length + 1,
+ [], added_key, self)
+ new_index = len(keys)
+ new_structure.keys[added_key] = new_index
+ self.keys[added_key] = ~len(self.other_structs)
+ self.other_structs.append(new_structure)
+ return new_structure
+
+
+class State(object):
+ def __init__(self, space):
+ self.empty_structure = SharedStructure()
+ self.empty_structure.propagating = True
+
+
+class SharedDictImplementation(DictImplementation):
+
+ def __init__(self, space):
+ self.space = space
+ self.structure = space.fromcache(State).empty_structure
+ self.entries = []
+
+ def get(self, w_lookup):
+ space = self.space
+ w_lookup_type = space.type(w_lookup)
+ if space.is_w(w_lookup_type, space.w_str):
+ lookup = space.str_w(w_lookup)
+ i = self.structure.keys.get(lookup, -1)
+ if i < 0:
+ return None
+ return self.entries[i]
+ elif _is_sane_hash(space, w_lookup_type):
+ return None
+ else:
+ return self._as_rdict().get(w_lookup)
+
+ def setitem(self, w_key, w_value):
+ space = self.space
+ if space.is_w(space.type(w_key), space.w_str):
+ return self.setitem_str(w_key, w_value)
+ else:
+ return self._as_rdict().setitem(w_key, w_value)
+
+ def setitem_str(self, w_key, w_value):
+ m = ~len(self.structure.other_structs)
+ key = self.space.str_w(w_key)
+ i = self.structure.keys.get(key, m)
+ if i >= 0:
+ self.entries[i] = w_value
+ return self
+ if not self.structure.propagating:
+ return self._as_rdict(as_strdict=True).setitem_str(w_key, w_value)
+ if i == m:
+ new_structure = self.structure.new_structure(key)
+ else:
+ new_structure = self.structure.other_structs[~i]
+ new_structure.propagating = True
+ self.entries.append(w_value)
+ assert self.structure.length + 1 == new_structure.length
+ self.structure = new_structure
+ assert self.structure.keys[key] >= 0
+ return self
+
+ def delitem(self, w_key):
+ space = self.space
+ w_key_type = space.type(w_key)
+ if space.is_w(w_key_type, space.w_str):
+ key = space.str_w(w_key)
+ if (self.structure.last_key is not None and
+ key == self.structure.last_key):
+ self.entries.pop()
+ self.structure = self.structure.back_struct
+ return self
+ return self._as_rdict().delitem(w_key)
+ elif _is_sane_hash(space, w_key_type):
+ raise KeyError
+ else:
+ return self._as_rdict().delitem(w_key)
+
+ def length(self):
+ return self.structure.length
+
+ def iteritems(self):
+ return self._as_rdict().iteritems()
+
+ def iterkeys(self):
+ return self._as_rdict().iterkeys()
+
+ def itervalues(self):
+ return self._as_rdict().itervalues()
+
+ def keys(self):
+ space = self.space
+ return [space.wrap(key)
+ for (key, item) in self.structure.keys.iteritems()
+ if item >= 0]
+
+ def values(self):
+ return self.entries
+
+ def items(self):
+ space = self.space
+ return [(space.wrap(key), self.entries[item])
+ for (key, item) in self.structure.keys.iteritems()
+ if item >= 0]
+
+ def _as_rdict(self, as_strdict=False):
+ if as_strdict:
+ newimpl = StrDictImplementation(self.space)
+ else:
+ newimpl = RDictImplementation(self.space)
+ for k, i in self.structure.keys.items():
+ if i >= 0:
+ newimpl.setitem_str(self.space.wrap(k), self.entries[i])
+ return newimpl
+
+
import time, py
class DictInfo(object):
@@ -592,6 +724,8 @@
def __init__(w_self, space):
if space.config.objspace.std.withdictmeasurement:
w_self.implementation = MeasuringDictImplementation(space)
+ elif space.config.objspace.std.withsharingdict:
+ w_self.implementation = SharedDictImplementation(space)
else:
w_self.implementation = space.emptydictimpl
Modified: pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py (original)
+++ pypy/dist/pypy/objspace/std/test/test_dictmultiobject.py Sun Dec 17 15:50:40 2006
@@ -14,6 +14,16 @@
def setup_class(cls):
cls.space = gettestobjspace(**{"objspace.std.withmultidict": True})
+class TestW_DictObject(test_dictobject.TestW_DictObject):
+ def setup_class(cls):
+ cls.space = gettestobjspace(**{"objspace.std.withsharingdict": True})
+
+class AppTest_DictObject(test_dictobject.AppTest_DictObject):
+ def setup_class(cls):
+ cls.space = gettestobjspace(**{"objspace.std.withsharingdict": True})
+
+
+
class FakeSpace(test_dictobject.FakeSpace):
def str_w(self, string):
assert isinstance(string, str)
Modified: pypy/dist/pypy/objspace/std/test/test_dictobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_dictobject.py (original)
+++ pypy/dist/pypy/objspace/std/test/test_dictobject.py Sun Dec 17 15:50:40 2006
@@ -384,6 +384,7 @@
FakeSpace.config.objspace = Config()
FakeSpace.config.objspace.std = Config()
FakeSpace.config.objspace.std.withdictmeasurement = False
+FakeSpace.config.objspace.std.withsharingdict = False
from pypy.objspace.std.dictobject import getitem__Dict_ANY, setitem__Dict_ANY_ANY
More information about the Pypy-commit
mailing list