[pypy-svn] r17473 - in pypy/dist/pypy: interpreter objspace/std objspace/std/test

arigo at codespeak.net arigo at codespeak.net
Sun Sep 11 20:36:53 CEST 2005


Author: arigo
Date: Sun Sep 11 20:36:51 2005
New Revision: 17473

Modified:
   pypy/dist/pypy/interpreter/baseobjspace.py
   pypy/dist/pypy/objspace/std/dictobject.py
   pypy/dist/pypy/objspace/std/dicttype.py
   pypy/dist/pypy/objspace/std/marshal_impl.py
   pypy/dist/pypy/objspace/std/objspace.py
   pypy/dist/pypy/objspace/std/test/test_dictobject.py
Log:
And eventually...

Got rid of the hash table logic in dictobject.py,
using r_dicts instead.


Modified: pypy/dist/pypy/interpreter/baseobjspace.py
==============================================================================
--- pypy/dist/pypy/interpreter/baseobjspace.py	(original)
+++ pypy/dist/pypy/interpreter/baseobjspace.py	Sun Sep 11 20:36:51 2005
@@ -369,6 +369,10 @@
         """shortcut for space.is_true(space.is_(w_obj1, w_obj2))"""
         return self.is_true(self.is_(w_obj1, w_obj2))
 
+    def hash_w(self, w_obj):
+        """shortcut for space.int_w(space.hash(w_obj))"""
+        return self.int_w(self.hash(w_obj))
+
     def newbool(self, b):
         if b:
             return self.w_True

Modified: pypy/dist/pypy/objspace/std/dictobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/dictobject.py	(original)
+++ pypy/dist/pypy/objspace/std/dictobject.py	Sun Sep 11 20:36:51 2005
@@ -9,108 +9,33 @@
 from pypy.interpreter import gateway
 
 from pypy.rpython.rarithmetic import r_uint
+from pypy.rpython.objectmodel import r_dict
 
-class Entry:
-    def __init__(self):
-        self.hash = r_uint(0)
-        self.w_key = None
-        self.w_value = None
-    def __repr__(self):
-        return '<Entry %r,%r,%r>'%(self.hash, self.w_key, self.w_value)
 
 class W_DictObject(W_Object):
     from pypy.objspace.std.dicttype import dict_typedef as typedef
 
-    def __init__(w_self, space, list_pairs_w):
+    def __init__(w_self, space, w_otherdict=None):
         W_Object.__init__(w_self, space)
-        
-        w_self.used = 0
-        w_self.data = []
-        w_self.resize(len(list_pairs_w)*2)
-        w_self.w_dummy = space.newlist([])
+        if w_otherdict is None:
+            w_self.content = r_dict(space.eq_w, space.hash_w)
+        else:
+            w_self.content = w_otherdict.content.copy()
+
+    def initialize_content(w_self, list_pairs_w):
         for w_k, w_v in list_pairs_w:
-            w_self.insert(w_self.hash(w_k), w_k, w_v)
-        
+            w_self.content[w_k] = w_v
+
     def __repr__(w_self):
         """ representation for debugging purposes """
-        return "%s(%s)" % (w_self.__class__.__name__, w_self.data)
-
-    def hash(w_self, w_obj):
-        space = w_self.space
-        return r_uint(space.int_w(space.hash(w_obj)))
-
-    def insert(self, h, w_key, w_value):
-        entry = self.lookdict(h, w_key)
-        if entry.w_value is None:
-            self.used += 1
-            entry.hash = h
-            entry.w_key = w_key
-            entry.w_value = w_value
-        else:
-            entry.w_value = w_value
-
-    def resize(self, minused):
-        newsize = 4
-        while newsize < minused:
-            newsize *= 2
-        od = self.data
-
-        self.used = 0
-        self.data = [Entry() for i in range(newsize)]
-        for entry in od:
-            if entry.w_value is not None:
-                self.insert(entry.hash, entry.w_key, entry.w_value)
-
-    def lookdict(self, lookup_hash, w_lookup):
-        assert isinstance(lookup_hash, r_uint)
-        space = self.space
-        data = self.data
-        mask = len(data) - 1   # len(data) is expected to be a power of 2
-        i = lookup_hash & mask
-
-        entry = data[i]
-        if entry.w_key is None or space.is_w(w_lookup, entry.w_key):
-            return entry
-        if entry.w_key is self.w_dummy:
-            freeslot = entry
-        else:
-            if entry.hash == lookup_hash and space.eq_w(entry.w_key, w_lookup):
-                if self.data is not data:
-                    # the eq_w() modified the dict sufficiently to have it
-                    # switch to another table.  Can't return 'entry', which
-                    # belongs to the old table.  Start over...
-                    return self.lookdict(lookup_hash, w_lookup)
-                return entry
-            freeslot = None
-
-        perturb = lookup_hash
-        while 1:
-            i = (i << 2) + i + perturb + 1
-            entry = data[i & mask]
-            if entry.w_key is None:
-                if freeslot:
-                    return freeslot
-                else:
-                    return entry
-            if entry.hash == lookup_hash and entry.w_key is not self.w_dummy \
-                   and space.eq_w(entry.w_key, w_lookup):
-                if self.data is not data:
-                    # the eq_w() modified the dict sufficiently to have it
-                    # switch to another table.  Can't return 'entry', which
-                    # belongs to the old table.  Start over...
-                    return self.lookdict(lookup_hash, w_lookup)
-                return entry
-            if entry.w_key is self.w_dummy and freeslot is None:
-                freeslot = entry
-            perturb >>= 5
+        return "%s(%s)" % (w_self.__class__.__name__, w_self.content)
 
     def unwrap(w_dict):
         space = w_dict.space
         result = {}
-        for entry in w_dict.data:
-            if entry.w_value is not None:
-                # XXX generic mixed types unwrap
-                result[space.unwrap(entry.w_key)] = space.unwrap(entry.w_value)
+        for w_key, w_value in w_dict.content.items():
+            # generic mixed types unwrap
+            result[space.unwrap(w_key)] = space.unwrap(w_value)
         return result
 
 registerimplementation(W_DictObject)
@@ -119,9 +44,8 @@
 def init__Dict(space, w_dict, __args__):
     w_src, w_kwds = __args__.parse('dict',
                           (['seq_or_map'], None, 'kwargs'), # signature
-                          [W_DictObject(space, [])])        # default argument
-    dict_clear__Dict(space, w_dict)
-    # XXX do dict({...}) with dict_update__Dict_Dict()
+                          [W_DictObject(space)])        # default argument
+    w_dict.content.clear()
     try:
         space.getattr(w_src, space.wrap("keys"))
     except OperationError:
@@ -132,41 +56,35 @@
                 raise OperationError(space.w_ValueError,
                              space.wrap("dict() takes a sequence of pairs"))
             w_k, w_v = pair
-            setitem__Dict_ANY_ANY(space, w_dict, w_k, w_v)
+            w_dict.content[w_k] = w_v
     else:
         if space.is_true(w_src):
             from pypy.objspace.std.dicttype import dict_update__ANY_ANY
             dict_update__ANY_ANY(space, w_dict, w_src)
     if space.is_true(w_kwds):
-        space.call_method(w_dict, 'update', w_kwds)
+        from pypy.objspace.std.dicttype import dict_update__ANY_ANY
+        dict_update__ANY_ANY(space, w_dict, w_kwds)
 
 def getitem__Dict_ANY(space, w_dict, w_lookup):
-    entry = w_dict.lookdict(w_dict.hash(w_lookup), w_lookup)
-    if entry.w_value is not None:
-        return entry.w_value
-    else:
+    try:
+        return w_dict.content[w_lookup]
+    except KeyError:
         raise OperationError(space.w_KeyError, w_lookup)
 
 def setitem__Dict_ANY_ANY(space, w_dict, w_newkey, w_newvalue):
-    w_dict.insert(w_dict.hash(w_newkey), w_newkey, w_newvalue)
-    if 2*w_dict.used > len(w_dict.data):
-        w_dict.resize(2*w_dict.used)
+    w_dict.content[w_newkey] = w_newvalue
 
 def delitem__Dict_ANY(space, w_dict, w_lookup):
-    entry = w_dict.lookdict(w_dict.hash(w_lookup), w_lookup)
-    if entry.w_value is not None:
-        w_dict.used -= 1
-        entry.w_key = w_dict.w_dummy
-        entry.w_value = None
-    else:
+    try:
+        del w_dict.content[w_lookup]
+    except KeyError:
         raise OperationError(space.w_KeyError, w_lookup)
     
 def len__Dict(space, w_dict):
-    return space.wrap(w_dict.used)
+    return space.wrap(len(w_dict.content))
 
 def contains__Dict_ANY(space, w_dict, w_lookup):
-    entry = w_dict.lookdict(w_dict.hash(w_lookup), w_lookup)
-    return space.newbool(entry.w_value is not None)
+    return space.newbool(w_lookup in w_dict.content)
 
 dict_has_key__Dict_ANY = contains__Dict_ANY
 
@@ -177,60 +95,55 @@
     if space.is_true(space.is_(w_left, w_right)):
         return space.w_True
 
-    if w_left.used != w_right.used:
+    if len(w_left.content) != len(w_right.content):
         return space.w_False
-    for entry in w_left.data:
-        w_val = entry.w_value
-        if w_val is None:
-            continue
-        rightentry = w_right.lookdict(entry.hash, entry.w_key)
-        if rightentry.w_value is None:
+    for w_key, w_val in w_left.content.iteritems():
+        try:
+            w_rightval = w_right.content[w_key]
+        except KeyError:
             return space.w_False
-        if not space.eq_w(w_val, rightentry.w_value):
+        if not space.eq_w(w_val, w_rightval):
             return space.w_False
     return space.w_True
 
-def characterize(space, adata, w_b):
+def characterize(space, acontent, bcontent):
     """ (similar to CPython) 
-    returns the smallest key in adata for which b's value is different or absent and this value """
+    returns the smallest key in acontent for which b's value is different or absent and this value """
     w_smallest_diff_a_key = None
     w_its_value = None
-    for entry in adata:
-        w_val = entry.w_value
-        if w_val is None:
-            continue
-        w_key = entry.w_key
+    for w_key, w_val in acontent.iteritems():
         if w_smallest_diff_a_key is None or space.is_true(space.lt(w_key, w_smallest_diff_a_key)):
-            b_entry = w_b.lookdict(entry.hash, w_key)
-            if b_entry.w_value is None:
+            try:
+                w_bvalue = bcontent[w_key]
+            except KeyError:
                 w_its_value = w_val
                 w_smallest_diff_a_key = w_key
             else:
-                if not space.eq_w(w_val, b_entry.w_value):
+                if not space.eq_w(w_val, w_bvalue):
                     w_its_value = w_val
                     w_smallest_diff_a_key = w_key
     return w_smallest_diff_a_key, w_its_value
 
 def lt__Dict_Dict(space, w_left, w_right):
     # Different sizes, no problem
-    if w_left.used < w_right.used:
+    leftcontent = w_left.content
+    rightcontent = w_right.content
+    if len(leftcontent) < len(rightcontent):
         return space.w_True
-    if w_left.used > w_right.used:
+    if len(leftcontent) > len(rightcontent):
         return space.w_False
 
     # Same size
-    w_leftdiff, w_leftval = characterize(space, w_left.data, w_right)
+    w_leftdiff, w_leftval = characterize(space, leftcontent, rightcontent)
     if w_leftdiff is None:
         return space.w_False
-    w_rightdiff, w_rightval = characterize(space, w_right.data, w_left)
-    w_res = space.w_False
+    w_rightdiff, w_rightval = characterize(space, rightcontent, leftcontent)
     if w_rightdiff is None:
         # w_leftdiff is not None, w_rightdiff is None
         return space.w_True 
-    w_isequal = space.eq(w_leftdiff, w_rightdiff)
     w_res = space.lt(w_leftdiff, w_rightdiff)
-    if (space.is_w(w_res, space.w_False) and 
-        space.is_true(w_isequal) and 
+    if (not space.is_true(w_res) and
+        space.eq_w(w_leftdiff, w_rightdiff) and 
         w_rightval is not None):
         w_res = space.lt(w_leftval, w_rightval)
     return w_res
@@ -239,24 +152,17 @@
     raise OperationError(space.w_TypeError,space.wrap("dict objects are unhashable"))
 
 def dict_copy__Dict(space, w_self):
-    return W_DictObject(space, [(entry.w_key,entry.w_value)
-                                for entry in w_self.data
-                                if entry.w_value is not None])
+    return W_DictObject(space, w_self)
 
 def dict_items__Dict(space, w_self):
-    return space.newlist([ space.newtuple([entry.w_key,entry.w_value])
-                           for entry in w_self.data
-                           if entry.w_value is not None])
+    return space.newlist([ space.newtuple([w_key, w_value])
+                           for w_key, w_value in w_self.content.iteritems() ])
 
 def dict_keys__Dict(space, w_self):
-    return space.newlist([ entry.w_key
-                           for entry in w_self.data
-                           if entry.w_value is not None])
+    return space.newlist(w_self.content.keys())
 
 def dict_values__Dict(space, w_self):
-    return space.newlist([ entry.w_value
-                           for entry in w_self.data
-                           if entry.w_value is not None])
+    return space.newlist(w_self.content.values())
 
 def dict_iteritems__Dict(space, w_self):
     return W_DictIter_Items(space, w_self)
@@ -268,15 +174,10 @@
     return W_DictIter_Values(space, w_self)
 
 def dict_clear__Dict(space, w_self):
-    w_self.data = [Entry()]
-    w_self.used = 0
+    w_self.content.clear()
 
 def dict_get__Dict_ANY_ANY(space, w_dict, w_lookup, w_default):
-    entry = w_dict.lookdict(w_dict.hash(w_lookup), w_lookup)
-    if entry.w_value is not None:
-        return entry.w_value
-    else:
-        return w_default
+    return w_dict.content.get(w_lookup, w_default)
 
 app = gateway.applevel('''
     def dictrepr(currently_in_repr, d):
@@ -306,7 +207,7 @@
 dictrepr = app.interphook("dictrepr")
 
 def repr__Dict(space, w_dict):
-    if w_dict.used == 0:
+    if len(w_dict.content) == 0:
         return space.wrap('{}')
     w_currently_in_repr = space.getexecutioncontext()._py_repr
     return dictrepr(space, w_currently_in_repr, w_dict)
@@ -320,58 +221,72 @@
 
     def __init__(w_self, space, w_dictobject):
         W_Object.__init__(w_self, space)
-        w_self.w_dictobject = w_dictobject
-        w_self.len = w_dictobject.used
+        w_self.content = content = w_dictobject.content
+        w_self.len = len(content)
         w_self.pos = 0
-        w_self.datapos = 0
+        w_self.setup_iterator()
 
-    def return_entry(w_self, entry):
+    def return_entry(w_self, w_key, w_value):
         raise NotImplementedError
 
 registerimplementation(W_DictIterObject)
 
 class W_DictIter_Keys(W_DictIterObject):
-    def return_entry(w_self, entry):
-        return entry.w_key
+    def setup_iterator(w_self):
+        w_self.iterator = w_self.content.iterkeys()
+    def next_entry(w_self):
+        # note that this 'for' loop only runs once, at most
+        for w_key in w_self.iterator:
+            return w_key
+        else:
+            return None
 
 class W_DictIter_Values(W_DictIterObject):
-    def return_entry(w_self, entry):
-        return entry.w_value
+    def setup_iterator(w_self):
+        w_self.iterator = w_self.content.itervalues()
+    def next_entry(w_self):
+        # note that this 'for' loop only runs once, at most
+        for w_value in w_self.iterator:
+            return w_value
+        else:
+            return None
 
 class W_DictIter_Items(W_DictIterObject):
-    def return_entry(w_self, entry):
-        return w_self.space.newtuple([entry.w_key, entry.w_value])
+    def setup_iterator(w_self):
+        w_self.iterator = w_self.content.iteritems()
+    def next_entry(w_self):
+        # note that this 'for' loop only runs once, at most
+        for w_key, w_value in w_self.iterator:
+            return w_self.space.newtuple([w_key, w_value])
+        else:
+            return None
 
 
 def iter__DictIterObject(space, w_dictiter):
     return w_dictiter
 
 def next__DictIterObject(space, w_dictiter):
-    w_dict = w_dictiter.w_dictobject
-    if w_dict is not None:
-        if w_dictiter.len != w_dict.used:
+    content = w_dictiter.content
+    if content is not None:
+        if w_dictiter.len != len(content):
             w_dictiter.len = -1   # Make this error state sticky
             raise OperationError(space.w_RuntimeError,
                      space.wrap("dictionary changed size during iteration"))
         # look for the next entry
-        i = w_dictiter.datapos
-        data = w_dict.data
-        while i < len(data):
-            entry = data[i]
-            i += 1
-            if entry.w_value is not None:
-                w_dictiter.pos += 1
-                w_dictiter.datapos = i
-                return w_dictiter.return_entry(entry)
+        w_result = w_dictiter.next_entry()
+        if w_result is not None:
+            w_dictiter.pos += 1
+            return w_result
         # no more entries
-        w_dictiter.w_dictobject = None
+        w_dictiter.content = None
     raise OperationError(space.w_StopIteration, space.w_None)
 
 def len__DictIterObject(space, w_dictiter):
-    w_dict = w_dictiter.w_dictobject
-    if w_dict is None or w_dictiter.len == -1 :
+    content = w_dictiter.content
+    if content is None or w_dictiter.len == -1:
         return space.wrap(0)
     return space.wrap(w_dictiter.len - w_dictiter.pos)
+
 # ____________________________________________________________
 
 from pypy.objspace.std import dicttype

Modified: pypy/dist/pypy/objspace/std/dicttype.py
==============================================================================
--- pypy/dist/pypy/objspace/std/dicttype.py	(original)
+++ pypy/dist/pypy/objspace/std/dicttype.py	Sun Sep 11 20:36:51 2005
@@ -100,7 +100,7 @@
 def descr__new__(space, w_dicttype, __args__):
     from pypy.objspace.std.dictobject import W_DictObject
     w_obj = space.allocate_instance(W_DictObject, w_dicttype)
-    W_DictObject.__init__(w_obj, space, [])
+    W_DictObject.__init__(w_obj, space)
     return w_obj
 
 # ____________________________________________________________

Modified: pypy/dist/pypy/objspace/std/marshal_impl.py
==============================================================================
--- pypy/dist/pypy/objspace/std/marshal_impl.py	(original)
+++ pypy/dist/pypy/objspace/std/marshal_impl.py	Sun Sep 11 20:36:51 2005
@@ -341,17 +341,16 @@
 
 def marshal_w__Dict(space, w_dict, m):
     m.start(TYPE_DICT)
-    for entry in w_dict.data:
-        if entry.w_value is not None:
-            m.put_w_obj(entry.w_key)
-            m.put_w_obj(entry.w_value)
+    for w_key, w_value in w_dict.content.iteritems():
+        m.put_w_obj(w_key)
+        m.put_w_obj(w_value)
     m.atom(TYPE_NULL)
 
 def unmarshal_Dict(space, u, tc):
     # since primitive lists are not optimized and we don't know
     # the dict size in advance, use the dict's setitem instead
     # of building a list of tuples.
-    w_dic = W_DictObject(space, [])
+    w_dic = W_DictObject(space)
     setter = dictobject.setitem__Dict_ANY_ANY
     while 1:
         w_key = u.get_w_obj(True)

Modified: pypy/dist/pypy/objspace/std/objspace.py
==============================================================================
--- pypy/dist/pypy/objspace/std/objspace.py	(original)
+++ pypy/dist/pypy/objspace/std/objspace.py	Sun Sep 11 20:36:51 2005
@@ -248,7 +248,7 @@
             return W_UnicodeObject(self, [unichr(ord(u)) for u in x]) # xxx
         if isinstance(x, dict):
             items_w = [(self.wrap(k), self.wrap(v)) for (k, v) in x.iteritems()]
-            return W_DictObject(self, items_w)
+            return self.newdict(items_w)
         if isinstance(x, float):
             return W_FloatObject(self, x)
         if isinstance(x, tuple):
@@ -324,7 +324,9 @@
         return W_ListObject(self, list_w)
 
     def newdict(self, list_pairs_w):
-        return W_DictObject(self, list_pairs_w)
+        w_result = W_DictObject(self)
+        w_result.initialize_content(list_pairs_w)
+        return w_result
 
     def newslice(self, w_start, w_end, w_step):
         return W_SliceObject(self, w_start, w_end, w_step)
@@ -395,7 +397,7 @@
     def is_true(self, w_obj):
         # XXX don't look!
         if isinstance(w_obj, W_DictObject):
-            return not not w_obj.used
+            return len(w_obj.content) != 0
         else:
             return DescrOperation.is_true(self, w_obj)
 

Modified: pypy/dist/pypy/objspace/std/test/test_dictobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_dictobject.py	(original)
+++ pypy/dist/pypy/objspace/std/test/test_dictobject.py	Sun Sep 11 20:36:51 2005
@@ -7,13 +7,14 @@
 
     def test_empty(self):
         space = self.space
-        d = W_DictObject(space, [])
+        d = W_DictObject(space)
         assert not self.space.is_true(d)
 
     def test_nonempty(self):
         space = self.space
         wNone = space.w_None
-        d = W_DictObject(space, [(wNone, wNone)])
+        d = W_DictObject(space)
+        d.initialize_content([(wNone, wNone)])
         assert space.is_true(d)
         i = space.getitem(d, wNone)
         equal = space.eq(i, wNone)
@@ -23,7 +24,8 @@
         space = self.space
         wk1 = space.wrap('key')
         wone = space.wrap(1)
-        d = W_DictObject(space, [(space.wrap('zero'),space.wrap(0))])
+        d = W_DictObject(space)
+        d.initialize_content([(space.wrap('zero'),space.wrap(0))])
         space.setitem(d,wk1,wone)
         wback = space.getitem(d,wk1)
         assert self.space.eq_w(wback,wone)
@@ -31,8 +33,8 @@
     def test_delitem(self):
         space = self.space
         wk1 = space.wrap('key')
-        d = W_DictObject(space,
-                              [(space.wrap('zero'),space.wrap(0)),
+        d = W_DictObject(space)
+        d.initialize_content( [(space.wrap('zero'),space.wrap(0)),
                                (space.wrap('one'),space.wrap(1)),
                                (space.wrap('two'),space.wrap(2))])
         space.delitem(d,space.wrap('one'))
@@ -338,12 +340,10 @@
 
 # the minimal 'space' needed to use a W_DictObject
 class FakeSpace:
-    def hash(self, obj):
+    def hash_w(self, obj):
         return hash(obj)
     def unwrap(self, x):
         return x
-    def int_w(self, x):
-        return x
     def is_true(self, x):
         return x
     def is_(self, x, y):
@@ -364,7 +364,7 @@
 
     def test_stressdict(self):
         from random import randint
-        d = W_DictObject(self.space, [])
+        d = W_DictObject(self.space)
         N = 10000
         pydict = {}
         for i in range(N):



More information about the Pypy-commit mailing list