[pypy-svn] r54501 - in pypy/branch/gc-tweak/pypy/rpython/memory: . gc test

arigo at codespeak.net arigo at codespeak.net
Tue May 6 22:33:01 CEST 2008


Author: arigo
Date: Tue May  6 22:33:01 2008
New Revision: 54501

Modified:
   pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py
   pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py
   pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py
   pypy/branch/gc-tweak/pypy/rpython/memory/gcwrapper.py
   pypy/branch/gc-tweak/pypy/rpython/memory/support.py
   pypy/branch/gc-tweak/pypy/rpython/memory/test/test_support.py
Log:
Mostly playing around.  This is all candidate for being
archived in some branch instead of getting to the trunk:
a tree-based low-level AddressDict implementation.


Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py	Tue May  6 22:33:01 2008
@@ -162,7 +162,8 @@
         """
         if self.DEBUG:
             from pypy.rlib.objectmodel import we_are_translated
-            self._debug_seen = self.AddressStack()
+            from pypy.rpython.memory.support import AddressDict
+            self._debug_seen = AddressDict()
             self._debug_pending = self.AddressStack()
             if not we_are_translated():
                 self.root_walker._walk_prebuilt_gc(self._debug_record)
@@ -179,7 +180,7 @@
     def _debug_record(self, obj):
         seen = self._debug_seen
         if not seen.contains(obj):
-            seen.append(obj)
+            seen.add(obj)
             self._debug_pending.append(obj)
     def _debug_callback(self, root):
         obj = root.address[0]

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py	Tue May  6 22:33:01 2008
@@ -453,14 +453,14 @@
                       "nursery object with GCFLAG_NO_YOUNG_PTRS")
             self.trace(obj, self._debug_no_nursery_pointer, None)
         elif not self.is_in_nursery(obj):
-            ll_assert(self.old_objects_pointing_to_young.contains(obj),
+            ll_assert(self._d_oopty.contains(obj),
                       "missing from old_objects_pointing_to_young")
         if tid & GCFLAG_NO_HEAP_PTRS:
             ll_assert(self.is_last_generation(obj),
                       "GCFLAG_NO_HEAP_PTRS on non-3rd-generation object")
             self.trace(obj, self._debug_no_gen1or2_pointer, None)
         elif self.is_last_generation(obj):
-            ll_assert(self.last_generation_root_objects.contains(obj),
+            ll_assert(self._d_lgro.contains(obj),
                       "missing from last_generation_root_objects")
 
     def _debug_no_nursery_pointer(self, root, ignored):
@@ -473,7 +473,11 @@
 
     def debug_check_consistency(self):
         if self.DEBUG:
+            self._d_oopty = self.old_objects_pointing_to_young.stack2dict()
+            self._d_lgro = self.last_generation_root_objects.stack2dict()
             SemiSpaceGC.debug_check_consistency(self)
+            self._d_oopty.delete()
+            self._d_lgro.delete()
             self.old_objects_pointing_to_young.foreach(
                 self._debug_check_flag_1, None)
             self.last_generation_root_objects.foreach(

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py	Tue May  6 22:33:01 2008
@@ -470,12 +470,14 @@
         GenerationGC.debug_check_object(self, obj)
         tid = self.header(obj).tid
         if tid & GCFLAG_UNVISITED:
-            ll_assert(self.gen2_rawmalloced_objects.contains(obj),
+            ll_assert(self._d_gen2ro.contains(obj),
                       "GCFLAG_UNVISITED on non-gen2 object")
 
     def debug_check_consistency(self):
         if self.DEBUG:
+            self._d_gen2ro = self.gen2_rawmalloced_objects.stack2dict()
             GenerationGC.debug_check_consistency(self)
+            self._d_gen2ro.delete()
             self.gen2_rawmalloced_objects.foreach(self._debug_check_gen2, None)
             self.gen3_rawmalloced_objects.foreach(self._debug_check_gen3, None)
 

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gcwrapper.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gcwrapper.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gcwrapper.py	Tue May  6 22:33:01 2008
@@ -10,7 +10,7 @@
     def __init__(self, llinterp, flowgraphs, gc_class, GC_PARAMS={}):
         self.gc = gc_class(chunk_size = 10, **GC_PARAMS)
         self.gc.set_root_walker(LLInterpRootWalker(self))
-        #self.gc.DEBUG = True   # slows down test_gc.py a lot
+        self.gc.DEBUG = True
         self.llinterp = llinterp
         self.prepare_graphs(flowgraphs)
         self.gc.setup()

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/support.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/support.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/support.py	Tue May  6 22:33:01 2008
@@ -1,5 +1,6 @@
 from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.rlib.objectmodel import free_non_gc_object, we_are_translated
+from pypy.rlib.rarithmetic import r_uint, LONG_BIT
 from pypy.rlib.debug import ll_assert
 
 DEFAULT_CHUNK_SIZE = 1019
@@ -124,19 +125,10 @@
                 count = chunk_size
         foreach._annspecialcase_ = 'specialize:arg(1)'
 
-        def contains(self, addr):
-            """Check if 'addr' is in the list.  That's just a linear scan,
-            so it's slow!"""
-            chunk = self.chunk
-            count = self.used_in_last_chunk
-            while chunk:
-                while count > 0:
-                    count -= 1
-                    if chunk.items[count] == addr:
-                        return True
-                chunk = chunk.next
-                count = chunk_size
-            return False
+        def stack2dict(self):
+            result = AddressDict()
+            self.foreach(result.setitem, llmemory.NULL)
+            return result
 
     cache[chunk_size] = AddressStack
     return AddressStack
@@ -207,3 +199,152 @@
 
     cache[chunk_size] = AddressDeque
     return AddressDeque
+
+
+def AddressDict():
+    if we_are_translated():
+        return LLAddressDict()
+    else:
+        return BasicAddressDict()
+
+class BasicAddressDict(object):
+    def __init__(self):
+        self.data = {}
+    def _key(self, addr):
+        return addr._fixup().ptr._obj
+    def delete(self):
+        pass
+    def contains(self, keyaddr):
+        return self._key(keyaddr) in self.data
+    def get(self, keyaddr, default=llmemory.NULL):
+        return self.data.get(self._key(keyaddr), default)
+    def setitem(self, keyaddr, valueaddr):
+        assert keyaddr
+        self.data[self._key(keyaddr)] = valueaddr
+    def add(self, keyaddr):
+        self.setitem(keyaddr, llmemory.NULL)
+
+# if diff_bit == -1, the node is a key/value pair (key=left/value=right)
+# if diff_bit >= 0, then the node is the root of a subtree where:
+#   * the keys have all exactly the same bits > diff_bit
+#   * the keys whose 'diff_bit' is 0 are in the 'left' subtree
+#   * the keys whose 'diff_bit' is 1 are in the 'right' subtree
+ADDRDICTNODE = lltype.Struct('AddrDictNode',
+                             ('diff_bit', lltype.Signed),
+                             ('left', llmemory.Address),
+                             ('right', llmemory.Address))
+
+class LLAddressDict(object):
+    _alloc_flavor_ = "raw"
+
+    def __init__(self):
+        self.root = lltype.malloc(ADDRDICTNODE, flavor='raw')
+        self.root.diff_bit = -1
+        self.root.left = llmemory.NULL
+
+    def delete(self):
+        node = self.root
+        parent = lltype.nullptr(ADDRDICTNODE)
+        while True:
+            if node.diff_bit >= 0:
+                next = _node_reveal(node.left)
+                node.left = _node_hide(parent)
+                parent = node
+                node = next
+            else:
+                lltype.free(node, flavor='raw')
+                if not parent:
+                    break
+                node = _node_reveal(parent.right)
+                grandparent = _node_reveal(parent.left)
+                lltype.free(parent, flavor='raw')
+                parent = grandparent
+        free_non_gc_object(self)
+
+    def contains(self, keyaddr):
+        if keyaddr:
+            node = self._lookup(keyaddr)
+            return keyaddr == node.left
+        else:
+            return False
+
+    def get(self, keyaddr, default=llmemory.NULL):
+        if keyaddr:
+            node = self._lookup(keyaddr)
+            if keyaddr == node.left:
+                return node.right
+        return default
+
+    def setitem(self, keyaddr, valueaddr):
+        ll_assert(bool(keyaddr), "cannot store NULL in an AddressDict")
+        node = self._lookup(keyaddr)
+        if node.left == llmemory.NULL or node.left == keyaddr:
+            node.left = keyaddr
+            node.right = valueaddr
+        else:
+            number1 = r_uint(llmemory.cast_adr_to_int(keyaddr))
+            number2 = r_uint(llmemory.cast_adr_to_int(node.left))
+            diff = number1 ^ number2
+            parentnode = self._lookup(keyaddr, difflimit = diff >> 1)
+            # all subnodes of parentnode have a key that is equal to
+            # 'keyaddr' for all bits in range(0, msb(diff)), and differs
+            # from 'keyaddr' exactly at bit msb(diff).
+            # At this point, parentnode.diff_bit < msb(diff).
+            nextbit = parentnode.diff_bit
+            copynode = lltype.malloc(ADDRDICTNODE, flavor='raw')
+            copynode.diff_bit = nextbit
+            copynode.left = parentnode.left
+            copynode.right = parentnode.right
+            bit = self._msb(diff, nextbit + 1)
+            newnode = lltype.malloc(ADDRDICTNODE, flavor='raw')
+            parentnode.diff_bit = bit
+            ll_assert(number1 & (r_uint(1) << bit) !=
+                      number2 & (r_uint(1) << bit), "setitem: bad 'bit'")
+            if number1 & (r_uint(1) << bit):
+                parentnode.left = _node_hide(copynode)
+                parentnode.right = _node_hide(newnode)
+            else:
+                parentnode.left = _node_hide(newnode)
+                parentnode.right = _node_hide(copynode)
+            newnode.diff_bit = -1
+            newnode.left = keyaddr
+            newnode.right = valueaddr
+        if not we_are_translated():
+            assert self.contains(keyaddr)
+
+    def add(self, keyaddr):
+        self.setitem(keyaddr, llmemory.NULL)
+
+    def _msb(self, value, lowerbound=0):
+        # Most Significant Bit: '(1<<result)' is the highest bit set in 'value'
+        ll_assert(value >= (r_uint(1) << lowerbound),
+                  "msb: bad value or lowerbound")
+        if value >= (r_uint(1) << (LONG_BIT-1)):
+            return LONG_BIT-1    # most significant possible bit
+        bit = lowerbound
+        while (value >> bit) > r_uint(1):
+            bit += 1
+        return bit
+
+    def _lookup(self, addr, difflimit=r_uint(0)):
+        # * with difflimit == 0, find and return the leaf node whose key is
+        #   equal to or closest from 'addr'.
+        # * with difflimit > 0, look for the node N closest to the root such
+        #   that all the keys of the subtree starting at node N are equal to
+        #   the given 'addr' at least for all bits > msb(difflimit).
+        number = r_uint(llmemory.cast_adr_to_int(addr))
+        node = self.root
+        while node.diff_bit >= 0:
+            mask = r_uint(1) << node.diff_bit
+            if mask <= difflimit:
+                return node
+            if number & mask:
+                node = _node_reveal(node.right)
+            else:
+                node = _node_reveal(node.left)
+        return node
+
+_node_hide = llmemory.cast_ptr_to_adr
+
+def _node_reveal(nodeaddr):
+    return llmemory.cast_adr_to_ptr(nodeaddr, lltype.Ptr(ADDRDICTNODE))

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/test/test_support.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/test/test_support.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/test/test_support.py	Tue May  6 22:33:01 2008
@@ -1,6 +1,8 @@
+import sys
 from pypy.rlib.objectmodel import free_non_gc_object
 from pypy.rpython.memory.support import get_address_stack
 from pypy.rpython.memory.support import get_address_deque
+from pypy.rpython.memory.support import LLAddressDict
 
 from pypy.rpython.test.test_llinterp import interpret
 from pypy.rpython.lltypesystem import lltype, llmemory
@@ -139,3 +141,44 @@
     AddressStack = get_address_stack()
     res = interpret(f, [], malloc_check=False)
     assert res
+
+
+def test_LLAddressDict():
+    import random
+
+    class intaddr(object):
+        _TYPE = llmemory.Address
+        def __init__(self, intval):
+            self.intval = intval
+        def _cast_to_int(self):
+            return self.intval
+        def __repr__(self):
+            return '<intaddr 0x%x>' % (self.intval & (sys.maxint*2+1),)
+        def __eq__(self, other):
+            return isinstance(other, intaddr) and self.intval == other.intval
+        def __ne__(self, other):
+            return not self.__eq__(other)
+
+    for i in range(8) + range(8, 80, 10):
+        examples = {}
+        lst = []
+        for j in range(i):
+            if j % 17 == 13:
+                intval = random.choice(lst)
+            else:
+                intval = random.randrange(-sys.maxint, sys.maxint) or 1
+            lst.append(intval)
+            examples[intval] = True
+
+        d = LLAddressDict()
+        for intval in lst:
+            d.setitem(intaddr(intval), intaddr(-intval))
+        for intval in lst:
+            assert d.contains(intaddr(intval))
+            assert d.get(intaddr(intval), "???").intval == -intval
+        for intval in lst:
+            for j in range(intval-5, intval+5):
+                if j not in examples:
+                    assert not d.contains(intaddr(j))
+        assert not d.contains(llmemory.NULL)
+        d.delete()



More information about the Pypy-commit mailing list