[pypy-svn] r54587 - in pypy/branch/gc-tweak/pypy/rpython/memory: . gc gc/test test

arigo at codespeak.net arigo at codespeak.net
Fri May 9 13:37:21 CEST 2008


Author: arigo
Date: Fri May  9 13:37:18 2008
New Revision: 54587

Modified:
   pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py
   pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py
   pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py
   pypy/branch/gc-tweak/pypy/rpython/memory/gc/semispace.py
   pypy/branch/gc-tweak/pypy/rpython/memory/gc/test/test_direct.py
   pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py
   pypy/branch/gc-tweak/pypy/rpython/memory/support.py
   pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py
Log:
(likely inprogress)

Kill kill kill the obscure id() logic, and replace it
with something custom but sane in each GC class.


Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py	Fri May  9 13:37:18 2008
@@ -198,110 +198,6 @@
 class MovingGCBase(GCBase):
     moving_gc = True
 
-    def __init__(self):
-        # WaRnInG!  Putting GC objects as fields of the GC itself is
-        # basically *not* working in general!  When running tests with
-        # the gcwrapper, there is no way they can be returned from
-        # get_roots_from_llinterp().  When the whole GC goes through the
-        # gctransformer, though, it works if the fields are read-only
-        # (and thus only ever reference a prebuilt list or dict).  These
-        # prebuilt lists or dicts themselves can be mutated and point to
-        # more non-prebuild GC objects; this is fine because the
-        # internal GC ptr in the prebuilt list or dict is found by
-        # gctypelayout and listed in addresses_of_static_ptrs.
-
-        # XXX I'm not sure any more about the warning above.  The fields
-        # of 'self' are found by gctypelayout and added to
-        # addresses_of_static_ptrs_in_nongc, so in principle they could
-        # be mutated and still be found by collect().
-
-        self.wr_to_objects_with_id = []
-        self.object_id_dict = {}
-        self.object_id_dict_ends_at = 0
-
-    def id(self, ptr):
-        self.disable_finalizers()
-        try:
-            return self._compute_id(ptr)
-        finally:
-            self.enable_finalizers()
-
-    def _compute_id(self, ptr):
-        # XXX this may explode if --no-translation-rweakref is specified
-        # ----------------------------------------------------------------
-        # Basic logic: the list item wr_to_objects_with_id[i] contains a
-        # weakref to the object whose id is i + 1.  The object_id_dict is
-        # an optimization that tries to reduce the number of linear
-        # searches in this list.
-        # ----------------------------------------------------------------
-        # Invariant: if object_id_dict_ends_at >= 0, then object_id_dict
-        # contains all pairs {address: id}, for the addresses
-        # of all objects that are the targets of the weakrefs of the
-        # following slice: wr_to_objects_with_id[:object_id_dict_ends_at].
-        # ----------------------------------------------------------------
-        # Essential: as long as notify_objects_just_moved() is not called,
-        # we assume that the objects' addresses did not change.  We also
-        # assume that the address of a live object cannot be reused for
-        # another object without an intervening notify_objects_just_moved()
-        # call, but this could be fixed easily if needed.
-        # ----------------------------------------------------------------
-        # First check the dictionary
-        i = self.object_id_dict_ends_at
-        if i < 0:
-            self.object_id_dict.clear()      # dictionary invalid
-            self.object_id_dict_ends_at = 0
-            i = 0
-        else:
-            adr = llmemory.cast_ptr_to_adr(ptr)
-            try:
-                i = self.object_id_dict[adr]
-            except KeyError:
-                pass
-            else:
-                # double-check that the answer we got is correct
-                lst = self.wr_to_objects_with_id
-                target = llmemory.weakref_deref(llmemory.GCREF, lst[i])
-                ll_assert(target == ptr, "bogus object_id_dict")
-                return i + 1     # found via the dict
-        # Walk the tail of the list, where entries are not also in the dict
-        lst = self.wr_to_objects_with_id
-        end = len(lst)
-        freeentry = -1
-        while i < end:
-            target = llmemory.weakref_deref(llmemory.GCREF, lst[i])
-            if not target:
-                freeentry = i
-            else:
-                ll_assert(self.get_type_id(llmemory.cast_ptr_to_adr(target))
-                             > 0, "bogus weakref in compute_id()")
-                # record this entry in the dict
-                adr = llmemory.cast_ptr_to_adr(target)
-                self.object_id_dict[adr] = i
-                if target == ptr:
-                    break               # found
-            i += 1
-        else:
-            # not found
-            wr = llmemory.weakref_create(ptr)
-            if freeentry < 0:
-                ll_assert(end == len(lst), "unexpected lst growth in gc_id")
-                i = end
-                lst.append(wr)
-            else:
-                i = freeentry       # reuse the id() of a dead object
-                lst[i] = wr
-            adr = llmemory.cast_ptr_to_adr(ptr)
-            self.object_id_dict[adr] = i
-        # all entries up to and including index 'i' are now valid in the dict
-        # unless a collection occurred while we were working, in which case
-        # the object_id_dict is bogus anyway
-        if self.object_id_dict_ends_at >= 0:
-            self.object_id_dict_ends_at = i + 1
-        return i + 1       # this produces id() values 1, 2, 3, 4...
-
-    def notify_objects_just_moved(self):
-        self.object_id_dict_ends_at = -1
-
 
 def choose_gc_from_config(config):
     """Return a (GCClass, GC_PARAMS) from the given config object.

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py	Fri May  9 13:37:18 2008
@@ -62,6 +62,7 @@
         # it lists exactly the old and static objects whose
         # GCFLAG_NO_YOUNG_PTRS bit is not set.
         self.young_objects_with_weakrefs = self.AddressStack()
+        self.young_objects_with_id = self.AddressDict()
         self.reset_nursery()
 
         # compute the constant lower bounds for the attributes
@@ -249,6 +250,7 @@
     def semispace_collect(self, size_changing=False):
         self.reset_young_gcflags() # we are doing a full collection anyway
         self.weakrefs_grow_older()
+        self.ids_grow_older()
         self.reset_nursery()
         if DEBUG_PRINT:
             llop.debug_print(lltype.Void, "major collect, size changing", size_changing)
@@ -343,7 +345,8 @@
             # GCFLAG_NO_YOUNG_PTRS set again by trace_and_drag_out_of_nursery
             if self.young_objects_with_weakrefs.non_empty():
                 self.invalidate_young_weakrefs()
-            self.notify_objects_just_moved()
+            if self.young_objects_with_id.length() > 0:
+                self.update_young_objects_with_id()
             # mark the nursery as free and fill it with zeroes again
             llarena.arena_reset(self.nursery, self.nursery_size, True)
             if DEBUG_PRINT:
@@ -454,6 +457,28 @@
         # overridden by HybridGC
         return (self.header(obj).tid & GCFLAG_EXTERNAL) != 0
 
+    def _compute_id(self, obj):
+        if self.is_in_nursery(obj):
+            result = self.young_objects_with_id.get(obj)
+            if not result:
+                result = self._next_id()
+                self.young_objects_with_id.setitem(obj, result)
+            return result
+        else:
+            return SemiSpaceGC._compute_id(self, obj)
+
+    def update_young_objects_with_id(self):
+        self.young_objects_with_id.foreach(self._update_object_id,
+                                           self.objects_with_id)
+        self.young_objects_with_id.clear()
+
+    def ids_grow_older(self):
+        self.young_objects_with_id.foreach(self._id_grow_older, None)
+        self.young_objects_with_id.clear()
+
+    def _id_grow_older(self, obj, id, ignored):
+        self.objects_with_id.setitem(obj, id)
+
     def debug_check_object(self, obj):
         """Check the invariants about 'obj' that should be true
         between collections."""

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py	Fri May  9 13:37:18 2008
@@ -466,6 +466,22 @@
                              dead_size, "bytes in",
                              dead_count, "objs")
 
+    def _compute_id_for_external(self, obj):
+        # the base classes make the assumption that all external objects
+        # have an id equal to their address.  This is wrong if the object
+        # is a generation 3 rawmalloced object that initially lived in
+        # the semispaces.
+        if self.is_last_generation(obj):
+            # in this case, we still need to check if the object had its
+            # id taken before.  If not, we can use its address as its id.
+            return self.objects_with_id.get(obj, obj)
+        else:
+            # a generation 2 external object was never non-external in
+            # the past, so it cannot be listed in self.objects_with_id.
+            return obj
+        # XXX a possible optimization would be to use three dicts, one
+        # for each generation, instead of mixing gen2 and gen3 objects.
+
     def debug_check_object(self, obj):
         """Check the invariants about 'obj' that should be true
         between collections."""

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/semispace.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/semispace.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/semispace.py	Fri May  9 13:37:18 2008
@@ -3,6 +3,7 @@
 from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
 from pypy.rpython.memory.support import DEFAULT_CHUNK_SIZE
 from pypy.rpython.memory.support import get_address_stack, get_address_deque
+from pypy.rpython.memory.support import AddressDict
 from pypy.rpython.memory.gcheader import GCHeaderBuilder
 from pypy.rpython.lltypesystem import lltype, llmemory, llarena
 from pypy.rlib.objectmodel import free_non_gc_object
@@ -50,8 +51,11 @@
         self.gcheaderbuilder = GCHeaderBuilder(self.HDR)
         self.AddressStack = get_address_stack(chunk_size)
         self.AddressDeque = get_address_deque(chunk_size)
+        self.AddressDict = AddressDict
         self.finalizer_lock_count = 0
         self.red_zone = 0
+        self.id_free_list = self.AddressStack()
+        self.next_free_id = 1
 
     def setup(self):
         if DEBUG_PRINT:
@@ -66,6 +70,7 @@
         self.objects_with_finalizers = self.AddressDeque()
         self.run_finalizers = self.AddressDeque()
         self.objects_with_weakrefs = self.AddressStack()
+        self.objects_with_id = self.AddressDict()
 
     def disable_finalizers(self):
         self.finalizer_lock_count += 1
@@ -235,9 +240,9 @@
             scan = self.deal_with_objects_with_finalizers(scan)
         if self.objects_with_weakrefs.non_empty():
             self.invalidate_weakrefs()
+        self.update_objects_with_id()
         self.finished_full_collect()
         self.debug_check_consistency()
-        self.notify_objects_just_moved()
         if not size_changing:
             llarena.arena_reset(fromspace, self.space_size, True)
             self.record_red_zone()
@@ -573,6 +578,62 @@
         finally:
             self.finalizer_lock_count -= 1
 
+    def id(self, ptr):
+        obj = llmemory.cast_ptr_to_adr(ptr)
+        if self.header(obj).tid & GCFLAG_EXTERNAL:
+            result = self._compute_id_for_external(obj)
+        else:
+            result = self._compute_id(obj)
+        return llmemory.cast_adr_to_int(result)
+
+    def _next_id(self):
+        # return an id not currently in use (as an address instead of an int)
+        if self.id_free_list.non_empty():
+            result = self.id_free_list.pop()    # reuse a dead id
+        else:
+            # make up a fresh id number
+            result = llmemory.cast_int_to_adr(self.next_free_id)
+            self.next_free_id += 2    # only odd numbers, to make lltype
+                                      # and llmemory happy and to avoid
+                                      # clashes with real addresses
+        return result
+
+    def _compute_id(self, obj):
+        # look if the object is listed in objects_with_id
+        result = self.objects_with_id.get(obj)
+        if not result:
+            result = self._next_id()
+            self.objects_with_id.setitem(obj, result)
+        return result
+
+    def _compute_id_for_external(self, obj):
+        # For prebuilt objects, we can simply return their address.
+        # This method is overriden by the HybridGC.
+        return obj
+
+    def update_objects_with_id(self):
+        old = self.objects_with_id
+        new_objects_with_id = self.AddressDict(old.length())
+        old.foreach(self._update_object_id_FAST, new_objects_with_id)
+        old.delete()
+        self.objects_with_id = new_objects_with_id
+
+    def _update_object_id(self, obj, id, new_objects_with_id):
+        # safe version (used by subclasses)
+        if self.surviving(obj):
+            newobj = self.get_forwarding_address(obj)
+            new_objects_with_id.setitem(newobj, id)
+        else:
+            self.id_free_list.append(id)
+
+    def _update_object_id_FAST(self, obj, id, new_objects_with_id):
+        # unsafe version, assumes that the new_objects_with_id is large enough
+        if self.surviving(obj):
+            newobj = self.get_forwarding_address(obj)
+            new_objects_with_id.insertclean(newobj, id)
+        else:
+            self.id_free_list.append(id)
+
     def debug_check_object(self, obj):
         """Check the invariants about 'obj' that should be true
         between collections."""

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/test/test_direct.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/test/test_direct.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/test/test_direct.py	Fri May  9 13:37:18 2008
@@ -238,6 +238,32 @@
         self.gc.collect()
         verify()
 
+    def test_id(self):
+        ids = {}
+        def allocate_bunch(count=50):
+            base = len(self.stackroots)
+            for i in range(count):
+                p = self.malloc(S)
+                self.stackroots.append(p)
+            for i in range(count):
+                j = base + (i*1291) % count
+                pid = self.gc.id(self.stackroots[j])
+                assert isinstance(pid, int)
+                ids[j] = pid
+        def verify():
+            for j, expected in ids.items():
+                assert self.gc.id(self.stackroots[j]) == expected
+        allocate_bunch(5)
+        verify()
+        allocate_bunch(75)
+        verify()
+        allocate_bunch(5)
+        verify()
+        self.gc.collect()
+        verify()
+        self.gc.collect()
+        verify()
+
 
 class TestSemiSpaceGC(DirectGCTest):
     from pypy.rpython.memory.gc.semispace import SemiSpaceGC as GCClass

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py	Fri May  9 13:37:18 2008
@@ -57,6 +57,9 @@
 def dict_add(d, key):
     rdict.ll_dict_setitem(d, key, llmemory.NULL)
 
+def dict_insertclean(d, key, value):
+    rdict.ll_dict_insertclean(d, key, value, _hash(key))
+
 def dict_foreach(d, callback, arg):
     entries = d.entries
     i = len(entries) - 1
@@ -87,6 +90,8 @@
                          'setitem': rdict.ll_dict_setitem,
                          'get': dict_get,
                          'add': dict_add,
+                         'insertclean': dict_insertclean,
+                         'clear': rdict.ll_clear,
                          'foreach': dict_foreach,
                          'keyhash': dict_keyhash,
                          'keyeq': None,

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/support.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/support.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/support.py	Fri May  9 13:37:18 2008
@@ -228,6 +228,9 @@
     def _key(self, addr):
         return addr._fixup().ptr._obj
 
+    def _wrapkey(self, obj):
+        return llmemory.cast_ptr_to_adr(obj._as_ptr())
+
     def delete(self):
         pass
 
@@ -244,14 +247,23 @@
         assert keyaddr
         self.data[self._key(keyaddr)] = valueaddr
 
+    def insertclean(self, keyaddr, valueaddr):
+        assert keyaddr
+        key = self._key(keyaddr)
+        assert key not in self.data
+        self.data[key] = valueaddr
+
     def add(self, keyaddr):
         self.setitem(keyaddr, llmemory.NULL)
 
+    def clear(self):
+        self.data.clear()
+
     def foreach(self, callback, arg):
         """Invoke 'callback(key, value, arg)' for all items in the dict.
         Typically, 'callback' is a bound method and 'arg' can be None."""
         for key, value in self.data.iteritems():
-            callback(key, value, arg)
+            callback(self._wrapkey(key), value, arg)
 
 
 def copy_and_update(dict, surviving, updated_address):

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py	Fri May  9 13:37:18 2008
@@ -42,9 +42,9 @@
         assert lldict.alloc_count == 0
 
     def test_copy_and_update(self):
-        d = lldict.newdict()
+        d = lldict.newdict(3)
         d.setitem(intaddr(41), intaddr(44))
-        d.setitem(intaddr(42), intaddr(45))
+        d.insertclean(intaddr(42), intaddr(45))
         d.setitem(intaddr(43), intaddr(46))
         def surviving(key):
             return key.intval != 41
@@ -60,6 +60,15 @@
         d2.delete()
         assert lldict.alloc_count == 0
 
+    def test_clear(self):
+        d = lldict.newdict()
+        d.setitem(intaddr(41), intaddr(42))
+        d.clear()
+        assert d.length() == 0
+        assert not d.contains(intaddr(41))
+        d.delete()
+        assert lldict.alloc_count == 0
+
     def test_random(self):
         for i in range(8) + range(8, 80, 10):
             examples = {}



More information about the Pypy-commit mailing list