[pypy-svn] r47453 - in pypy/dist/pypy: rpython/lltypesystem/test rpython/memory/gc rpython/memory/test translator/c/test

arigo at codespeak.net arigo at codespeak.net
Sun Oct 14 22:54:04 CEST 2007


Author: arigo
Date: Sun Oct 14 22:54:02 2007
New Revision: 47453

Modified:
   pypy/dist/pypy/rpython/lltypesystem/test/test_llarena.py
   pypy/dist/pypy/rpython/memory/gc/base.py
   pypy/dist/pypy/rpython/memory/gc/semispace.py
   pypy/dist/pypy/rpython/memory/test/test_gc.py
   pypy/dist/pypy/rpython/memory/test/test_transformed_gc.py
   pypy/dist/pypy/translator/c/test/test_newgc.py
Log:
Implement some cleverness and safely in the id() for moving collectors.
Sadly, this is based on address trickery that makes testing on top of
the llinterp a bit annoying.  So this check-in contains many new tests,
most of them skipped :-/  At least, the genc test passes.


Modified: pypy/dist/pypy/rpython/lltypesystem/test/test_llarena.py
==============================================================================
--- pypy/dist/pypy/rpython/lltypesystem/test/test_llarena.py	(original)
+++ pypy/dist/pypy/rpython/lltypesystem/test/test_llarena.py	Sun Oct 14 22:54:02 2007
@@ -114,10 +114,10 @@
 
 
 SX = lltype.Struct('S', ('x',lltype.Signed))
+SPTR = lltype.Ptr(SX)
 precomputed_size = round_up_for_allocation(llmemory.sizeof(SX))
 
 def test_look_inside_object():
-    SPTR = lltype.Ptr(SX)
     myarenasize = 50
     a = arena_malloc(myarenasize, False)
     b = a + round_up_for_allocation(llmemory.sizeof(lltype.Char))
@@ -132,6 +132,19 @@
     arena_free(a)
     return 42
 
+def test_address_eq_as_int():
+    a = arena_malloc(50, False)
+    arena_reserve(a, precomputed_size)
+    p = llmemory.cast_adr_to_ptr(a, SPTR)
+    a1 = llmemory.cast_ptr_to_adr(p)
+    assert a == a1
+    assert not (a != a1)
+    assert (a+1) != a1
+    assert not ((a+1) == a1)
+    py.test.skip("cast_adr_to_int() is hard to get consistent")
+    assert llmemory.cast_adr_to_int(a) == llmemory.cast_adr_to_int(a1)
+    assert llmemory.cast_adr_to_int(a+1) == llmemory.cast_adr_to_int(a1) + 1
+
 
 def test_llinterpreted():
     from pypy.rpython.test.test_llinterp import interpret

Modified: pypy/dist/pypy/rpython/memory/gc/base.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gc/base.py	(original)
+++ pypy/dist/pypy/rpython/memory/gc/base.py	Sun Oct 14 22:54:02 2007
@@ -1,4 +1,5 @@
 from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rlib.objectmodel import debug_assert
 
 class GCBase(object):
     _alloc_flavor_ = "raw"
@@ -86,32 +87,87 @@
 
     def __init__(self):
         self.wr_to_objects_with_id = []
+        self.object_id_dict = {}
+        self.object_id_dict_ends_at = 0
 
     def id(self, ptr):
-        # XXX linear search! this is probably too slow to be reasonable :-(
-        # On the other hand, it punishes you for using 'id', so that's good :-)
+        self.disable_finalizers()
+        try:
+            return self._compute_id(ptr)
+        finally:
+            self.enable_finalizers()
+
+    def _compute_id(self, ptr):
         # XXX this may explode if --no-translation-rweakref is specified
+        # ----------------------------------------------------------------
+        # Basic logic: the list item wr_to_objects_with_id[i] contains a
+        # weakref to the object whose id is i + 1.  The object_id_dict is
+        # an optimization that tries to reduce the number of linear
+        # searches in this list.
+        # ----------------------------------------------------------------
+        # Invariant: if object_id_dict_ends_at >= 0, then object_id_dict
+        # contains (at least) all pairs {address: id}, for the addresses
+        # of all objects that are the targets of the weakrefs of the
+        # following slice: wr_to_objects_with_id[:object_id_dict_ends_at].
+        # ----------------------------------------------------------------
+        # Essential: as long as notify_objects_just_moved() is not called,
+        # we assume that the objects' addresses did not change.
+        # ----------------------------------------------------------------
+        # First check the dictionary
+        i = self.object_id_dict_ends_at
+        if i < 0:
+            self.object_id_dict.clear()      # dictionary invalid
+            self.object_id_dict_ends_at = 0
+            i = 0
+        else:
+            adr = llmemory.cast_ptr_to_adr(ptr)
+            try:
+                i = self.object_id_dict[adr]
+            except KeyError:
+                pass
+            else:
+                # double-check that the answer we got is correct
+                lst = self.wr_to_objects_with_id
+                target = llmemory.weakref_deref(llmemory.GCREF, lst[i])
+                debug_assert(target == ptr, "bogus object_id_dict")
+                return i + 1     # found via the dict
+        # Walk the tail of the list, where entries are not also in the dict
         lst = self.wr_to_objects_with_id
-        i = len(lst)
+        end = len(lst)
         freeentry = -1
-        while i > 0:
-            i -= 1
+        while i < end:
             target = llmemory.weakref_deref(llmemory.GCREF, lst[i])
             if not target:
                 freeentry = i
-            elif target == ptr:
-                break               # found
+            else:
+                # record this entry in the dict
+                adr = llmemory.cast_ptr_to_adr(target)
+                self.object_id_dict[adr] = i
+                if target == ptr:
+                    break               # found
+            i += 1
         else:
             # not found
             wr = llmemory.weakref_create(ptr)
-            if freeentry == -1:
-                i = len(lst)
+            if freeentry < 0:
+                debug_assert(end == len(lst), "unexpected lst growth in gc_id")
+                i = end
                 lst.append(wr)
             else:
                 i = freeentry       # reuse the id() of a dead object
                 lst[i] = wr
+            adr = llmemory.cast_ptr_to_adr(ptr)
+            self.object_id_dict[adr] = i
+        # all entries up to and including index 'i' are now valid in the dict
+        # unless a collection occurred while we were working, in which case
+        # the object_id_dict is bogus anyway
+        if self.object_id_dict_ends_at >= 0:
+            self.object_id_dict_ends_at = i + 1
         return i + 1       # this produces id() values 1, 2, 3, 4...
 
+    def notify_objects_just_moved(self):
+        self.object_id_dict_ends_at = -1
+
 
 def choose_gc_from_config(config):
     """Return a (GCClass, GC_PARAMS) from the given config object.

Modified: pypy/dist/pypy/rpython/memory/gc/semispace.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gc/semispace.py	(original)
+++ pypy/dist/pypy/rpython/memory/gc/semispace.py	Sun Oct 14 22:54:02 2007
@@ -40,8 +40,16 @@
         self.free = self.tospace
         self.objects_with_finalizers = self.AddressLinkedList()
         self.run_finalizers = self.AddressLinkedList()
-        self.executing_finalizers = False
         self.objects_with_weakrefs = self.AddressLinkedList()
+        self.finalizer_lock_count = 0
+
+    def disable_finalizers(self):
+        self.finalizer_lock_count += 1
+
+    def enable_finalizers(self):
+        self.finalizer_lock_count -= 1
+        if self.run_finalizers.non_empty():
+            self.execute_finalizers()
 
     def malloc_fixedsize(self, typeid, size, can_collect, has_finalizer=False,
                          contains_weakptr=False):
@@ -171,13 +179,14 @@
         scan = self.free = tospace
         self.collect_roots()
         scan = self.scan_copied(scan)
-        if self.objects_with_weakrefs.non_empty():
-            self.invalidate_weakrefs()
         if self.run_finalizers.non_empty():
             self.update_run_finalizers()
         if self.objects_with_finalizers.non_empty():
             self.deal_with_objects_with_finalizers()
         scan = self.scan_copied(scan)
+        if self.objects_with_weakrefs.non_empty():
+            self.invalidate_weakrefs()
+        self.notify_objects_just_moved()
         if not size_changing:
             llarena.arena_reset(fromspace, self.space_size, True)
             self.execute_finalizers()
@@ -326,9 +335,9 @@
         self.run_finalizers = new_run_finalizer
 
     def execute_finalizers(self):
-        if self.executing_finalizers:
+        if self.finalizer_lock_count > 0:
             return    # the outer invocation of execute_finalizers() will do it
-        self.executing_finalizers = True
+        self.finalizer_lock_count = 1
         try:
             while self.run_finalizers.non_empty():
                 obj = self.run_finalizers.pop()
@@ -336,7 +345,7 @@
                 finalizer = self.getfinalizer(hdr.typeid)
                 finalizer(obj)
         finally:
-            self.executing_finalizers = False
+            self.finalizer_lock_count = 0
 
     STATISTICS_NUMBERS = 0
 

Modified: pypy/dist/pypy/rpython/memory/test/test_gc.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/test/test_gc.py	(original)
+++ pypy/dist/pypy/rpython/memory/test/test_gc.py	Sun Oct 14 22:54:02 2007
@@ -252,6 +252,10 @@
         assert res
 
     def test_id(self):
+        py.test.skip("the MovingGCBase.id() logic can't be directly run")
+        # XXX ^^^ the problem is that the MovingGCBase instance holds
+        # references to GC objects - a list of weakrefs and a dict - and
+        # there is no way we can return these from get_roots_from_llinterp().
         class A(object):
             pass
         a1 = A()
@@ -270,26 +274,6 @@
         res = self.interpret(f, [])
         assert res == 0
 
-    def test_many_ids(self):
-        class A(object):
-            pass
-        def f():
-            from pypy.rpython.lltypesystem import lltype, rffi
-            alist = [A() for i in range(500)]
-            idarray = lltype.malloc(rffi.INTP.TO, len(alist), flavor='raw')
-            # Compute the id of all elements of the list.  The goal is
-            # to not allocate memory, so that if the GC needs memory to
-            # remember the ids, it will trigger some collections itself
-            for i in range(len(alist)):
-                idarray[i] = id(alist[i])
-            for j in range(2):
-                if j == 1:     # allocate some stuff between the two iterations
-                    [A() for i in range(200)]
-                for i in range(len(alist)):
-                    assert idarray[i] == id(alist[i])
-            lltype.free(idarray, flavor='raw')
-        self.interpret(f, [])
-
 
 class TestMarkSweepGC(GCTest):
     from pypy.rpython.memory.gc.marksweep import MarkSweepGC as GCClass

Modified: pypy/dist/pypy/rpython/memory/test/test_transformed_gc.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/test/test_transformed_gc.py	(original)
+++ pypy/dist/pypy/rpython/memory/test/test_transformed_gc.py	Sun Oct 14 22:54:02 2007
@@ -663,3 +663,31 @@
             from pypy.rpython.memory.gc.semispace import SemiSpaceGC as GCClass
             GC_PARAMS = {'space_size': 2048}
             root_stack_depth = 200
+
+    def test_many_ids(self):
+        py.test.skip("fails for bad reasons in lltype.py :-(")
+        class A(object):
+            pass
+        def f():
+            from pypy.rpython.lltypesystem import lltype, rffi
+            alist = [A() for i in range(50)]
+            idarray = lltype.malloc(rffi.INTP.TO, len(alist), flavor='raw')
+            # Compute the id of all the elements of the list.  The goal is
+            # to not allocate memory, so that if the GC needs memory to
+            # remember the ids, it will trigger some collections itself
+            i = 0
+            while i < len(alist):
+                idarray[i] = id(alist[i])
+                i += 1
+            j = 0
+            while j < 2:
+                if j == 1:     # allocate some stuff between the two iterations
+                    [A() for i in range(20)]
+                i = 0
+                while i < len(alist):
+                    assert idarray[i] == id(alist[i])
+                    i += 1
+                j += 1
+            lltype.free(idarray, flavor='raw')
+        run = self.runner(f)
+        run([])

Modified: pypy/dist/pypy/translator/c/test/test_newgc.py
==============================================================================
--- pypy/dist/pypy/translator/c/test/test_newgc.py	(original)
+++ pypy/dist/pypy/translator/c/test/test_newgc.py	Sun Oct 14 22:54:02 2007
@@ -827,3 +827,33 @@
 class TestSemiSpaceGC(TestUsingFramework):
     frameworkgc = "semispace"
     should_be_moving = True
+
+    def test_many_ids(self):
+        class A(object):
+            pass
+        def f():
+            from pypy.rpython.lltypesystem import lltype, rffi
+            alist = [A() for i in range(50000)]
+            idarray = lltype.malloc(rffi.INTP.TO, len(alist), flavor='raw')
+            # Compute the id of all elements of the list.  The goal is
+            # to not allocate memory, so that if the GC needs memory to
+            # remember the ids, it will trigger some collections itself
+            i = 0
+            while i < len(alist):
+                idarray[i] = id(alist[i])
+                i += 1
+            j = 0
+            while j < 2:
+                if j == 1:     # allocate some stuff between the two iterations
+                    [A() for i in range(20000)]
+                i = 0
+                while i < len(alist):
+                    if idarray[i] != id(alist[i]):
+                        return j * 1000000 + i
+                    i += 1
+                j += 1
+            lltype.free(idarray, flavor='raw')
+            return -2
+        fn = self.getcompiled(f)
+        res = fn()
+        assert res == -2



More information about the Pypy-commit mailing list