[pypy-commit] pypy stm-gc: Intermediate check-in.

arigo noreply at buildbot.pypy.org
Fri Apr 13 16:06:26 CEST 2012


Author: Armin Rigo <arigo at tunes.org>
Branch: stm-gc
Changeset: r54332:2abd27c473de
Date: 2012-04-13 15:21 +0200
http://bitbucket.org/pypy/pypy/changeset/2abd27c473de/

Log:	Intermediate check-in.

diff --git a/pypy/rpython/memory/gc/stmgc.py b/pypy/rpython/memory/gc/stmgc.py
--- a/pypy/rpython/memory/gc/stmgc.py
+++ b/pypy/rpython/memory/gc/stmgc.py
@@ -33,7 +33,10 @@
 #   - The LOCAL objects might be YOUNG or OLD depending on whether they
 #     already survived a collection.  YOUNG LOCAL objects are either in
 #     the nursery or, if they are big, raw-malloced.  OLD LOCAL objects
-#     are in the shared area.
+#     are in the shared area.  Getting the write barrier right for both
+#     this and the general STM mechanisms is tricky, so for now this GC
+#     is not actually generational (slow when running long transactions
+#     or before running transactions at all).
 #
 GCFLAG_GLOBAL     = first_gcflag << 0     # keep in sync with et.c
 GCFLAG_WAS_COPIED = first_gcflag << 1     # keep in sync with et.c
@@ -65,7 +68,7 @@
 
     TRANSLATION_PARAMS = {
         'stm_operations': 'use_real_one',
-        'nursery_size': 4*1024*1024,            # 4 MB
+        'nursery_size': 32*1024*1024,           # 32 MB
 
         "page_size": 1024*WORD,                 # copied from minimark.py
         "arena_size": 65536*WORD,               # copied from minimark.py
@@ -120,32 +123,20 @@
         self.main_thread_tls = StmGCTLS(self, in_main_thread=True)
         self.main_thread_tls.start_transaction()
 
+    @always_inline
     def get_tls(self):
         from pypy.rpython.memory.gc.stmtls import StmGCTLS
         tls = self.stm_operations.get_tls()
         return StmGCTLS.cast_address_to_tls_object(tls)
 
+    def enter_transactional_mode(self):
+        self.main_thread_tls.enter_transactional_mode()
+
+    def leave_transactional_mode(self):
+        self.main_thread_tls.leave_transactional_mode()
+
     # ----------
 
-    @always_inline
-    def allocate_bump_pointer(self, size):
-        tls = self.collector.get_tls()
-        free = tls.nursery_free
-        top  = tls.nursery_top
-        if (top - free) < llmemory.raw_malloc_usage(size):
-            free = self.local_collection(size)
-        tls.nursery_free = free + size
-        return free
-
-    @dont_inline
-    def local_collection(self, size):
-        tls = self.collector.get_tls()
-        if not tls.nursery_free:
-            fatalerror("malloc in a non-main thread but outside a transaction")
-        #...
-        xxxxxxxxx
-
-
     def malloc_fixedsize_clear(self, typeid, size,
                                needs_finalizer=False,
                                is_finalizer_light=False,
@@ -159,7 +150,7 @@
         # Get the memory from the nursery.
         size_gc_header = self.gcheaderbuilder.size_gc_header
         totalsize = size_gc_header + size
-        result = self.allocate_bump_pointer(totalsize)
+        result = self.get_tls().allocate_bump_pointer(totalsize)
         #
         # Build the object.
         llarena.arena_reserve(result, totalsize)
@@ -180,7 +171,7 @@
         nonvarsize = size_gc_header + size
         totalsize = nonvarsize + itemsize * length
         totalsize = llarena.round_up_for_allocation(totalsize)
-        result = self.allocate_bump_pointer(totalsize)
+        result = self.get_tls().allocate_bump_pointer(totalsize)
         llarena.arena_reserve(result, totalsize)
         obj = result + size_gc_header
         self.init_gc_object(result, typeid, flags=0)
@@ -315,7 +306,7 @@
         def _stm_write_barrier_global(obj):
             if not stm_operations.in_transaction():
                 return obj
-            # we need to find of make a local copy
+            # we need to find or make a local copy
             hdr = self.header(obj)
             if hdr.tid & GCFLAG_WAS_COPIED == 0:
                 # in this case, we are sure that we don't have a copy
diff --git a/pypy/rpython/memory/gc/stmtls.py b/pypy/rpython/memory/gc/stmtls.py
--- a/pypy/rpython/memory/gc/stmtls.py
+++ b/pypy/rpython/memory/gc/stmtls.py
@@ -3,9 +3,11 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance, base_ptr_lltype
 from pypy.rlib.objectmodel import we_are_translated, free_non_gc_object
 from pypy.rlib.rarithmetic import r_uint
-from pypy.rlib.debug import ll_assert
+from pypy.rlib.debug import ll_assert, debug_start, debug_stop, fatalerror
 
 from pypy.rpython.memory.gc.stmgc import WORD, NULL
+from pypy.rpython.memory.gc.stmgc import always_inline, dont_inline
+from pypy.rpython.memory.gc.stmgc import GCFLAG_GLOBAL
 
 
 class StmGCTLS(object):
@@ -34,21 +36,18 @@
         self.nursery_size  = self.gc.nursery_size
         self.nursery_start = self._alloc_nursery(self.nursery_size)
         #
-        # --- the local raw-malloced objects, young and old
-        self.rawmalloced_young_objects = self.null_address_dict()
-        self.rawmalloced_old_objects = None
-        self.rawmalloced_total_size = r_uint(0)
-        # --- the local objects with weakrefs, young and old
-        self.young_objects_with_weakrefs = self.AddressStack()
-        self.old_objects_with_weakrefs = self.AddressStack()
-        # --- support for id and identityhash: maps nursery objects with
-        #     GCFLAG_HAS_SHADOW to their future location at the next
-        #     local collection
-        self.nursery_objects_shadows = self.AddressDict()
+        # --- the local raw-malloced objects (chained list via hdr.version)
+        self.rawmalloced_objects = NULL
+        # --- the local "normal" old objects (chained list via hdr.version)
+        self.old_objects = NULL
+        # --- the local objects with weakrefs (chained list via hdr.version)
+        #self.young_objects_with_weakrefs = NULL
+        #self.old_objects_with_weakrefs = NULL
         #
         self._register_with_C_code()
 
     def teardown_thread(self):
+        self._cleanup_state()
         self._unregister_with_C_code()
         self._free_nursery(self.nursery_start)
         free_non_gc_object(self)
@@ -67,9 +66,9 @@
             tls = cast_instance_to_base_ptr(self)
             tlsaddr = llmemory.cast_ptr_to_adr(tls)
         else:
-            n = 10000 + len(self.nontranslated_dict)
+            n = 10000 + len(StmGCTLS.nontranslated_dict)
             tlsaddr = rffi.cast(llmemory.Address, n)
-            self.nontranslated_dict[n] = self
+            StmGCTLS.nontranslated_dict[n] = self
         self.stm_operations.set_tls(tlsaddr, int(self.in_main_thread))
 
     def _unregister_with_C_code(self):
@@ -78,16 +77,40 @@
         self.stm_operations.del_tls()
 
     @staticmethod
-    def cast_address_to_tls_object(self, tlsaddr):
+    @always_inline
+    def cast_address_to_tls_object(tlsaddr):
         if we_are_translated():
             tls = llmemory.cast_adr_to_ptr(tlsaddr, base_ptr_lltype())
             return cast_base_ptr_to_instance(tls)
         else:
             n = rffi.cast(lltype.Signed, tlsaddr)
-            return self.nontranslated_dict[n]
+            return StmGCTLS.nontranslated_dict[n]
+
+    def _disable_mallocs(self):
+        ll_assert(bool(self.nursery_free), "disable_mallocs: already disabled")
+        self.nursery_pending_clear = self.nursery_free - self.nursery_start
+        self.nursery_free = NULL
+        self.nursery_top  = NULL
 
     # ------------------------------------------------------------
 
+    def enter_transactional_mode(self):
+        """Called on the main thread, just before spawning the other
+        threads."""
+        self.local_collection()
+        if not self.local_nursery_is_empty():
+            self.local_collection(run_finalizers=False)
+        self._promote_locals_to_globals()
+        self._disable_mallocs()
+
+    def leave_transactional_mode(self):
+        """Restart using the main thread for mallocs."""
+        if not we_are_translated():
+            for key, value in StmGCTLS.nontranslated_dict.items():
+                if value is not self:
+                    del StmGCTLS.nontranslated_dict[key]
+        self.start_transaction()
+
     def start_transaction(self):
         """Enter a thread: performs any pending cleanups, and set
         up a fresh state for allocating.  Called at the start of
@@ -96,29 +119,88 @@
         # end_of_transaction_collection() are not balanced: if a
         # transaction is aborted, the latter might never be called.
         # Be ready here to clean up any state.
+        self._cleanup_state()
         if self.nursery_free:
             clear_size = self.nursery_free - self.nursery_start
         else:
             clear_size = self.nursery_pending_clear
+        self.nursery_pending_clear = 0
         if clear_size > 0:
             llarena.arena_reset(self.nursery_start, clear_size, 2)
-            self.nursery_pending_clear = 0
-        if self.rawmalloced_young_objects:
-            xxx
-        if self.rawmalloced_old_objects:
-            xxx
         self.nursery_free = self.nursery_start
         self.nursery_top  = self.nursery_start + self.nursery_size
 
+    def local_nursery_is_empty(self):
+        ll_assert(self.nursery_free, "local_nursery_is_empty: gc not running")
+        return self.nursery_free == self.nursery_start
+
     # ------------------------------------------------------------
 
-    def local_collection(self):
+    def local_collection(self, run_finalizers=True):
         """Do a local collection.  Finds all surviving young objects
         and make them old.  Also looks for roots from the stack.
         The flag GCFLAG_WAS_COPIED is kept and the C tree is updated
         if the local young object moves.
         """
-        xxx
+        #
+        debug_start("gc-local")
+        #
+        # First, find the roots that point to young objects.  All nursery
+        # objects found are copied out of the nursery, and the occasional
+        # young raw-malloced object is flagged with GCFLAG_VISITED.
+        # Note that during this step, we ignore references to further
+        # young objects; only objects directly referenced by roots
+        # are copied out or flagged.  They are also added to the list
+        # 'old_objects_pointing_to_young'.
+        self.collect_roots_in_nursery()
+        #
+        while True:
+            # If we are using card marking, do a partial trace of the arrays
+            # that are flagged with GCFLAG_CARDS_SET.
+            if self.card_page_indices > 0:
+                self.collect_cardrefs_to_nursery()
+            #
+            # Now trace objects from 'old_objects_pointing_to_young'.
+            # All nursery objects they reference are copied out of the
+            # nursery, and again added to 'old_objects_pointing_to_young'.
+            # All young raw-malloced object found are flagged GCFLAG_VISITED.
+            # We proceed until 'old_objects_pointing_to_young' is empty.
+            self.collect_oldrefs_to_nursery()
+            #
+            # We have to loop back if collect_oldrefs_to_nursery caused
+            # new objects to show up in old_objects_with_cards_set
+            if self.card_page_indices > 0:
+                if self.old_objects_with_cards_set.non_empty():
+                    continue
+            break
+        #
+        # Now all live nursery objects should be out.  Update the young
+        # weakrefs' targets.
+        if self.young_objects_with_weakrefs.non_empty():
+            self.invalidate_young_weakrefs()
+        if self.young_objects_with_light_finalizers.non_empty():
+            self.deal_with_young_objects_with_finalizers()
+        #
+        # Clear this mapping.
+        if self.nursery_objects_shadows.length() > 0:
+            self.nursery_objects_shadows.clear()
+        #
+        # Walk the list of young raw-malloced objects, and either free
+        # them or make them old.
+        if self.young_rawmalloced_objects:
+            self.free_young_rawmalloced_objects()
+        #
+        # All live nursery objects are out, and the rest dies.  Fill
+        # the whole nursery with zero and reset the current nursery pointer.
+        llarena.arena_reset(self.nursery, self.nursery_size, 2)
+        self.debug_rotate_nursery()
+        self.nursery_free = self.nursery
+        #
+        debug_print("minor collect, total memory used:",
+                    self.get_total_memory_used())
+        if self.DEBUG >= 2:
+            self.debug_check_consistency()     # expensive!
+        debug_stop("gc-minor")
 
     def end_of_transaction_collection(self):
         """Do an end-of-transaction collection.  Finds all surviving
@@ -133,3 +215,49 @@
         xxx
 
     # ------------------------------------------------------------
+
+    @always_inline
+    def allocate_bump_pointer(self, size):
+        free = self.nursery_free
+        top  = self.nursery_top
+        if (top - free) < llmemory.raw_malloc_usage(size):
+            free = self.allocate_object_of_size(size)
+        self.nursery_free = free + size
+        return free
+
+    @dont_inline
+    def allocate_object_of_size(self, size):
+        if not self.nursery_free:
+            fatalerror("malloc in a non-main thread but outside a transaction")
+        if size > self.nursery_size:
+            fatalerror("object too large to ever fit in the nursery")
+        while True:
+            self.local_collection()
+            free = self.nursery_free
+            top  = self.nursery_top
+            if (top - free) < llmemory.raw_malloc_usage(size):
+                continue         # try again
+            return free
+
+    # ------------------------------------------------------------
+
+    def _promote_locals_to_globals(self):
+        ll_assert(self.local_nursery_is_empty(), "nursery must be empty [1]")
+        #
+        obj = self.old_objects
+        self.old_objects = NULL
+        while obj:
+            hdr = self.header(obj)
+            hdr.tid |= GCFLAG_GLOBAL
+            obj = hdr.version
+        #
+        obj = self.rawmalloced_objects
+        self.rawmalloced_objects = NULL
+        while obj:
+            hdr = self.header(obj)
+            hdr.tid |= GCFLAG_GLOBAL
+            obj = hdr.version
+
+    def _cleanup_state(self):
+        if self.rawmalloced_objects:
+            xxx     # free the rawmalloced_objects still around
diff --git a/pypy/rpython/memory/gc/test/test_stmgc.py b/pypy/rpython/memory/gc/test/test_stmgc.py
--- a/pypy/rpython/memory/gc/test/test_stmgc.py
+++ b/pypy/rpython/memory/gc/test/test_stmgc.py
@@ -133,7 +133,7 @@
     return llmemory.offsetof(WR, 'wadr')
 
 
-class TestBasic:
+class StmGCTests:
     GCClass = StmGC
 
     def setup_method(self, meth):
@@ -155,6 +155,7 @@
             if key != 0:
                 self.gc.stm_operations.threadnum = key
                 self.gc.teardown_thread()
+        self.gc.stm_operations.threadnum = 0
 
     # ----------
     # test helpers
@@ -187,14 +188,18 @@
         meth = getattr(self.gc, 'read_int%d' % WORD)
         return meth(obj, offset)
 
+
+class TestBasic(StmGCTests):
+
     def test_gc_creation_works(self):
         pass
 
     def test_allocate_bump_pointer(self):
-        a3 = self.gc.allocate_bump_pointer(3)
-        a4 = self.gc.allocate_bump_pointer(4)
-        a5 = self.gc.allocate_bump_pointer(5)
-        a6 = self.gc.allocate_bump_pointer(6)
+        tls = self.gc.main_thread_tls
+        a3 = tls.allocate_bump_pointer(3)
+        a4 = tls.allocate_bump_pointer(4)
+        a5 = tls.allocate_bump_pointer(5)
+        a6 = tls.allocate_bump_pointer(6)
         assert a4 - a3 == 3
         assert a5 - a4 == 4
         assert a6 - a5 == 5
@@ -210,7 +215,7 @@
     def test_malloc_main_vs_thread(self):
         gcref = self.gc.malloc_fixedsize_clear(123, llmemory.sizeof(S))
         obj = llmemory.cast_ptr_to_adr(gcref)
-        assert self.gc.header(obj).tid & GCFLAG_GLOBAL != 0
+        assert self.gc.header(obj).tid & GCFLAG_GLOBAL == 0
         #
         self.select_thread(1)
         gcref = self.gc.malloc_fixedsize_clear(123, llmemory.sizeof(S))
@@ -588,7 +593,3 @@
         assert a == sr1_adr
         a = self.gc.stm_normalize_global(tr1_adr)
         assert a == sr1_adr
-
-    def test_alloc_a_lot_from_main_thread(self):
-        for i in range(1000):
-            sr1, sr1_adr = self.malloc(SR)
diff --git a/pypy/rpython/memory/gc/test/test_stmtls.py b/pypy/rpython/memory/gc/test/test_stmtls.py
new file mode 100644
--- /dev/null
+++ b/pypy/rpython/memory/gc/test/test_stmtls.py
@@ -0,0 +1,50 @@
+import py
+from pypy.rpython.lltypesystem import lltype, llmemory, llarena, llgroup, rffi
+from pypy.rpython.memory.gc.stmtls import StmGCTLS, WORD
+from pypy.rpython.memory.gc.test.test_stmgc import StmGCTests
+
+
+S = lltype.GcStruct('S', ('a', lltype.Signed), ('b', lltype.Signed),
+                         ('c', lltype.Signed))
+
+
+class TestStmGCTLS(StmGCTests):
+    current_stack = ()
+
+    def stack_add(self, p):
+        if self.current_stack == ():
+            self.current_stack = []
+        self.current_stack.append(p)
+
+    def stack_pop(self):
+        return self.current_stack.pop()
+
+    # ----------
+
+    def test_creation_works(self):
+        pass
+
+    def test_allocate_bump_pointer(self):
+        tls = self.gc.main_thread_tls
+        a3 = tls.allocate_bump_pointer(3)
+        a4 = tls.allocate_bump_pointer(4)
+        a5 = tls.allocate_bump_pointer(5)
+        a6 = tls.allocate_bump_pointer(6)
+        assert a4 - a3 == 3
+        assert a5 - a4 == 4
+        assert a6 - a5 == 5
+
+    def test_local_collection(self):
+        s1, _ = self.malloc(S); s1.a = 111
+        s2, _ = self.malloc(S); s2.a = 222
+        self.stack_add(s2)
+        self.gc.main_thread_tls.local_collection()
+        s3 = self.stack_pop()
+        assert s3.a == 222
+        xxxx # raises...
+        s1.a
+        s2.a
+
+    def test_alloc_a_lot(self):
+        for i in range(1000):
+            sr1, sr1_adr = self.malloc(SR)


More information about the pypy-commit mailing list