[pypy-commit] pypy stmgc-c7: import stmgc/bde7c7634878 (branch card-marking)

arigo noreply at buildbot.pypy.org
Mon Jun 30 13:04:53 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c7
Changeset: r72282:6bd3b22ee3f6
Date: 2014-06-30 12:57 +0200
http://bitbucket.org/pypy/pypy/changeset/6bd3b22ee3f6/

Log:	import stmgc/bde7c7634878 (branch card-marking)

diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision
--- a/rpython/translator/stm/src_stm/revision
+++ b/rpython/translator/stm/src_stm/revision
@@ -1,1 +1,1 @@
-70c403598485
+bde7c7634878
diff --git a/rpython/translator/stm/src_stm/stm/contention.c b/rpython/translator/stm/src_stm/stm/contention.c
--- a/rpython/translator/stm/src_stm/stm/contention.c
+++ b/rpython/translator/stm/src_stm/stm/contention.c
@@ -195,7 +195,7 @@
         /* tell the other to commit ASAP, since it causes aborts */
         signal_other_to_commit_soon(contmgr.other_pseg);
 
-        dprintf(("abort in contention\n"));
+        dprintf(("abort in contention: kind %d\n", kind));
         STM_SEGMENT->nursery_end = abort_category;
         marker_contention(kind, false, other_segment_num, obj);
         abort_with_mutex();
diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c
--- a/rpython/translator/stm/src_stm/stm/core.c
+++ b/rpython/translator/stm/src_stm/stm/core.c
@@ -41,26 +41,66 @@
 #endif
 }
 
-void _stm_write_slowpath(object_t *obj)
+__attribute__((always_inline))
+static void write_slowpath_overflow_obj(object_t *obj, bool mark_card)
+{
+    /* An overflow object is an object from the same transaction, but
+       outside the nursery.  More precisely, it is no longer young,
+       i.e. it comes from before the most recent minor collection.
+    */
+    assert(STM_PSEGMENT->objects_pointing_to_nursery != NULL);
+
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+    if (!mark_card) {
+        /* The basic case, with no card marking.  We append the object
+           into 'objects_pointing_to_nursery', and remove the flag so
+           that the write_slowpath will not be called again until the
+           next minor collection. */
+        if (obj->stm_flags & GCFLAG_CARDS_SET) {
+            /* if we clear this flag, we also need to clear the cards */
+            _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+                                obj, CARD_CLEAR, false);
+        }
+        obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET);
+        LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+    }
+    else {
+        /* Card marking.  Don't remove GCFLAG_WRITE_BARRIER because we
+           need to come back to _stm_write_slowpath_card() for every
+           card to mark.  Add GCFLAG_CARDS_SET. */
+        obj->stm_flags |= GCFLAG_CARDS_SET;
+        assert(STM_PSEGMENT->old_objects_with_cards);
+        LIST_APPEND(STM_PSEGMENT->old_objects_with_cards, obj);
+    }
+}
+
+__attribute__((always_inline))
+static void write_slowpath_common(object_t *obj, bool mark_card)
 {
     assert(_seems_to_be_running_transaction());
     assert(!_is_young(obj));
     assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
 
-    /* is this an object from the same transaction, outside the nursery? */
-    if ((obj->stm_flags & -GCFLAG_OVERFLOW_NUMBER_bit0) ==
-            STM_PSEGMENT->overflow_number) {
+    uintptr_t base_lock_idx = get_write_lock_idx((uintptr_t)obj);
 
-        dprintf_test(("write_slowpath %p -> ovf obj_to_nurs\n", obj));
-        obj->stm_flags &= ~GCFLAG_WRITE_BARRIER;
-        assert(STM_PSEGMENT->objects_pointing_to_nursery != NULL);
-        LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+    if (IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)) {
+        assert(write_locks[base_lock_idx] == 0);
+        write_slowpath_overflow_obj(obj, mark_card);
         return;
     }
+    /* Else, it's an old object and we need to privatise it.
+       Do a read-barrier now.  Note that this must occur before the
+       safepoints that may be issued in write_write_contention_management().
+    */
+    stm_read(obj);
 
-    /* do a read-barrier now.  Note that this must occur before the
-       safepoints that may be issued in write_write_contention_management(). */
-    stm_read(obj);
+    /* Take the segment's own lock number */
+    uint8_t lock_num = STM_PSEGMENT->write_lock_num;
+
+    /* If CARDS_SET, we entered here at least once already, so we
+       already own the write_lock */
+    assert(IMPLY(obj->stm_flags & GCFLAG_CARDS_SET,
+                 write_locks[base_lock_idx] == lock_num));
 
     /* XXX XXX XXX make the logic of write-locking objects optional! */
 
@@ -69,16 +109,14 @@
        'modified_old_objects' (but, because it had GCFLAG_WRITE_BARRIER,
        not in 'objects_pointing_to_nursery').  We'll detect this case
        by finding that we already own the write-lock. */
-    uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START;
-    uint8_t lock_num = STM_PSEGMENT->write_lock_num;
-    assert(lock_idx < sizeof(write_locks));
+
  retry:
-    if (write_locks[lock_idx] == 0) {
+    if (write_locks[base_lock_idx] == 0) {
         /* A lock to prevent reading garbage from
            lookup_other_thread_recorded_marker() */
         acquire_marker_lock(STM_SEGMENT->segment_base);
 
-        if (UNLIKELY(!__sync_bool_compare_and_swap(&write_locks[lock_idx],
+        if (UNLIKELY(!__sync_bool_compare_and_swap(&write_locks[base_lock_idx],
                                                    0, lock_num))) {
             release_marker_lock(STM_SEGMENT->segment_base);
             goto retry;
@@ -120,16 +158,15 @@
             realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
             obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
 
-            /* that's the page *following* the last page with the object */
-            end_page = (((uintptr_t)obj) + obj_size + 4095) / 4096UL;
+            /* get the last page containing data from the object */
+            end_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL;
 
-            for (i = first_page; i < end_page; i++) {
+            for (i = first_page; i <= end_page; i++) {
                 page_privatize(i);
             }
         }
     }
-    else if (write_locks[lock_idx] == lock_num) {
-        OPT_ASSERT(STM_PSEGMENT->objects_pointing_to_nursery != NULL);
+    else if (write_locks[base_lock_idx] == lock_num) {
 #ifdef STM_TESTS
         bool found = false;
         LIST_FOREACH_R(STM_PSEGMENT->modified_old_objects, object_t *,
@@ -140,17 +177,10 @@
     else {
         /* call the contention manager, and then retry (unless we were
            aborted). */
-        write_write_contention_management(lock_idx, obj);
+        write_write_contention_management(base_lock_idx, obj);
         goto retry;
     }
 
-    /* A common case for write_locks[] that was either 0 or lock_num:
-       we need to add the object to 'objects_pointing_to_nursery'
-       if there is such a list. */
-    if (STM_PSEGMENT->objects_pointing_to_nursery != NULL) {
-        dprintf_test(("write_slowpath %p -> old obj_to_nurs\n", obj));
-        LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
-    }
 
     /* check that we really have a private page */
     assert(is_private_page(STM_SEGMENT->segment_num,
@@ -159,16 +189,104 @@
     /* check that so far all copies of the object have the flag */
     check_flag_write_barrier(obj);
 
-    /* remove GCFLAG_WRITE_BARRIER, but only if we succeeded in
-       getting the write-lock */
     assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
-    obj->stm_flags &= ~GCFLAG_WRITE_BARRIER;
+    if (!mark_card) {
+        /* A common case for write_locks[] that was either 0 or lock_num:
+           we need to add the object to the appropriate list if there is one.
+        */
+        if (STM_PSEGMENT->objects_pointing_to_nursery != NULL) {
+            dprintf_test(("write_slowpath %p -> old obj_to_nurs\n", obj));
+            LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+        }
+
+        if (obj->stm_flags & GCFLAG_CARDS_SET) {
+            /* if we clear this flag, we have to tell sync_old_objs that
+               everything needs to be synced */
+            _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+                                obj, CARD_MARKED_OLD, true); /* mark all */
+        }
+
+        /* remove GCFLAG_WRITE_BARRIER if we succeeded in getting the base
+           write-lock (not for card marking). */
+        obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET);
+    }
+    else {
+        /* don't remove WRITE_BARRIER, but add CARDS_SET */
+        obj->stm_flags |= GCFLAG_CARDS_SET;
+        assert(STM_PSEGMENT->old_objects_with_cards);
+        LIST_APPEND(STM_PSEGMENT->old_objects_with_cards, obj);
+    }
 
     /* for sanity, check again that all other segment copies of this
        object still have the flag (so privatization worked) */
     check_flag_write_barrier(obj);
 }
 
+void _stm_write_slowpath(object_t *obj)
+{
+    write_slowpath_common(obj, /*mark_card=*/false);
+}
+
+static bool obj_should_use_cards(object_t *obj)
+{
+    struct object_s *realobj = (struct object_s *)
+        REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+
+    return (size >= _STM_MIN_CARD_OBJ_SIZE);
+}
+
+void _stm_write_slowpath_card(object_t *obj, uintptr_t index)
+{
+    /* If CARDS_SET is not set so far, issue a normal write barrier.
+       If the object is large enough, ask it to set up the object for
+       card marking instead.
+    */
+    if (!(obj->stm_flags & GCFLAG_CARDS_SET)) {
+        bool mark_card = obj_should_use_cards(obj);
+        write_slowpath_common(obj, mark_card);
+        if (!mark_card)
+            return;
+    }
+
+    dprintf_test(("write_slowpath_card %p -> index:%lu\n",
+                  obj, index));
+
+    /* We reach this point if we have to mark the card.
+     */
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+    assert(obj->stm_flags & GCFLAG_CARDS_SET);
+    assert(!(obj->stm_flags & GCFLAG_SMALL_UNIFORM)); /* not supported/tested */
+
+#ifndef NDEBUG
+    struct object_s *realobj = (struct object_s *)
+        REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+    /* we need at least one lock in addition to the STM-reserved object
+       write-lock */
+    assert(size >= 32);
+    /* the 'index' must be in range(length-of-obj), but we don't have
+       a direct way to know the length.  We know that it is smaller
+       than the size in bytes. */
+    assert(index < size);
+#endif
+
+    /* Write into the card's lock.  This is used by the next minor
+       collection to know what parts of the big object may have changed.
+       We already own the object here or it is an overflow obj. */
+    uintptr_t base_lock_idx = get_write_lock_idx((uintptr_t)obj);
+    uintptr_t card_lock_idx = base_lock_idx + get_index_to_card_index(index);
+    write_locks[card_lock_idx] = CARD_MARKED;
+
+    /* More debug checks */
+    dprintf(("mark %p index %lu, card:%lu with %d\n",
+             obj, index, get_index_to_card_index(index), CARD_MARKED));
+    assert(IMPLY(IS_OVERFLOW_OBJ(STM_PSEGMENT, obj),
+                 write_locks[base_lock_idx] == 0));
+    assert(IMPLY(!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj),
+                 write_locks[base_lock_idx] == STM_PSEGMENT->write_lock_num));
+}
+
 static void reset_transaction_read_version(void)
 {
     /* force-reset all read markers to 0 */
@@ -285,6 +403,8 @@
             ({
                 if (was_read_remote(remote_base, item, remote_version)) {
                     /* A write-read conflict! */
+                    dprintf(("write-read conflict on %p, our seg: %d, other: %ld\n",
+                             item, STM_SEGMENT->segment_num, i));
                     if (write_read_contention_management(i, item)) {
                         /* If we reach this point, we didn't abort, but we
                            had to wait for the other thread to commit.  If we
@@ -356,7 +476,214 @@
     }
 }
 
-static void synchronize_object_now(object_t *obj)
+static void _page_wise_synchronize_object_now(object_t *obj)
+{
+    uintptr_t start = (uintptr_t)obj;
+    uintptr_t first_page = start / 4096UL;
+
+    char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    ssize_t obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
+    assert(obj_size >= 16);
+    uintptr_t end = start + obj_size;
+    uintptr_t last_page = (end - 1) / 4096UL;
+    long i, myself = STM_SEGMENT->segment_num;
+
+    for (; first_page <= last_page; first_page++) {
+
+        uintptr_t copy_size;
+        if (first_page == last_page) {
+            /* this is the final fragment */
+            copy_size = end - start;
+        }
+        else {
+            /* this is a non-final fragment, going up to the
+               page's end */
+            copy_size = 4096 - (start & 4095);
+        }
+        /* double-check that the result fits in one page */
+        assert(copy_size > 0);
+        assert(copy_size + (start & 4095) <= 4096);
+
+        /* First copy the object into the shared page, if needed */
+        char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start);
+        char *dst = REAL_ADDRESS(stm_object_pages, start);
+        if (is_private_page(myself, first_page)) {
+            if (copy_size == 4096)
+                pagecopy(dst, src);
+            else
+                memcpy(dst, src, copy_size);
+        }
+        else {
+            assert(memcmp(dst, src, copy_size) == 0);  /* same page */
+        }
+
+        for (i = 1; i <= NB_SEGMENTS; i++) {
+            if (i == myself)
+                continue;
+
+            /* src = REAL_ADDRESS(stm_object_pages, start); */
+            dst = REAL_ADDRESS(get_segment_base(i), start);
+            if (is_private_page(i, first_page)) {
+                /* The page is a private page.  We need to diffuse this
+                   fragment of object from the shared page to this private
+                   page. */
+                if (copy_size == 4096)
+                    pagecopy(dst, src);
+                else
+                    memcpy(dst, src, copy_size);
+            }
+            else {
+                assert(!memcmp(dst, src, copy_size));  /* same page */
+            }
+        }
+
+        start = (start + 4096) & ~4095;
+    }
+}
+
+static inline bool _has_private_page_in_range(
+    long seg_num, uintptr_t start, uintptr_t size)
+{
+    uintptr_t first_page = start / 4096UL;
+    uintptr_t last_page = (start + size) / 4096UL;
+    for (; first_page <= last_page; first_page++)
+        if (is_private_page(seg_num, first_page))
+            return true;
+    return false;
+}
+
+static void _card_wise_synchronize_object_now(object_t *obj)
+{
+    assert(obj_should_use_cards(obj));
+    assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+    assert(!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj));
+
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    size_t obj_size = stmcb_size_rounded_up(realobj);
+    assert(obj_size >= 32);
+
+    uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj);
+    uintptr_t card_index = 1;
+    uintptr_t last_card_index = get_index_to_card_index(obj_size - 1); /* max valid index */
+    long i, myself = STM_SEGMENT->segment_num;
+
+    /* simple heuristic to check if probably the whole object is
+       marked anyway so we should do page-wise synchronize */
+    if (write_locks[first_card_index + 1] == CARD_MARKED_OLD
+        && write_locks[first_card_index + last_card_index] == CARD_MARKED_OLD
+        && write_locks[first_card_index + (last_card_index >> 1) + 1] == CARD_MARKED_OLD) {
+
+        dprintf(("card_wise_sync assumes %p,size:%lu is fully marked\n", obj, obj_size));
+        _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+                            obj, CARD_CLEAR, false);
+        _page_wise_synchronize_object_now(obj);
+        return;
+    }
+
+    dprintf(("card_wise_sync syncs %p,size:%lu card-wise\n", obj, obj_size));
+
+    /* Combine multiple marked cards and do a memcpy for them. We don't
+       try yet to use page_copy() or otherwise take into account privatization
+       of pages (except _has_private_page_in_range) */
+    uintptr_t offset_itemsize[2];
+    bool all_cards_were_cleared = true;
+
+    uintptr_t start_card_index = -1;
+    while (card_index <= last_card_index) {
+        uintptr_t card_lock_idx = first_card_index + card_index;
+        uint8_t card_value = write_locks[card_lock_idx];
+
+        if (card_value == CARD_MARKED_OLD) {
+            write_locks[card_lock_idx] = CARD_CLEAR;
+
+            if (start_card_index == -1) {   /* first marked card */
+                start_card_index = card_index;
+                /* start = (uintptr_t)obj + stmcb_index_to_byte_offset( */
+                /*     realobj, get_card_index_to_index(card_index)); */
+                if (all_cards_were_cleared) {
+                    all_cards_were_cleared = false;
+                    stmcb_get_card_base_itemsize(realobj, offset_itemsize);
+                }
+            }
+        }
+        else {
+            OPT_ASSERT(card_value == CARD_CLEAR);
+        }
+
+        if (start_card_index != -1                    /* something to copy */
+            && (card_value != CARD_MARKED_OLD         /* found non-marked card */
+                || card_index == last_card_index)) {  /* this is the last card */
+            /* do the copying: */
+            uintptr_t start, copy_size;
+            uintptr_t next_card_offset;
+            uintptr_t start_card_offset;
+            uintptr_t next_card_index = card_index;
+
+            if (card_value == CARD_MARKED_OLD) {
+                /* card_index is the last card of the object, but we need
+                   to go one further to get the right offset */
+                next_card_index++;
+            }
+
+            start_card_offset = offset_itemsize[0] +
+                get_card_index_to_index(start_card_index) * offset_itemsize[1];
+
+            next_card_offset = offset_itemsize[0] +
+                get_card_index_to_index(next_card_index) * offset_itemsize[1];
+
+            if (next_card_offset > obj_size)
+                next_card_offset = obj_size;
+
+            start = (uintptr_t)obj + start_card_offset;
+            copy_size = next_card_offset - start_card_offset;
+            OPT_ASSERT(copy_size > 0);
+
+            /* dprintf(("copy %lu bytes\n", copy_size)); */
+
+            /* since we have marked cards, at least one page here must be private */
+            assert(_has_private_page_in_range(myself, start, copy_size));
+
+            /* copy to shared segment: */
+            char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start);
+            char *dst = REAL_ADDRESS(stm_object_pages, start);
+            memcpy(dst, src, copy_size);
+
+            /* copy to other segments */
+            for (i = 1; i <= NB_SEGMENTS; i++) {
+                if (i == myself)
+                    continue;
+                if (!_has_private_page_in_range(i, start, copy_size))
+                    continue;
+                /* src = REAL_ADDRESS(stm_object_pages, start); */
+                dst = REAL_ADDRESS(get_segment_base(i), start);
+                memcpy(dst, src, copy_size);
+            }
+
+            start_card_index = -1;
+        }
+
+        card_index++;
+    }
+
+    if (all_cards_were_cleared) {
+        /* well, seems like we never called stm_write_card() on it, so actually
+           we need to fall back to synchronize the whole object */
+        _page_wise_synchronize_object_now(obj);
+        return;
+    }
+
+#ifndef NDEBUG
+    char *src = REAL_ADDRESS(stm_object_pages, (uintptr_t)obj);
+    char *dst;
+    for (i = 1; i <= NB_SEGMENTS; i++) {
+        dst = REAL_ADDRESS(get_segment_base(i), (uintptr_t)obj);
+        assert(memcmp(dst, src, obj_size) == 0);
+    }
+#endif
+}
+
+
+static void synchronize_object_now(object_t *obj, bool ignore_cards)
 {
     /* Copy around the version of 'obj' that lives in our own segment.
        It is first copied into the shared pages, and then into other
@@ -368,72 +695,16 @@
     assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
     assert(STM_PSEGMENT->privatization_lock == 1);
 
-    uintptr_t start = (uintptr_t)obj;
-    uintptr_t first_page = start / 4096UL;
+    if (obj->stm_flags & GCFLAG_SMALL_UNIFORM) {
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+        abort();//XXX WRITE THE FAST CASE
+    } else if (ignore_cards || !obj_should_use_cards(obj)) {
+        _page_wise_synchronize_object_now(obj);
+    } else {
+        _card_wise_synchronize_object_now(obj);
+    }
 
-    if (obj->stm_flags & GCFLAG_SMALL_UNIFORM) {
-        abort();//XXX WRITE THE FAST CASE
-    }
-    else {
-        char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
-        ssize_t obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
-        assert(obj_size >= 16);
-        uintptr_t end = start + obj_size;
-        uintptr_t last_page = (end - 1) / 4096UL;
-        long i, myself = STM_SEGMENT->segment_num;
-
-        for (; first_page <= last_page; first_page++) {
-
-            uintptr_t copy_size;
-            if (first_page == last_page) {
-                /* this is the final fragment */
-                copy_size = end - start;
-            }
-            else {
-                /* this is a non-final fragment, going up to the
-                   page's end */
-                copy_size = 4096 - (start & 4095);
-            }
-            /* double-check that the result fits in one page */
-            assert(copy_size > 0);
-            assert(copy_size + (start & 4095) <= 4096);
-
-            /* First copy the object into the shared page, if needed */
-            char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start);
-            char *dst = REAL_ADDRESS(stm_object_pages, start);
-            if (is_private_page(myself, first_page)) {
-                if (copy_size == 4096)
-                    pagecopy(dst, src);
-                else
-                    memcpy(dst, src, copy_size);
-            }
-            else {
-                assert(memcmp(dst, src, copy_size) == 0);  /* same page */
-            }
-
-            for (i = 1; i <= NB_SEGMENTS; i++) {
-                if (i == myself)
-                    continue;
-
-                src = REAL_ADDRESS(stm_object_pages, start);
-                dst = REAL_ADDRESS(get_segment_base(i), start);
-                if (is_private_page(i, first_page)) {
-                    /* The page is a private page.  We need to diffuse this
-                       fragment of object from the shared page to this private
-                       page. */
-                    if (copy_size == 4096)
-                        pagecopy(dst, src);
-                    else
-                        memcpy(dst, src, copy_size);
-                }
-                else {
-                    assert(!memcmp(dst, src, copy_size));  /* same page */
-                }
-            }
-
-            start = (start + 4096) & ~4095;
-        }
-    }
+    _cards_cleared_in_object(get_priv_segment(STM_SEGMENT->segment_num), obj);
 }
 
 static void push_overflow_objects_from_privatized_pages(void)
@@ -443,7 +714,7 @@
 
     acquire_privatization_lock();
     LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *,
-                   synchronize_object_now(item));
+                   synchronize_object_now(item, true /*ignore_cards*/));
     release_privatization_lock();
 }
 
@@ -467,7 +738,7 @@
 
             /* copy the object to the shared page, and to the other
                private pages as needed */
-            synchronize_object_now(item);
+            synchronize_object_now(item, false); /* don't ignore_cards */
         }));
     release_privatization_lock();
 
@@ -484,7 +755,9 @@
     STM_PSEGMENT->marker_inev[1] = 0;
 
     /* reset these lists to NULL for the next transaction */
+    _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num));
     LIST_FREE(STM_PSEGMENT->objects_pointing_to_nursery);
+    list_clear(STM_PSEGMENT->old_objects_with_cards);
     LIST_FREE(STM_PSEGMENT->large_overflow_objects);
 
     timing_end_transaction(attribute_to);
@@ -535,6 +808,7 @@
 
     /* synchronize modified old objects to other threads */
     push_modified_to_other_segments();
+    _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num));
 
     /* update 'overflow_number' if needed */
     if (STM_PSEGMENT->overflow_number_has_been_used) {
@@ -594,6 +868,9 @@
             ssize_t size = stmcb_size_rounded_up((struct object_s *)src);
             memcpy(dst, src, size);
 
+            if (obj_should_use_cards(item))
+                _reset_object_cards(pseg, item, CARD_CLEAR, false);
+
             /* objects in 'modified_old_objects' usually have the
                WRITE_BARRIER flag, unless they have been modified
                recently.  Ignore the old flag; after copying from the
@@ -622,6 +899,10 @@
 
 static void abort_data_structures_from_segment_num(int segment_num)
 {
+#pragma push_macro("STM_PSEGMENT")
+#pragma push_macro("STM_SEGMENT")
+#undef STM_PSEGMENT
+#undef STM_SEGMENT
     /* This function clears the content of the given segment undergoing
        an abort.  It is called from abort_with_mutex(), but also sometimes
        from other threads that figure out that this segment should abort.
@@ -651,6 +932,7 @@
 
     /* reset all the modified objects (incl. re-adding GCFLAG_WRITE_BARRIER) */
     reset_modified_from_other_segments(segment_num);
+    _verify_cards_cleared_in_all_lists(pseg);
 
     /* reset the tl->shadowstack and thread_local_obj to their original
        value before the transaction start */
@@ -663,8 +945,11 @@
 
     /* reset these lists to NULL too on abort */
     LIST_FREE(pseg->objects_pointing_to_nursery);
+    list_clear(pseg->old_objects_with_cards);
     LIST_FREE(pseg->large_overflow_objects);
     list_clear(pseg->young_weakrefs);
+#pragma pop_macro("STM_SEGMENT")
+#pragma pop_macro("STM_PSEGMENT")
 }
 
 static void abort_with_mutex(void)
diff --git a/rpython/translator/stm/src_stm/stm/core.h b/rpython/translator/stm/src_stm/stm/core.h
--- a/rpython/translator/stm/src_stm/stm/core.h
+++ b/rpython/translator/stm/src_stm/stm/core.h
@@ -36,6 +36,8 @@
 #define WRITELOCK_START       ((END_NURSERY_PAGE * 4096UL) >> 4)
 #define WRITELOCK_END         READMARKER_END
 
+#define CARD_SIZE   _STM_CARD_SIZE
+
 enum /* stm_flags */ {
     /* This flag is set on non-nursery objects.  It forces stm_write()
        to call _stm_write_slowpath().
@@ -55,6 +57,12 @@
        after the object. */
     GCFLAG_HAS_SHADOW = 0x04,
 
+    /* Set on objects that are large enough (_STM_MIN_CARD_OBJ_SIZE)
+       to have multiple cards (at least _STM_MIN_CARD_COUNT), and that
+       have at least one card marked.  This flag implies
+       GCFLAG_WRITE_BARRIER. */
+    GCFLAG_CARDS_SET = _STM_GCFLAG_CARDS_SET,
+
     /* All remaining bits of the 32-bit 'stm_flags' field are taken by
        the "overflow number".  This is a number that identifies the
        "overflow objects" from the current transaction among all old
@@ -62,7 +70,7 @@
        current transaction that have been flushed out of the nursery,
        which occurs if the same transaction allocates too many objects.
     */
-    GCFLAG_OVERFLOW_NUMBER_bit0 = 0x8   /* must be last */
+    GCFLAG_OVERFLOW_NUMBER_bit0 = 0x10   /* must be last */
 };
 
 
@@ -97,6 +105,10 @@
        understood as meaning implicitly "this is the same as
        'modified_old_objects'". */
     struct list_s *objects_pointing_to_nursery;
+    /* Like objects_pointing_to_nursery it holds the old objects that
+       we did a stm_write_card() on. Objects can be in both lists.
+       It is NULL iff objects_pointing_to_nursery is NULL. */
+    struct list_s *old_objects_with_cards;
 
     /* List of all large, overflowed objects.  Only non-NULL after the
        current transaction spanned a minor collection. */
@@ -213,9 +225,34 @@
 
 static uint8_t write_locks[WRITELOCK_END - WRITELOCK_START];
 
+enum /* card values for write_locks */ {
+    CARD_CLEAR = 0,             /* card not used at all */
+    CARD_MARKED = 100,            /* card marked for tracing in the next gc */
+    CARD_MARKED_OLD = 101,        /* card was marked before, but cleared
+                                     in a GC */
+};
+
 
 #define REAL_ADDRESS(segment_base, src)   ((segment_base) + (uintptr_t)(src))
 
+#define IS_OVERFLOW_OBJ(pseg, obj) (((obj)->stm_flags & -GCFLAG_OVERFLOW_NUMBER_bit0) \
+                                    == (pseg)->overflow_number)
+
+static inline uintptr_t get_index_to_card_index(uintptr_t index) {
+    return (index / CARD_SIZE) + 1;
+}
+
+static inline uintptr_t get_card_index_to_index(uintptr_t card_index) {
+    return (card_index - 1) * CARD_SIZE;
+}
+
+
+static inline uintptr_t get_write_lock_idx(uintptr_t obj) {
+    uintptr_t res = (obj >> 4) - WRITELOCK_START;
+    assert(res < sizeof(write_locks));
+    return res;
+}
+
 static inline char *get_segment_base(long segment_num) {
     return stm_object_pages + segment_num * (NB_PAGES * 4096UL);
 }
@@ -258,7 +295,7 @@
 }
 
 static void copy_object_to_shared(object_t *obj, int source_segment_num);
-static void synchronize_object_now(object_t *obj);
+static void synchronize_object_now(object_t *obj, bool ignore_cards);
 
 static inline void acquire_privatization_lock(void)
 {
diff --git a/rpython/translator/stm/src_stm/stm/gcpage.c b/rpython/translator/stm/src_stm/stm/gcpage.c
--- a/rpython/translator/stm/src_stm/stm/gcpage.c
+++ b/rpython/translator/stm/src_stm/stm/gcpage.c
@@ -167,7 +167,7 @@
 
 static inline uintptr_t mark_loc(object_t *obj)
 {
-    uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START;
+    uintptr_t lock_idx = get_write_lock_idx((uintptr_t)obj);
     assert(lock_idx < sizeof(write_locks));
     return lock_idx;
 }
@@ -441,6 +441,11 @@
 
 static void clean_up_segment_lists(void)
 {
+#pragma push_macro("STM_PSEGMENT")
+#pragma push_macro("STM_SEGMENT")
+#undef STM_PSEGMENT
+#undef STM_SEGMENT
+
     long i;
     for (i = 1; i <= NB_SEGMENTS; i++) {
         struct stm_priv_segment_info_s *pseg = get_priv_segment(i);
@@ -451,21 +456,54 @@
            written to but don't actually point to the nursery.  Clear
            it up and set GCFLAG_WRITE_BARRIER again on the objects.
            This is the case for transactions where
-               MINOR_NOTHING_TO_DO() == false
+               MINOR_NOTHING_TO_DO() == true
            but they still did write-barriers on objects
         */
         lst = pseg->objects_pointing_to_nursery;
         if (lst != NULL) {
-            LIST_FOREACH_R(lst, uintptr_t /*item*/,
+            LIST_FOREACH_R(lst, object_t* /*item*/,
                 ({
                     struct object_s *realobj = (struct object_s *)
-                        REAL_ADDRESS(pseg->pub.segment_base, item);
+                        REAL_ADDRESS(pseg->pub.segment_base, (uintptr_t)item);
+
                     assert(!(realobj->stm_flags & GCFLAG_WRITE_BARRIER));
+                    OPT_ASSERT(!(realobj->stm_flags & GCFLAG_CARDS_SET));
+
                     realobj->stm_flags |= GCFLAG_WRITE_BARRIER;
+
+                    if (realobj->stm_flags & GCFLAG_CARDS_SET) {
+                        /* we called a normal WB on this object, so all cards
+                           need to be marked OLD */
+                        if (!IS_OVERFLOW_OBJ(pseg, realobj)) {
+                            _reset_object_cards(pseg, item, CARD_MARKED_OLD, true); /* mark all */
+                        } else {
+                            /* simply clear overflow */
+                            _reset_object_cards(pseg, item, CARD_CLEAR, false);
+                        }
+                    }
                 }));
             list_clear(lst);
+        } else {
+            /* if here MINOR_NOTHING_TO_DO() was true before, it's like
+               we "didn't do a collection" at all. So nothing to do on
+               modified_old_objs. */
         }
 
+        lst = pseg->old_objects_with_cards;
+        LIST_FOREACH_R(lst, object_t* /*item*/,
+            ({
+                struct object_s *realobj = (struct object_s *)
+                    REAL_ADDRESS(pseg->pub.segment_base, item);
+                OPT_ASSERT(realobj->stm_flags & GCFLAG_CARDS_SET);
+                OPT_ASSERT(realobj->stm_flags & GCFLAG_WRITE_BARRIER);
+
+                /* clear cards if overflow, or mark marked cards as old otherwise */
+                uint8_t mark_value = IS_OVERFLOW_OBJ(pseg, realobj) ?
+                    CARD_CLEAR : CARD_MARKED_OLD;
+                _reset_object_cards(pseg, item, mark_value, false);
+            }));
+        list_clear(lst);
+
         /* Remove from 'large_overflow_objects' all objects that die */
         lst = pseg->large_overflow_objects;
         if (lst != NULL) {
@@ -478,6 +516,8 @@
             }
         }
     }
+#pragma pop_macro("STM_SEGMENT")
+#pragma pop_macro("STM_PSEGMENT")
 }
 
 static inline bool largemalloc_keep_object_at(char *data)
@@ -506,6 +546,20 @@
     _stm_largemalloc_sweep();
 }
 
+static void assert_cleared_locks(size_t n)
+{
+#ifndef NDEBUG
+    size_t i;
+    uint8_t *s = write_locks;
+# ifndef STM_TESTS
+    if (n > 5000) n = 5000;
+# endif
+    for (i = 0; i < n; i++)
+        assert(s[i] == CARD_CLEAR || s[i] == CARD_MARKED
+               || s[i] == CARD_MARKED_OLD);
+#endif
+}
+
 static void clean_write_locks(void)
 {
     /* the write_locks array, containing the visit marker during
@@ -515,7 +569,7 @@
     object_t *loc2 = (object_t *)(uninitialized_page_stop - stm_object_pages);
     uintptr_t lock2_idx = mark_loc(loc2 - 1) + 1;
 
-    assert_memset_zero(write_locks, lock2_idx);
+    assert_cleared_locks(lock2_idx);
     memset(write_locks + lock2_idx, 0, sizeof(write_locks) - lock2_idx);
 }
 
diff --git a/rpython/translator/stm/src_stm/stm/misc.c b/rpython/translator/stm/src_stm/stm/misc.c
--- a/rpython/translator/stm/src_stm/stm/misc.c
+++ b/rpython/translator/stm/src_stm/stm/misc.c
@@ -41,6 +41,12 @@
     return (obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) == 0;
 }
 
+
+bool _stm_was_written_card(object_t *obj)
+{
+    return obj->stm_flags & _STM_GCFLAG_CARDS_SET;
+}
+
 #ifdef STM_TESTS
 uintptr_t _stm_get_private_page(uintptr_t pagenum)
 {
@@ -62,6 +68,13 @@
     return list_count(STM_PSEGMENT->objects_pointing_to_nursery);
 }
 
+long _stm_count_old_objects_with_cards(void)
+{
+    if (STM_PSEGMENT->old_objects_with_cards == NULL)
+        return -1;
+    return list_count(STM_PSEGMENT->old_objects_with_cards);
+}
+
 object_t *_stm_enum_modified_old_objects(long index)
 {
     return (object_t *)list_item(
@@ -74,6 +87,12 @@
         STM_PSEGMENT->objects_pointing_to_nursery, index);
 }
 
+object_t *_stm_enum_old_objects_with_cards(long index)
+{
+    return (object_t *)list_item(
+        STM_PSEGMENT->old_objects_with_cards, index);
+}
+
 uint64_t _stm_total_allocated(void)
 {
     return increment_total_allocated(0);
diff --git a/rpython/translator/stm/src_stm/stm/nursery.c b/rpython/translator/stm/src_stm/stm/nursery.c
--- a/rpython/translator/stm/src_stm/stm/nursery.c
+++ b/rpython/translator/stm/src_stm/stm/nursery.c
@@ -66,6 +66,8 @@
     object_t *obj = *pobj;
     object_t *nobj;
     uintptr_t nobj_sync_now;
+    char *realobj;
+    size_t size;
 
     if (obj == NULL)
         return;
@@ -76,8 +78,6 @@
            to GCWORD_MOVED.  In that case, the forwarding location, i.e.
            where the object moved to, is stored in the second word in 'obj'. */
         object_t *TLPREFIX *pforwarded_array = (object_t *TLPREFIX *)obj;
-        char *realobj;
-        size_t size;
 
         if (obj->stm_flags & GCFLAG_HAS_SHADOW) {
             /* ^^ the single check above detects both already-moved objects
@@ -150,6 +150,7 @@
 
     /* Must trace the object later */
     LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, nobj_sync_now);
+    _cards_cleared_in_object(get_priv_segment(STM_SEGMENT->segment_num), nobj);
 }
 
 static void collect_roots_in_nursery(void)
@@ -184,30 +185,217 @@
     minor_trace_if_young(&tl->thread_local_obj);
 }
 
-static inline void _collect_now(object_t *obj)
+static void _cards_cleared_in_object(struct stm_priv_segment_info_s *pseg, object_t *obj)
+{
+#ifndef NDEBUG
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+
+    if (size < _STM_MIN_CARD_OBJ_SIZE)
+        return;                 /* too small for cards */
+
+    uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj);
+    uintptr_t card_index = 1;
+    uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */
+
+    OPT_ASSERT(write_locks[first_card_index] <= NB_SEGMENTS_MAX
+               || write_locks[first_card_index] == 255); /* see gcpage.c */
+    while (card_index <= last_card_index) {
+        uintptr_t card_lock_idx = first_card_index + card_index;
+        assert(write_locks[card_lock_idx] == CARD_CLEAR);
+        card_index++;
+    }
+
+    assert(!(realobj->stm_flags & GCFLAG_CARDS_SET));
+#endif
+}
+
+static void _verify_cards_cleared_in_all_lists(struct stm_priv_segment_info_s *pseg)
+{
+#ifndef NDEBUG
+    LIST_FOREACH_R(
+        pseg->modified_old_objects, object_t * /*item*/,
+        _cards_cleared_in_object(pseg, item));
+
+    if (pseg->large_overflow_objects) {
+        LIST_FOREACH_R(
+            pseg->large_overflow_objects, object_t * /*item*/,
+            _cards_cleared_in_object(pseg, item));
+    }
+    if (pseg->objects_pointing_to_nursery) {
+        LIST_FOREACH_R(
+            pseg->objects_pointing_to_nursery, object_t * /*item*/,
+            _cards_cleared_in_object(pseg, item));
+    }
+    LIST_FOREACH_R(
+        pseg->old_objects_with_cards, object_t * /*item*/,
+        _cards_cleared_in_object(pseg, item));
+#endif
+}
+
+static void _reset_object_cards(struct stm_priv_segment_info_s *pseg,
+                                object_t *obj, uint8_t mark_value,
+                                bool mark_all)
+{
+#pragma push_macro("STM_PSEGMENT")
+#pragma push_macro("STM_SEGMENT")
+#undef STM_PSEGMENT
+#undef STM_SEGMENT
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+
+    OPT_ASSERT(size >= _STM_MIN_CARD_OBJ_SIZE);
+    assert(IMPLY(mark_value == CARD_CLEAR, !mark_all)); /* not necessary */
+    assert(IMPLY(mark_all, mark_value == CARD_MARKED_OLD)); /* set *all* to OLD */
+    assert(IMPLY(IS_OVERFLOW_OBJ(pseg, realobj),
+                 mark_value == CARD_CLEAR)); /* overflows are always CLEARed */
+
+    uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj);
+    uintptr_t card_index = 1;
+    uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */
+
+    OPT_ASSERT(write_locks[first_card_index] <= NB_SEGMENTS
+               || write_locks[first_card_index] == 255); /* see gcpage.c */
+
+    dprintf(("mark cards of %p, size %lu with %d, all: %d\n",
+             obj, size, mark_value, mark_all));
+    dprintf(("obj has %lu cards\n", last_card_index));
+    while (card_index <= last_card_index) {
+        uintptr_t card_lock_idx = first_card_index + card_index;
+
+        if (mark_all || write_locks[card_lock_idx] != CARD_CLEAR) {
+            /* dprintf(("mark card %lu,wl:%lu of %p with %d\n", */
+            /*          card_index, card_lock_idx, obj, mark_value)); */
+            write_locks[card_lock_idx] = mark_value;
+        }
+        card_index++;
+    }
+
+    realobj->stm_flags &= ~GCFLAG_CARDS_SET;
+
+#pragma pop_macro("STM_SEGMENT")
+#pragma pop_macro("STM_PSEGMENT")
+}
+
+
+static void _trace_card_object(object_t *obj)
+{
+    assert(!_is_in_nursery(obj));
+    assert(obj->stm_flags & GCFLAG_CARDS_SET);
+    assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+
+    dprintf(("_trace_card_object(%p)\n", obj));
+    bool obj_is_overflow = IS_OVERFLOW_OBJ(STM_PSEGMENT, obj);
+    uint8_t mark_value = obj_is_overflow ? CARD_CLEAR : CARD_MARKED_OLD;
+
+    struct object_s *realobj = (struct object_s *)REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+    size_t size = stmcb_size_rounded_up(realobj);
+
+    uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj);
+    uintptr_t card_index = 1;
+    uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */
+
+    OPT_ASSERT(write_locks[first_card_index] <= NB_SEGMENTS_MAX
+               || write_locks[first_card_index] == 255); /* see gcpage.c */
+
+    /* XXX: merge ranges */
+    while (card_index <= last_card_index) {
+        uintptr_t card_lock_idx = first_card_index + card_index;
+        if (write_locks[card_lock_idx] == CARD_MARKED) {
+            /* clear or set to old: */
+            write_locks[card_lock_idx] = mark_value;
+
+            uintptr_t start = get_card_index_to_index(card_index);
+            uintptr_t stop = get_card_index_to_index(card_index + 1);
+
+            dprintf(("trace_cards on %p with start:%lu stop:%lu\n",
+                     obj, start, stop));
+            stmcb_trace_cards(realobj, &minor_trace_if_young,
+                              start, stop);
+
+        }
+
+        /* all cards should be cleared on overflow objs */
+        assert(IMPLY(obj_is_overflow,
+                     write_locks[card_lock_idx] == CARD_CLEAR));
+
+        card_index++;
+    }
+    obj->stm_flags &= ~GCFLAG_CARDS_SET;
+}
+
+
+
+static inline void _collect_now(object_t *obj, bool was_definitely_young)
 {
     assert(!_is_young(obj));
 
-    /* We must not have GCFLAG_WRITE_BARRIER so far.  Add it now. */
-    assert(!(obj->stm_flags & GCFLAG_WRITE_BARRIER));
-    obj->stm_flags |= GCFLAG_WRITE_BARRIER;
+    dprintf(("_collect_now: %p\n", obj));
 
-    /* Trace the 'obj' to replace pointers to nursery with pointers
-       outside the nursery, possibly forcing nursery objects out and
-       adding them to 'objects_pointing_to_nursery' as well. */
-    char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
-    stmcb_trace((struct object_s *)realobj, &minor_trace_if_young);
+    if (!(obj->stm_flags & GCFLAG_WRITE_BARRIER)) {
+        /* Trace the 'obj' to replace pointers to nursery with pointers
+           outside the nursery, possibly forcing nursery objects out and
+           adding them to 'objects_pointing_to_nursery' as well. */
+        char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+        stmcb_trace((struct object_s *)realobj, &minor_trace_if_young);
+
+        obj->stm_flags |= GCFLAG_WRITE_BARRIER;
+        if (obj->stm_flags & GCFLAG_CARDS_SET) {
+            /* all objects that had WB cleared need to be fully synchronised
+               on commit, so we have to mark all their cards */
+            struct stm_priv_segment_info_s *pseg = get_priv_segment(
+                STM_SEGMENT->segment_num);
+
+            /* stm_wb-slowpath should never have triggered for young objs */
+            assert(!was_definitely_young);
+
+            if (!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)) {
+                _reset_object_cards(pseg, obj, CARD_MARKED_OLD, true); /* mark all */
+            } else {
+                /* simply clear overflow */
+                _reset_object_cards(pseg, obj, CARD_CLEAR, false);
+            }
+        }
+    }
+    /* else traced in collect_cardrefs_to_nursery if necessary */
+}
+
+
+static void collect_cardrefs_to_nursery(void)
+{
+    dprintf(("collect_cardrefs_to_nursery\n"));
+    struct list_s *lst = STM_PSEGMENT->old_objects_with_cards;
+
+    while (!list_is_empty(lst)) {
+        object_t *obj = (object_t*)list_pop_item(lst);
+
+        assert(!_is_young(obj));
+
+        if (!(obj->stm_flags & GCFLAG_CARDS_SET)) {
+            /* handled in _collect_now() */
+            continue;
+        }
+
+        /* traces cards, clears marked cards or marks them old if
+           necessary */
+        _trace_card_object(obj);
+
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+    }
 }
 
 static void collect_oldrefs_to_nursery(void)
 {
+    dprintf(("collect_oldrefs_to_nursery\n"));
     struct list_s *lst = STM_PSEGMENT->objects_pointing_to_nursery;
 
     while (!list_is_empty(lst)) {
         uintptr_t obj_sync_now = list_pop_item(lst);
         object_t *obj = (object_t *)(obj_sync_now & ~FLAG_SYNC_LARGE);
 
-        _collect_now(obj);
+        bool was_definitely_young = (obj_sync_now & FLAG_SYNC_LARGE);
+        _collect_now(obj, was_definitely_young);
+        assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
 
         if (obj_sync_now & FLAG_SYNC_LARGE) {
             /* this was a large object.  We must either synchronize the
@@ -215,13 +403,15 @@
                WRITE_BARRIER flag and traced into it to fix its
                content); or add the object to 'large_overflow_objects'.
             */
+            struct stm_priv_segment_info_s *pseg = get_priv_segment(STM_SEGMENT->segment_num);
             if (STM_PSEGMENT->minor_collect_will_commit_now) {
                 acquire_privatization_lock();
-                synchronize_object_now(obj);
+                synchronize_object_now(obj, true); /* ignore cards! */
                 release_privatization_lock();
             } else {
                 LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj);
             }
+            _cards_cleared_in_object(pseg, obj);
         }
 
         /* the list could have moved while appending */
@@ -231,12 +421,15 @@
 
 static void collect_modified_old_objects(void)
 {
-    LIST_FOREACH_R(STM_PSEGMENT->modified_old_objects, object_t * /*item*/,
-                   _collect_now(item));
+    dprintf(("collect_modified_old_objects\n"));
+    LIST_FOREACH_R(
+        STM_PSEGMENT->modified_old_objects, object_t * /*item*/,
+        _collect_now(item, false));
 }
 
 static void collect_roots_from_markers(uintptr_t num_old)
 {
+    dprintf(("collect_roots_from_markers\n"));
     /* visit the marker objects */
     struct list_s *mlst = STM_PSEGMENT->modified_old_objects_markers;
     STM_PSEGMENT->modified_old_objects_markers_num_old = list_count(mlst);
@@ -255,6 +448,11 @@
 
 static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg)
 {
+#pragma push_macro("STM_PSEGMENT")
+#pragma push_macro("STM_SEGMENT")
+#undef STM_PSEGMENT
+#undef STM_SEGMENT
+    dprintf(("throw_away_nursery\n"));
     /* reset the nursery by zeroing it */
     size_t nursery_used;
     char *realnursery;
@@ -280,10 +478,11 @@
         wlog_t *item;
 
         TREE_LOOP_FORWARD(*pseg->young_outside_nursery, item) {
-            assert(!_is_in_nursery((object_t *)item->addr));
+            object_t *obj = (object_t*)item->addr;
+
             /* mark slot as unread */
             ((struct stm_read_marker_s *)
-             (pseg->pub.segment_base + (item->addr >> 4)))->rm = 0;
+             (pseg->pub.segment_base + (((uintptr_t)obj) >> 4)))->rm = 0;
 
             _stm_large_free(stm_object_pages + item->addr);
         } TREE_LOOP_END;
@@ -292,7 +491,29 @@
     }
 
     tree_clear(pseg->nursery_objects_shadows);
+
+
+    /* modified_old_objects' cards get cleared in push_modified_to_other_segments
+       or reset_modified_from_other_segments. Objs in old_objs_with_cards but not
+       in modified_old_objs are overflow objects and handled here: */
+    if (pseg->large_overflow_objects != NULL) {
+        /* some overflow objects may have cards when aborting, clear them too */
+        LIST_FOREACH_R(pseg->large_overflow_objects, object_t * /*item*/,
+            {
+                struct object_s *realobj = (struct object_s *)
+                    REAL_ADDRESS(pseg->pub.segment_base, item);
+
+                if (realobj->stm_flags & GCFLAG_CARDS_SET) {
+                    /* CARDS_SET is enough since other HAS_CARDS objs
+                       are already cleared */
+                    _reset_object_cards(pseg, item, CARD_CLEAR, false);
+                }
+            });
+    }
+
     return nursery_used;
+#pragma pop_macro("STM_SEGMENT")
+#pragma pop_macro("STM_PSEGMENT")
 }
 
 #define MINOR_NOTHING_TO_DO(pseg)                                       \
@@ -354,7 +575,9 @@
 
     collect_roots_in_nursery();
 
+    collect_cardrefs_to_nursery();
     collect_oldrefs_to_nursery();
+    assert(list_is_empty(STM_PSEGMENT->old_objects_with_cards));
 
     /* now all surviving nursery objects have been moved out */
     stm_move_young_weakrefs();
@@ -428,6 +651,7 @@
 
     char *result = allocate_outside_nursery_large(size_rounded_up);
     object_t *o = (object_t *)(result - stm_object_pages);
+
     tree_insert(STM_PSEGMENT->young_outside_nursery, (uintptr_t)o, 0);
 
     memset(REAL_ADDRESS(STM_SEGMENT->segment_base, o), 0, size_rounded_up);
@@ -529,6 +753,7 @@
     memcpy(realnobj, realobj, size);
 
     obj->stm_flags |= GCFLAG_HAS_SHADOW;
+
     tree_insert(STM_PSEGMENT->nursery_objects_shadows,
                 (uintptr_t)obj, (uintptr_t)nobj);
     return nobj;
diff --git a/rpython/translator/stm/src_stm/stm/nursery.h b/rpython/translator/stm/src_stm/stm/nursery.h
--- a/rpython/translator/stm/src_stm/stm/nursery.h
+++ b/rpython/translator/stm/src_stm/stm/nursery.h
@@ -7,6 +7,10 @@
 
 static uint32_t highest_overflow_number;
 
+static void _cards_cleared_in_object(struct stm_priv_segment_info_s *pseg, object_t *obj);
+static void _reset_object_cards(struct stm_priv_segment_info_s *pseg,
+                                object_t *obj, uint8_t mark_value,
+                                bool mark_all);
 static void minor_collection(bool commit);
 static void check_nursery_at_transaction_start(void);
 static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg);
diff --git a/rpython/translator/stm/src_stm/stm/setup.c b/rpython/translator/stm/src_stm/stm/setup.c
--- a/rpython/translator/stm/src_stm/stm/setup.c
+++ b/rpython/translator/stm/src_stm/stm/setup.c
@@ -84,6 +84,7 @@
 {
     /* Check that some values are acceptable */
     assert(NB_SEGMENTS <= NB_SEGMENTS_MAX);
+    assert(CARD_SIZE >= 32 && CARD_SIZE % 16 == 0);
     assert(4096 <= ((uintptr_t)STM_SEGMENT));
     assert((uintptr_t)STM_SEGMENT == (uintptr_t)STM_PSEGMENT);
     assert(((uintptr_t)STM_PSEGMENT) + sizeof(*STM_PSEGMENT) <= 8192);
@@ -118,6 +119,7 @@
         pr->pub.segment_num = i;
         pr->pub.segment_base = segment_base;
         pr->objects_pointing_to_nursery = NULL;
+        pr->old_objects_with_cards = list_create();
         pr->large_overflow_objects = NULL;
         pr->modified_old_objects = list_create();
         pr->modified_old_objects_markers = list_create();
@@ -157,6 +159,7 @@
     for (i = 1; i <= NB_SEGMENTS; i++) {
         struct stm_priv_segment_info_s *pr = get_priv_segment(i);
         assert(pr->objects_pointing_to_nursery == NULL);
+        list_free(pr->old_objects_with_cards);
         assert(pr->large_overflow_objects == NULL);
         list_free(pr->modified_old_objects);
         list_free(pr->modified_old_objects_markers);
diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h
--- a/rpython/translator/stm/src_stm/stmgc.h
+++ b/rpython/translator/stm/src_stm/stmgc.h
@@ -108,6 +108,7 @@
 /* this should use llvm's coldcc calling convention,
    but it's not exposed to C code so far */
 void _stm_write_slowpath(object_t *);
+void _stm_write_slowpath_card(object_t *, uintptr_t);
 object_t *_stm_allocate_slowpath(ssize_t);
 object_t *_stm_allocate_external(ssize_t);
 void _stm_become_inevitable(const char*);
@@ -121,6 +122,7 @@
 #include <stdbool.h>
 bool _stm_was_read(object_t *obj);
 bool _stm_was_written(object_t *obj);
+bool _stm_was_written_card(object_t *obj);
 uintptr_t _stm_get_private_page(uintptr_t pagenum);
 bool _stm_in_transaction(stm_thread_local_t *tl);
 char *_stm_get_segment_base(long index);
@@ -138,12 +140,18 @@
 void _stm_set_nursery_free_count(uint64_t free_count);
 long _stm_count_modified_old_objects(void);
 long _stm_count_objects_pointing_to_nursery(void);
+long _stm_count_old_objects_with_cards(void);
 object_t *_stm_enum_modified_old_objects(long index);
 object_t *_stm_enum_objects_pointing_to_nursery(long index);
+object_t *_stm_enum_old_objects_with_cards(long index);
 uint64_t _stm_total_allocated(void);
 #endif
 
 #define _STM_GCFLAG_WRITE_BARRIER      0x01
+#define _STM_GCFLAG_CARDS_SET          0x08
+#define _STM_CARD_SIZE                 32     /* must be >= 32 */
+#define _STM_MIN_CARD_COUNT            17
+#define _STM_MIN_CARD_OBJ_SIZE         (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT)
 #define _STM_NSE_SIGNAL_MAX     _STM_TIME_N
 #define _STM_FAST_ALLOC           (66*1024)
 
@@ -214,6 +222,20 @@
         _stm_write_slowpath(obj);
 }
 
+/* The following is a GC-optimized barrier that works on the granularity
+   of CARD_SIZE.  It can be used on any array object, but it is only
+   useful with those that were internally marked with GCFLAG_HAS_CARDS.
+   It has the same purpose as stm_write() for TM.
+   'index' is the array-item-based position within the object, which
+   is measured in units returned by stmcb_get_card_base_itemsize().
+*/
+__attribute__((always_inline))
+static inline void stm_write_card(object_t *obj, uintptr_t index)
+{
+    if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0))
+        _stm_write_slowpath_card(obj, index);
+}
+
 /* Must be provided by the user of this library.
    The "size rounded up" must be a multiple of 8 and at least 16.
    "Tracing" an object means enumerating all GC references in it,
@@ -224,6 +246,16 @@
 */
 extern ssize_t stmcb_size_rounded_up(struct object_s *);
 extern void stmcb_trace(struct object_s *, void (object_t **));
+/* a special trace-callback that is only called for the marked
+   ranges of indices (using stm_write_card(o, index)) */
+extern void stmcb_trace_cards(struct object_s *, void (object_t **),
+                              uintptr_t start, uintptr_t stop);
+/* this function will be called on objects that support cards.
+   It returns the base_offset (in bytes) inside the object from
+   where the indices start, and item_size (in bytes) for the size of
+   one item */
+extern void stmcb_get_card_base_itemsize(struct object_s *,
+                                         uintptr_t offset_itemsize[2]);
 extern void stmcb_commit_soon(void);
 
 
@@ -249,6 +281,7 @@
     return (object_t *)p;
 }
 
+
 /* Allocate a weakref object. Weakref objects have a
    reference to an object at the byte-offset
        stmcb_size_rounded_up(obj) - sizeof(void*)


More information about the pypy-commit mailing list