[pypy-commit] pypy stmgc-c7: import stmgc/4bde66e3b621 (branch "marker")

arigo noreply at buildbot.pypy.org
Mon Apr 28 19:09:28 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c7
Changeset: r71045:73611e136820
Date: 2014-04-28 18:45 +0200
http://bitbucket.org/pypy/pypy/changeset/73611e136820/

Log:	import stmgc/4bde66e3b621 (branch "marker")

diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision
--- a/rpython/translator/stm/src_stm/revision
+++ b/rpython/translator/stm/src_stm/revision
@@ -1,1 +1,1 @@
-889897f2f5ef
+4bde66e3b621
diff --git a/rpython/translator/stm/src_stm/stm/contention.c b/rpython/translator/stm/src_stm/stm/contention.c
--- a/rpython/translator/stm/src_stm/stm/contention.c
+++ b/rpython/translator/stm/src_stm/stm/contention.c
@@ -100,7 +100,8 @@
 
 
 static void contention_management(uint8_t other_segment_num,
-                                  enum contention_kind_e kind)
+                                  enum contention_kind_e kind,
+                                  object_t *obj)
 {
     assert(_has_mutex());
     assert(other_segment_num != STM_SEGMENT->segment_num);
@@ -162,10 +163,12 @@
              itself already paused here.
         */
         contmgr.other_pseg->signal_when_done = true;
+        marker_contention(kind, false, other_segment_num, obj);
 
         change_timing_state(wait_category);
 
-        /* XXX should also tell other_pseg "please commit soon" */
+        /* tell the other to commit ASAP */
+        signal_other_to_commit_soon(contmgr.other_pseg);
 
         dprintf(("pausing...\n"));
         cond_signal(C_AT_SAFE_POINT);
@@ -177,12 +180,22 @@
         if (must_abort())
             abort_with_mutex();
 
-        change_timing_state(STM_TIME_RUN_CURRENT);
+        struct stm_priv_segment_info_s *pseg =
+            get_priv_segment(STM_SEGMENT->segment_num);
+        double elapsed =
+            change_timing_state_tl(pseg->pub.running_thread,
+                                   STM_TIME_RUN_CURRENT);
+        marker_copy(pseg->pub.running_thread, pseg,
+                    wait_category, elapsed);
     }
 
     else if (!contmgr.abort_other) {
+        /* tell the other to commit ASAP, since it causes aborts */
+        signal_other_to_commit_soon(contmgr.other_pseg);
+
         dprintf(("abort in contention\n"));
         STM_SEGMENT->nursery_end = abort_category;
+        marker_contention(kind, false, other_segment_num, obj);
         abort_with_mutex();
     }
 
@@ -190,6 +203,7 @@
         /* We have to signal the other thread to abort, and wait until
            it does. */
         contmgr.other_pseg->pub.nursery_end = abort_category;
+        marker_contention(kind, true, other_segment_num, obj);
 
         int sp = contmgr.other_pseg->safe_point;
         switch (sp) {
@@ -257,10 +271,18 @@
             abort_data_structures_from_segment_num(other_segment_num);
         }
         dprintf(("killed other thread\n"));
+
+        /* we should commit soon, we caused an abort */
+        //signal_other_to_commit_soon(get_priv_segment(STM_SEGMENT->segment_num));
+        if (!STM_PSEGMENT->signalled_to_commit_soon) {
+            STM_PSEGMENT->signalled_to_commit_soon = true;
+            stmcb_commit_soon();
+        }
     }
 }
 
-static void write_write_contention_management(uintptr_t lock_idx)
+static void write_write_contention_management(uintptr_t lock_idx,
+                                              object_t *obj)
 {
     s_mutex_lock();
 
@@ -271,7 +293,7 @@
         assert(get_priv_segment(other_segment_num)->write_lock_num ==
                prev_owner);
 
-        contention_management(other_segment_num, WRITE_WRITE_CONTENTION);
+        contention_management(other_segment_num, WRITE_WRITE_CONTENTION, obj);
 
         /* now we return into _stm_write_slowpath() and will try again
            to acquire the write lock on our object. */
@@ -280,12 +302,13 @@
     s_mutex_unlock();
 }
 
-static void write_read_contention_management(uint8_t other_segment_num)
+static void write_read_contention_management(uint8_t other_segment_num,
+                                             object_t *obj)
 {
-    contention_management(other_segment_num, WRITE_READ_CONTENTION);
+    contention_management(other_segment_num, WRITE_READ_CONTENTION, obj);
 }
 
 static void inevitable_contention_management(uint8_t other_segment_num)
 {
-    contention_management(other_segment_num, INEVITABLE_CONTENTION);
+    contention_management(other_segment_num, INEVITABLE_CONTENTION, NULL);
 }
diff --git a/rpython/translator/stm/src_stm/stm/contention.h b/rpython/translator/stm/src_stm/stm/contention.h
--- a/rpython/translator/stm/src_stm/stm/contention.h
+++ b/rpython/translator/stm/src_stm/stm/contention.h
@@ -1,11 +1,14 @@
 /* Imported by rpython/translator/stm/import_stmgc.py */
 
-static void write_write_contention_management(uintptr_t lock_idx);
-static void write_read_contention_management(uint8_t other_segment_num);
+static void write_write_contention_management(uintptr_t lock_idx,
+                                              object_t *obj);
+static void write_read_contention_management(uint8_t other_segment_num,
+                                             object_t *obj);
 static void inevitable_contention_management(uint8_t other_segment_num);
 
 static inline bool is_abort(uintptr_t nursery_end) {
-    return (nursery_end <= _STM_NSE_SIGNAL_MAX && nursery_end != NSE_SIGPAUSE);
+    return (nursery_end <= _STM_NSE_SIGNAL_MAX && nursery_end != NSE_SIGPAUSE
+            && nursery_end != NSE_SIGCOMMITSOON);
 }
 
 static inline bool is_aborting_now(uint8_t other_segment_num) {
diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c
--- a/rpython/translator/stm/src_stm/stm/core.c
+++ b/rpython/translator/stm/src_stm/stm/core.c
@@ -15,13 +15,10 @@
 #define EVENTUALLY(condition)                                   \
     {                                                           \
         if (!(condition)) {                                     \
-            int _i;                                             \
-            for (_i = 1; _i <= NB_SEGMENTS; _i++)               \
-                spinlock_acquire(lock_pages_privatizing[_i]);   \
+            acquire_privatization_lock();                       \
             if (!(condition))                                   \
                 stm_fatalerror("fails: " #condition);           \
-            for (_i = 1; _i <= NB_SEGMENTS; _i++)               \
-                spinlock_release(lock_pages_privatizing[_i]);   \
+            release_privatization_lock();                       \
         }                                                       \
     }
 #endif
@@ -77,9 +74,15 @@
     assert(lock_idx < sizeof(write_locks));
  retry:
     if (write_locks[lock_idx] == 0) {
+        /* A lock to prevent reading garbage from
+           lookup_other_thread_recorded_marker() */
+        acquire_marker_lock(STM_SEGMENT->segment_base);
+
         if (UNLIKELY(!__sync_bool_compare_and_swap(&write_locks[lock_idx],
-                                                   0, lock_num)))
+                                                   0, lock_num))) {
+            release_marker_lock(STM_SEGMENT->segment_base);
             goto retry;
+        }
 
         dprintf_test(("write_slowpath %p -> mod_old\n", obj));
 
@@ -87,6 +90,15 @@
            Add it to the list 'modified_old_objects'. */
         LIST_APPEND(STM_PSEGMENT->modified_old_objects, obj);
 
+        /* Add the current marker, recording where we wrote to this object */
+        uintptr_t marker[2];
+        marker_fetch(STM_SEGMENT->running_thread, marker);
+        STM_PSEGMENT->modified_old_objects_markers =
+            list_append2(STM_PSEGMENT->modified_old_objects_markers,
+                         marker[0], marker[1]);
+
+        release_marker_lock(STM_SEGMENT->segment_base);
+
         /* We need to privatize the pages containing the object, if they
            are still SHARED_PAGE.  The common case is that there is only
            one page in total. */
@@ -128,7 +140,7 @@
     else {
         /* call the contention manager, and then retry (unless we were
            aborted). */
-        write_write_contention_management(lock_idx);
+        write_write_contention_management(lock_idx, obj);
         goto retry;
     }
 
@@ -196,7 +208,13 @@
     assert(STM_PSEGMENT->transaction_state == TS_NONE);
     change_timing_state(STM_TIME_RUN_CURRENT);
     STM_PSEGMENT->start_time = tl->_timing_cur_start;
+    STM_PSEGMENT->signalled_to_commit_soon = false;
     STM_PSEGMENT->safe_point = SP_RUNNING;
+#ifndef NDEBUG
+    STM_PSEGMENT->marker_inev[1] = 99999999999999999L;
+#endif
+    if (jmpbuf == NULL)
+        marker_fetch_inev();
     STM_PSEGMENT->transaction_state = (jmpbuf != NULL ? TS_REGULAR
                                                       : TS_INEVITABLE);
     STM_SEGMENT->jmpbuf_ptr = jmpbuf;
@@ -224,12 +242,17 @@
     }
 
     assert(list_is_empty(STM_PSEGMENT->modified_old_objects));
+    assert(list_is_empty(STM_PSEGMENT->modified_old_objects_markers));
     assert(list_is_empty(STM_PSEGMENT->young_weakrefs));
     assert(tree_is_cleared(STM_PSEGMENT->young_outside_nursery));
     assert(tree_is_cleared(STM_PSEGMENT->nursery_objects_shadows));
     assert(tree_is_cleared(STM_PSEGMENT->callbacks_on_abort));
     assert(STM_PSEGMENT->objects_pointing_to_nursery == NULL);
     assert(STM_PSEGMENT->large_overflow_objects == NULL);
+#ifndef NDEBUG
+    /* this should not be used when objects_pointing_to_nursery == NULL */
+    STM_PSEGMENT->modified_old_objects_markers_num_old = 99999999999999999L;
+#endif
 
     check_nursery_at_transaction_start();
 }
@@ -264,7 +287,7 @@
             ({
                 if (was_read_remote(remote_base, item, remote_version)) {
                     /* A write-read conflict! */
-                    write_read_contention_management(i);
+                    write_read_contention_management(i, item);
 
                     /* If we reach this point, we didn't abort, but maybe we
                        had to wait for the other thread to commit.  If we
@@ -338,9 +361,12 @@
     /* Copy around the version of 'obj' that lives in our own segment.
        It is first copied into the shared pages, and then into other
        segments' own private pages.
+
+       Must be called with the privatization lock acquired.
     */
     assert(!_is_young(obj));
     assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+    assert(STM_PSEGMENT->privatization_lock == 1);
 
     uintptr_t start = (uintptr_t)obj;
     uintptr_t first_page = start / 4096UL;
@@ -382,26 +408,9 @@
                     memcpy(dst, src, copy_size);
             }
             else {
-                EVENTUALLY(memcmp(dst, src, copy_size) == 0);  /* same page */
+                assert(memcmp(dst, src, copy_size) == 0);  /* same page */
             }
 
-            /* Do a full memory barrier.  We must make sure that other
-               CPUs see the changes we did to the shared page ("S",
-               above) before we check the other segments below with
-               is_private_page().  Otherwise, we risk the following:
-               this CPU writes "S" but the writes are not visible yet;
-               then it checks is_private_page() and gets false, and does
-               nothing more; just afterwards another CPU sets its own
-               private_page bit and copies the page; but it risks doing
-               so before seeing the "S" writes.
-
-               XXX what is the cost of this?  If it's high, then we
-               should reorganize the code so that we buffer the second
-               parts and do them by bunch of N, after just one call to
-               __sync_synchronize()...
-            */
-            __sync_synchronize();
-
             for (i = 1; i <= NB_SEGMENTS; i++) {
                 if (i == myself)
                     continue;
@@ -418,7 +427,7 @@
                         memcpy(dst, src, copy_size);
                 }
                 else {
-                    EVENTUALLY(!memcmp(dst, src, copy_size));  /* same page */
+                    assert(!memcmp(dst, src, copy_size));  /* same page */
                 }
             }
 
@@ -432,12 +441,15 @@
     if (STM_PSEGMENT->large_overflow_objects == NULL)
         return;
 
+    acquire_privatization_lock();
     LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *,
                    synchronize_object_now(item));
+    release_privatization_lock();
 }
 
 static void push_modified_to_other_segments(void)
 {
+    acquire_privatization_lock();
     LIST_FOREACH_R(
         STM_PSEGMENT->modified_old_objects,
         object_t * /*item*/,
@@ -457,8 +469,10 @@
                private pages as needed */
             synchronize_object_now(item);
         }));
+    release_privatization_lock();
 
     list_clear(STM_PSEGMENT->modified_old_objects);
+    list_clear(STM_PSEGMENT->modified_old_objects_markers);
 }
 
 static void _finish_transaction(int attribute_to)
@@ -597,6 +611,7 @@
         }));
 
     list_clear(pseg->modified_old_objects);
+    list_clear(pseg->modified_old_objects_markers);
 }
 
 static void abort_data_structures_from_segment_num(int segment_num)
@@ -621,8 +636,9 @@
                        (int)pseg->transaction_state);
     }
 
-    /* look up and preserve the marker information as a string */
-    marker_fetch_expand(pseg);
+    /* if we don't have marker information already, look up and preserve
+       the marker information from the shadowstack as a string */
+    marker_default_for_abort(pseg);
 
     /* throw away the content of the nursery */
     long bytes_in_nursery = throw_away_nursery(pseg);
@@ -706,6 +722,7 @@
     if (STM_PSEGMENT->transaction_state == TS_REGULAR) {
         dprintf(("become_inevitable: %s\n", msg));
 
+        marker_fetch_inev();
         wait_for_end_of_inevitable_transaction(NULL);
         STM_PSEGMENT->transaction_state = TS_INEVITABLE;
         STM_SEGMENT->jmpbuf_ptr = NULL;
diff --git a/rpython/translator/stm/src_stm/stm/core.h b/rpython/translator/stm/src_stm/stm/core.h
--- a/rpython/translator/stm/src_stm/stm/core.h
+++ b/rpython/translator/stm/src_stm/stm/core.h
@@ -79,9 +79,17 @@
     /* List of old objects (older than the current transaction) that the
        current transaction attempts to modify.  This is used to track
        the STM status: they are old objects that where written to and
-       that need to be copied to other segments upon commit. */
+       that need to be copied to other segments upon commit.  Note that
+       every object takes three list items: the object, and two words for
+       the location marker. */
     struct list_s *modified_old_objects;
 
+    /* For each entry in 'modified_old_objects', we have two entries
+       in the following list, which give the marker at the time we added
+       the entry to modified_old_objects. */
+    struct list_s *modified_old_objects_markers;
+    uintptr_t modified_old_objects_markers_num_old;
+
     /* List of out-of-nursery objects that may contain pointers to
        nursery objects.  This is used to track the GC status: they are
        all objects outside the nursery on which an stm_write() occurred
@@ -149,12 +157,31 @@
     /* For sleeping contention management */
     bool signal_when_done;
 
+    /* This lock is acquired when that segment calls synchronize_object_now.
+       On the rare event of a page_privatize(), the latter will acquire
+       all the locks in all segments.  Otherwise, for the common case,
+       it's cheap.  (The set of all 'privatization_lock' in all segments
+       works like one single read-write lock, with page_privatize() acquiring
+       the write lock; but this variant is more efficient for the case of
+       many reads / rare writes.) */
+    uint8_t privatization_lock;
+
+    /* This lock is acquired when we mutate 'modified_old_objects' but
+       we don't have the global mutex.  It is also acquired during minor
+       collection.  It protects against a different thread that tries to
+       get this segment's marker corresponding to some object, or to
+       expand the marker into a full description. */
+    uint8_t marker_lock;
+
     /* In case of abort, we restore the 'shadowstack' field and the
        'thread_local_obj' field. */
     struct stm_shadowentry_s *shadowstack_at_start_of_transaction;
     object_t *threadlocal_at_start_of_transaction;
     struct stm_shadowentry_s *shadowstack_at_abort;
 
+    /* Already signalled to commit soon: */
+    bool signalled_to_commit_soon;
+
     /* For debugging */
 #ifndef NDEBUG
     pthread_t running_pthread;
@@ -162,6 +189,8 @@
 
     /* Temporarily stores the marker information */
     char marker_self[_STM_MARKER_LEN];
+    char marker_other[_STM_MARKER_LEN];
+    uintptr_t marker_inev[2];  /* marker where this thread became inevitable */
 };
 
 enum /* safe_point */ {
@@ -231,3 +260,31 @@
 
 static void copy_object_to_shared(object_t *obj, int source_segment_num);
 static void synchronize_object_now(object_t *obj);
+
+static inline void acquire_privatization_lock(void)
+{
+    uint8_t *lock = (uint8_t *)REAL_ADDRESS(STM_SEGMENT->segment_base,
+                                            &STM_PSEGMENT->privatization_lock);
+    spinlock_acquire(*lock);
+}
+
+static inline void release_privatization_lock(void)
+{
+    uint8_t *lock = (uint8_t *)REAL_ADDRESS(STM_SEGMENT->segment_base,
+                                            &STM_PSEGMENT->privatization_lock);
+    spinlock_release(*lock);
+}
+
+static inline void acquire_marker_lock(char *segment_base)
+{
+    uint8_t *lock = (uint8_t *)REAL_ADDRESS(segment_base,
+                                            &STM_PSEGMENT->marker_lock);
+    spinlock_acquire(*lock);
+}
+
+static inline void release_marker_lock(char *segment_base)
+{
+    uint8_t *lock = (uint8_t *)REAL_ADDRESS(segment_base,
+                                            &STM_PSEGMENT->marker_lock);
+    spinlock_release(*lock);
+}
diff --git a/rpython/translator/stm/src_stm/stm/gcpage.c b/rpython/translator/stm/src_stm/stm/gcpage.c
--- a/rpython/translator/stm/src_stm/stm/gcpage.c
+++ b/rpython/translator/stm/src_stm/stm/gcpage.c
@@ -93,17 +93,20 @@
     /* uncommon case: need to initialize some more pages */
     spinlock_acquire(lock_growth_large);
 
-    if (addr + size > uninitialized_page_start) {
+    char *start = uninitialized_page_start;
+    if (addr + size > start) {
         uintptr_t npages;
-        npages = (addr + size - uninitialized_page_start) / 4096UL;
+        npages = (addr + size - start) / 4096UL;
         npages += GCPAGE_NUM_PAGES;
-        if (uninitialized_page_stop - uninitialized_page_start <
-                npages * 4096UL) {
+        if (uninitialized_page_stop - start < npages * 4096UL) {
             stm_fatalerror("out of memory!");   /* XXX */
         }
-        setup_N_pages(uninitialized_page_start, npages);
-        __sync_synchronize();
-        uninitialized_page_start += npages * 4096UL;
+        setup_N_pages(start, npages);
+        if (!__sync_bool_compare_and_swap(&uninitialized_page_start,
+                                          start,
+                                          start + npages * 4096UL)) {
+            stm_fatalerror("uninitialized_page_start changed?");
+        }
     }
     spinlock_release(lock_growth_large);
     return addr;
@@ -419,6 +422,23 @@
     }
 }
 
+static void mark_visit_from_markers(void)
+{
+    long j;
+    for (j = 1; j <= NB_SEGMENTS; j++) {
+        char *base = get_segment_base(j);
+        struct list_s *lst = get_priv_segment(j)->modified_old_objects_markers;
+        uintptr_t i;
+        for (i = list_count(lst); i > 0; i -= 2) {
+            mark_visit_object((object_t *)list_item(lst, i - 1), base);
+        }
+        if (get_priv_segment(j)->transaction_state == TS_INEVITABLE) {
+            uintptr_t marker_inev_obj = get_priv_segment(j)->marker_inev[1];
+            mark_visit_object((object_t *)marker_inev_obj, base);
+        }
+    }
+}
+
 static void clean_up_segment_lists(void)
 {
     long i;
@@ -521,6 +541,7 @@
     /* marking */
     LIST_CREATE(mark_objects_to_trace);
     mark_visit_from_modified_objects();
+    mark_visit_from_markers();
     mark_visit_from_roots();
     LIST_FREE(mark_objects_to_trace);
 
diff --git a/rpython/translator/stm/src_stm/stm/largemalloc.c b/rpython/translator/stm/src_stm/stm/largemalloc.c
--- a/rpython/translator/stm/src_stm/stm/largemalloc.c
+++ b/rpython/translator/stm/src_stm/stm/largemalloc.c
@@ -354,6 +354,9 @@
     mscan->size = request_size;
     mscan->prev_size = BOTH_CHUNKS_USED;
     increment_total_allocated(request_size + LARGE_MALLOC_OVERHEAD);
+#ifndef NDEBUG
+    memset((char *)&mscan->d, 0xda, request_size);
+#endif
 
     lm_unlock();
 
diff --git a/rpython/translator/stm/src_stm/stm/list.h b/rpython/translator/stm/src_stm/stm/list.h
--- a/rpython/translator/stm/src_stm/stm/list.h
+++ b/rpython/translator/stm/src_stm/stm/list.h
@@ -34,6 +34,18 @@
 
 #define LIST_APPEND(lst, e)   ((lst) = list_append((lst), (uintptr_t)(e)))
 
+static inline struct list_s *list_append2(struct list_s *lst,
+                                          uintptr_t item0, uintptr_t item1)
+{
+    uintptr_t index = lst->count;
+    lst->count += 2;
+    if (UNLIKELY(index >= lst->last_allocated))
+        lst = _list_grow(lst, index + 1);
+    lst->items[index + 0] = item0;
+    lst->items[index + 1] = item1;
+    return lst;
+}
+
 
 static inline void list_clear(struct list_s *lst)
 {
@@ -67,6 +79,11 @@
     lst->items[index] = newitem;
 }
 
+static inline uintptr_t *list_ptr_to_item(struct list_s *lst, uintptr_t index)
+{
+    return &lst->items[index];
+}
+
 #define LIST_FOREACH_R(lst, TYPE, CODE)         \
     do {                                        \
         struct list_s *_lst = (lst);            \
diff --git a/rpython/translator/stm/src_stm/stm/marker.c b/rpython/translator/stm/src_stm/stm/marker.c
--- a/rpython/translator/stm/src_stm/stm/marker.c
+++ b/rpython/translator/stm/src_stm/stm/marker.c
@@ -12,38 +12,73 @@
                           const char *marker);
 
 
-static void marker_fetch_expand(struct stm_priv_segment_info_s *pseg)
+static void marker_fetch(stm_thread_local_t *tl, uintptr_t marker[2])
+{
+    /* fetch the current marker from the tl's shadow stack,
+       and return it in 'marker[2]'. */
+    struct stm_shadowentry_s *current = tl->shadowstack - 1;
+    struct stm_shadowentry_s *base = tl->shadowstack_base;
+
+    /* The shadowstack_base contains STM_STACK_MARKER_OLD, which is
+       a convenient stopper for the loop below but which shouldn't
+       be returned. */
+    assert(base->ss == (object_t *)STM_STACK_MARKER_OLD);
+
+    while (!(((uintptr_t)current->ss) & 1)) {
+        current--;
+        assert(current >= base);
+    }
+    if (current != base) {
+        /* found the odd marker */
+        marker[0] = (uintptr_t)current[0].ss;
+        marker[1] = (uintptr_t)current[1].ss;
+    }
+    else {
+        /* no marker found */
+        marker[0] = 0;
+        marker[1] = 0;
+    }
+}
+
+static void marker_expand(uintptr_t marker[2], char *segment_base,
+                          char *outmarker)
+{
+    /* Expand the marker given by 'marker[2]' into a full string.  This
+       works assuming that the marker was produced inside the segment
+       given by 'segment_base'.  If that's from a different thread, you
+       must first acquire the corresponding 'marker_lock'. */
+    assert(_has_mutex());
+    outmarker[0] = 0;
+    if (marker[0] == 0)
+        return;   /* no marker entry found */
+    if (stmcb_expand_marker != NULL) {
+        stmcb_expand_marker(segment_base, marker[0], (object_t *)marker[1],
+                            outmarker, _STM_MARKER_LEN);
+    }
+}
+
+static void marker_default_for_abort(struct stm_priv_segment_info_s *pseg)
 {
     if (pseg->marker_self[0] != 0)
         return;   /* already collected an entry */
 
-    if (stmcb_expand_marker != NULL) {
-        stm_thread_local_t *tl = pseg->pub.running_thread;
-        struct stm_shadowentry_s *current = tl->shadowstack - 1;
-        struct stm_shadowentry_s *base = tl->shadowstack_base;
-        /* stop walking just before shadowstack_base, which contains
-           STM_STACK_MARKER_OLD which shouldn't be expanded */
-        while (--current > base) {
-            uintptr_t x = (uintptr_t)current->ss;
-            if (x & 1) {
-                /* the stack entry is an odd number */
-                stmcb_expand_marker(pseg->pub.segment_base, x, current[1].ss,
-                                    pseg->marker_self, _STM_MARKER_LEN);
-
-                if (pseg->marker_self[0] != 0)
-                    break;
-            }
-        }
-    }
+    uintptr_t marker[2];
+    marker_fetch(pseg->pub.running_thread, marker);
+    marker_expand(marker, pseg->pub.segment_base, pseg->marker_self);
+    pseg->marker_other[0] = 0;
 }
 
 char *_stm_expand_marker(void)
 {
-    struct stm_priv_segment_info_s *pseg =
-        get_priv_segment(STM_SEGMENT->segment_num);
-    pseg->marker_self[0] = 0;
-    marker_fetch_expand(pseg);
-    return pseg->marker_self;
+    /* for tests only! */
+    static char _result[_STM_MARKER_LEN];
+    uintptr_t marker[2];
+    _result[0] = 0;
+    s_mutex_lock();
+    marker_fetch(STM_SEGMENT->running_thread, marker);
+    marker_expand(marker, STM_SEGMENT->segment_base, _result);
+    s_mutex_unlock();
+    return _result;
 }
 
 static void marker_copy(stm_thread_local_t *tl,
@@ -65,6 +100,105 @@
         tl->longest_marker_state = attribute_to;
         tl->longest_marker_time = time;
         memcpy(tl->longest_marker_self, pseg->marker_self, _STM_MARKER_LEN);
+        memcpy(tl->longest_marker_other, pseg->marker_other, _STM_MARKER_LEN);
     }
     pseg->marker_self[0] = 0;
+    pseg->marker_other[0] = 0;
 }
+
+static void marker_fetch_obj_write(uint8_t in_segment_num, object_t *obj,
+                                   uintptr_t marker[2])
+{
+    assert(_has_mutex());
+
+    /* here, we acquired the other thread's marker_lock, which means that:
+
+       (1) it has finished filling 'modified_old_objects' after it sets
+           up the write_locks[] value that we're conflicting with
+
+       (2) it is not mutating 'modified_old_objects' right now (we have
+           the global mutex_lock at this point too).
+    */
+    long i;
+    struct stm_priv_segment_info_s *pseg = get_priv_segment(in_segment_num);
+    struct list_s *mlst = pseg->modified_old_objects;
+    struct list_s *mlstm = pseg->modified_old_objects_markers;
+    for (i = list_count(mlst); --i >= 0; ) {
+        if (list_item(mlst, i) == (uintptr_t)obj) {
+            assert(list_count(mlstm) == 2 * list_count(mlst));
+            marker[0] = list_item(mlstm, i * 2 + 0);
+            marker[1] = list_item(mlstm, i * 2 + 1);
+            return;
+        }
+    }
+    marker[0] = 0;
+    marker[1] = 0;
+}
+
+static void marker_contention(int kind, bool abort_other,
+                              uint8_t other_segment_num, object_t *obj)
+{
+    uintptr_t self_marker[2];
+    uintptr_t other_marker[2];
+    struct stm_priv_segment_info_s *my_pseg, *other_pseg;
+
+    my_pseg = get_priv_segment(STM_SEGMENT->segment_num);
+    other_pseg = get_priv_segment(other_segment_num);
+
+    char *my_segment_base = STM_SEGMENT->segment_base;
+    char *other_segment_base = get_segment_base(other_segment_num);
+
+    acquire_marker_lock(other_segment_base);
+
+    /* Collect the location for myself.  It's usually the current
+       location, except in a write-read abort, in which case it's the
+       older location of the write. */
+    if (kind == WRITE_READ_CONTENTION)
+        marker_fetch_obj_write(my_pseg->pub.segment_num, obj, self_marker);
+    else
+        marker_fetch(my_pseg->pub.running_thread, self_marker);
+
+    /* Expand this location into either my_pseg->marker_self or
+       other_pseg->marker_other, depending on who aborts. */
+    marker_expand(self_marker, my_segment_base,
+                  abort_other ? other_pseg->marker_other
+                              : my_pseg->marker_self);
+
+    /* For some categories, we can also collect the relevant information
+       for the other segment. */
+    switch (kind) {
+    case WRITE_WRITE_CONTENTION:
+        marker_fetch_obj_write(other_segment_num, obj, other_marker);
+        break;
+    case INEVITABLE_CONTENTION:
+        assert(abort_other == false);
+        other_marker[0] = other_pseg->marker_inev[0];
+        other_marker[1] = other_pseg->marker_inev[1];
+        break;
+    default:
+        other_marker[0] = 0;
+        other_marker[1] = 0;
+        break;
+    }
+
+    marker_expand(other_marker, other_segment_base,
+                  abort_other ? other_pseg->marker_self
+                              : my_pseg->marker_other);
+
+    if (abort_other && other_pseg->marker_self[0] == 0) {
+        if (kind == WRITE_READ_CONTENTION)
+            strcpy(other_pseg->marker_self, "<read at unknown location>");
+        else
+            strcpy(other_pseg->marker_self, "<no location information>");
+    }
+
+    release_marker_lock(other_segment_base);
+}
+
+static void marker_fetch_inev(void)
+{
+    uintptr_t marker[2];
+    marker_fetch(STM_SEGMENT->running_thread, marker);
+    STM_PSEGMENT->marker_inev[0] = marker[0];
+    STM_PSEGMENT->marker_inev[1] = marker[1];
+}
diff --git a/rpython/translator/stm/src_stm/stm/marker.h b/rpython/translator/stm/src_stm/stm/marker.h
--- a/rpython/translator/stm/src_stm/stm/marker.h
+++ b/rpython/translator/stm/src_stm/stm/marker.h
@@ -1,6 +1,13 @@
 /* Imported by rpython/translator/stm/import_stmgc.py */
 
-static void marker_fetch_expand(struct stm_priv_segment_info_s *pseg);
+static void marker_fetch(stm_thread_local_t *tl, uintptr_t marker[2]);
+static void marker_fetch_inev(void);
+static void marker_expand(uintptr_t marker[2], char *segment_base,
+                          char *outmarker);
+static void marker_default_for_abort(struct stm_priv_segment_info_s *pseg);
 static void marker_copy(stm_thread_local_t *tl,
                         struct stm_priv_segment_info_s *pseg,
                         enum stm_time_e attribute_to, double time);
+
+static void marker_contention(int kind, bool abort_other,
+                              uint8_t other_segment_num, object_t *obj);
diff --git a/rpython/translator/stm/src_stm/stm/nursery.c b/rpython/translator/stm/src_stm/stm/nursery.c
--- a/rpython/translator/stm/src_stm/stm/nursery.c
+++ b/rpython/translator/stm/src_stm/stm/nursery.c
@@ -216,7 +216,9 @@
                content); or add the object to 'large_overflow_objects'.
             */
             if (STM_PSEGMENT->minor_collect_will_commit_now) {
+                acquire_privatization_lock();
                 synchronize_object_now(obj);
+                release_privatization_lock();
             }
             else
                 LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj);
@@ -233,6 +235,18 @@
                    _collect_now(item));
 }
 
+static void collect_roots_from_markers(uintptr_t num_old)
+{
+    /* visit the marker objects */
+    struct list_s *mlst = STM_PSEGMENT->modified_old_objects_markers;
+    STM_PSEGMENT->modified_old_objects_markers_num_old = list_count(mlst);
+    uintptr_t i, total = list_count(mlst);
+    assert((total & 1) == 0);
+    for (i = num_old + 1; i < total; i += 2) {
+        minor_trace_if_young((object_t **)list_ptr_to_item(mlst, i));
+    }
+}
+
 static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg)
 {
     /* reset the nursery by zeroing it */
@@ -282,6 +296,8 @@
 
     dprintf(("minor_collection commit=%d\n", (int)commit));
 
+    acquire_marker_lock(STM_SEGMENT->segment_base);
+
     STM_PSEGMENT->minor_collect_will_commit_now = commit;
     if (!commit) {
         /* 'STM_PSEGMENT->overflow_number' is used now by this collection,
@@ -297,6 +313,7 @@
     /* All the objects we move out of the nursery become "overflow"
        objects.  We use the list 'objects_pointing_to_nursery'
        to hold the ones we didn't trace so far. */
+    uintptr_t num_old;
     if (STM_PSEGMENT->objects_pointing_to_nursery == NULL) {
         STM_PSEGMENT->objects_pointing_to_nursery = list_create();
 
@@ -306,7 +323,12 @@
            into objects_pointing_to_nursery, but instead we use the
            following shortcut */
         collect_modified_old_objects();
+        num_old = 0;
     }
+    else
+        num_old = STM_PSEGMENT->modified_old_objects_markers_num_old;
+
+    collect_roots_from_markers(num_old);
 
     collect_roots_in_nursery();
 
@@ -319,6 +341,8 @@
 
     assert(MINOR_NOTHING_TO_DO(STM_PSEGMENT));
     assert(list_is_empty(STM_PSEGMENT->objects_pointing_to_nursery));
+
+    release_marker_lock(STM_SEGMENT->segment_base);
 }
 
 static void minor_collection(bool commit)
diff --git a/rpython/translator/stm/src_stm/stm/nursery.h b/rpython/translator/stm/src_stm/stm/nursery.h
--- a/rpython/translator/stm/src_stm/stm/nursery.h
+++ b/rpython/translator/stm/src_stm/stm/nursery.h
@@ -2,6 +2,7 @@
 
 /* '_stm_nursery_section_end' is either NURSERY_END or NSE_SIGxxx */
 #define NSE_SIGPAUSE   STM_TIME_WAIT_OTHER
+#define NSE_SIGCOMMITSOON   STM_TIME_SYNC_COMMIT_SOON
 
 
 static uint32_t highest_overflow_number;
diff --git a/rpython/translator/stm/src_stm/stm/pages.c b/rpython/translator/stm/src_stm/stm/pages.c
--- a/rpython/translator/stm/src_stm/stm/pages.c
+++ b/rpython/translator/stm/src_stm/stm/pages.c
@@ -109,18 +109,20 @@
 {
     /* check this thread's 'pages_privatized' bit */
     uint64_t bitmask = 1UL << (STM_SEGMENT->segment_num - 1);
-    struct page_shared_s *ps = &pages_privatized[pagenum - PAGE_FLAG_START];
+    volatile struct page_shared_s *ps = (volatile struct page_shared_s *)
+        &pages_privatized[pagenum - PAGE_FLAG_START];
     if (ps->by_segment & bitmask) {
         /* the page is already privatized; nothing to do */
         return;
     }
 
-#ifndef NDEBUG
-    spinlock_acquire(lock_pages_privatizing[STM_SEGMENT->segment_num]);
-#endif
+    long i;
+    for (i = 1; i <= NB_SEGMENTS; i++) {
+        spinlock_acquire(get_priv_segment(i)->privatization_lock);
+    }
 
     /* add this thread's 'pages_privatized' bit */
-    __sync_fetch_and_add(&ps->by_segment, bitmask);
+    ps->by_segment |= bitmask;
 
     /* "unmaps" the page to make the address space location correspond
        again to its underlying file offset (XXX later we should again
@@ -134,9 +136,9 @@
     /* copy the content from the shared (segment 0) source */
     pagecopy(new_page, stm_object_pages + pagenum * 4096UL);
 
-#ifndef NDEBUG
-    spinlock_release(lock_pages_privatizing[STM_SEGMENT->segment_num]);
-#endif
+    for (i = NB_SEGMENTS; i >= 1; i--) {
+        spinlock_release(get_priv_segment(i)->privatization_lock);
+    }
 }
 
 static void _page_do_reshare(long segnum, uintptr_t pagenum)
diff --git a/rpython/translator/stm/src_stm/stm/pages.h b/rpython/translator/stm/src_stm/stm/pages.h
--- a/rpython/translator/stm/src_stm/stm/pages.h
+++ b/rpython/translator/stm/src_stm/stm/pages.h
@@ -35,20 +35,6 @@
 };
 
 static struct page_shared_s pages_privatized[PAGE_FLAG_END - PAGE_FLAG_START];
-/* Rules for concurrent access to this array, possibly with is_private_page():
-
-   - we clear bits only during major collection, when all threads are
-     synchronized anyway
-
-   - we set only the bit corresponding to our segment number, using
-     an atomic addition; and we do it _before_ we actually make the
-     page private.
-
-   - concurrently, other threads checking the bits might (rarely)
-     get the answer 'true' to is_private_page() even though it is not
-     actually private yet.  This inconsistency is in the direction
-     that we want for synchronize_object_now().
-*/
 
 static void pages_initialize_shared(uintptr_t pagenum, uintptr_t count);
 static void page_privatize(uintptr_t pagenum);
@@ -73,7 +59,3 @@
     if (pages_privatized[pagenum - PAGE_FLAG_START].by_segment != 0)
         page_reshare(pagenum);
 }
-
-#ifndef NDEBUG
-static char lock_pages_privatizing[NB_SEGMENTS + 1] = { 0 };
-#endif
diff --git a/rpython/translator/stm/src_stm/stm/setup.c b/rpython/translator/stm/src_stm/stm/setup.c
--- a/rpython/translator/stm/src_stm/stm/setup.c
+++ b/rpython/translator/stm/src_stm/stm/setup.c
@@ -79,6 +79,7 @@
         pr->objects_pointing_to_nursery = NULL;
         pr->large_overflow_objects = NULL;
         pr->modified_old_objects = list_create();
+        pr->modified_old_objects_markers = list_create();
         pr->young_weakrefs = list_create();
         pr->old_weakrefs = list_create();
         pr->young_outside_nursery = tree_create();
@@ -116,6 +117,7 @@
         assert(pr->objects_pointing_to_nursery == NULL);
         assert(pr->large_overflow_objects == NULL);
         list_free(pr->modified_old_objects);
+        list_free(pr->modified_old_objects_markers);
         list_free(pr->young_weakrefs);
         list_free(pr->old_weakrefs);
         tree_free(pr->young_outside_nursery);
diff --git a/rpython/translator/stm/src_stm/stm/sync.c b/rpython/translator/stm/src_stm/stm/sync.c
--- a/rpython/translator/stm/src_stm/stm/sync.c
+++ b/rpython/translator/stm/src_stm/stm/sync.c
@@ -3,6 +3,10 @@
 #include <sys/prctl.h>
 #include <asm/prctl.h>
 
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
 
 /* Each segment can be in one of three possible states, described by
    the segment variable 'safe_point':
@@ -261,6 +265,18 @@
 static bool _safe_points_requested = false;
 #endif
 
+static void signal_other_to_commit_soon(struct stm_priv_segment_info_s *other_pseg)
+{
+    assert(_has_mutex());
+    /* never overwrite abort signals or safepoint requests
+       (too messy to deal with) */
+    if (!other_pseg->signalled_to_commit_soon
+        && !is_abort(other_pseg->pub.nursery_end)
+        && !pause_signalled) {
+        other_pseg->pub.nursery_end = NSE_SIGCOMMITSOON;
+    }
+}
+
 static void signal_everybody_to_pause_running(void)
 {
     assert(_safe_points_requested == false);
@@ -324,7 +340,21 @@
         if (STM_SEGMENT->nursery_end == NURSERY_END)
             break;    /* no safe point requested */
 
+        if (STM_SEGMENT->nursery_end == NSE_SIGCOMMITSOON) {
+            if (previous_state == -1) {
+                previous_state = change_timing_state(STM_TIME_SYNC_COMMIT_SOON);
+            }
+
+            STM_PSEGMENT->signalled_to_commit_soon = true;
+            stmcb_commit_soon();
+            if (!pause_signalled) {
+                STM_SEGMENT->nursery_end = NURSERY_END;
+                break;
+            }
+            STM_SEGMENT->nursery_end = NSE_SIGPAUSE;
+        }
         assert(STM_SEGMENT->nursery_end == NSE_SIGPAUSE);
+        assert(pause_signalled);
 
         /* If we are requested to enter a safe-point, we cannot proceed now.
            Wait until the safe-point request is removed for us. */
diff --git a/rpython/translator/stm/src_stm/stm/timing.c b/rpython/translator/stm/src_stm/stm/timing.c
--- a/rpython/translator/stm/src_stm/stm/timing.c
+++ b/rpython/translator/stm/src_stm/stm/timing.c
@@ -26,10 +26,11 @@
     return oldstate;
 }
 
-static void change_timing_state_tl(stm_thread_local_t *tl,
-                                   enum stm_time_e newstate)
+static double change_timing_state_tl(stm_thread_local_t *tl,
+                                     enum stm_time_e newstate)
 {
     TIMING_CHANGE(tl, newstate);
+    return elasped;
 }
 
 static void timing_end_transaction(enum stm_time_e attribute_to)
@@ -59,6 +60,7 @@
     "wait write read",
     "wait inevitable",
     "wait other",
+    "sync commit soon",
     "bookkeeping",
     "minor gc",
     "major gc",
diff --git a/rpython/translator/stm/src_stm/stm/timing.h b/rpython/translator/stm/src_stm/stm/timing.h
--- a/rpython/translator/stm/src_stm/stm/timing.h
+++ b/rpython/translator/stm/src_stm/stm/timing.h
@@ -9,7 +9,7 @@
 }
 
 static enum stm_time_e change_timing_state(enum stm_time_e newstate);
-static void change_timing_state_tl(stm_thread_local_t *tl,
-                                   enum stm_time_e newstate);
+static double change_timing_state_tl(stm_thread_local_t *tl,
+                                     enum stm_time_e newstate);
 
 static void timing_end_transaction(enum stm_time_e attribute_to);
diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h
--- a/rpython/translator/stm/src_stm/stmgc.h
+++ b/rpython/translator/stm/src_stm/stmgc.h
@@ -67,6 +67,7 @@
     STM_TIME_WAIT_WRITE_READ,
     STM_TIME_WAIT_INEVITABLE,
     STM_TIME_WAIT_OTHER,
+    STM_TIME_SYNC_COMMIT_SOON,
     STM_TIME_BOOKKEEPING,
     STM_TIME_MINOR_GC,
     STM_TIME_MAJOR_GC,
@@ -217,9 +218,13 @@
    The "size rounded up" must be a multiple of 8 and at least 16.
    "Tracing" an object means enumerating all GC references in it,
    by invoking the callback passed as argument.
+   stmcb_commit_soon() is called when it is advised to commit
+   the transaction as soon as possible in order to avoid conflicts
+   or improve performance in general.
 */
 extern ssize_t stmcb_size_rounded_up(struct object_s *);
 extern void stmcb_trace(struct object_s *, void (object_t **));
+extern void stmcb_commit_soon(void);
 
 
 /* Allocate an object of the given size, which must be a multiple


More information about the pypy-commit mailing list