[pypy-commit] stmgc default: fix a race between doing shadow stack snapshots and a concurrently running major

Raemi noreply at buildbot.pypy.org
Wed Aug 13 18:11:33 CEST 2014


Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch: 
Changeset: r1311:b7f132d1afba
Date: 2014-08-13 18:12 +0200
http://bitbucket.org/pypy/stmgc/changeset/b7f132d1afba/

Log:	fix a race between doing shadow stack snapshots and a concurrently
	running major collection. Also add more comments

diff --git a/c7/demo/demo_random2.c b/c7/demo/demo_random2.c
--- a/c7/demo/demo_random2.c
+++ b/c7/demo/demo_random2.c
@@ -16,7 +16,7 @@
 #define FORKS 3
 
 #define ACTIVE_ROOTS_SET_SIZE 100 // max num of roots created/alive in one transaction
-
+#define MAX_ROOTS_ON_SS 1000 // max on shadow stack
 
 // SUPPORT
 struct node_s;
@@ -127,12 +127,17 @@
 {
     int i;
     long to_push = td.active_roots_num;
+    long not_pushed = 0;
     for (i = to_push - 1; i >= 0; i--) {
-        STM_PUSH_ROOT(stm_thread_local, td.active_roots_set[i]);
-        td.roots_on_ss++;
         td.active_roots_num--;
+        if (td.roots_on_ss < MAX_ROOTS_ON_SS) {
+            STM_PUSH_ROOT(stm_thread_local, td.active_roots_set[i]);
+            td.roots_on_ss++;
+        } else {
+            not_pushed++;
+        }
     }
-    return to_push;
+    return to_push - not_pushed;
 }
 
 void add_root(objptr_t r);
@@ -206,6 +211,7 @@
 objptr_t simple_events(objptr_t p, objptr_t _r)
 {
     int k = get_rand(10);
+    long pushed;
 
     switch (k) {
     case 0: // remove a root
@@ -221,8 +227,7 @@
             p = _r;
         break;
     case 3: // allocate fresh 'p'
-        ;
-        long pushed = push_roots();
+        pushed = push_roots();
         size_t sizes[4] = {sizeof(struct node_s),
                            sizeof(struct node_s) + (get_rand(100000) & ~15),
                            sizeof(struct node_s) + 4096,
@@ -281,7 +286,6 @@
     return p;
 }
 
-
 void frame_loop();
 objptr_t do_step(objptr_t p)
 {
@@ -306,28 +310,28 @@
         td.roots_on_ss = td.roots_on_ss_at_tr_start;
         td.active_roots_num = 0;
         pop_roots(pushed);
-        return NULL;
+        p = NULL;
     } else if (get_rand(10) == 1) {
         long pushed = push_roots();
         /* leaving our frame */
         frame_loop();
         /* back in our frame */
         pop_roots(pushed);
-        return NULL;
+        p = NULL;
     } else if (get_rand(20) == 1) {
         long pushed = push_roots();
         stm_become_inevitable(&stm_thread_local, "please");
         assert(stm_is_inevitable());
         pop_roots(pushed);
-        return NULL;
+        p= NULL;
     } else if (get_rand(20) == 1) {
-        return (objptr_t)-1; // possibly fork
+        p = (objptr_t)-1; // possibly fork
     } else if (get_rand(20) == 1) {
         long pushed = push_roots();
         stm_become_globally_unique_transaction(&stm_thread_local, "really");
         fprintf(stderr, "[GUT/%d]", (int)STM_SEGMENT->segment_num);
         pop_roots(pushed);
-        return NULL;
+        p = NULL;
     }
     return p;
 }
@@ -338,7 +342,9 @@
     rewind_jmp_buf rjbuf;
 
     stm_rewind_jmp_enterframe(&stm_thread_local, &rjbuf);
-    volatile long roots_on_ss = td.roots_on_ss;
+    //fprintf(stderr,"%p F: %p\n", STM_SEGMENT->running_thread,  __builtin_frame_address(0));
+
+    long roots_on_ss = td.roots_on_ss;
     /* "interpreter main loop": this is one "application-frame" */
     while (td.steps_left-->0 && get_rand(10) != 0) {
         if (td.steps_left % 8 == 0)
@@ -348,6 +354,7 @@
 
         p = do_step(p);
 
+
         if (p == (objptr_t)-1) {
             p = NULL;
 
diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -997,6 +997,9 @@
     /* NB. careful, this function might be called more than once to
        abort a given segment.  Make sure that
        stm_rewind_jmp_restore_shadowstack() is idempotent. */
+    /* we need to do this here and not directly in rewind_longjmp() because
+       that is called when we already released everything (safe point)
+       and a concurrent major GC could mess things up. */
     stm_rewind_jmp_restore_shadowstack(tl);
     assert(tl->shadowstack == pseg->shadowstack_at_start_of_transaction);
 #endif
diff --git a/c7/stm/rewind_setjmp.c b/c7/stm/rewind_setjmp.c
--- a/c7/stm/rewind_setjmp.c
+++ b/c7/stm/rewind_setjmp.c
@@ -4,6 +4,12 @@
 #include <assert.h>
 #include <alloca.h>
 
+#ifndef _STM_CORE_H_
+long _has_mutex() {return 1;}
+void s_mutex_lock() {}
+void s_mutex_unlock() {}
+#endif
+
 
 struct _rewind_jmp_moved_s {
     struct _rewind_jmp_moved_s *next;
@@ -23,26 +29,39 @@
 
 static void copy_stack(rewind_jmp_thread *rjthread, char *base, void *ssbase)
 {
-    /* Copy away part of the stack and shadowstack.
+    /* Copy away part of the stack and shadowstack. Sets moved_off_base to
+       the current frame_base.
+
        The stack is copied between 'base' (lower limit, i.e. newest bytes)
        and 'rjthread->head->frame_base' (upper limit, i.e. oldest bytes).
        The shadowstack is copied between 'ssbase' (upper limit, newest)
        and 'rjthread->head->shadowstack_base' (lower limit, oldest).
     */
+    struct _rewind_jmp_moved_s *next;
+    char *stop;
+    void *ssstop;
+    size_t stack_size, ssstack_size;
+
+    assert(_has_mutex());
+
     assert(rjthread->head != NULL);
-    char *stop = rjthread->head->frame_base;
+    stop = rjthread->head->frame_base;
+    ssstop = rjthread->head->shadowstack_base;
     assert(stop >= base);
-    void *ssstop = rjthread->head->shadowstack_base;
     assert(ssstop <= ssbase);
-    struct _rewind_jmp_moved_s *next = (struct _rewind_jmp_moved_s *)
-        rj_malloc(RJM_HEADER + (stop - base) + (ssbase - ssstop));
+    stack_size = stop - base;
+    ssstack_size = ssbase - ssstop;
+
+    next = (struct _rewind_jmp_moved_s *)
+        rj_malloc(RJM_HEADER + stack_size + ssstack_size);
     assert(next != NULL);    /* XXX out of memory */
     next->next = rjthread->moved_off;
-    next->stack_size = stop - base;
-    next->shadowstack_size = ssbase - ssstop;
-    memcpy(((char *)next) + RJM_HEADER, base, stop - base);
-    memcpy(((char *)next) + RJM_HEADER + (stop - base), ssstop,
-           ssbase - ssstop);
+    next->stack_size = stack_size;
+    next->shadowstack_size = ssstack_size;
+
+    memcpy(((char *)next) + RJM_HEADER, base, stack_size);
+    memcpy(((char *)next) + RJM_HEADER + stack_size, ssstop,
+           ssstack_size);
 
     rjthread->moved_off_base = stop;
     rjthread->moved_off_ssbase = ssstop;
@@ -52,7 +71,12 @@
 __attribute__((noinline))
 long rewind_jmp_setjmp(rewind_jmp_thread *rjthread, void *ss)
 {
+    /* saves the current stack frame to the list of slices and
+       calls setjmp(). It returns the number of times a longjmp()
+       jumped back to this setjmp() */
     if (rjthread->moved_off) {
+        /* old stack slices are not needed anymore (next longjmp()
+           will restore only to this setjmp()) */
         _rewind_jmp_free_stack_slices(rjthread);
     }
     /* all locals of this function that need to be saved and restored
@@ -72,22 +96,36 @@
         result = rjthread->repeat_count + 1;
     }
     rjthread->repeat_count = result;
+
+    /* snapshot of top frame: needed every time because longjmp() frees
+       the previous one. Need to have mutex locked otherwise a concurrent
+       GC may get garbage while saving shadow stack */
+    s_mutex_lock();
     copy_stack(rjthread, (char *)&saved, saved.ss1);
+    s_mutex_unlock();
+
     return result;
 }
 
 __attribute__((noinline, noreturn))
 static void do_longjmp(rewind_jmp_thread *rjthread, char *stack_free)
 {
+    /* go through list of copied stack-slices and copy them back to the
+       current stack, expanding it if necessary. The shadowstack should
+       already be restored at this point (restore_shadowstack()) */
     assert(rjthread->moved_off_base != NULL);
+    s_mutex_lock();
 
     while (rjthread->moved_off) {
         struct _rewind_jmp_moved_s *p = rjthread->moved_off;
         char *target = rjthread->moved_off_base;
+        /* CPU stack grows downwards: */
         target -= p->stack_size;
         if (target < stack_free) {
             /* need more stack space! */
+            s_mutex_unlock();
             do_longjmp(rjthread, alloca(stack_free - target));
+            abort();            /* unreachable */
         }
         memcpy(target, ((char *)p) + RJM_HEADER, p->stack_size);
 
@@ -95,6 +133,8 @@
         rjthread->moved_off = p->next;
         rj_free(p);
     }
+
+    s_mutex_unlock();
     __builtin_longjmp(rjthread->jmpbuf, 1);
 }
 
@@ -109,19 +149,25 @@
 char *rewind_jmp_enum_shadowstack(rewind_jmp_thread *rjthread,
                                   void *callback(void *, const void *, size_t))
 {
+    /* enumerate all saved shadow-stack slices */
     struct _rewind_jmp_moved_s *p = rjthread->moved_off;
     char *sstarget = rjthread->moved_off_ssbase;
 
+    assert(_has_mutex());
+
     while (p) {
-        char *ssend = sstarget + p->shadowstack_size;
-        callback(sstarget, ((char *)p) + RJM_HEADER + p->stack_size,
-                 p->shadowstack_size);
-        sstarget = ssend;
+        if (p->shadowstack_size) {
+            void *ss_slice = ((char *)p) + RJM_HEADER + p->stack_size;
+            callback(sstarget, ss_slice, p->shadowstack_size);
+
+            sstarget += p->shadowstack_size;
+        }
         p = p->next;
     }
     return sstarget;
 }
 
+
 char *rewind_jmp_restore_shadowstack(rewind_jmp_thread *rjthread)
 {
     return rewind_jmp_enum_shadowstack(rjthread, memcpy);
@@ -130,16 +176,23 @@
 __attribute__((noinline))
 void _rewind_jmp_copy_stack_slice(rewind_jmp_thread *rjthread)
 {
+    /* called when leaving a frame. copies the now-current frame
+       to the list of stack-slices */
+    s_mutex_lock();
     if (rjthread->head == NULL) {
         _rewind_jmp_free_stack_slices(rjthread);
+        s_mutex_unlock();
         return;
     }
     assert(rjthread->moved_off_base < (char *)rjthread->head);
     copy_stack(rjthread, rjthread->moved_off_base, rjthread->moved_off_ssbase);
+    s_mutex_unlock();
 }
 
 void _rewind_jmp_free_stack_slices(rewind_jmp_thread *rjthread)
 {
+    /* frees all saved stack copies */
+    assert(_has_mutex());
     struct _rewind_jmp_moved_s *p = rjthread->moved_off;
     struct _rewind_jmp_moved_s *pnext;
     while (p) {
diff --git a/c7/stm/rewind_setjmp.h b/c7/stm/rewind_setjmp.h
--- a/c7/stm/rewind_setjmp.h
+++ b/c7/stm/rewind_setjmp.h
@@ -1,9 +1,20 @@
 #ifndef _REWIND_SETJMP_H_
 #define _REWIND_SETJMP_H_
 
+
 #include <stddef.h>
 
 /************************************************************
+There is a singly-linked list of frames in each thread
+rjthread->head->prev->prev->prev
+
+Another singly-linked list is the list of copied stack-slices.
+When doing a setjmp(), we copy the top-frame, free all old
+stack-slices, and link it to the top-frame->moved_off.
+When returning from the top-frame while moved_off still points
+to a slice, we also need to copy the top-frame->prev frame/slice
+and add it to this list (pointed to by moved_off).
+--------------------------------------------------------------
 
            :                   :       ^^^^^
            |-------------------|    older frames in the stack
@@ -58,6 +69,7 @@
 } rewind_jmp_thread;
 
 
+/* remember the current stack and ss_stack positions */
 #define rewind_jmp_enterframe(rjthread, rjbuf, ss)   do {  \
     (rjbuf)->frame_base = __builtin_frame_address(0);      \
     (rjbuf)->shadowstack_base = (char *)(ss);              \
@@ -65,6 +77,8 @@
     (rjthread)->head = (rjbuf);                            \
 } while (0)
 
+/* go up one frame. if there was a setjmp call in this frame,
+ */
 #define rewind_jmp_leaveframe(rjthread, rjbuf, ss)   do {    \
     assert((rjbuf)->shadowstack_base == (char *)(ss));       \
     (rjthread)->head = (rjbuf)->prev;                        \


More information about the pypy-commit mailing list