[pypy-commit] pypy fast-gil: Move the GIL logic to a new thread_gil.c. Use simple names for the

Tue Jun 24 19:09:36 CEST 2014

Author: Armin Rigo <arigo at tunes.org>
Branch: fast-gil
Changeset: r72198:50a199f1b79d
Date: 2014-06-24 19:09 +0200
http://bitbucket.org/pypy/pypy/changeset/50a199f1b79d/

Log:	Move the GIL logic to a new thread_gil.c. Use simple names for the
	basic operations and define these in thread_pthread and thread_nt as
	appropriate.

diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c
new file mode 100644
--- /dev/null
+++ b/rpython/translator/c/src/thread_gil.c
@@ -0,0 +1,160 @@
+
+/* Idea:
+
+   - "The GIL" is a composite concept.  There are two locks, and "the
+     GIL is locked" when both are locked.
+
+   - The first lock is a simple global variable 'rpy_fastgil'.  With
+     shadowstack, we use the most portable definition: 0 means unlocked
+     and != 0 means locked.  With asmgcc, 0 means unlocked but only 1
+     means locked.  A different value means unlocked too, but the value
+     is used by the JIT to contain the stack top for stack root scanning.
+
+   - The second lock is a regular mutex.  In the fast path, it is never
+     unlocked.  Remember that "the GIL is unlocked" means that either
+     the first or the second lock is unlocked.  It should never be the
+     case that both are unlocked at the same time.
+
+   - Let's call "thread 1" the thread with the GIL.  Whenever it does an
+     external function call, it sets 'rpy_fastgil' to 0 (unlocked).
+     This is the cheapest way to release the GIL.  When it returns from
+     the function call, this thread attempts to atomically change
+     'rpy_fastgil' to 1.  In the common case where it works, thread 1
+     has got the GIL back and so continues to run.
+
+   - Say "thread 2" is eagerly waiting for thread 1 to become blocked in
+     some long-running call.  Regularly, it checks if 'rpy_fastgil' is 0
+     and tries to atomically change it to 1.  If it succeeds, it means
+     that the GIL was not previously locked.  Thread 2 has now got the GIL.
+
+   - If there are more than 2 threads, the rest is really sleeping by
+     waiting on the 'mutex_gil_stealer' held by thread 2.
+
+   - An additional mechanism is used for when thread 1 wants to
+     explicitly yield the GIL to thread 2: it does so by releasing
+     'mutex_gil' (which is otherwise not released) but keeping the
+     value of 'rpy_fastgil' to 1.
+*/
+
+long rpy_fastgil = 1;
+long rpy_waiting_threads = -1;
+static mutex_t mutex_gil_stealer;
+static mutex_t mutex_gil;
+
+void RPyGilAllocate(void)
+{
+    assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
+    mutex_init(&mutex_gil_stealer);
+    mutex_init(&mutex_gil);
+    mutex_lock(&mutex_gil);
+    rpy_waiting_threads = 0;
+}
+
+void RPyGilAcquire(void)
+{
+    /* Acquires the GIL.  Note: this function saves and restores 'errno'.
+     */
+    long old_fastgil = lock_test_and_set(&rpy_fastgil, 1);
+
+    if (!RPY_FASTGIL_LOCKED(old_fastgil)) {
+        /* The fastgil was not previously locked: success.
+           'mutex_gil' should still be locked at this point.
+        */
+    }
+    else {
+        /* Otherwise, another thread is busy with the GIL. */
+        int old_errno = errno;
+
+        /* Register me as one of the threads that is actively waiting
+           for the GIL.  The number of such threads is found in
+           rpy_lock_ready. */
+        assert(rpy_waiting_threads >= 0);
+        atomic_increment(&rpy_waiting_threads);
+
+        /* Enter the waiting queue from the end.  Assuming a roughly
+           first-in-first-out order, this will nicely give the threads
+           a round-robin chance.
+        */
+        mutex_lock(&mutex_gil_stealer);
+
+        /* We are now the stealer thread.  Steals! */
+        while (1) {
+            /* Sleep for one interval of time.  We may be woken up earlier
+               if 'mutex_gil' is released.
+            */
+            if (mutex_lock_timeout(&mutex_gil, 0.001)) {   /* 1 ms... */
+                /* We arrive here if 'mutex_gil' was recently released
+                   and we just relocked it.
+                 */
+                old_fastgil = 0;
+                break;
+            }
+
+            /* Busy-looping here.  Try to look again if 'rpy_fastgil' is
+               released.
+            */
+            if (!RPY_FASTGIL_LOCKED(rpy_fastgil)) {
+                old_fastgil = lock_test_and_set(&rpy_fastgil, 1);
+                if (!RPY_FASTGIL_LOCKED(old_fastgil))
+                    /* yes, got a non-held value!  Now we hold it. */
+                    break;
+            }
+            /* Otherwise, loop back. */
+        }
+        atomic_decrement(&rpy_waiting_threads);
+        mutex_unlock(&mutex_gil_stealer);
+
+        errno = old_errno;
+    }
+
+#ifdef PYPY_USE_ASMGCC
+    if (old_fastgil != 0) {
+        /* this case only occurs from the JIT compiler */
+        struct pypy_ASM_FRAMEDATA_HEAD0 *new =
+            (struct pypy_ASM_FRAMEDATA_HEAD0 *)old_fastgil;
+        struct pypy_ASM_FRAMEDATA_HEAD0 *root = &pypy_g_ASM_FRAMEDATA_HEAD;
+        struct pypy_ASM_FRAMEDATA_HEAD0 *next = root->as_next;
+        new->as_next = next;
+        new->as_prev = root;
+        root->as_next = new;
+        next->as_prev = new;
+    }
+#else
+    assert(old_fastgil == 0);
+#endif
+    assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
+    return;
+}
+
+/*
+void RPyGilRelease(void)
+{
+    Releases the GIL in order to do an external function call.
+    We assume that the common case is that the function call is
+    actually very short, and optimize accordingly.
+
+    Note: this function is defined as a 'static inline' in thread.h.
+}
+*/
+
+long RPyGilYieldThread(void)
+{
+    /* can be called even before RPyGilAllocate(), but in this case,
+       'rpy_waiting_threads' will be -1. */
+    assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
+    if (rpy_waiting_threads <= 0)
+        return 0;
+
+    /* Explicitly release the 'mutex_gil'.
+     */
+    mutex_unlock(&mutex_gil);
+
+    /* Now nobody has got the GIL, because 'mutex_gil' is released (but
+       rpy_fastgil is still locked).  Call RPyGilAcquire().  It will
+       enqueue ourselves at the end of the 'mutex_gil_stealer' queue.
+       If there is no other waiting thread, it will fall through both
+       its pthread_mutex_lock() and pthread_mutex_timedlock() now.
+     */
+    RPyGilAcquire();
+    return 1;
+}
diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c
--- a/rpython/translator/c/src/thread_nt.c
+++ b/rpython/translator/c/src/thread_nt.c
@@ -196,52 +196,35 @@
 /* GIL code                                                 */
 /************************************************************/
 
-static volatile LONG pending_acquires = -1;
-static CRITICAL_SECTION mutex_gil;
-static HANDLE cond_gil;
+typedef HANDLE mutex_t;
 
-long RPyGilAllocate(void)
-{
-    pending_acquires = 0;
-    InitializeCriticalSection(&mutex_gil);
-    EnterCriticalSection(&mutex_gil);
-    cond_gil = CreateEvent (NULL, FALSE, FALSE, NULL);
-    return 1;
+static void gil_fatal(const char *msg) {
+    fprintf(stderr, "Fatal error in the GIL: %s\n", msg);
+    abort();
 }
 
-long RPyGilYieldThread(void)
-{
-    /* can be called even before RPyGilAllocate(), but in this case,
-       pending_acquires will be -1 */
-    if (pending_acquires <= 0)
-        return 0;
-    InterlockedIncrement(&pending_acquires);
-    PulseEvent(cond_gil);
-
-    /* hack: the three following lines do a pthread_cond_wait(), and
-       normally specifying a timeout of INFINITE would be fine.  But the
-       first and second operations are not done atomically, so there is a
-       (small) risk that PulseEvent misses the WaitForSingleObject().
-       In this case the process will just sleep a few milliseconds. */
-    LeaveCriticalSection(&mutex_gil);
-    WaitForSingleObject(cond_gil, 15);
-    EnterCriticalSection(&mutex_gil);
-
-    InterlockedDecrement(&pending_acquires);
-    return 1;
+static inline void mutex_init(mutex_t *mutex) {
+    *mutex = CreateMutex(NULL, 0, NULL);
+    if (*mutex == NULL)
+        gil_fatal("CreateMutex failed");
 }
 
-void RPyGilRelease(void)
-{
-    LeaveCriticalSection(&mutex_gil);
-    PulseEvent(cond_gil);
+static inline void mutex_lock(mutex_t *mutex) {
+    WaitForSingleObject(*mutex, INFINITE);
 }
 
-void RPyGilAcquire(void)
-{
-    InterlockedIncrement(&pending_acquires);
-    EnterCriticalSection(&mutex_gil);
-    InterlockedDecrement(&pending_acquires);
+static inline void mutex_unlock(mutex_t *mutex) {
+    ReleaseMutex(*mutex);
 }
 
-# error "XXX implement me"
+static inline int mutex_lock_timeout(mutex_t *mutex, double delay)
+{
+    DWORD result = WaitForSingleObject(*mutex, (DWORD)(delay * 1000.0 + 0.9));
+    return (result != WAIT_TIMEOUT);
+}
+
+#define lock_test_and_set(ptr, value)  InterlockedExchangeAcquire(ptr, value)
+#define atomic_increment(ptr)          InterlockedIncrement(ptr)
+#define atomic_decrement(ptr)          InterlockedDecrement(ptr)
+
+#include "src/thread_gil.c"
diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c
--- a/rpython/translator/c/src/thread_pthread.c
+++ b/rpython/translator/c/src/thread_pthread.c
@@ -472,71 +472,18 @@
 /* GIL code                                                 */
 /************************************************************/
 
-
 #include <time.h>
 
-
 #define ASSERT_STATUS(call)                             \
     if (call != 0) {                                    \
         fprintf(stderr, "Fatal error: " #call "\n");    \
         abort();                                        \
     }
 
-/* Idea:
-
-   - "The GIL" is a composite concept.  There are two locks, and "the
-     GIL is locked" when both are locked.
-
-   - The first lock is a simple global variable 'rpy_fastgil'.  With
-     shadowstack, we use the most portable definition: 0 means unlocked
-     and != 0 means locked.  With asmgcc, 0 means unlocked but only 1
-     means locked.  A different value means unlocked too, but the value
-     is used by the JIT to contain the stack top for stack root scanning.
-
-   - The second lock is a regular mutex.  In the fast path, it is never
-     unlocked.  Remember that "the GIL is unlocked" means that either
-     the first or the second lock is unlocked.  It should never be the
-     case that both are unlocked at the same time.
-
-   - Let's call "thread 1" the thread with the GIL.  Whenever it does an
-     external function call, it sets 'rpy_fastgil' to 0 (unlocked).
-     This is the cheapest way to release the GIL.  When it returns from
-     the function call, this thread attempts to atomically change
-     'rpy_fastgil' to 1.  In the common case where it works, thread 1
-     has got the GIL back and so continues to run.
-
-   - Say "thread 2" is eagerly waiting for thread 1 to become blocked in
-     some long-running call.  Regularly, it checks if 'rpy_fastgil' is 0
-     and tries to atomically change it to 1.  If it succeeds, it means
-     that the GIL was not previously locked.  Thread 2 has now got the GIL.
-
-   - If there are more than 2 threads, the rest is really sleeping by
-     waiting on the 'mutex_gil_stealer' held by thread 2.
-
-   - An additional mechanism is used for when thread 1 wants to
-     explicitly yield the GIL to thread 2: it does so by releasing
-     'mutex_gil' (which is otherwise not released) but keeping the
-     value of 'rpy_fastgil' to 1.
-*/
-
-long rpy_fastgil = 1;
-static pthread_mutex_t mutex_gil_stealer;
-static pthread_mutex_t mutex_gil;
-long rpy_lock_ready = 0;
-
-void RPyGilAllocate(void)
+static inline void timespec_add(struct timespec *t, double incr)
 {
-    assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
-    ASSERT_STATUS(pthread_mutex_init(&mutex_gil_stealer,
-                                     pthread_mutexattr_default));
-    ASSERT_STATUS(pthread_mutex_init(&mutex_gil, pthread_mutexattr_default));
-    ASSERT_STATUS(pthread_mutex_lock(&mutex_gil));
-    rpy_lock_ready = 1;
-}
-
-static inline void timespec_add(struct timespec *t, long incr)
-{
-    long nsec = t->tv_nsec + incr;
+    /* assumes that "incr" is not too large, less than 1 second */
+    long nsec = t->tv_nsec + (long)(incr * 1000000000.0);
     if (nsec >= 1000000000) {
         t->tv_sec += 1;
         nsec -= 1000000000;
@@ -545,112 +492,29 @@
     t->tv_nsec = nsec;
 }
 
-void RPyGilAcquire(void)
-{
-    /* Acquires the GIL.  Note: this function saves and restores 'errno'.
-     */
-    long old_fastgil = __sync_lock_test_and_set(&rpy_fastgil, 1);
+typedef pthread_mutex_t mutex_t;
 
-    if (!RPY_FASTGIL_LOCKED(old_fastgil)) {
-        /* The fastgil was not previously locked: success.
-           'mutex_gil' should still be locked at this point.
-        */
-    }
-    else {
-        /* Otherwise, another thread is busy with the GIL. */
-        int old_errno = errno;
-
-        /* Enter the waiting queue from the end.  Assuming a roughly
-           first-in-first-out order, this will nicely give the threads
-           a round-robin chance.
-        */
-        assert(rpy_lock_ready);
-        ASSERT_STATUS(pthread_mutex_lock(&mutex_gil_stealer));
-
-        /* We are now the stealer thread.  Steals! */
-        while (1) {
-            int delay = 1000000;   /* 1 ms... */
-            struct timespec t;
-
-            /* Sleep for one interval of time.  We may be woken up earlier
-               if 'mutex_gil' is released.
-            */
-            clock_gettime(CLOCK_REALTIME, &t);
-            timespec_add(&t, delay);
-            int error_from_timedlock = pthread_mutex_timedlock(&mutex_gil, &t);
-
-            if (error_from_timedlock != ETIMEDOUT) {
-                ASSERT_STATUS(error_from_timedlock);
-
-                /* We arrive here if 'mutex_gil' was recently released
-                   and we just relocked it.
-                 */
-                old_fastgil = 0;
-                break;
-            }
-
-            /* Busy-looping here.  Try to look again if 'rpy_fastgil' is
-               released.
-            */
-            if (!RPY_FASTGIL_LOCKED(rpy_fastgil)) {
-                old_fastgil = __sync_lock_test_and_set(&rpy_fastgil, 1);
-                if (!RPY_FASTGIL_LOCKED(old_fastgil))
-                    /* yes, got a non-held value!  Now we hold it. */
-                    break;
-            }
-            /* Otherwise, loop back. */
-        }
-        ASSERT_STATUS(pthread_mutex_unlock(&mutex_gil_stealer));
-
-        errno = old_errno;
-    }
-
-#ifdef PYPY_USE_ASMGCC
-    if (old_fastgil != 0) {
-        /* this case only occurs from the JIT compiler */
-        struct pypy_ASM_FRAMEDATA_HEAD0 *new =
-            (struct pypy_ASM_FRAMEDATA_HEAD0 *)old_fastgil;
-        struct pypy_ASM_FRAMEDATA_HEAD0 *root = &pypy_g_ASM_FRAMEDATA_HEAD;
-        struct pypy_ASM_FRAMEDATA_HEAD0 *next = root->as_next;
-        new->as_next = next;
-        new->as_prev = root;
-        root->as_next = new;
-        next->as_prev = new;
-    }
-#else
-    assert(old_fastgil == 0);
-#endif
-    assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
-    return;
+static inline void mutex_init(mutex_t *mutex) {
+    ASSERT_STATUS(pthread_mutex_init(mutex, pthread_mutexattr_default));
 }
-
-/*
-void RPyGilRelease(void)
-{
-    Releases the GIL in order to do an external function call.
-    We assume that the common case is that the function call is
-    actually very short, and optimize accordingly.
-
-    Note: this function is defined as a 'static inline' in thread.h.
+static inline void mutex_lock(mutex_t *mutex) {
+    ASSERT_STATUS(pthread_mutex_lock(mutex));
 }
-*/
-
-long RPyGilYieldThread(void)
-{
-    assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
-    if (!rpy_lock_ready)
+static inline void mutex_unlock(mutex_t *mutex) {
+    ASSERT_STATUS(pthread_mutex_unlock(mutex));
+}
+static inline int mutex_lock_timeout(mutex_t *mutex, double delay) {
+    struct timespec t;
+    clock_gettime(CLOCK_REALTIME, &t);
+    timespec_add(&t, delay);
+    int error_from_timedlock = pthread_mutex_timedlock(mutex, &t);
+    if (error_from_timedlock == ETIMEDOUT)
         return 0;
-
-    /* Explicitly release the 'mutex_gil'.
-     */
-    ASSERT_STATUS(pthread_mutex_unlock(&mutex_gil));
-
-    /* Now nobody has got the GIL, because 'mutex_gil' is released (but
-       rpy_fastgil is still locked).  Call RPyGilAcquire().  It will
-       enqueue ourselves at the end of the 'mutex_gil_stealer' queue.
-       If there is no other waiting thread, it will fall through both
-       its pthread_mutex_lock() and pthread_mutex_timedlock() now.
-     */
-    RPyGilAcquire();
+    ASSERT_STATUS(error_from_timedlock);
     return 1;
 }
+#define lock_test_and_set(ptr, value)  __sync_lock_test_and_set(ptr, value)
+#define atomic_increment(ptr)          __sync_fetch_and_add(ptr, 1)
+#define atomic_decrement(ptr)          __sync_fetch_and_sub(ptr, 1)
+
+#include "src/thread_gil.c"