[Python-Dev] Re: Signal-resistant code (was: Two random and nearly unrelated ideas)

Zack Weinberg zack@codesourcery.com
Fri, 6 Sep 2002 13:52:31 -0700


On Fri, Sep 06, 2002 at 01:53:22PM -0400, Guido van Rossum wrote:
> > Would this be an appropriate place to complain about how
> > KeyboardInterrupt won't wake up a thread stuck waiting on a Queue?
> 
> No, unless you have a real proposal on how to fix it (not just a vague
> idea -- we've all had those, and they don't work).  Working code or
> shut up. :-)

Fair enough.

The underlying problem is that KeyboardInterrupt does not abort
acquire() called on a thread lock.  This is only noticeable when it
was the main thread that called acquire -- if it's some other thread,
the KeyboardInterrupt will still be delivered to the main thread.
Compare the behavior of these two test programs:

-- test1.py --
import time, thread

lock = thread.allocate_lock()
lock.acquire()

def child_thread():
        print "Acquiring lock"
        lock.acquire()
        print "Have lock (can't happen)"
        lock.release()

thread.start_new_thread(child_thread, ())

print "Hit ^C now"
time.sleep(3600)

-- test2.py --
import time, thread

lock = thread.allocate_lock()

def child_thread():
        print "Acquiring lock"
        lock.acquire()
        print "Have lock"
        time.sleep(3600)
        lock.release()

thread.start_new_thread(child_thread, ())

time.sleep(1) # give child a chance to acquire lock
print "Hit ^C now"
lock.acquire()

I'm going to look only at the pthread-based thread support; presumably
similar changes to the ones I will propose, need to be made to the
others.

There are two cases of PyThread_acquire_lock in thread_pthread.h: using
semaphores, and using condition variables.  Let's look at the
condition variable one first:

                /* mut must be locked by me -- part of the condition
                 * protocol */
                status = pthread_mutex_lock( &thelock->mut );
                CHECK_STATUS("pthread_mutex_lock[2]");
                while ( thelock->locked ) {
                        status = pthread_cond_wait(&thelock->lock_released,
                                                   &thelock->mut);
                        CHECK_STATUS("pthread_cond_wait");
                }
                thelock->locked = 1;
                status = pthread_mutex_unlock( &thelock->mut );

Naively, we'd like to shove a check of PyOS_InterruptOccurred in that
loop so we can bail out if it's true.  It is part of the spec for
pthread_cond_wait that any signal which is handled (as SIGINT is) will
not interrupt its execution.  So in order to get a chance to check for
interrupts we need to change this to a repeated timed wait, like so:

                while ( thelock->locked && !interrupted ) {
                        timeout.tv_sec = time(0) + 1;
                        status = pthread_cond_timedwait(&thelock->lock_released,
                                                        &thelock->mut,
                                                        &timeout);
                        if (status != ETIMEDOUT)
                                CHECK_STATUS("pthread_cond_wait");
                        interrupted = PyOS_InterruptOccurred();
                }
                thelock->locked = 1;
                status = pthread_mutex_unlock( &thelock->mut );

Then we do a bit of fiddling in the return path to reset the interrupt
flag and make sure the caller sees a failure.

In the semaphore case, life is theoretically simpler: there is no
mutex, and sem_wait is interrupted by a handled signal, assuming
SA_RESTART was not set for that signal (which it isn't, in Python).

        do {
                if (waitflag)
                        status = fix_status(sem_wait(thelock));
                else
                        status = fix_status(sem_trywait(thelock));
        } while (status == EINTR); /* Retry if interrupted by a signal */

becomes

        do {
                if (waitflag)
                        status = fix_status(sem_wait(thelock));
                else
                        status = fix_status(sem_trywait(thelock));
                if (status == EINTR && PyOS_InterruptOccurred())
                        goto interrupted;
        } while (status == EINTR); /* Retry if interrupted by a signal */

...

 interrupted:
        PyErr_SetInterrupt();
        dprintf(("PyThread_acquire_lock(%p, %d) interrupted by user\n",
                 lock, waitflag));
        return 0;

However, the Linux semaphore implementation is buggy and will not
actually return EINTR from sem_wait, ever.  I'll take this up with the
libc maintainers; at the Python level, the thing to do is assume it
works.  Hence, the appended patch.

(While I was at it I fixed CHECK_STATUS so that it actually prints the
relevant system error, instead of whatever junk happens to be in
errno.)

zw

===================================================================
Index: thread_pthread.h
--- thread_pthread.h	17 Mar 2002 17:19:00 -0000	2.40
+++ thread_pthread.h	6 Sep 2002 20:51:31 -0000
@@ -128,7 +128,12 @@ typedef struct {
 	pthread_mutex_t  mut;
 } pthread_lock;
 
-#define CHECK_STATUS(name)  if (status != 0) { perror(name); error = 1; }
+#define CHECK_STATUS(name)  do { 					\
+	if (status != 0) {						\
+		fprintf(stderr, "%s: %s\n", name, strerror(status));	\
+		error = 1;						\
+	}								\
+} while (0) 
 
 /*
  * Initialization.
@@ -387,6 +392,8 @@ PyThread_acquire_lock(PyThread_type_lock
 			status = fix_status(sem_wait(thelock));
 		else
 			status = fix_status(sem_trywait(thelock));
+		if (status == EINTR && PyOS_InterruptOccurred())
+			goto interrupted;
 	} while (status == EINTR); /* Retry if interrupted by a signal */
 
 	if (waitflag) {
@@ -399,6 +406,12 @@ PyThread_acquire_lock(PyThread_type_lock
 
 	dprintf(("PyThread_acquire_lock(%p, %d) -> %d\n", lock, waitflag, success));
 	return success;
+
+ interrupted:
+	PyErr_SetInterrupt();
+	dprintf(("PyThread_acquire_lock(%p, %d) interrupted by user\n",
+		 lock, waitflag));
+	return 0;
 }
 
 void 
@@ -472,8 +485,10 @@ int 
 PyThread_acquire_lock(PyThread_type_lock lock, int waitflag)
 {
 	int success;
+	int interrupted = 0;
 	pthread_lock *thelock = (pthread_lock *)lock;
 	int status, error = 0;
+	struct timespec timeout;
 
 	dprintf(("PyThread_acquire_lock(%p, %d) called\n", lock, waitflag));
 
@@ -491,10 +506,15 @@ PyThread_acquire_lock(PyThread_type_lock
 		 * protocol */
 		status = pthread_mutex_lock( &thelock->mut );
 		CHECK_STATUS("pthread_mutex_lock[2]");
-		while ( thelock->locked ) {
-			status = pthread_cond_wait(&thelock->lock_released,
-						   &thelock->mut);
-			CHECK_STATUS("pthread_cond_wait");
+		timeout.tv_nsec = 0;
+		while ( thelock->locked && !interrupted ) {
+			timeout.tv_sec = time(0) + 1;
+			status = pthread_cond_timedwait(&thelock->lock_released,
+							&thelock->mut,
+							&timeout);
+			if (status != ETIMEDOUT)
+				CHECK_STATUS("pthread_cond_wait");
+			interrupted = PyOS_InterruptOccurred();
 		}
 		thelock->locked = 1;
 		status = pthread_mutex_unlock( &thelock->mut );
@@ -502,6 +522,10 @@ PyThread_acquire_lock(PyThread_type_lock
 		success = 1;
 	}
 	if (error) success = 0;
+	if (interrupted) {
+		PyErr_SetInterrupt();
+		success = 0;
+	}
 	dprintf(("PyThread_acquire_lock(%p, %d) -> %d\n", lock, waitflag, success));
 	return success;
 }