Re: subprocess and EINTR errnos
On Wed, 10 Nov 2004, John P Speno wrote: Hi, sorry for the delayed response.
While using subprocess (aka popen5), I came across one potential gotcha. I've had exceptions ending like this:
File "test.py", line 5, in test cmd = popen5.Popen(args, stdout=PIPE) File "popen5.py", line 577, in __init__ data = os.read(errpipe_read, 1048576) # Exceptions limited to 1 MB OSError: [Errno 4] Interrupted system call
(on Solaris 9)
Would it make sense for subprocess to use a more robust read() function which can handle these cases, i.e. when the parent's read on the pipe to the child's stderr is interrupted by a system call, and returns EINTR? I imagine it could catch EINTR and EAGAIN and retry the failed read().
I assume you are using signals in your application? The os.read above is not the only system call that can fail with EINTR. subprocess.py is full of other system calls that can fail, and I suspect that many other Python modules are as well. I've made a patch (attached) to subprocess.py (and test_subprocess.py) that should guard against EINTR, but I haven't committed it yet. It's quite large. Are Python modules supposed to handle EINTR? Why not let the C code handle this? Or, perhaps the signal module should provide a sigaction function, so that users can use SA_RESTART. Index: subprocess.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/subprocess.py,v retrieving revision 1.8 diff -u -r1.8 subprocess.py --- subprocess.py 7 Nov 2004 14:30:34 -0000 1.8 +++ subprocess.py 17 Nov 2004 19:42:30 -0000 @@ -888,6 +888,50 @@ pass + def _read_no_intr(self, fd, buffersize): + """Like os.read, but retries on EINTR""" + while True: + try: + return os.read(fd, buffersize) + except OSError, e: + if e.errno == errno.EINTR: + continue + else: + raise + + + def _read_all(self, fd, buffersize): + """Like os.read, but retries on EINTR, and reads until EOF""" + all = "" + while True: + data = self._read_no_intr(fd, buffersize) + all += data + if data == "": + return all + + + def _write_no_intr(self, fd, s): + """Like os.write, but retries on EINTR""" + while True: + try: + return os.write(fd, s) + except OSError, e: + if e.errno == errno.EINTR: + continue + else: + raise + + def _waitpid_no_intr(self, pid, options): + """Like os.waitpid, but retries on EINTR""" + while True: + try: + return os.waitpid(pid, options) + except OSError, e: + if e.errno == errno.EINTR: + continue + else: + raise + def _execute_child(self, args, executable, preexec_fn, close_fds, cwd, env, universal_newlines, startupinfo, creationflags, shell, @@ -963,7 +1007,7 @@ exc_value, tb) exc_value.child_traceback = ''.join(exc_lines) - os.write(errpipe_write, pickle.dumps(exc_value)) + self._write_no_intr(errpipe_write, pickle.dumps(exc_value)) # This exitcode won't be reported to applications, so it # really doesn't matter what we return. @@ -979,7 +1023,7 @@ os.close(errwrite) # Wait for exec to fail or succeed; possibly raising exception - data = os.read(errpipe_read, 1048576) # Exceptions limited to 1 MB + data = self._read_all(errpipe_read, 1048576) # Exceptions limited to 1 MB os.close(errpipe_read) if data != "": child_exception = pickle.loads(data) @@ -1003,7 +1047,7 @@ attribute.""" if self.returncode == None: try: - pid, sts = os.waitpid(self.pid, os.WNOHANG) + pid, sts = self._waitpid_no_intr(self.pid, os.WNOHANG) if pid == self.pid: self._handle_exitstatus(sts) except os.error: @@ -1015,7 +1059,7 @@ """Wait for child process to terminate. Returns returncode attribute.""" if self.returncode == None: - pid, sts = os.waitpid(self.pid, 0) + pid, sts = self._waitpid_no_intr(self.pid, 0) self._handle_exitstatus(sts) return self.returncode @@ -1049,27 +1093,33 @@ stderr = [] while read_set or write_set: - rlist, wlist, xlist = select.select(read_set, write_set, []) + try: + rlist, wlist, xlist = select.select(read_set, write_set, []) + except select.error, e: + if e[0] == errno.EINTR: + continue + else: + raise if self.stdin in wlist: # When select has indicated that the file is writable, # we can write up to PIPE_BUF bytes without risk # blocking. POSIX defines PIPE_BUF >= 512 - bytes_written = os.write(self.stdin.fileno(), input[:512]) + bytes_written = self._write_no_intr(self.stdin.fileno(), input[:512]) input = input[bytes_written:] if not input: self.stdin.close() write_set.remove(self.stdin) if self.stdout in rlist: - data = os.read(self.stdout.fileno(), 1024) + data = self._read_no_intr(self.stdout.fileno(), 1024) if data == "": self.stdout.close() read_set.remove(self.stdout) stdout.append(data) if self.stderr in rlist: - data = os.read(self.stderr.fileno(), 1024) + data = self._read_no_intr(self.stderr.fileno(), 1024) if data == "": self.stderr.close() read_set.remove(self.stderr) Index: test/test_subprocess.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_subprocess.py,v retrieving revision 1.14 diff -u -r1.14 test_subprocess.py --- test/test_subprocess.py 12 Nov 2004 15:51:48 -0000 1.14 +++ test/test_subprocess.py 17 Nov 2004 19:42:30 -0000 @@ -7,6 +7,7 @@ import tempfile import time import re +import errno mswindows = (sys.platform == "win32") @@ -35,6 +36,16 @@ fname = tempfile.mktemp() return os.open(fname, os.O_RDWR|os.O_CREAT), fname + def read_no_intr(self, obj): + while True: + try: + return obj.read() + except IOError, e: + if e.errno == errno.EINTR: + continue + else: + raise + # # Generic tests # @@ -123,7 +134,7 @@ p = subprocess.Popen([sys.executable, "-c", 'import sys; sys.stdout.write("orange")'], stdout=subprocess.PIPE) - self.assertEqual(p.stdout.read(), "orange") + self.assertEqual(self.read_no_intr(p.stdout), "orange") def test_stdout_filedes(self): # stdout is set to open file descriptor @@ -151,7 +162,7 @@ p = subprocess.Popen([sys.executable, "-c", 'import sys; sys.stderr.write("strawberry")'], stderr=subprocess.PIPE) - self.assertEqual(remove_stderr_debug_decorations(p.stderr.read()), + self.assertEqual(remove_stderr_debug_decorations(self.read_no_intr(p.stderr)), "strawberry") def test_stderr_filedes(self): @@ -186,7 +197,7 @@ 'sys.stderr.write("orange")'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - output = p.stdout.read() + output = self.read_no_intr(p.stdout) stripped = remove_stderr_debug_decorations(output) self.assertEqual(stripped, "appleorange") @@ -220,7 +231,7 @@ stdout=subprocess.PIPE, cwd=tmpdir) normcase = os.path.normcase - self.assertEqual(normcase(p.stdout.read()), normcase(tmpdir)) + self.assertEqual(normcase(self.read_no_intr(p.stdout)), normcase(tmpdir)) def test_env(self): newenv = os.environ.copy() @@ -230,7 +241,7 @@ 'sys.stdout.write(os.getenv("FRUIT"))'], stdout=subprocess.PIPE, env=newenv) - self.assertEqual(p.stdout.read(), "orange") + self.assertEqual(self.read_no_intr(p.stdout), "orange") def test_communicate(self): p = subprocess.Popen([sys.executable, "-c", @@ -305,7 +316,8 @@ 'sys.stdout.write("\\nline6");'], stdout=subprocess.PIPE, universal_newlines=1) - stdout = p.stdout.read() + + stdout = self.read_no_intr(p.stdout) if hasattr(open, 'newlines'): # Interpreter with universal newline support self.assertEqual(stdout, @@ -343,7 +355,7 @@ def test_no_leaking(self): # Make sure we leak no resources - max_handles = 1026 # too much for most UNIX systems + max_handles = 10 # too much for most UNIX systems if mswindows: max_handles = 65 # a full test is too slow on Windows for i in range(max_handles): @@ -424,7 +436,7 @@ 'sys.stdout.write(os.getenv("FRUIT"))'], stdout=subprocess.PIPE, preexec_fn=lambda: os.putenv("FRUIT", "apple")) - self.assertEqual(p.stdout.read(), "apple") + self.assertEqual(self.read_no_intr(p.stdout), "apple") def test_args_string(self): # args is a string @@ -457,7 +469,7 @@ p = subprocess.Popen(["echo $FRUIT"], shell=1, stdout=subprocess.PIPE, env=newenv) - self.assertEqual(p.stdout.read().strip(), "apple") + self.assertEqual(self.read_no_intr(p.stdout).strip(), "apple") def test_shell_string(self): # Run command through the shell (string) @@ -466,7 +478,7 @@ p = subprocess.Popen("echo $FRUIT", shell=1, stdout=subprocess.PIPE, env=newenv) - self.assertEqual(p.stdout.read().strip(), "apple") + self.assertEqual(self.read_no_intr(p.stdout).strip(), "apple") def test_call_string(self): # call() function with string argument on UNIX @@ -525,7 +537,7 @@ p = subprocess.Popen(["set"], shell=1, stdout=subprocess.PIPE, env=newenv) - self.assertNotEqual(p.stdout.read().find("physalis"), -1) + self.assertNotEqual(self.read_no_intr(p.stdout).find("physalis"), -1) def test_shell_string(self): # Run command through the shell (string) @@ -534,7 +546,7 @@ p = subprocess.Popen("set", shell=1, stdout=subprocess.PIPE, env=newenv) - self.assertNotEqual(p.stdout.read().find("physalis"), -1) + self.assertNotEqual(self.read_no_intr(p.stdout).find("physalis"), -1) def test_call_string(self): # call() function with string argument on Windows /Peter Åstrand <astrand@lysator.liu.se>
On Nov 17, 2004, at 2:53 PM, Peter Astrand wrote:
I assume you are using signals in your application? The os.read above is not the only system call that can fail with EINTR. subprocess.py is full of other system calls that can fail, and I suspect that many other Python modules are as well.
I've made a patch (attached) to subprocess.py (and test_subprocess.py) that should guard against EINTR, but I haven't committed it yet. It's quite large.
Are Python modules supposed to handle EINTR? Why not let the C code handle this? Or, perhaps the signal module should provide a sigaction function, so that users can use SA_RESTART.
Here's a simple way to demonstrate the same problem (without subprocess). I think any solution should not be at the subprocess.py level.
import os,sys,signal signal.signal(signal.SIGCHLD, lambda x,y: sys.stderr.write("SIGCHLD!\n")) 0 f=os.popen("sleep 5; echo 'Foo'"); print f.read() SIGCHLD! Traceback (most recent call last): File "<stdin>", line 1, in ? IOError: [Errno 4] Interrupted system call
This is an issue that Twisted has run into. Because Twisted installs its own SIGCHLD handler, other code which uses popen and then reads/writes the pipe *will* fail, because the SIGCHLD interrupts the final read. The proposed solution for twisted (which no one has implemented yet) is to make a small C module which simply adds the SA_RESTART flag to an installed signal handler. This has a disadvantage: because python queues up signals to run during normal interpreter execution, NOT at interrupt time, the handler won't be called until after the read finally returns. In our particular case, that's good enough, as the SIGCHLD handler is not particularly timing critical. As long as it gets called at some point, that's fine. However, a more general solution would be to emulate the SA_RESTART functionality at Python level: - Add an optional argument to signal.signal to specify whether you want it to restart interrupted calls after handling the signal. This would *not* set SA_RESTART, but would instead set a flag for the python read/write/etc wrappers to look at. - Make the python wrapper for read/write/etc handle EINTR internally: first dispatch to the waiting signal handler, then call the syscall again, if the sighandler was installed with the restart argument. From my documentation for SA_RESTART: """The affected system calls include open(2), read(2), write(2), sendto(2), recvfrom(2), sendmsg(2) and recvmsg(2) on a communications channel or a slow device (such as a terminal, but not a regular file) and during a wait(2) or ioctl(2).""" So, to emulate SA_RESTART, all those should be wrapped to restart in appropriate conditions. James
Peter Astrand wrote:
I've made a patch (attached) to subprocess.py (and test_subprocess.py) that should guard against EINTR, but I haven't committed it yet. It's quite large.
This is probably outside the bounds of the "conservative" checkins Anthony has requested prior to the release of 2.4. Given also that it may affect more than just subprocess.py, this looks like a 2.5 problem. Cheers, Nick. -- Nick Coghlan | Brisbane, Australia Email: ncoghlan@email.com | Mobile: +61 409 573 268
On Wed, 17 Nov 2004, Peter Astrand wrote,
I've made a patch (attached) to subprocess.py (and test_subprocess.py) that should guard against EINTR, but I haven't committed it yet. It's quite large.
Are Python modules supposed to handle EINTR? Why not let the C code handle this? Or, perhaps the signal module should provide a sigaction function, so that users can use SA_RESTART.
This is a snippet from a email sent in 2004, I was wondering if there was any update on this issue. Are these issues supposed to be handled on a per application basis, or will a fix go into Python in the near future? -- Hatem Nassrat Refrences: http://mail.python.org/pipermail/python-dev/2004-November/049983.html
On Mon, Jul 06, 2009, Hatem Nassrat wrote:
On Wed, 17 Nov 2004, Peter Astrand wrote,
I've made a patch (attached) to subprocess.py (and test_subprocess.py) that should guard against EINTR, but I haven't committed it yet. It's quite large.
Are Python modules supposed to handle EINTR? Why not let the C code handle this? Or, perhaps the signal module should provide a sigaction function, so that users can use SA_RESTART.
This is a snippet from a email sent in 2004, I was wondering if there was any update on this issue. Are these issues supposed to be handled on a per application basis, or will a fix go into Python in the near future?
For starters, if there is to be any progress, there needs to be an open issue on bugs.python.org -- have you searched to see if one already exists and created one if it doesn't? -- Aahz (aahz@pythoncraft.com) <*> http://www.pythoncraft.com/ "as long as we like the same operating system, things are cool." --piranha
On Wed, 17 Nov 2004, Peter Astrand wrote,
I've made a patch (attached) to subprocess.py (and test_subprocess.py) that should guard against EINTR, but I haven't committed it yet. It's quite large.
Are Python modules supposed to handle EINTR? Why not let the C code handle this? Or, perhaps the signal module should provide a sigaction function, so that users can use SA_RESTART.
This is a snippet from a email sent in 2004, I was wondering if there was any update on this issue. Are these issues supposed to be handled on a per application basis, or will a fix go into Python in the near future? -- Hatem Nassrat Refrences: http://mail.python.org/pipermail/python-dev/2004-November/049983.html
participants (5)
-
Aahz
-
Hatem Nassrat
-
James Y Knight
-
Nick Coghlan
-
Peter Astrand