[Python-checkins] cpython (3.4): Issue #21491: socketserver: Fix a race condition in child processes reaping.

charles-francois.natali python-checkins at python.org
Fri Jun 20 23:42:39 CEST 2014


http://hg.python.org/cpython/rev/2a7375bd09f9
changeset:   91294:2a7375bd09f9
branch:      3.4
parent:      91291:ec0aae4df38b
user:        Charles-François Natali <cf.natali at gmail.com>
date:        Fri Jun 20 22:37:35 2014 +0100
summary:
  Issue #21491: socketserver: Fix a race condition in child processes reaping.

files:
  Lib/socketserver.py |  54 +++++++++++++++++---------------
  Misc/NEWS           |   2 +
  2 files changed, 31 insertions(+), 25 deletions(-)


diff --git a/Lib/socketserver.py b/Lib/socketserver.py
--- a/Lib/socketserver.py
+++ b/Lib/socketserver.py
@@ -523,35 +523,39 @@
 
     def collect_children(self):
         """Internal routine to wait for children that have exited."""
-        if self.active_children is None: return
+        if self.active_children is None:
+            return
+
+        # If we're above the max number of children, wait and reap them until
+        # we go back below threshold. Note that we use waitpid(-1) below to be
+        # able to collect children in size(<defunct children>) syscalls instead
+        # of size(<children>): the downside is that this might reap children
+        # which we didn't spawn, which is why we only resort to this when we're
+        # above max_children.
         while len(self.active_children) >= self.max_children:
-            # XXX: This will wait for any child process, not just ones
-            # spawned by this library. This could confuse other
-            # libraries that expect to be able to wait for their own
-            # children.
             try:
-                pid, status = os.waitpid(0, 0)
+                pid, _ = os.waitpid(-1, 0)
+                self.active_children.discard(pid)
+            except InterruptedError:
+                pass
+            except ChildProcessError:
+                # we don't have any children, we're done
+                self.active_children.clear()
             except OSError:
-                pid = None
-            if pid not in self.active_children: continue
-            self.active_children.remove(pid)
+                break
 
-        # XXX: This loop runs more system calls than it ought
-        # to. There should be a way to put the active_children into a
-        # process group and then use os.waitpid(-pgid) to wait for any
-        # of that set, but I couldn't find a way to allocate pgids
-        # that couldn't collide.
-        for child in self.active_children:
+        # Now reap all defunct children.
+        for pid in self.active_children.copy():
             try:
-                pid, status = os.waitpid(child, os.WNOHANG)
+                pid, _ = os.waitpid(pid, os.WNOHANG)
+                # if the child hasn't exited yet, pid will be 0 and ignored by
+                # discard() below
+                self.active_children.discard(pid)
+            except ChildProcessError:
+                # someone else reaped it
+                self.active_children.discard(pid)
             except OSError:
-                pid = None
-            if not pid: continue
-            try:
-                self.active_children.remove(pid)
-            except ValueError as e:
-                raise ValueError('%s. x=%d and list=%r' % (e.message, pid,
-                                                           self.active_children))
+                pass
 
     def handle_timeout(self):
         """Wait for zombies after self.timeout seconds of inactivity.
@@ -573,8 +577,8 @@
         if pid:
             # Parent process
             if self.active_children is None:
-                self.active_children = []
-            self.active_children.append(pid)
+                self.active_children = set()
+            self.active_children.add(pid)
             self.close_request(request)
             return
         else:
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -27,6 +27,8 @@
 Library
 -------
 
+- Issue #21491: socketserver: Fix a race condition in child processes reaping.
+
 - Issue #21722: The distutils "upload" command now exits with a non-zero
   return code when uploading fails.  Patch by Martin Dengler.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list