Python-checkins
Threads by month
- ----- 2024 -----
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2003 -----
- December
- November
- October
- September
- August
June 2019
- 2 participants
- 665 discussions
bpo-37199: Fix test failures when IPv6 is unavailable or disabled (GH-14480)
by Miss Islington (bot) 30 Jun '19
by Miss Islington (bot) 30 Jun '19
30 Jun '19
https://github.com/python/cpython/commit/c2684c6d62978e9ce8256c3c7744d0332a…
commit: c2684c6d62978e9ce8256c3c7744d0332a2abe4c
branch: 3.8
author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
committer: GitHub <noreply(a)github.com>
date: 2019-06-30T08:42:22-07:00
summary:
bpo-37199: Fix test failures when IPv6 is unavailable or disabled (GH-14480)
(cherry picked from commit c2cda638d63b98f5cf9a8ef13e15aace2b7e3f0b)
Co-authored-by: Zackery Spytz <zspytz(a)gmail.com>
files:
A Misc/NEWS.d/next/Tests/2019-06-29-23-56-28.bpo-37199.FHDsLf.rst
M Lib/test/support/__init__.py
M Lib/test/test_asyncio/test_base_events.py
M Lib/test/test_socket.py
M Lib/test/test_ssl.py
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index ef623db87e04..31b0dc8fc2ca 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -1491,6 +1491,8 @@ def get_socket_conn_refused_errs():
# bpo-31910: socket.create_connection() fails randomly
# with EADDRNOTAVAIL on Travis CI
errors.append(errno.EADDRNOTAVAIL)
+ if not IPV6_ENABLED:
+ errors.append(errno.EAFNOSUPPORT)
return errors
diff --git a/Lib/test/test_asyncio/test_base_events.py b/Lib/test/test_asyncio/test_base_events.py
index 811b37425dd2..08d4792fa726 100644
--- a/Lib/test/test_asyncio/test_base_events.py
+++ b/Lib/test/test_asyncio/test_base_events.py
@@ -91,6 +91,9 @@ def test_ipaddr_info(self):
self.assertIsNone(
base_events._ipaddr_info('1.2.3.4', 1, UNSPEC, 0, 0))
+ if not support.IPV6_ENABLED:
+ return
+
# IPv4 address with family IPv6.
self.assertIsNone(
base_events._ipaddr_info('1.2.3.4', 1, INET6, STREAM, TCP))
@@ -1149,7 +1152,7 @@ def test_create_server_stream_bittype(self):
srv.close()
self.loop.run_until_complete(srv.wait_closed())
- @unittest.skipUnless(hasattr(socket, 'AF_INET6'), 'no IPv6 support')
+ @unittest.skipUnless(support.IPV6_ENABLED, 'no IPv6 support')
def test_create_server_ipv6(self):
async def main():
with self.assertWarns(DeprecationWarning):
@@ -1281,6 +1284,9 @@ def _test_create_connection_ip_addr(self, m_socket, allow_inet_pton):
t.close()
test_utils.run_briefly(self.loop) # allow transport to close
+ if not support.IPV6_ENABLED:
+ return
+
sock.family = socket.AF_INET6
coro = self.loop.create_connection(asyncio.Protocol, '::1', 80)
t, p = self.loop.run_until_complete(coro)
@@ -1298,6 +1304,7 @@ def _test_create_connection_ip_addr(self, m_socket, allow_inet_pton):
t.close()
test_utils.run_briefly(self.loop) # allow transport to close
+ @unittest.skipUnless(support.IPV6_ENABLED, 'no IPv6 support')
@unittest.skipIf(sys.platform.startswith('aix'),
"bpo-25545: IPv6 scope id and getaddrinfo() behave differently on AIX")
@patch_socket
diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py
index 74662cfeb327..db525642d6af 100644
--- a/Lib/test/test_socket.py
+++ b/Lib/test/test_socket.py
@@ -4814,8 +4814,15 @@ def test_create_connection_timeout(self):
# Issue #9792: create_connection() should not recast timeout errors
# as generic socket errors.
with self.mocked_socket_module():
- with self.assertRaises(socket.timeout):
+ try:
socket.create_connection((HOST, 1234))
+ except socket.timeout:
+ pass
+ except OSError as exc:
+ if support.IPV6_ENABLED or exc.errno != errno.EAFNOSUPPORT:
+ raise
+ else:
+ self.fail('socket.timeout not raised')
class NetworkConnectionAttributesTest(SocketTCPTest, ThreadableTest):
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index 7ba8156eef5d..38fdf3f375cc 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -673,7 +673,7 @@ def fail(cert, hostname):
fail(cert, 'example.net')
# -- IPv6 matching --
- if hasattr(socket, 'AF_INET6'):
+ if support.IPV6_ENABLED:
cert = {'subject': ((('commonName', 'example.com'),),),
'subjectAltName': (
('DNS', 'example.com'),
@@ -754,7 +754,7 @@ def fail(cert, hostname):
ssl._inet_paton(invalid)
for ipaddr in ['127.0.0.1', '192.168.0.1']:
self.assertTrue(ssl._inet_paton(ipaddr))
- if hasattr(socket, 'AF_INET6'):
+ if support.IPV6_ENABLED:
for ipaddr in ['::1', '2001:db8:85a3::8a2e:370:7334']:
self.assertTrue(ssl._inet_paton(ipaddr))
diff --git a/Misc/NEWS.d/next/Tests/2019-06-29-23-56-28.bpo-37199.FHDsLf.rst b/Misc/NEWS.d/next/Tests/2019-06-29-23-56-28.bpo-37199.FHDsLf.rst
new file mode 100644
index 000000000000..b05209159cc8
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2019-06-29-23-56-28.bpo-37199.FHDsLf.rst
@@ -0,0 +1 @@
+Fix test failures when IPv6 is unavailable or disabled.
1
0
bpo-37199: Fix test failures when IPv6 is unavailable or disabled (#14480)
by Andrew Svetlov 30 Jun '19
by Andrew Svetlov 30 Jun '19
30 Jun '19
https://github.com/python/cpython/commit/c2cda638d63b98f5cf9a8ef13e15aace2b…
commit: c2cda638d63b98f5cf9a8ef13e15aace2b7e3f0b
branch: master
author: Zackery Spytz <zspytz(a)gmail.com>
committer: Andrew Svetlov <andrew.svetlov(a)gmail.com>
date: 2019-06-30T18:24:43+03:00
summary:
bpo-37199: Fix test failures when IPv6 is unavailable or disabled (#14480)
files:
A Misc/NEWS.d/next/Tests/2019-06-29-23-56-28.bpo-37199.FHDsLf.rst
M Lib/test/support/__init__.py
M Lib/test/test_asyncio/test_base_events.py
M Lib/test/test_socket.py
M Lib/test/test_ssl.py
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index 19ea9764e96c..a65de4a5abe8 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -1493,6 +1493,8 @@ def get_socket_conn_refused_errs():
# bpo-31910: socket.create_connection() fails randomly
# with EADDRNOTAVAIL on Travis CI
errors.append(errno.EADDRNOTAVAIL)
+ if not IPV6_ENABLED:
+ errors.append(errno.EAFNOSUPPORT)
return errors
diff --git a/Lib/test/test_asyncio/test_base_events.py b/Lib/test/test_asyncio/test_base_events.py
index 811b37425dd2..08d4792fa726 100644
--- a/Lib/test/test_asyncio/test_base_events.py
+++ b/Lib/test/test_asyncio/test_base_events.py
@@ -91,6 +91,9 @@ def test_ipaddr_info(self):
self.assertIsNone(
base_events._ipaddr_info('1.2.3.4', 1, UNSPEC, 0, 0))
+ if not support.IPV6_ENABLED:
+ return
+
# IPv4 address with family IPv6.
self.assertIsNone(
base_events._ipaddr_info('1.2.3.4', 1, INET6, STREAM, TCP))
@@ -1149,7 +1152,7 @@ def test_create_server_stream_bittype(self):
srv.close()
self.loop.run_until_complete(srv.wait_closed())
- @unittest.skipUnless(hasattr(socket, 'AF_INET6'), 'no IPv6 support')
+ @unittest.skipUnless(support.IPV6_ENABLED, 'no IPv6 support')
def test_create_server_ipv6(self):
async def main():
with self.assertWarns(DeprecationWarning):
@@ -1281,6 +1284,9 @@ def _test_create_connection_ip_addr(self, m_socket, allow_inet_pton):
t.close()
test_utils.run_briefly(self.loop) # allow transport to close
+ if not support.IPV6_ENABLED:
+ return
+
sock.family = socket.AF_INET6
coro = self.loop.create_connection(asyncio.Protocol, '::1', 80)
t, p = self.loop.run_until_complete(coro)
@@ -1298,6 +1304,7 @@ def _test_create_connection_ip_addr(self, m_socket, allow_inet_pton):
t.close()
test_utils.run_briefly(self.loop) # allow transport to close
+ @unittest.skipUnless(support.IPV6_ENABLED, 'no IPv6 support')
@unittest.skipIf(sys.platform.startswith('aix'),
"bpo-25545: IPv6 scope id and getaddrinfo() behave differently on AIX")
@patch_socket
diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py
index 50094de58bf1..e92f871880a9 100644
--- a/Lib/test/test_socket.py
+++ b/Lib/test/test_socket.py
@@ -4964,8 +4964,15 @@ def test_create_connection_timeout(self):
# Issue #9792: create_connection() should not recast timeout errors
# as generic socket errors.
with self.mocked_socket_module():
- with self.assertRaises(socket.timeout):
+ try:
socket.create_connection((HOST, 1234))
+ except socket.timeout:
+ pass
+ except OSError as exc:
+ if support.IPV6_ENABLED or exc.errno != errno.EAFNOSUPPORT:
+ raise
+ else:
+ self.fail('socket.timeout not raised')
class NetworkConnectionAttributesTest(SocketTCPTest, ThreadableTest):
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index c72a85710d5c..064f0e8d4de6 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -676,7 +676,7 @@ def fail(cert, hostname):
fail(cert, 'example.net')
# -- IPv6 matching --
- if hasattr(socket, 'AF_INET6'):
+ if support.IPV6_ENABLED:
cert = {'subject': ((('commonName', 'example.com'),),),
'subjectAltName': (
('DNS', 'example.com'),
@@ -757,7 +757,7 @@ def fail(cert, hostname):
ssl._inet_paton(invalid)
for ipaddr in ['127.0.0.1', '192.168.0.1']:
self.assertTrue(ssl._inet_paton(ipaddr))
- if hasattr(socket, 'AF_INET6'):
+ if support.IPV6_ENABLED:
for ipaddr in ['::1', '2001:db8:85a3::8a2e:370:7334']:
self.assertTrue(ssl._inet_paton(ipaddr))
diff --git a/Misc/NEWS.d/next/Tests/2019-06-29-23-56-28.bpo-37199.FHDsLf.rst b/Misc/NEWS.d/next/Tests/2019-06-29-23-56-28.bpo-37199.FHDsLf.rst
new file mode 100644
index 000000000000..b05209159cc8
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2019-06-29-23-56-28.bpo-37199.FHDsLf.rst
@@ -0,0 +1 @@
+Fix test failures when IPv6 is unavailable or disabled.
1
0
bpo-35621: Support running subprocesses in asyncio when loop is executed in non-main thread (GH-14344)
by Miss Islington (bot) 30 Jun '19
by Miss Islington (bot) 30 Jun '19
30 Jun '19
https://github.com/python/cpython/commit/bf8cb31803558f1105efb15b0ee4bd184f…
commit: bf8cb31803558f1105efb15b0ee4bd184f3218c8
branch: 3.8
author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
committer: GitHub <noreply(a)github.com>
date: 2019-06-30T03:22:34-07:00
summary:
bpo-35621: Support running subprocesses in asyncio when loop is executed in non-main thread (GH-14344)
(cherry picked from commit 0d671c04c39b52e44597491b893eb0b6c86b3d45)
Co-authored-by: Andrew Svetlov <andrew.svetlov(a)gmail.com>
files:
A Misc/NEWS.d/next/Library/2019-05-28-19-03-46.bpo-35621.Abc1lf.rst
M Doc/library/asyncio-policy.rst
M Doc/library/asyncio-subprocess.rst
M Lib/asyncio/unix_events.py
M Lib/test/test_asyncio/test_subprocess.py
M Lib/test/test_asyncio/test_unix_events.py
M Lib/test/test_asyncio/utils.py
diff --git a/Doc/library/asyncio-policy.rst b/Doc/library/asyncio-policy.rst
index 6212df85dbc1..aa8f8f13eae0 100644
--- a/Doc/library/asyncio-policy.rst
+++ b/Doc/library/asyncio-policy.rst
@@ -117,6 +117,7 @@ asyncio ships with the following built-in policies:
.. availability:: Windows.
+.. _asyncio-watchers:
Process Watchers
================
@@ -129,10 +130,11 @@ In asyncio, child processes are created with
:func:`create_subprocess_exec` and :meth:`loop.subprocess_exec`
functions.
-asyncio defines the :class:`AbstractChildWatcher` abstract base class,
-which child watchers should implement, and has two different
-implementations: :class:`SafeChildWatcher` (configured to be used
-by default) and :class:`FastChildWatcher`.
+asyncio defines the :class:`AbstractChildWatcher` abstract base class, which child
+watchers should implement, and has four different implementations:
+:class:`ThreadedChildWatcher` (configured to be used by default),
+:class:`MultiLoopChildWatcher`, :class:`SafeChildWatcher`, and
+:class:`FastChildWatcher`.
See also the :ref:`Subprocess and Threads <asyncio-subprocess-threads>`
section.
@@ -184,6 +186,15 @@ implementation used by the asyncio event loop:
Note: loop may be ``None``.
+ .. method:: is_active()
+
+ Return ``True`` if the watcher is ready to use.
+
+ Spawning a subprocess with *inactive* current child watcher raises
+ :exc:`RuntimeError`.
+
+ .. versionadded:: 3.8
+
.. method:: close()
Close the watcher.
@@ -191,16 +202,48 @@ implementation used by the asyncio event loop:
This method has to be called to ensure that underlying
resources are cleaned-up.
-.. class:: SafeChildWatcher
+.. class:: ThreadedChildWatcher
+
+ This implementation starts a new waiting thread for every subprocess spawn.
+
+ It works reliably even when the asyncio event loop is run in a non-main OS thread.
+
+ There is no noticeable overhead when handling a big number of children (*O(1)* each
+ time a child terminates), but stating a thread per process requires extra memory.
+
+ This watcher is used by default.
+
+ .. versionadded:: 3.8
- This implementation avoids disrupting other code spawning processes
+.. class:: MultiLoopChildWatcher
+
+ This implementation registers a :py:data:`SIGCHLD` signal handler on
+ instantiation. That can break third-party code that installs a custom handler for
+ `SIGCHLD`. signal).
+
+ The watcher avoids disrupting other code spawning processes
by polling every process explicitly on a :py:data:`SIGCHLD` signal.
- This is a safe solution but it has a significant overhead when
+ There is no limitation for running subprocesses from different threads once the
+ watcher is installed.
+
+ The solution is safe but it has a significant overhead when
handling a big number of processes (*O(n)* each time a
:py:data:`SIGCHLD` is received).
- asyncio uses this safe implementation by default.
+ .. versionadded:: 3.8
+
+.. class:: SafeChildWatcher
+
+ This implementation uses active event loop from the main thread to handle
+ :py:data:`SIGCHLD` signal. If the main thread has no running event loop another
+ thread cannot spawn a subprocess (:exc:`RuntimeError` is raised).
+
+ The watcher avoids disrupting other code spawning processes
+ by polling every process explicitly on a :py:data:`SIGCHLD` signal.
+
+ This solution is as safe as :class:`MultiLoopChildWatcher` and has the same *O(N)*
+ complexity but requires a running event loop in the main thread to work.
.. class:: FastChildWatcher
@@ -211,6 +254,9 @@ implementation used by the asyncio event loop:
There is no noticeable overhead when handling a big number of
children (*O(1)* each time a child terminates).
+ This solution requires a running event loop in the main thread to work, as
+ :class:`SafeChildWatcher`.
+
Custom Policies
===============
diff --git a/Doc/library/asyncio-subprocess.rst b/Doc/library/asyncio-subprocess.rst
index 00dc66c48b21..444fb6361b5e 100644
--- a/Doc/library/asyncio-subprocess.rst
+++ b/Doc/library/asyncio-subprocess.rst
@@ -293,18 +293,26 @@ their completion.
Subprocess and Threads
----------------------
-Standard asyncio event loop supports running subprocesses from
-different threads, but there are limitations:
+Standard asyncio event loop supports running subprocesses from different threads by
+default.
-* An event loop must run in the main thread.
+On Windows subprocesses are provided by :class:`ProactorEventLoop` only (default),
+:class:`SelectorEventLoop` has no subprocess support.
-* The child watcher must be instantiated in the main thread
- before executing subprocesses from other threads. Call the
- :func:`get_child_watcher` function in the main thread to instantiate
- the child watcher.
+On UNIX *child watchers* are used for subprocess finish waiting, see
+:ref:`asyncio-watchers` for more info.
-Note that alternative event loop implementations might not share
-the above limitations; please refer to their documentation.
+
+.. versionchanged:: 3.8
+
+ UNIX switched to use :class:`ThreadedChildWatcher` for spawning subprocesses from
+ different threads without any limitation.
+
+ Spawning a subprocess with *inactive* current child watcher raises
+ :exc:`RuntimeError`.
+
+Note that alternative event loop implementations might have own limitations;
+please refer to their documentation.
.. seealso::
diff --git a/Lib/asyncio/unix_events.py b/Lib/asyncio/unix_events.py
index 28128d2977df..d7a4af86f71b 100644
--- a/Lib/asyncio/unix_events.py
+++ b/Lib/asyncio/unix_events.py
@@ -2,6 +2,7 @@
import errno
import io
+import itertools
import os
import selectors
import signal
@@ -12,7 +13,6 @@
import threading
import warnings
-
from . import base_events
from . import base_subprocess
from . import constants
@@ -29,7 +29,9 @@
__all__ = (
'SelectorEventLoop',
'AbstractChildWatcher', 'SafeChildWatcher',
- 'FastChildWatcher', 'DefaultEventLoopPolicy',
+ 'FastChildWatcher',
+ 'MultiLoopChildWatcher', 'ThreadedChildWatcher',
+ 'DefaultEventLoopPolicy',
)
@@ -184,6 +186,13 @@ def _make_write_pipe_transport(self, pipe, protocol, waiter=None,
stdin, stdout, stderr, bufsize,
extra=None, **kwargs):
with events.get_child_watcher() as watcher:
+ if not watcher.is_active():
+ # Check early.
+ # Raising exception before process creation
+ # prevents subprocess execution if the watcher
+ # is not ready to handle it.
+ raise RuntimeError("asyncio.get_child_watcher() is not activated, "
+ "subprocess support is not installed.")
waiter = self.create_future()
transp = _UnixSubprocessTransport(self, protocol, args, shell,
stdin, stdout, stderr, bufsize,
@@ -838,6 +847,15 @@ def close(self):
"""
raise NotImplementedError()
+ def is_active(self):
+ """Return ``True`` if the watcher is active and is used by the event loop.
+
+ Return True if the watcher is installed and ready to handle process exit
+ notifications.
+
+ """
+ raise NotImplementedError()
+
def __enter__(self):
"""Enter the watcher's context and allow starting new processes
@@ -849,6 +867,20 @@ def __exit__(self, a, b, c):
raise NotImplementedError()
+def _compute_returncode(status):
+ if os.WIFSIGNALED(status):
+ # The child process died because of a signal.
+ return -os.WTERMSIG(status)
+ elif os.WIFEXITED(status):
+ # The child process exited (e.g sys.exit()).
+ return os.WEXITSTATUS(status)
+ else:
+ # The child exited, but we don't understand its status.
+ # This shouldn't happen, but if it does, let's just
+ # return that status; perhaps that helps debug it.
+ return status
+
+
class BaseChildWatcher(AbstractChildWatcher):
def __init__(self):
@@ -858,6 +890,9 @@ def __init__(self):
def close(self):
self.attach_loop(None)
+ def is_active(self):
+ return self._loop is not None and self._loop.is_running()
+
def _do_waitpid(self, expected_pid):
raise NotImplementedError()
@@ -898,19 +933,6 @@ def _sig_chld(self):
'exception': exc,
})
- def _compute_returncode(self, status):
- if os.WIFSIGNALED(status):
- # The child process died because of a signal.
- return -os.WTERMSIG(status)
- elif os.WIFEXITED(status):
- # The child process exited (e.g sys.exit()).
- return os.WEXITSTATUS(status)
- else:
- # The child exited, but we don't understand its status.
- # This shouldn't happen, but if it does, let's just
- # return that status; perhaps that helps debug it.
- return status
-
class SafeChildWatcher(BaseChildWatcher):
"""'Safe' child watcher implementation.
@@ -934,11 +956,6 @@ def __exit__(self, a, b, c):
pass
def add_child_handler(self, pid, callback, *args):
- if self._loop is None:
- raise RuntimeError(
- "Cannot add child handler, "
- "the child watcher does not have a loop attached")
-
self._callbacks[pid] = (callback, args)
# Prevent a race condition in case the child is already terminated.
@@ -974,7 +991,7 @@ def _do_waitpid(self, expected_pid):
# The child process is still alive.
return
- returncode = self._compute_returncode(status)
+ returncode = _compute_returncode(status)
if self._loop.get_debug():
logger.debug('process %s exited with returncode %s',
expected_pid, returncode)
@@ -1035,11 +1052,6 @@ def __exit__(self, a, b, c):
def add_child_handler(self, pid, callback, *args):
assert self._forks, "Must use the context manager"
- if self._loop is None:
- raise RuntimeError(
- "Cannot add child handler, "
- "the child watcher does not have a loop attached")
-
with self._lock:
try:
returncode = self._zombies.pop(pid)
@@ -1072,7 +1084,7 @@ def _do_waitpid_all(self):
# A child process is still alive.
return
- returncode = self._compute_returncode(status)
+ returncode = _compute_returncode(status)
with self._lock:
try:
@@ -1101,6 +1113,209 @@ def _do_waitpid_all(self):
callback(pid, returncode, *args)
+class MultiLoopChildWatcher(AbstractChildWatcher):
+ """A watcher that doesn't require running loop in the main thread.
+
+ This implementation registers a SIGCHLD signal handler on
+ instantiation (which may conflict with other code that
+ install own handler for this signal).
+
+ The solution is safe but it has a significant overhead when
+ handling a big number of processes (*O(n)* each time a
+ SIGCHLD is received).
+ """
+
+ # Implementation note:
+ # The class keeps compatibility with AbstractChildWatcher ABC
+ # To achieve this it has empty attach_loop() method
+ # and doesn't accept explicit loop argument
+ # for add_child_handler()/remove_child_handler()
+ # but retrieves the current loop by get_running_loop()
+
+ def __init__(self):
+ self._callbacks = {}
+ self._saved_sighandler = None
+
+ def is_active(self):
+ return self._saved_sighandler is not None
+
+ def close(self):
+ self._callbacks.clear()
+ if self._saved_sighandler is not None:
+ handler = signal.getsignal(signal.SIGCHLD)
+ if handler != self._sig_chld:
+ logger.warning("SIGCHLD handler was changed by outside code")
+ else:
+ signal.signal(signal.SIGCHLD, self._saved_sighandler)
+ self._saved_sighandler = None
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ pass
+
+ def add_child_handler(self, pid, callback, *args):
+ loop = events.get_running_loop()
+ self._callbacks[pid] = (loop, callback, args)
+
+ # Prevent a race condition in case the child is already terminated.
+ self._do_waitpid(pid)
+
+ def remove_child_handler(self, pid):
+ try:
+ del self._callbacks[pid]
+ return True
+ except KeyError:
+ return False
+
+ def attach_loop(self, loop):
+ # Don't save the loop but initialize itself if called first time
+ # The reason to do it here is that attach_loop() is called from
+ # unix policy only for the main thread.
+ # Main thread is required for subscription on SIGCHLD signal
+ if self._saved_sighandler is None:
+ self._saved_sighandler = signal.signal(signal.SIGCHLD, self._sig_chld)
+ if self._saved_sighandler is None:
+ logger.warning("Previous SIGCHLD handler was set by non-Python code, "
+ "restore to default handler on watcher close.")
+ self._saved_sighandler = signal.SIG_DFL
+
+ # Set SA_RESTART to limit EINTR occurrences.
+ signal.siginterrupt(signal.SIGCHLD, False)
+
+ def _do_waitpid_all(self):
+ for pid in list(self._callbacks):
+ self._do_waitpid(pid)
+
+ def _do_waitpid(self, expected_pid):
+ assert expected_pid > 0
+
+ try:
+ pid, status = os.waitpid(expected_pid, os.WNOHANG)
+ except ChildProcessError:
+ # The child process is already reaped
+ # (may happen if waitpid() is called elsewhere).
+ pid = expected_pid
+ returncode = 255
+ logger.warning(
+ "Unknown child process pid %d, will report returncode 255",
+ pid)
+ debug_log = False
+ else:
+ if pid == 0:
+ # The child process is still alive.
+ return
+
+ returncode = _compute_returncode(status)
+ debug_log = True
+ try:
+ loop, callback, args = self._callbacks.pop(pid)
+ except KeyError: # pragma: no cover
+ # May happen if .remove_child_handler() is called
+ # after os.waitpid() returns.
+ logger.warning("Child watcher got an unexpected pid: %r",
+ pid, exc_info=True)
+ else:
+ if loop.is_closed():
+ logger.warning("Loop %r that handles pid %r is closed", loop, pid)
+ else:
+ if debug_log and loop.get_debug():
+ logger.debug('process %s exited with returncode %s',
+ expected_pid, returncode)
+ loop.call_soon_threadsafe(callback, pid, returncode, *args)
+
+ def _sig_chld(self, signum, frame):
+ try:
+ self._do_waitpid_all()
+ except (SystemExit, KeyboardInterrupt):
+ raise
+ except BaseException:
+ logger.warning('Unknown exception in SIGCHLD handler', exc_info=True)
+
+
+class ThreadedChildWatcher(AbstractChildWatcher):
+ """Threaded child watcher implementation.
+
+ The watcher uses a thread per process
+ for waiting for the process finish.
+
+ It doesn't require subscription on POSIX signal
+ but a thread creation is not free.
+
+ The watcher has O(1) complexity, its perfomance doesn't depend
+ on amount of spawn processes.
+ """
+
+ def __init__(self):
+ self._pid_counter = itertools.count(0)
+ self._threads = {}
+
+ def is_active(self):
+ return True
+
+ def close(self):
+ pass
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ pass
+
+ def __del__(self, _warn=warnings.warn):
+ threads = [thread for thread in list(self._threads.values())
+ if thread.is_alive()]
+ if threads:
+ _warn(f"{self.__class__} has registered but not finished child processes",
+ ResourceWarning,
+ source=self)
+
+ def add_child_handler(self, pid, callback, *args):
+ loop = events.get_running_loop()
+ thread = threading.Thread(target=self._do_waitpid,
+ name=f"waitpid-{next(self._pid_counter)}",
+ args=(loop, pid, callback, args),
+ daemon=True)
+ self._threads[pid] = thread
+ thread.start()
+
+ def remove_child_handler(self, pid):
+ # asyncio never calls remove_child_handler() !!!
+ # The method is no-op but is implemented because
+ # abstract base classe requires it
+ return True
+
+ def attach_loop(self, loop):
+ pass
+
+ def _do_waitpid(self, loop, expected_pid, callback, args):
+ assert expected_pid > 0
+
+ try:
+ pid, status = os.waitpid(expected_pid, 0)
+ except ChildProcessError:
+ # The child process is already reaped
+ # (may happen if waitpid() is called elsewhere).
+ pid = expected_pid
+ returncode = 255
+ logger.warning(
+ "Unknown child process pid %d, will report returncode 255",
+ pid)
+ else:
+ returncode = _compute_returncode(status)
+ if loop.get_debug():
+ logger.debug('process %s exited with returncode %s',
+ expected_pid, returncode)
+
+ if loop.is_closed():
+ logger.warning("Loop %r that handles pid %r is closed", loop, pid)
+ else:
+ loop.call_soon_threadsafe(callback, pid, returncode, *args)
+
+ self._threads.pop(expected_pid)
+
+
class _UnixDefaultEventLoopPolicy(events.BaseDefaultEventLoopPolicy):
"""UNIX event loop policy with a watcher for child processes."""
_loop_factory = _UnixSelectorEventLoop
@@ -1112,7 +1327,7 @@ def __init__(self):
def _init_watcher(self):
with events._lock:
if self._watcher is None: # pragma: no branch
- self._watcher = SafeChildWatcher()
+ self._watcher = ThreadedChildWatcher()
if isinstance(threading.current_thread(),
threading._MainThread):
self._watcher.attach_loop(self._local._loop)
@@ -1134,7 +1349,7 @@ def set_event_loop(self, loop):
def get_child_watcher(self):
"""Get the watcher for child processes.
- If not yet set, a SafeChildWatcher object is automatically created.
+ If not yet set, a ThreadedChildWatcher object is automatically created.
"""
if self._watcher is None:
self._init_watcher()
diff --git a/Lib/test/test_asyncio/test_subprocess.py b/Lib/test/test_asyncio/test_subprocess.py
index e9a9e50430c3..b9578b2866c0 100644
--- a/Lib/test/test_asyncio/test_subprocess.py
+++ b/Lib/test/test_asyncio/test_subprocess.py
@@ -633,6 +633,7 @@ def test_create_subprocess_exec_with_path(self):
self.assertIsNone(self.loop.run_until_complete(execute()))
+
if sys.platform != 'win32':
# Unix
class SubprocessWatcherMixin(SubprocessMixin):
@@ -648,7 +649,24 @@ def setUp(self):
watcher = self.Watcher()
watcher.attach_loop(self.loop)
policy.set_child_watcher(watcher)
- self.addCleanup(policy.set_child_watcher, None)
+
+ def tearDown(self):
+ super().tearDown()
+ policy = asyncio.get_event_loop_policy()
+ watcher = policy.get_child_watcher()
+ policy.set_child_watcher(None)
+ watcher.attach_loop(None)
+ watcher.close()
+
+ class SubprocessThreadedWatcherTests(SubprocessWatcherMixin,
+ test_utils.TestCase):
+
+ Watcher = unix_events.ThreadedChildWatcher
+
+ class SubprocessMultiLoopWatcherTests(SubprocessWatcherMixin,
+ test_utils.TestCase):
+
+ Watcher = unix_events.MultiLoopChildWatcher
class SubprocessSafeWatcherTests(SubprocessWatcherMixin,
test_utils.TestCase):
@@ -670,5 +688,25 @@ def setUp(self):
self.set_event_loop(self.loop)
+class GenericWatcherTests:
+
+ def test_create_subprocess_fails_with_inactive_watcher(self):
+
+ async def execute():
+ watcher = mock.create_authspec(asyncio.AbstractChildWatcher)
+ watcher.is_active.return_value = False
+ asyncio.set_child_watcher(watcher)
+
+ with self.assertRaises(RuntimeError):
+ await subprocess.create_subprocess_exec(
+ support.FakePath(sys.executable), '-c', 'pass')
+
+ watcher.add_child_handler.assert_not_called()
+
+ self.assertIsNone(self.loop.run_until_complete(execute()))
+
+
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/Lib/test/test_asyncio/test_unix_events.py b/Lib/test/test_asyncio/test_unix_events.py
index 5c610cdd67ba..462a8b3c7859 100644
--- a/Lib/test/test_asyncio/test_unix_events.py
+++ b/Lib/test/test_asyncio/test_unix_events.py
@@ -1082,6 +1082,8 @@ def test_not_implemented(self):
NotImplementedError, watcher.attach_loop, f)
self.assertRaises(
NotImplementedError, watcher.close)
+ self.assertRaises(
+ NotImplementedError, watcher.is_active)
self.assertRaises(
NotImplementedError, watcher.__enter__)
self.assertRaises(
@@ -1784,15 +1786,6 @@ def test_close(self, m):
if isinstance(self.watcher, asyncio.FastChildWatcher):
self.assertFalse(self.watcher._zombies)
- @waitpid_mocks
- def test_add_child_handler_with_no_loop_attached(self, m):
- callback = mock.Mock()
- with self.create_watcher() as watcher:
- with self.assertRaisesRegex(
- RuntimeError,
- 'the child watcher does not have a loop attached'):
- watcher.add_child_handler(100, callback)
-
class SafeChildWatcherTests (ChildWatcherTestsMixin, test_utils.TestCase):
def create_watcher(self):
@@ -1809,17 +1802,16 @@ class PolicyTests(unittest.TestCase):
def create_policy(self):
return asyncio.DefaultEventLoopPolicy()
- def test_get_child_watcher(self):
+ def test_get_default_child_watcher(self):
policy = self.create_policy()
self.assertIsNone(policy._watcher)
watcher = policy.get_child_watcher()
- self.assertIsInstance(watcher, asyncio.SafeChildWatcher)
+ self.assertIsInstance(watcher, asyncio.ThreadedChildWatcher)
self.assertIs(policy._watcher, watcher)
self.assertIs(watcher, policy.get_child_watcher())
- self.assertIsNone(watcher._loop)
def test_get_child_watcher_after_set(self):
policy = self.create_policy()
@@ -1829,18 +1821,6 @@ def test_get_child_watcher_after_set(self):
self.assertIs(policy._watcher, watcher)
self.assertIs(watcher, policy.get_child_watcher())
- def test_get_child_watcher_with_mainloop_existing(self):
- policy = self.create_policy()
- loop = policy.get_event_loop()
-
- self.assertIsNone(policy._watcher)
- watcher = policy.get_child_watcher()
-
- self.assertIsInstance(watcher, asyncio.SafeChildWatcher)
- self.assertIs(watcher._loop, loop)
-
- loop.close()
-
def test_get_child_watcher_thread(self):
def f():
@@ -1866,7 +1846,11 @@ def test_child_watcher_replace_mainloop_existing(self):
policy = self.create_policy()
loop = policy.get_event_loop()
- watcher = policy.get_child_watcher()
+ # Explicitly setup SafeChildWatcher,
+ # default ThreadedChildWatcher has no _loop property
+ watcher = asyncio.SafeChildWatcher()
+ policy.set_child_watcher(watcher)
+ watcher.attach_loop(loop)
self.assertIs(watcher._loop, loop)
diff --git a/Lib/test/test_asyncio/utils.py b/Lib/test/test_asyncio/utils.py
index cb373d544f41..5b4bb123a9ec 100644
--- a/Lib/test/test_asyncio/utils.py
+++ b/Lib/test/test_asyncio/utils.py
@@ -1,5 +1,6 @@
"""Utilities shared by tests."""
+import asyncio
import collections
import contextlib
import io
@@ -512,6 +513,18 @@ def close_loop(loop):
if executor is not None:
executor.shutdown(wait=True)
loop.close()
+ policy = support.maybe_get_event_loop_policy()
+ if policy is not None:
+ try:
+ watcher = policy.get_child_watcher()
+ except NotImplementedError:
+ # watcher is not implemented by EventLoopPolicy, e.g. Windows
+ pass
+ else:
+ if isinstance(watcher, asyncio.ThreadedChildWatcher):
+ threads = list(watcher._threads.values())
+ for thread in threads:
+ thread.join()
def set_event_loop(self, loop, *, cleanup=True):
assert loop is not None
diff --git a/Misc/NEWS.d/next/Library/2019-05-28-19-03-46.bpo-35621.Abc1lf.rst b/Misc/NEWS.d/next/Library/2019-05-28-19-03-46.bpo-35621.Abc1lf.rst
new file mode 100644
index 000000000000..c492e1de6d5c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-05-28-19-03-46.bpo-35621.Abc1lf.rst
@@ -0,0 +1,2 @@
+Support running asyncio subprocesses when execution event loop in a thread
+on UNIX.
1
0
bpo-35621: Support running subprocesses in asyncio when loop is executed in non-main thread (GH-14344)
by Andrew Svetlov 30 Jun '19
by Andrew Svetlov 30 Jun '19
30 Jun '19
https://github.com/python/cpython/commit/0d671c04c39b52e44597491b893eb0b6c8…
commit: 0d671c04c39b52e44597491b893eb0b6c86b3d45
branch: master
author: Andrew Svetlov <andrew.svetlov(a)gmail.com>
committer: GitHub <noreply(a)github.com>
date: 2019-06-30T12:54:59+03:00
summary:
bpo-35621: Support running subprocesses in asyncio when loop is executed in non-main thread (GH-14344)
files:
A Misc/NEWS.d/next/Library/2019-05-28-19-03-46.bpo-35621.Abc1lf.rst
M Doc/library/asyncio-policy.rst
M Doc/library/asyncio-subprocess.rst
M Lib/asyncio/unix_events.py
M Lib/test/test_asyncio/test_subprocess.py
M Lib/test/test_asyncio/test_unix_events.py
M Lib/test/test_asyncio/utils.py
diff --git a/Doc/library/asyncio-policy.rst b/Doc/library/asyncio-policy.rst
index 6212df85dbc1..aa8f8f13eae0 100644
--- a/Doc/library/asyncio-policy.rst
+++ b/Doc/library/asyncio-policy.rst
@@ -117,6 +117,7 @@ asyncio ships with the following built-in policies:
.. availability:: Windows.
+.. _asyncio-watchers:
Process Watchers
================
@@ -129,10 +130,11 @@ In asyncio, child processes are created with
:func:`create_subprocess_exec` and :meth:`loop.subprocess_exec`
functions.
-asyncio defines the :class:`AbstractChildWatcher` abstract base class,
-which child watchers should implement, and has two different
-implementations: :class:`SafeChildWatcher` (configured to be used
-by default) and :class:`FastChildWatcher`.
+asyncio defines the :class:`AbstractChildWatcher` abstract base class, which child
+watchers should implement, and has four different implementations:
+:class:`ThreadedChildWatcher` (configured to be used by default),
+:class:`MultiLoopChildWatcher`, :class:`SafeChildWatcher`, and
+:class:`FastChildWatcher`.
See also the :ref:`Subprocess and Threads <asyncio-subprocess-threads>`
section.
@@ -184,6 +186,15 @@ implementation used by the asyncio event loop:
Note: loop may be ``None``.
+ .. method:: is_active()
+
+ Return ``True`` if the watcher is ready to use.
+
+ Spawning a subprocess with *inactive* current child watcher raises
+ :exc:`RuntimeError`.
+
+ .. versionadded:: 3.8
+
.. method:: close()
Close the watcher.
@@ -191,16 +202,48 @@ implementation used by the asyncio event loop:
This method has to be called to ensure that underlying
resources are cleaned-up.
-.. class:: SafeChildWatcher
+.. class:: ThreadedChildWatcher
+
+ This implementation starts a new waiting thread for every subprocess spawn.
+
+ It works reliably even when the asyncio event loop is run in a non-main OS thread.
+
+ There is no noticeable overhead when handling a big number of children (*O(1)* each
+ time a child terminates), but stating a thread per process requires extra memory.
+
+ This watcher is used by default.
+
+ .. versionadded:: 3.8
- This implementation avoids disrupting other code spawning processes
+.. class:: MultiLoopChildWatcher
+
+ This implementation registers a :py:data:`SIGCHLD` signal handler on
+ instantiation. That can break third-party code that installs a custom handler for
+ `SIGCHLD`. signal).
+
+ The watcher avoids disrupting other code spawning processes
by polling every process explicitly on a :py:data:`SIGCHLD` signal.
- This is a safe solution but it has a significant overhead when
+ There is no limitation for running subprocesses from different threads once the
+ watcher is installed.
+
+ The solution is safe but it has a significant overhead when
handling a big number of processes (*O(n)* each time a
:py:data:`SIGCHLD` is received).
- asyncio uses this safe implementation by default.
+ .. versionadded:: 3.8
+
+.. class:: SafeChildWatcher
+
+ This implementation uses active event loop from the main thread to handle
+ :py:data:`SIGCHLD` signal. If the main thread has no running event loop another
+ thread cannot spawn a subprocess (:exc:`RuntimeError` is raised).
+
+ The watcher avoids disrupting other code spawning processes
+ by polling every process explicitly on a :py:data:`SIGCHLD` signal.
+
+ This solution is as safe as :class:`MultiLoopChildWatcher` and has the same *O(N)*
+ complexity but requires a running event loop in the main thread to work.
.. class:: FastChildWatcher
@@ -211,6 +254,9 @@ implementation used by the asyncio event loop:
There is no noticeable overhead when handling a big number of
children (*O(1)* each time a child terminates).
+ This solution requires a running event loop in the main thread to work, as
+ :class:`SafeChildWatcher`.
+
Custom Policies
===============
diff --git a/Doc/library/asyncio-subprocess.rst b/Doc/library/asyncio-subprocess.rst
index 00dc66c48b21..444fb6361b5e 100644
--- a/Doc/library/asyncio-subprocess.rst
+++ b/Doc/library/asyncio-subprocess.rst
@@ -293,18 +293,26 @@ their completion.
Subprocess and Threads
----------------------
-Standard asyncio event loop supports running subprocesses from
-different threads, but there are limitations:
+Standard asyncio event loop supports running subprocesses from different threads by
+default.
-* An event loop must run in the main thread.
+On Windows subprocesses are provided by :class:`ProactorEventLoop` only (default),
+:class:`SelectorEventLoop` has no subprocess support.
-* The child watcher must be instantiated in the main thread
- before executing subprocesses from other threads. Call the
- :func:`get_child_watcher` function in the main thread to instantiate
- the child watcher.
+On UNIX *child watchers* are used for subprocess finish waiting, see
+:ref:`asyncio-watchers` for more info.
-Note that alternative event loop implementations might not share
-the above limitations; please refer to their documentation.
+
+.. versionchanged:: 3.8
+
+ UNIX switched to use :class:`ThreadedChildWatcher` for spawning subprocesses from
+ different threads without any limitation.
+
+ Spawning a subprocess with *inactive* current child watcher raises
+ :exc:`RuntimeError`.
+
+Note that alternative event loop implementations might have own limitations;
+please refer to their documentation.
.. seealso::
diff --git a/Lib/asyncio/unix_events.py b/Lib/asyncio/unix_events.py
index 28128d2977df..d7a4af86f71b 100644
--- a/Lib/asyncio/unix_events.py
+++ b/Lib/asyncio/unix_events.py
@@ -2,6 +2,7 @@
import errno
import io
+import itertools
import os
import selectors
import signal
@@ -12,7 +13,6 @@
import threading
import warnings
-
from . import base_events
from . import base_subprocess
from . import constants
@@ -29,7 +29,9 @@
__all__ = (
'SelectorEventLoop',
'AbstractChildWatcher', 'SafeChildWatcher',
- 'FastChildWatcher', 'DefaultEventLoopPolicy',
+ 'FastChildWatcher',
+ 'MultiLoopChildWatcher', 'ThreadedChildWatcher',
+ 'DefaultEventLoopPolicy',
)
@@ -184,6 +186,13 @@ def _make_write_pipe_transport(self, pipe, protocol, waiter=None,
stdin, stdout, stderr, bufsize,
extra=None, **kwargs):
with events.get_child_watcher() as watcher:
+ if not watcher.is_active():
+ # Check early.
+ # Raising exception before process creation
+ # prevents subprocess execution if the watcher
+ # is not ready to handle it.
+ raise RuntimeError("asyncio.get_child_watcher() is not activated, "
+ "subprocess support is not installed.")
waiter = self.create_future()
transp = _UnixSubprocessTransport(self, protocol, args, shell,
stdin, stdout, stderr, bufsize,
@@ -838,6 +847,15 @@ def close(self):
"""
raise NotImplementedError()
+ def is_active(self):
+ """Return ``True`` if the watcher is active and is used by the event loop.
+
+ Return True if the watcher is installed and ready to handle process exit
+ notifications.
+
+ """
+ raise NotImplementedError()
+
def __enter__(self):
"""Enter the watcher's context and allow starting new processes
@@ -849,6 +867,20 @@ def __exit__(self, a, b, c):
raise NotImplementedError()
+def _compute_returncode(status):
+ if os.WIFSIGNALED(status):
+ # The child process died because of a signal.
+ return -os.WTERMSIG(status)
+ elif os.WIFEXITED(status):
+ # The child process exited (e.g sys.exit()).
+ return os.WEXITSTATUS(status)
+ else:
+ # The child exited, but we don't understand its status.
+ # This shouldn't happen, but if it does, let's just
+ # return that status; perhaps that helps debug it.
+ return status
+
+
class BaseChildWatcher(AbstractChildWatcher):
def __init__(self):
@@ -858,6 +890,9 @@ def __init__(self):
def close(self):
self.attach_loop(None)
+ def is_active(self):
+ return self._loop is not None and self._loop.is_running()
+
def _do_waitpid(self, expected_pid):
raise NotImplementedError()
@@ -898,19 +933,6 @@ def _sig_chld(self):
'exception': exc,
})
- def _compute_returncode(self, status):
- if os.WIFSIGNALED(status):
- # The child process died because of a signal.
- return -os.WTERMSIG(status)
- elif os.WIFEXITED(status):
- # The child process exited (e.g sys.exit()).
- return os.WEXITSTATUS(status)
- else:
- # The child exited, but we don't understand its status.
- # This shouldn't happen, but if it does, let's just
- # return that status; perhaps that helps debug it.
- return status
-
class SafeChildWatcher(BaseChildWatcher):
"""'Safe' child watcher implementation.
@@ -934,11 +956,6 @@ def __exit__(self, a, b, c):
pass
def add_child_handler(self, pid, callback, *args):
- if self._loop is None:
- raise RuntimeError(
- "Cannot add child handler, "
- "the child watcher does not have a loop attached")
-
self._callbacks[pid] = (callback, args)
# Prevent a race condition in case the child is already terminated.
@@ -974,7 +991,7 @@ def _do_waitpid(self, expected_pid):
# The child process is still alive.
return
- returncode = self._compute_returncode(status)
+ returncode = _compute_returncode(status)
if self._loop.get_debug():
logger.debug('process %s exited with returncode %s',
expected_pid, returncode)
@@ -1035,11 +1052,6 @@ def __exit__(self, a, b, c):
def add_child_handler(self, pid, callback, *args):
assert self._forks, "Must use the context manager"
- if self._loop is None:
- raise RuntimeError(
- "Cannot add child handler, "
- "the child watcher does not have a loop attached")
-
with self._lock:
try:
returncode = self._zombies.pop(pid)
@@ -1072,7 +1084,7 @@ def _do_waitpid_all(self):
# A child process is still alive.
return
- returncode = self._compute_returncode(status)
+ returncode = _compute_returncode(status)
with self._lock:
try:
@@ -1101,6 +1113,209 @@ def _do_waitpid_all(self):
callback(pid, returncode, *args)
+class MultiLoopChildWatcher(AbstractChildWatcher):
+ """A watcher that doesn't require running loop in the main thread.
+
+ This implementation registers a SIGCHLD signal handler on
+ instantiation (which may conflict with other code that
+ install own handler for this signal).
+
+ The solution is safe but it has a significant overhead when
+ handling a big number of processes (*O(n)* each time a
+ SIGCHLD is received).
+ """
+
+ # Implementation note:
+ # The class keeps compatibility with AbstractChildWatcher ABC
+ # To achieve this it has empty attach_loop() method
+ # and doesn't accept explicit loop argument
+ # for add_child_handler()/remove_child_handler()
+ # but retrieves the current loop by get_running_loop()
+
+ def __init__(self):
+ self._callbacks = {}
+ self._saved_sighandler = None
+
+ def is_active(self):
+ return self._saved_sighandler is not None
+
+ def close(self):
+ self._callbacks.clear()
+ if self._saved_sighandler is not None:
+ handler = signal.getsignal(signal.SIGCHLD)
+ if handler != self._sig_chld:
+ logger.warning("SIGCHLD handler was changed by outside code")
+ else:
+ signal.signal(signal.SIGCHLD, self._saved_sighandler)
+ self._saved_sighandler = None
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ pass
+
+ def add_child_handler(self, pid, callback, *args):
+ loop = events.get_running_loop()
+ self._callbacks[pid] = (loop, callback, args)
+
+ # Prevent a race condition in case the child is already terminated.
+ self._do_waitpid(pid)
+
+ def remove_child_handler(self, pid):
+ try:
+ del self._callbacks[pid]
+ return True
+ except KeyError:
+ return False
+
+ def attach_loop(self, loop):
+ # Don't save the loop but initialize itself if called first time
+ # The reason to do it here is that attach_loop() is called from
+ # unix policy only for the main thread.
+ # Main thread is required for subscription on SIGCHLD signal
+ if self._saved_sighandler is None:
+ self._saved_sighandler = signal.signal(signal.SIGCHLD, self._sig_chld)
+ if self._saved_sighandler is None:
+ logger.warning("Previous SIGCHLD handler was set by non-Python code, "
+ "restore to default handler on watcher close.")
+ self._saved_sighandler = signal.SIG_DFL
+
+ # Set SA_RESTART to limit EINTR occurrences.
+ signal.siginterrupt(signal.SIGCHLD, False)
+
+ def _do_waitpid_all(self):
+ for pid in list(self._callbacks):
+ self._do_waitpid(pid)
+
+ def _do_waitpid(self, expected_pid):
+ assert expected_pid > 0
+
+ try:
+ pid, status = os.waitpid(expected_pid, os.WNOHANG)
+ except ChildProcessError:
+ # The child process is already reaped
+ # (may happen if waitpid() is called elsewhere).
+ pid = expected_pid
+ returncode = 255
+ logger.warning(
+ "Unknown child process pid %d, will report returncode 255",
+ pid)
+ debug_log = False
+ else:
+ if pid == 0:
+ # The child process is still alive.
+ return
+
+ returncode = _compute_returncode(status)
+ debug_log = True
+ try:
+ loop, callback, args = self._callbacks.pop(pid)
+ except KeyError: # pragma: no cover
+ # May happen if .remove_child_handler() is called
+ # after os.waitpid() returns.
+ logger.warning("Child watcher got an unexpected pid: %r",
+ pid, exc_info=True)
+ else:
+ if loop.is_closed():
+ logger.warning("Loop %r that handles pid %r is closed", loop, pid)
+ else:
+ if debug_log and loop.get_debug():
+ logger.debug('process %s exited with returncode %s',
+ expected_pid, returncode)
+ loop.call_soon_threadsafe(callback, pid, returncode, *args)
+
+ def _sig_chld(self, signum, frame):
+ try:
+ self._do_waitpid_all()
+ except (SystemExit, KeyboardInterrupt):
+ raise
+ except BaseException:
+ logger.warning('Unknown exception in SIGCHLD handler', exc_info=True)
+
+
+class ThreadedChildWatcher(AbstractChildWatcher):
+ """Threaded child watcher implementation.
+
+ The watcher uses a thread per process
+ for waiting for the process finish.
+
+ It doesn't require subscription on POSIX signal
+ but a thread creation is not free.
+
+ The watcher has O(1) complexity, its perfomance doesn't depend
+ on amount of spawn processes.
+ """
+
+ def __init__(self):
+ self._pid_counter = itertools.count(0)
+ self._threads = {}
+
+ def is_active(self):
+ return True
+
+ def close(self):
+ pass
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ pass
+
+ def __del__(self, _warn=warnings.warn):
+ threads = [thread for thread in list(self._threads.values())
+ if thread.is_alive()]
+ if threads:
+ _warn(f"{self.__class__} has registered but not finished child processes",
+ ResourceWarning,
+ source=self)
+
+ def add_child_handler(self, pid, callback, *args):
+ loop = events.get_running_loop()
+ thread = threading.Thread(target=self._do_waitpid,
+ name=f"waitpid-{next(self._pid_counter)}",
+ args=(loop, pid, callback, args),
+ daemon=True)
+ self._threads[pid] = thread
+ thread.start()
+
+ def remove_child_handler(self, pid):
+ # asyncio never calls remove_child_handler() !!!
+ # The method is no-op but is implemented because
+ # abstract base classe requires it
+ return True
+
+ def attach_loop(self, loop):
+ pass
+
+ def _do_waitpid(self, loop, expected_pid, callback, args):
+ assert expected_pid > 0
+
+ try:
+ pid, status = os.waitpid(expected_pid, 0)
+ except ChildProcessError:
+ # The child process is already reaped
+ # (may happen if waitpid() is called elsewhere).
+ pid = expected_pid
+ returncode = 255
+ logger.warning(
+ "Unknown child process pid %d, will report returncode 255",
+ pid)
+ else:
+ returncode = _compute_returncode(status)
+ if loop.get_debug():
+ logger.debug('process %s exited with returncode %s',
+ expected_pid, returncode)
+
+ if loop.is_closed():
+ logger.warning("Loop %r that handles pid %r is closed", loop, pid)
+ else:
+ loop.call_soon_threadsafe(callback, pid, returncode, *args)
+
+ self._threads.pop(expected_pid)
+
+
class _UnixDefaultEventLoopPolicy(events.BaseDefaultEventLoopPolicy):
"""UNIX event loop policy with a watcher for child processes."""
_loop_factory = _UnixSelectorEventLoop
@@ -1112,7 +1327,7 @@ def __init__(self):
def _init_watcher(self):
with events._lock:
if self._watcher is None: # pragma: no branch
- self._watcher = SafeChildWatcher()
+ self._watcher = ThreadedChildWatcher()
if isinstance(threading.current_thread(),
threading._MainThread):
self._watcher.attach_loop(self._local._loop)
@@ -1134,7 +1349,7 @@ def set_event_loop(self, loop):
def get_child_watcher(self):
"""Get the watcher for child processes.
- If not yet set, a SafeChildWatcher object is automatically created.
+ If not yet set, a ThreadedChildWatcher object is automatically created.
"""
if self._watcher is None:
self._init_watcher()
diff --git a/Lib/test/test_asyncio/test_subprocess.py b/Lib/test/test_asyncio/test_subprocess.py
index e9a9e50430c3..b9578b2866c0 100644
--- a/Lib/test/test_asyncio/test_subprocess.py
+++ b/Lib/test/test_asyncio/test_subprocess.py
@@ -633,6 +633,7 @@ def test_create_subprocess_exec_with_path(self):
self.assertIsNone(self.loop.run_until_complete(execute()))
+
if sys.platform != 'win32':
# Unix
class SubprocessWatcherMixin(SubprocessMixin):
@@ -648,7 +649,24 @@ def setUp(self):
watcher = self.Watcher()
watcher.attach_loop(self.loop)
policy.set_child_watcher(watcher)
- self.addCleanup(policy.set_child_watcher, None)
+
+ def tearDown(self):
+ super().tearDown()
+ policy = asyncio.get_event_loop_policy()
+ watcher = policy.get_child_watcher()
+ policy.set_child_watcher(None)
+ watcher.attach_loop(None)
+ watcher.close()
+
+ class SubprocessThreadedWatcherTests(SubprocessWatcherMixin,
+ test_utils.TestCase):
+
+ Watcher = unix_events.ThreadedChildWatcher
+
+ class SubprocessMultiLoopWatcherTests(SubprocessWatcherMixin,
+ test_utils.TestCase):
+
+ Watcher = unix_events.MultiLoopChildWatcher
class SubprocessSafeWatcherTests(SubprocessWatcherMixin,
test_utils.TestCase):
@@ -670,5 +688,25 @@ def setUp(self):
self.set_event_loop(self.loop)
+class GenericWatcherTests:
+
+ def test_create_subprocess_fails_with_inactive_watcher(self):
+
+ async def execute():
+ watcher = mock.create_authspec(asyncio.AbstractChildWatcher)
+ watcher.is_active.return_value = False
+ asyncio.set_child_watcher(watcher)
+
+ with self.assertRaises(RuntimeError):
+ await subprocess.create_subprocess_exec(
+ support.FakePath(sys.executable), '-c', 'pass')
+
+ watcher.add_child_handler.assert_not_called()
+
+ self.assertIsNone(self.loop.run_until_complete(execute()))
+
+
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/Lib/test/test_asyncio/test_unix_events.py b/Lib/test/test_asyncio/test_unix_events.py
index 5c610cdd67ba..462a8b3c7859 100644
--- a/Lib/test/test_asyncio/test_unix_events.py
+++ b/Lib/test/test_asyncio/test_unix_events.py
@@ -1082,6 +1082,8 @@ def test_not_implemented(self):
NotImplementedError, watcher.attach_loop, f)
self.assertRaises(
NotImplementedError, watcher.close)
+ self.assertRaises(
+ NotImplementedError, watcher.is_active)
self.assertRaises(
NotImplementedError, watcher.__enter__)
self.assertRaises(
@@ -1784,15 +1786,6 @@ def test_close(self, m):
if isinstance(self.watcher, asyncio.FastChildWatcher):
self.assertFalse(self.watcher._zombies)
- @waitpid_mocks
- def test_add_child_handler_with_no_loop_attached(self, m):
- callback = mock.Mock()
- with self.create_watcher() as watcher:
- with self.assertRaisesRegex(
- RuntimeError,
- 'the child watcher does not have a loop attached'):
- watcher.add_child_handler(100, callback)
-
class SafeChildWatcherTests (ChildWatcherTestsMixin, test_utils.TestCase):
def create_watcher(self):
@@ -1809,17 +1802,16 @@ class PolicyTests(unittest.TestCase):
def create_policy(self):
return asyncio.DefaultEventLoopPolicy()
- def test_get_child_watcher(self):
+ def test_get_default_child_watcher(self):
policy = self.create_policy()
self.assertIsNone(policy._watcher)
watcher = policy.get_child_watcher()
- self.assertIsInstance(watcher, asyncio.SafeChildWatcher)
+ self.assertIsInstance(watcher, asyncio.ThreadedChildWatcher)
self.assertIs(policy._watcher, watcher)
self.assertIs(watcher, policy.get_child_watcher())
- self.assertIsNone(watcher._loop)
def test_get_child_watcher_after_set(self):
policy = self.create_policy()
@@ -1829,18 +1821,6 @@ def test_get_child_watcher_after_set(self):
self.assertIs(policy._watcher, watcher)
self.assertIs(watcher, policy.get_child_watcher())
- def test_get_child_watcher_with_mainloop_existing(self):
- policy = self.create_policy()
- loop = policy.get_event_loop()
-
- self.assertIsNone(policy._watcher)
- watcher = policy.get_child_watcher()
-
- self.assertIsInstance(watcher, asyncio.SafeChildWatcher)
- self.assertIs(watcher._loop, loop)
-
- loop.close()
-
def test_get_child_watcher_thread(self):
def f():
@@ -1866,7 +1846,11 @@ def test_child_watcher_replace_mainloop_existing(self):
policy = self.create_policy()
loop = policy.get_event_loop()
- watcher = policy.get_child_watcher()
+ # Explicitly setup SafeChildWatcher,
+ # default ThreadedChildWatcher has no _loop property
+ watcher = asyncio.SafeChildWatcher()
+ policy.set_child_watcher(watcher)
+ watcher.attach_loop(loop)
self.assertIs(watcher._loop, loop)
diff --git a/Lib/test/test_asyncio/utils.py b/Lib/test/test_asyncio/utils.py
index cb373d544f41..5b4bb123a9ec 100644
--- a/Lib/test/test_asyncio/utils.py
+++ b/Lib/test/test_asyncio/utils.py
@@ -1,5 +1,6 @@
"""Utilities shared by tests."""
+import asyncio
import collections
import contextlib
import io
@@ -512,6 +513,18 @@ def close_loop(loop):
if executor is not None:
executor.shutdown(wait=True)
loop.close()
+ policy = support.maybe_get_event_loop_policy()
+ if policy is not None:
+ try:
+ watcher = policy.get_child_watcher()
+ except NotImplementedError:
+ # watcher is not implemented by EventLoopPolicy, e.g. Windows
+ pass
+ else:
+ if isinstance(watcher, asyncio.ThreadedChildWatcher):
+ threads = list(watcher._threads.values())
+ for thread in threads:
+ thread.join()
def set_event_loop(self, loop, *, cleanup=True):
assert loop is not None
diff --git a/Misc/NEWS.d/next/Library/2019-05-28-19-03-46.bpo-35621.Abc1lf.rst b/Misc/NEWS.d/next/Library/2019-05-28-19-03-46.bpo-35621.Abc1lf.rst
new file mode 100644
index 000000000000..c492e1de6d5c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-05-28-19-03-46.bpo-35621.Abc1lf.rst
@@ -0,0 +1,2 @@
+Support running asyncio subprocesses when execution event loop in a thread
+on UNIX.
1
0
bpo-29505: Add more fuzzing for re.compile, re.load and csv.reader (GH-14255)
by Miss Islington (bot) 29 Jun '19
by Miss Islington (bot) 29 Jun '19
29 Jun '19
https://github.com/python/cpython/commit/ffcc161c753a72e7c4237c1e3c433d47b0…
commit: ffcc161c753a72e7c4237c1e3c433d47b020978e
branch: 3.8
author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
committer: GitHub <noreply(a)github.com>
date: 2019-06-29T23:13:18-07:00
summary:
bpo-29505: Add more fuzzing for re.compile, re.load and csv.reader (GH-14255)
Add more fuzz testing for re.compile, re.load and csv.reader
(cherry picked from commit 5cbbbd73a6acb6f96f5d6646aa7498d3dfb1706d)
Co-authored-by: Ammar Askar <ammar(a)ammaraskar.com>
files:
A Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
A Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv
A Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
A Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
A Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
A Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
M Lib/test/test_xxtestfuzz.py
M Modules/_xxtestfuzz/fuzz_tests.txt
M Modules/_xxtestfuzz/fuzzer.c
diff --git a/Lib/test/test_xxtestfuzz.py b/Lib/test/test_xxtestfuzz.py
index 532f5fe72aa5..15924aaeff38 100644
--- a/Lib/test/test_xxtestfuzz.py
+++ b/Lib/test/test_xxtestfuzz.py
@@ -16,6 +16,8 @@ def test_sample_input_smoke_test(self):
_xxtestfuzz.run(b" ")
_xxtestfuzz.run(b"x")
_xxtestfuzz.run(b"1")
+ _xxtestfuzz.run(b"AAAAAAA")
+ _xxtestfuzz.run(b"AAAAAA\0")
if __name__ == "__main__":
diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
new file mode 100644
index 000000000000..961306a87901
--- /dev/null
+++ b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
@@ -0,0 +1,219 @@
+"?"
+"abc"
+"()"
+"[]"
+"abc|def"
+"abc|def|ghi"
+"^xxx$"
+"ab\\b\\d\\bcd"
+"\\w|\\d"
+"a*?"
+"abc+"
+"abc+?"
+"xyz?"
+"xyz??"
+"xyz{0,1}"
+"xyz{0,1}?"
+"xyz{93}"
+"xyz{1,32}"
+"xyz{1,32}?"
+"xyz{1,}"
+"xyz{1,}?"
+"a\\fb\\nc\\rd\\te\\vf"
+"a\\nb\\bc"
+"(?:foo)"
+"(?: foo )"
+"foo|(bar|baz)|quux"
+"foo(?=bar)baz"
+"foo(?!bar)baz"
+"foo(?<=bar)baz"
+"foo(?<!bar)baz"
+"()"
+"(?=)"
+"[]"
+"[x]"
+"[xyz]"
+"[a-zA-Z0-9]"
+"[-123]"
+"[^123]"
+"]"
+"}"
+"[a-b-c]"
+"[x\\dz]"
+"[\\d-z]"
+"[\\d-\\d]"
+"[z-\\d]"
+"\\cj\\cJ\\ci\\cI\\ck\\cK"
+"\\c!"
+"\\c_"
+"\\c~"
+"[\\c!]"
+"[\\c_]"
+"[\\c~]"
+"[\\ca]"
+"[\\cz]"
+"[\\cA]"
+"[\\cZ]"
+"[\\c1]"
+"\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ "
+"[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]"
+"\\8"
+"\\9"
+"\\11"
+"\\11a"
+"\\011"
+"\\118"
+"\\111"
+"\\1111"
+"(x)(x)(x)\\1"
+"(x)(x)(x)\\2"
+"(x)(x)(x)\\3"
+"(x)(x)(x)\\4"
+"(x)(x)(x)\\1*"
+"(x)(x)(x)\\3*"
+"(x)(x)(x)\\4*"
+"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10"
+"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11"
+"(a)\\1"
+"(a\\1)"
+"(\\1a)"
+"(\\2)(\\1)"
+"(?=a){0,10}a"
+"(?=a){1,10}a"
+"(?=a){9,10}a"
+"(?!a)?a"
+"\\1(a)"
+"(?!(a))\\1"
+"(?!\\1(a\\1)\\1)\\1"
+"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
+"[\\0]"
+"[\\11]"
+"[\\11a]"
+"[\\011]"
+"[\\00011]"
+"[\\118]"
+"[\\111]"
+"[\\1111]"
+"\\x60"
+"\\x3z"
+"\\c"
+"\\u0034"
+"\\u003z"
+"foo[z]*"
+"\\u{12345}"
+"\\u{12345}\\u{23456}"
+"\\u{12345}{3}"
+"\\u{12345}*"
+"\\ud808\\udf45*"
+"[\\ud808\\udf45-\\ud809\\udccc]"
+"a"
+"a|b"
+"a\\n"
+"a$"
+"a\\b!"
+"a\\Bb"
+"a*?"
+"a?"
+"a??"
+"a{0,1}?"
+"a{1,2}?"
+"a+?"
+"(a)"
+"(a)\\1"
+"(\\1a)"
+"\\1(a)"
+"a\\s"
+"a\\S"
+"a\\D"
+"a\\w"
+"a\\W"
+"a."
+"a\\q"
+"a[a]"
+"a[^a]"
+"a[a-z]"
+"a(?:b)"
+"a(?=b)"
+"a(?!b)"
+"\\x60"
+"\\u0060"
+"\\cA"
+"\\q"
+"\\1112"
+"(a)\\1"
+"(?!a)?a\\1"
+"(?:(?=a))a\\1"
+"a{}"
+"a{,}"
+"a{"
+"a{z}"
+"a{12z}"
+"a{12,"
+"a{12,3b"
+"{}"
+"{,}"
+"{"
+"{z}"
+"{1z}"
+"{12,"
+"{12,3b"
+"a"
+"abc"
+"a[bc]d"
+"a|bc"
+"ab|c"
+"a||bc"
+"(?:ab)"
+"(?:ab|cde)"
+"(?:ab)|cde"
+"(ab)"
+"(ab|cde)"
+"(ab)\\1"
+"(ab|cde)\\1"
+"(?:ab)?"
+"(?:ab)+"
+"a?"
+"a+"
+"a??"
+"a*?"
+"a+?"
+"(?:a?)?"
+"(?:a+)?"
+"(?:a?)+"
+"(?:a*)+"
+"(?:a+)+"
+"(?:a?)*"
+"(?:a*)*"
+"(?:a+)*"
+"a{0}"
+"(?:a+){0,0}"
+"a*b"
+"a+b"
+"a*b|c"
+"a+b|c"
+"(?:a{5,1000000}){3,1000000}"
+"(?:ab){4,7}"
+"a\\bc"
+"a\\sc"
+"a\\Sc"
+"a(?=b)c"
+"a(?=bbb|bb)c"
+"a(?!bbb|bb)c"
+"\xe2\x81\xa3"
+"[\xe2\x81\xa3]"
+"\xed\xb0\x80"
+"\xed\xa0\x80"
+"(\xed\xb0\x80)\x01"
+"((\xed\xa0\x80))\x02"
+"\xf0\x9f\x92\xa9"
+"\x01"
+"\x0f"
+"[-\xf0\x9f\x92\xa9]+"
+"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\xbf]"
+"(?<=)"
+"(?<=a)"
+"(?<!)"
+"(?<!a)"
+"(?<a>)"
+"(?<a>.)"
+"(?<a>.)\\k<a>"
diff --git a/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv b/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv
new file mode 100644
index 000000000000..8b7887d0f1d2
Binary files /dev/null and b/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv differ
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
new file mode 100644
index 000000000000..d99247ccadfd
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
@@ -0,0 +1 @@
+XX<a\s*href=(.*?)[\s|>]
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
new file mode 100644
index 000000000000..0c67ee7dfc1b
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
@@ -0,0 +1 @@
+XX^(Tim|Robert)\s+the\s+(Enchanter|Shrubber)$
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
new file mode 100644
index 000000000000..cce8919e7285
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
@@ -0,0 +1 @@
+XX/((978[\--– ])?[0-9][0-9\--– ]{10}[\--– ][0-9xX])|((978)?[0-9]{9}[0-9Xx])/
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
new file mode 100644
index 000000000000..1e2efc51103b
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
@@ -0,0 +1 @@
+XX(\+1|1)?[ \-\.]?\(?(?<areacode>[0-9]{3})\)?[ \-\.]?(?<prefix>[0-9]{3})[ \-\.]?(?<number>[0-9]{4})[ \.]*(ext|x)?[ \.]*(?<extension>[0-9]{0,5})
diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt
index f0121291eaa0..9d330a668ee8 100644
--- a/Modules/_xxtestfuzz/fuzz_tests.txt
+++ b/Modules/_xxtestfuzz/fuzz_tests.txt
@@ -2,3 +2,6 @@ fuzz_builtin_float
fuzz_builtin_int
fuzz_builtin_unicode
fuzz_json_loads
+fuzz_sre_compile
+fuzz_sre_match
+fuzz_csv_reader
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index e862a99cfb34..16104e492ab1 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -81,8 +81,17 @@ static int fuzz_builtin_unicode(const char* data, size_t size) {
#define MAX_JSON_TEST_SIZE 0x10000
-/* Initialized in LLVMFuzzerTestOneInput */
PyObject* json_loads_method = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_json_loads() {
+ /* Import json.loads */
+ PyObject* json_module = PyImport_ImportModule("json");
+ if (json_module == NULL) {
+ return 0;
+ }
+ json_loads_method = PyObject_GetAttrString(json_module, "loads");
+ return json_loads_method != NULL;
+}
/* Fuzz json.loads(x) */
static int fuzz_json_loads(const char* data, size_t size) {
/* Since python supports arbitrarily large ints in JSON,
@@ -96,22 +105,227 @@ static int fuzz_json_loads(const char* data, size_t size) {
return 0;
}
PyObject* parsed = PyObject_CallFunctionObjArgs(json_loads_method, input_bytes, NULL);
+ if (parsed == NULL) {
+ /* Ignore ValueError as the fuzzer will more than likely
+ generate some invalid json and values */
+ if (PyErr_ExceptionMatches(PyExc_ValueError) ||
+ /* Ignore RecursionError as the fuzzer generates long sequences of
+ arrays such as `[[[...` */
+ PyErr_ExceptionMatches(PyExc_RecursionError) ||
+ /* Ignore unicode errors, invalid byte sequences are common */
+ PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)
+ ) {
+ PyErr_Clear();
+ }
+ }
+ Py_DECREF(input_bytes);
+ Py_XDECREF(parsed);
+ return 0;
+}
+
+#define MAX_RE_TEST_SIZE 0x10000
+
+PyObject* sre_compile_method = NULL;
+PyObject* sre_error_exception = NULL;
+int SRE_FLAG_DEBUG = 0;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_sre_compile() {
+ /* Import sre_compile.compile and sre.error */
+ PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
+ if (sre_compile_module == NULL) {
+ return 0;
+ }
+ sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
+ if (sre_compile_method == NULL) {
+ return 0;
+ }
+
+ PyObject* sre_constants = PyImport_ImportModule("sre_constants");
+ if (sre_constants == NULL) {
+ return 0;
+ }
+ sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
+ if (sre_error_exception == NULL) {
+ return 0;
+ }
+ PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG");
+ if (debug_flag == NULL) {
+ return 0;
+ }
+ SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag);
+ return 1;
+}
+/* Fuzz _sre.compile(x) */
+static int fuzz_sre_compile(const char* data, size_t size) {
+ /* Ignore really long regex patterns that will timeout the fuzzer */
+ if (size > MAX_RE_TEST_SIZE) {
+ return 0;
+ }
+ /* We treat the first 2 bytes of the input as a number for the flags */
+ if (size < 2) {
+ return 0;
+ }
+ uint16_t flags = ((uint16_t*) data)[0];
+ /* We remove the SRE_FLAG_DEBUG if present. This is because it
+ prints to stdout which greatly decreases fuzzing speed */
+ flags &= ~SRE_FLAG_DEBUG;
+
+ /* Pull the pattern from the remaining bytes */
+ PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2);
+ if (pattern_bytes == NULL) {
+ return 0;
+ }
+ PyObject* flags_obj = PyLong_FromUnsignedLong(flags);
+ if (flags_obj == NULL) {
+ Py_DECREF(pattern_bytes);
+ return 0;
+ }
+
+ /* compiled = _sre.compile(data[2:], data[0:2] */
+ PyObject* compiled = PyObject_CallFunctionObjArgs(
+ sre_compile_method, pattern_bytes, flags_obj, NULL);
/* Ignore ValueError as the fuzzer will more than likely
- generate some invalid json and values */
- if (parsed == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+ generate some invalid combination of flags */
+ if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
}
- /* Ignore RecursionError as the fuzzer generates long sequences of
- arrays such as `[[[...` */
- if (parsed == NULL && PyErr_ExceptionMatches(PyExc_RecursionError)) {
+ /* Ignore some common errors thrown by sre_parse:
+ Overflow, Assertion and Index */
+ if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
+ PyErr_ExceptionMatches(PyExc_AssertionError) ||
+ PyErr_ExceptionMatches(PyExc_IndexError))
+ ) {
PyErr_Clear();
}
- /* Ignore unicode errors, invalid byte sequences are common */
- if (parsed == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ /* Ignore re.error */
+ if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) {
PyErr_Clear();
}
- Py_DECREF(input_bytes);
- Py_XDECREF(parsed);
+
+ Py_DECREF(pattern_bytes);
+ Py_DECREF(flags_obj);
+ Py_XDECREF(compiled);
+ return 0;
+}
+
+/* Some random patterns used to test re.match.
+ Be careful not to add catostraphically slow regexes here, we want to
+ excercise the matching code without causing timeouts.*/
+static const char* regex_patterns[] = {
+ ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
+ "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
+ "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$",
+ "(?:a*)*", "a{1,2}?"
+};
+const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
+PyObject** compiled_patterns = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_sre_match() {
+ PyObject* re_module = PyImport_ImportModule("re");
+ if (re_module == NULL) {
+ return 0;
+ }
+ compiled_patterns = (PyObject**) PyMem_RawMalloc(
+ sizeof(PyObject*) * NUM_PATTERNS);
+ if (compiled_patterns == NULL) {
+ PyErr_NoMemory();
+ return 0;
+ }
+
+ /* Precompile all the regex patterns on the first run for faster fuzzing */
+ for (size_t i = 0; i < NUM_PATTERNS; i++) {
+ PyObject* compiled = PyObject_CallMethod(
+ re_module, "compile", "y", regex_patterns[i]);
+ /* Bail if any of the patterns fail to compile */
+ if (compiled == NULL) {
+ return 0;
+ }
+ compiled_patterns[i] = compiled;
+ }
+ return 1;
+}
+/* Fuzz re.match(x) */
+static int fuzz_sre_match(const char* data, size_t size) {
+ if (size < 1 || size > MAX_RE_TEST_SIZE) {
+ return 0;
+ }
+ /* Use the first byte as a uint8_t specifying the index of the
+ regex to use */
+ unsigned char idx = (unsigned char) data[0];
+ idx = idx % NUM_PATTERNS;
+
+ /* Pull the string to match from the remaining bytes */
+ PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1);
+ if (to_match == NULL) {
+ return 0;
+ }
+
+ PyObject* pattern = compiled_patterns[idx];
+ PyObject* match_callable = PyObject_GetAttrString(pattern, "match");
+
+ PyObject* matches = PyObject_CallFunctionObjArgs(match_callable, to_match, NULL);
+
+ Py_XDECREF(matches);
+ Py_DECREF(match_callable);
+ Py_DECREF(to_match);
+ return 0;
+}
+
+#define MAX_CSV_TEST_SIZE 0x10000
+PyObject* csv_module = NULL;
+PyObject* csv_error = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_csv_reader() {
+ /* Import csv and csv.Error */
+ csv_module = PyImport_ImportModule("csv");
+ if (csv_module == NULL) {
+ return 0;
+ }
+ csv_error = PyObject_GetAttrString(csv_module, "Error");
+ return csv_error != NULL;
+}
+/* Fuzz csv.reader([x]) */
+static int fuzz_csv_reader(const char* data, size_t size) {
+ if (size < 1 || size > MAX_CSV_TEST_SIZE) {
+ return 0;
+ }
+ /* Ignore non null-terminated strings since _csv can't handle
+ embeded nulls */
+ if (memchr(data, '\0', size) == NULL) {
+ return 0;
+ }
+
+ PyObject* s = PyUnicode_FromString(data);
+ /* Ignore exceptions until we have a valid string */
+ if (s == NULL) {
+ PyErr_Clear();
+ return 0;
+ }
+
+ /* Split on \n so we can test multiple lines */
+ PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n");
+ if (lines == NULL) {
+ Py_DECREF(s);
+ return 0;
+ }
+
+ PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines);
+ if (reader) {
+ /* Consume all of the reader as an iterator */
+ PyObject* parsed_line;
+ while ((parsed_line = PyIter_Next(reader))) {
+ Py_DECREF(parsed_line);
+ }
+ }
+
+ /* Ignore csv.Error because we're probably going to generate
+ some bad files (embeded new-lines, unterminated quotes etc) */
+ if (PyErr_ExceptionMatches(csv_error)) {
+ PyErr_Clear();
+ }
+
+ Py_XDECREF(reader);
+ Py_DECREF(s);
return 0;
}
@@ -152,12 +366,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
initialize CPython ourselves on the first run. */
Py_InitializeEx(0);
}
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
- if (json_loads_method == NULL) {
- PyObject* json_module = PyImport_ImportModule("json");
- json_loads_method = PyObject_GetAttrString(json_module, "loads");
- }
-#endif
int rv = 0;
@@ -171,7 +379,48 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
#endif
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
+ static int JSON_LOADS_INITIALIZED = 0;
+ if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {
+ PyErr_Print();
+ abort();
+ } else {
+ JSON_LOADS_INITIALIZED = 1;
+ }
+
rv |= _run_fuzz(data, size, fuzz_json_loads);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
+ static int SRE_COMPILE_INITIALIZED = 0;
+ if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) {
+ PyErr_Print();
+ abort();
+ } else {
+ SRE_COMPILE_INITIALIZED = 1;
+ }
+
+ rv |= _run_fuzz(data, size, fuzz_sre_compile);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
+ static int SRE_MATCH_INITIALIZED = 0;
+ if (!SRE_MATCH_INITIALIZED && !init_sre_match()) {
+ PyErr_Print();
+ abort();
+ } else {
+ SRE_MATCH_INITIALIZED = 1;
+ }
+
+ rv |= _run_fuzz(data, size, fuzz_sre_match);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
+ static int CSV_READER_INITIALIZED = 0;
+ if (!CSV_READER_INITIALIZED && !init_csv_reader()) {
+ PyErr_Print();
+ abort();
+ } else {
+ CSV_READER_INITIALIZED = 1;
+ }
+
+ rv |= _run_fuzz(data, size, fuzz_csv_reader);
#endif
return rv;
}
1
0
bpo-29505: Add more fuzzing for re.compile, re.load and csv.reader (GH-14255)
by Gregory P. Smith 29 Jun '19
by Gregory P. Smith 29 Jun '19
29 Jun '19
https://github.com/python/cpython/commit/5cbbbd73a6acb6f96f5d6646aa7498d3df…
commit: 5cbbbd73a6acb6f96f5d6646aa7498d3dfb1706d
branch: master
author: Ammar Askar <ammar(a)ammaraskar.com>
committer: Gregory P. Smith <greg(a)krypto.org>
date: 2019-06-29T22:54:42-07:00
summary:
bpo-29505: Add more fuzzing for re.compile, re.load and csv.reader (GH-14255)
Add more fuzz testing for re.compile, re.load and csv.reader
files:
A Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
A Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv
A Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
A Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
A Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
A Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
M Lib/test/test_xxtestfuzz.py
M Modules/_xxtestfuzz/fuzz_tests.txt
M Modules/_xxtestfuzz/fuzzer.c
diff --git a/Lib/test/test_xxtestfuzz.py b/Lib/test/test_xxtestfuzz.py
index 532f5fe72aa5..15924aaeff38 100644
--- a/Lib/test/test_xxtestfuzz.py
+++ b/Lib/test/test_xxtestfuzz.py
@@ -16,6 +16,8 @@ def test_sample_input_smoke_test(self):
_xxtestfuzz.run(b" ")
_xxtestfuzz.run(b"x")
_xxtestfuzz.run(b"1")
+ _xxtestfuzz.run(b"AAAAAAA")
+ _xxtestfuzz.run(b"AAAAAA\0")
if __name__ == "__main__":
diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
new file mode 100644
index 000000000000..961306a87901
--- /dev/null
+++ b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
@@ -0,0 +1,219 @@
+"?"
+"abc"
+"()"
+"[]"
+"abc|def"
+"abc|def|ghi"
+"^xxx$"
+"ab\\b\\d\\bcd"
+"\\w|\\d"
+"a*?"
+"abc+"
+"abc+?"
+"xyz?"
+"xyz??"
+"xyz{0,1}"
+"xyz{0,1}?"
+"xyz{93}"
+"xyz{1,32}"
+"xyz{1,32}?"
+"xyz{1,}"
+"xyz{1,}?"
+"a\\fb\\nc\\rd\\te\\vf"
+"a\\nb\\bc"
+"(?:foo)"
+"(?: foo )"
+"foo|(bar|baz)|quux"
+"foo(?=bar)baz"
+"foo(?!bar)baz"
+"foo(?<=bar)baz"
+"foo(?<!bar)baz"
+"()"
+"(?=)"
+"[]"
+"[x]"
+"[xyz]"
+"[a-zA-Z0-9]"
+"[-123]"
+"[^123]"
+"]"
+"}"
+"[a-b-c]"
+"[x\\dz]"
+"[\\d-z]"
+"[\\d-\\d]"
+"[z-\\d]"
+"\\cj\\cJ\\ci\\cI\\ck\\cK"
+"\\c!"
+"\\c_"
+"\\c~"
+"[\\c!]"
+"[\\c_]"
+"[\\c~]"
+"[\\ca]"
+"[\\cz]"
+"[\\cA]"
+"[\\cZ]"
+"[\\c1]"
+"\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ "
+"[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]"
+"\\8"
+"\\9"
+"\\11"
+"\\11a"
+"\\011"
+"\\118"
+"\\111"
+"\\1111"
+"(x)(x)(x)\\1"
+"(x)(x)(x)\\2"
+"(x)(x)(x)\\3"
+"(x)(x)(x)\\4"
+"(x)(x)(x)\\1*"
+"(x)(x)(x)\\3*"
+"(x)(x)(x)\\4*"
+"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10"
+"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11"
+"(a)\\1"
+"(a\\1)"
+"(\\1a)"
+"(\\2)(\\1)"
+"(?=a){0,10}a"
+"(?=a){1,10}a"
+"(?=a){9,10}a"
+"(?!a)?a"
+"\\1(a)"
+"(?!(a))\\1"
+"(?!\\1(a\\1)\\1)\\1"
+"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
+"[\\0]"
+"[\\11]"
+"[\\11a]"
+"[\\011]"
+"[\\00011]"
+"[\\118]"
+"[\\111]"
+"[\\1111]"
+"\\x60"
+"\\x3z"
+"\\c"
+"\\u0034"
+"\\u003z"
+"foo[z]*"
+"\\u{12345}"
+"\\u{12345}\\u{23456}"
+"\\u{12345}{3}"
+"\\u{12345}*"
+"\\ud808\\udf45*"
+"[\\ud808\\udf45-\\ud809\\udccc]"
+"a"
+"a|b"
+"a\\n"
+"a$"
+"a\\b!"
+"a\\Bb"
+"a*?"
+"a?"
+"a??"
+"a{0,1}?"
+"a{1,2}?"
+"a+?"
+"(a)"
+"(a)\\1"
+"(\\1a)"
+"\\1(a)"
+"a\\s"
+"a\\S"
+"a\\D"
+"a\\w"
+"a\\W"
+"a."
+"a\\q"
+"a[a]"
+"a[^a]"
+"a[a-z]"
+"a(?:b)"
+"a(?=b)"
+"a(?!b)"
+"\\x60"
+"\\u0060"
+"\\cA"
+"\\q"
+"\\1112"
+"(a)\\1"
+"(?!a)?a\\1"
+"(?:(?=a))a\\1"
+"a{}"
+"a{,}"
+"a{"
+"a{z}"
+"a{12z}"
+"a{12,"
+"a{12,3b"
+"{}"
+"{,}"
+"{"
+"{z}"
+"{1z}"
+"{12,"
+"{12,3b"
+"a"
+"abc"
+"a[bc]d"
+"a|bc"
+"ab|c"
+"a||bc"
+"(?:ab)"
+"(?:ab|cde)"
+"(?:ab)|cde"
+"(ab)"
+"(ab|cde)"
+"(ab)\\1"
+"(ab|cde)\\1"
+"(?:ab)?"
+"(?:ab)+"
+"a?"
+"a+"
+"a??"
+"a*?"
+"a+?"
+"(?:a?)?"
+"(?:a+)?"
+"(?:a?)+"
+"(?:a*)+"
+"(?:a+)+"
+"(?:a?)*"
+"(?:a*)*"
+"(?:a+)*"
+"a{0}"
+"(?:a+){0,0}"
+"a*b"
+"a+b"
+"a*b|c"
+"a+b|c"
+"(?:a{5,1000000}){3,1000000}"
+"(?:ab){4,7}"
+"a\\bc"
+"a\\sc"
+"a\\Sc"
+"a(?=b)c"
+"a(?=bbb|bb)c"
+"a(?!bbb|bb)c"
+"\xe2\x81\xa3"
+"[\xe2\x81\xa3]"
+"\xed\xb0\x80"
+"\xed\xa0\x80"
+"(\xed\xb0\x80)\x01"
+"((\xed\xa0\x80))\x02"
+"\xf0\x9f\x92\xa9"
+"\x01"
+"\x0f"
+"[-\xf0\x9f\x92\xa9]+"
+"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\xbf]"
+"(?<=)"
+"(?<=a)"
+"(?<!)"
+"(?<!a)"
+"(?<a>)"
+"(?<a>.)"
+"(?<a>.)\\k<a>"
diff --git a/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv b/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv
new file mode 100644
index 000000000000..8b7887d0f1d2
Binary files /dev/null and b/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv differ
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
new file mode 100644
index 000000000000..d99247ccadfd
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
@@ -0,0 +1 @@
+XX<a\s*href=(.*?)[\s|>]
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
new file mode 100644
index 000000000000..0c67ee7dfc1b
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
@@ -0,0 +1 @@
+XX^(Tim|Robert)\s+the\s+(Enchanter|Shrubber)$
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
new file mode 100644
index 000000000000..cce8919e7285
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
@@ -0,0 +1 @@
+XX/((978[\--– ])?[0-9][0-9\--– ]{10}[\--– ][0-9xX])|((978)?[0-9]{9}[0-9Xx])/
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
new file mode 100644
index 000000000000..1e2efc51103b
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
@@ -0,0 +1 @@
+XX(\+1|1)?[ \-\.]?\(?(?<areacode>[0-9]{3})\)?[ \-\.]?(?<prefix>[0-9]{3})[ \-\.]?(?<number>[0-9]{4})[ \.]*(ext|x)?[ \.]*(?<extension>[0-9]{0,5})
diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt
index f0121291eaa0..9d330a668ee8 100644
--- a/Modules/_xxtestfuzz/fuzz_tests.txt
+++ b/Modules/_xxtestfuzz/fuzz_tests.txt
@@ -2,3 +2,6 @@ fuzz_builtin_float
fuzz_builtin_int
fuzz_builtin_unicode
fuzz_json_loads
+fuzz_sre_compile
+fuzz_sre_match
+fuzz_csv_reader
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index e862a99cfb34..16104e492ab1 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -81,8 +81,17 @@ static int fuzz_builtin_unicode(const char* data, size_t size) {
#define MAX_JSON_TEST_SIZE 0x10000
-/* Initialized in LLVMFuzzerTestOneInput */
PyObject* json_loads_method = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_json_loads() {
+ /* Import json.loads */
+ PyObject* json_module = PyImport_ImportModule("json");
+ if (json_module == NULL) {
+ return 0;
+ }
+ json_loads_method = PyObject_GetAttrString(json_module, "loads");
+ return json_loads_method != NULL;
+}
/* Fuzz json.loads(x) */
static int fuzz_json_loads(const char* data, size_t size) {
/* Since python supports arbitrarily large ints in JSON,
@@ -96,22 +105,227 @@ static int fuzz_json_loads(const char* data, size_t size) {
return 0;
}
PyObject* parsed = PyObject_CallFunctionObjArgs(json_loads_method, input_bytes, NULL);
+ if (parsed == NULL) {
+ /* Ignore ValueError as the fuzzer will more than likely
+ generate some invalid json and values */
+ if (PyErr_ExceptionMatches(PyExc_ValueError) ||
+ /* Ignore RecursionError as the fuzzer generates long sequences of
+ arrays such as `[[[...` */
+ PyErr_ExceptionMatches(PyExc_RecursionError) ||
+ /* Ignore unicode errors, invalid byte sequences are common */
+ PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)
+ ) {
+ PyErr_Clear();
+ }
+ }
+ Py_DECREF(input_bytes);
+ Py_XDECREF(parsed);
+ return 0;
+}
+
+#define MAX_RE_TEST_SIZE 0x10000
+
+PyObject* sre_compile_method = NULL;
+PyObject* sre_error_exception = NULL;
+int SRE_FLAG_DEBUG = 0;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_sre_compile() {
+ /* Import sre_compile.compile and sre.error */
+ PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
+ if (sre_compile_module == NULL) {
+ return 0;
+ }
+ sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
+ if (sre_compile_method == NULL) {
+ return 0;
+ }
+
+ PyObject* sre_constants = PyImport_ImportModule("sre_constants");
+ if (sre_constants == NULL) {
+ return 0;
+ }
+ sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
+ if (sre_error_exception == NULL) {
+ return 0;
+ }
+ PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG");
+ if (debug_flag == NULL) {
+ return 0;
+ }
+ SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag);
+ return 1;
+}
+/* Fuzz _sre.compile(x) */
+static int fuzz_sre_compile(const char* data, size_t size) {
+ /* Ignore really long regex patterns that will timeout the fuzzer */
+ if (size > MAX_RE_TEST_SIZE) {
+ return 0;
+ }
+ /* We treat the first 2 bytes of the input as a number for the flags */
+ if (size < 2) {
+ return 0;
+ }
+ uint16_t flags = ((uint16_t*) data)[0];
+ /* We remove the SRE_FLAG_DEBUG if present. This is because it
+ prints to stdout which greatly decreases fuzzing speed */
+ flags &= ~SRE_FLAG_DEBUG;
+
+ /* Pull the pattern from the remaining bytes */
+ PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2);
+ if (pattern_bytes == NULL) {
+ return 0;
+ }
+ PyObject* flags_obj = PyLong_FromUnsignedLong(flags);
+ if (flags_obj == NULL) {
+ Py_DECREF(pattern_bytes);
+ return 0;
+ }
+
+ /* compiled = _sre.compile(data[2:], data[0:2] */
+ PyObject* compiled = PyObject_CallFunctionObjArgs(
+ sre_compile_method, pattern_bytes, flags_obj, NULL);
/* Ignore ValueError as the fuzzer will more than likely
- generate some invalid json and values */
- if (parsed == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+ generate some invalid combination of flags */
+ if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
}
- /* Ignore RecursionError as the fuzzer generates long sequences of
- arrays such as `[[[...` */
- if (parsed == NULL && PyErr_ExceptionMatches(PyExc_RecursionError)) {
+ /* Ignore some common errors thrown by sre_parse:
+ Overflow, Assertion and Index */
+ if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
+ PyErr_ExceptionMatches(PyExc_AssertionError) ||
+ PyErr_ExceptionMatches(PyExc_IndexError))
+ ) {
PyErr_Clear();
}
- /* Ignore unicode errors, invalid byte sequences are common */
- if (parsed == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ /* Ignore re.error */
+ if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) {
PyErr_Clear();
}
- Py_DECREF(input_bytes);
- Py_XDECREF(parsed);
+
+ Py_DECREF(pattern_bytes);
+ Py_DECREF(flags_obj);
+ Py_XDECREF(compiled);
+ return 0;
+}
+
+/* Some random patterns used to test re.match.
+ Be careful not to add catostraphically slow regexes here, we want to
+ excercise the matching code without causing timeouts.*/
+static const char* regex_patterns[] = {
+ ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
+ "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
+ "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$",
+ "(?:a*)*", "a{1,2}?"
+};
+const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
+PyObject** compiled_patterns = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_sre_match() {
+ PyObject* re_module = PyImport_ImportModule("re");
+ if (re_module == NULL) {
+ return 0;
+ }
+ compiled_patterns = (PyObject**) PyMem_RawMalloc(
+ sizeof(PyObject*) * NUM_PATTERNS);
+ if (compiled_patterns == NULL) {
+ PyErr_NoMemory();
+ return 0;
+ }
+
+ /* Precompile all the regex patterns on the first run for faster fuzzing */
+ for (size_t i = 0; i < NUM_PATTERNS; i++) {
+ PyObject* compiled = PyObject_CallMethod(
+ re_module, "compile", "y", regex_patterns[i]);
+ /* Bail if any of the patterns fail to compile */
+ if (compiled == NULL) {
+ return 0;
+ }
+ compiled_patterns[i] = compiled;
+ }
+ return 1;
+}
+/* Fuzz re.match(x) */
+static int fuzz_sre_match(const char* data, size_t size) {
+ if (size < 1 || size > MAX_RE_TEST_SIZE) {
+ return 0;
+ }
+ /* Use the first byte as a uint8_t specifying the index of the
+ regex to use */
+ unsigned char idx = (unsigned char) data[0];
+ idx = idx % NUM_PATTERNS;
+
+ /* Pull the string to match from the remaining bytes */
+ PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1);
+ if (to_match == NULL) {
+ return 0;
+ }
+
+ PyObject* pattern = compiled_patterns[idx];
+ PyObject* match_callable = PyObject_GetAttrString(pattern, "match");
+
+ PyObject* matches = PyObject_CallFunctionObjArgs(match_callable, to_match, NULL);
+
+ Py_XDECREF(matches);
+ Py_DECREF(match_callable);
+ Py_DECREF(to_match);
+ return 0;
+}
+
+#define MAX_CSV_TEST_SIZE 0x10000
+PyObject* csv_module = NULL;
+PyObject* csv_error = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_csv_reader() {
+ /* Import csv and csv.Error */
+ csv_module = PyImport_ImportModule("csv");
+ if (csv_module == NULL) {
+ return 0;
+ }
+ csv_error = PyObject_GetAttrString(csv_module, "Error");
+ return csv_error != NULL;
+}
+/* Fuzz csv.reader([x]) */
+static int fuzz_csv_reader(const char* data, size_t size) {
+ if (size < 1 || size > MAX_CSV_TEST_SIZE) {
+ return 0;
+ }
+ /* Ignore non null-terminated strings since _csv can't handle
+ embeded nulls */
+ if (memchr(data, '\0', size) == NULL) {
+ return 0;
+ }
+
+ PyObject* s = PyUnicode_FromString(data);
+ /* Ignore exceptions until we have a valid string */
+ if (s == NULL) {
+ PyErr_Clear();
+ return 0;
+ }
+
+ /* Split on \n so we can test multiple lines */
+ PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n");
+ if (lines == NULL) {
+ Py_DECREF(s);
+ return 0;
+ }
+
+ PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines);
+ if (reader) {
+ /* Consume all of the reader as an iterator */
+ PyObject* parsed_line;
+ while ((parsed_line = PyIter_Next(reader))) {
+ Py_DECREF(parsed_line);
+ }
+ }
+
+ /* Ignore csv.Error because we're probably going to generate
+ some bad files (embeded new-lines, unterminated quotes etc) */
+ if (PyErr_ExceptionMatches(csv_error)) {
+ PyErr_Clear();
+ }
+
+ Py_XDECREF(reader);
+ Py_DECREF(s);
return 0;
}
@@ -152,12 +366,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
initialize CPython ourselves on the first run. */
Py_InitializeEx(0);
}
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
- if (json_loads_method == NULL) {
- PyObject* json_module = PyImport_ImportModule("json");
- json_loads_method = PyObject_GetAttrString(json_module, "loads");
- }
-#endif
int rv = 0;
@@ -171,7 +379,48 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
#endif
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
+ static int JSON_LOADS_INITIALIZED = 0;
+ if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {
+ PyErr_Print();
+ abort();
+ } else {
+ JSON_LOADS_INITIALIZED = 1;
+ }
+
rv |= _run_fuzz(data, size, fuzz_json_loads);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
+ static int SRE_COMPILE_INITIALIZED = 0;
+ if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) {
+ PyErr_Print();
+ abort();
+ } else {
+ SRE_COMPILE_INITIALIZED = 1;
+ }
+
+ rv |= _run_fuzz(data, size, fuzz_sre_compile);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
+ static int SRE_MATCH_INITIALIZED = 0;
+ if (!SRE_MATCH_INITIALIZED && !init_sre_match()) {
+ PyErr_Print();
+ abort();
+ } else {
+ SRE_MATCH_INITIALIZED = 1;
+ }
+
+ rv |= _run_fuzz(data, size, fuzz_sre_match);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
+ static int CSV_READER_INITIALIZED = 0;
+ if (!CSV_READER_INITIALIZED && !init_csv_reader()) {
+ PyErr_Print();
+ abort();
+ } else {
+ CSV_READER_INITIALIZED = 1;
+ }
+
+ rv |= _run_fuzz(data, size, fuzz_csv_reader);
#endif
return rv;
}
1
0
[2.7] bpo-30754: Document textwrap.dedent blank line behavior. (GH-14469) (GH-14475)
by Miss Islington (bot) 29 Jun '19
by Miss Islington (bot) 29 Jun '19
29 Jun '19
https://github.com/python/cpython/commit/bc60c47169d1cb33f6fbe1ed64c09a536e…
commit: bc60c47169d1cb33f6fbe1ed64c09a536e82e1c3
branch: 2.7
author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
committer: GitHub <noreply(a)github.com>
date: 2019-06-29T21:41:55-07:00
summary:
[2.7] bpo-30754: Document textwrap.dedent blank line behavior. (GH-14469) (GH-14475)
* Added documentation for textwrap.dedent behavior.
(cherry picked from commit eb97b9211e7c99841d6cae8c63893b3525d5a401)
Co-authored-by: tmblweed <tmblweed(a)users.noreply.github.com>
https://bugs.python.org/issue30754
files:
M Doc/library/textwrap.rst
M Lib/textwrap.py
diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
index a50600e464dc..6b0decb5a67f 100644
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -66,6 +66,9 @@ indentation from strings that have unwanted whitespace to the left of the text.
of this module incorrectly expanded tabs before searching for common leading
whitespace.)
+ Lines containing only whitespace are ignored in the input and normalized to a
+ single newline character in the output.
+
For example::
def test():
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 5c2e4fa5237c..8d91ffa08176 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -383,6 +383,8 @@ def dedent(text):
considered to have no common leading whitespace. (This behaviour is
new in Python 2.5; older versions of this module incorrectly
expanded tabs before searching for common leading whitespace.)
+
+ Entirely blank lines are normalized to a newline character.
"""
# Look for the longest leading string of spaces and tabs common to
# all lines.
1
0
bpo-30754: Document textwrap.dedent blank line behavior. (GH-14469)
by Miss Islington (bot) 29 Jun '19
by Miss Islington (bot) 29 Jun '19
29 Jun '19
https://github.com/python/cpython/commit/3e133c401a51f08404b68f11d921f0b406…
commit: 3e133c401a51f08404b68f11d921f0b406283741
branch: 3.8
author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
committer: GitHub <noreply(a)github.com>
date: 2019-06-29T21:40:41-07:00
summary:
bpo-30754: Document textwrap.dedent blank line behavior. (GH-14469)
* Added documentation for textwrap.dedent behavior.
* Remove an obsolete note about pre-2.5 behavior from the docstring.
(cherry picked from commit eb97b9211e7c99841d6cae8c63893b3525d5a401)
Co-authored-by: tmblweed <tmblweed(a)users.noreply.github.com>
files:
M Doc/library/textwrap.rst
M Lib/textwrap.py
diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
index d254466c9a32..0f11ef401569 100644
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -77,6 +77,9 @@ functions should be good enough; otherwise, you should use an instance of
equal: the lines ``" hello"`` and ``"\thello"`` are considered to have no
common leading whitespace.
+ Lines containing only whitespace are ignored in the input and normalized to a
+ single newline character in the output.
+
For example::
def test():
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 8103f347452d..30e693c8de03 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -420,9 +420,9 @@ def dedent(text):
Note that tabs and spaces are both treated as whitespace, but they
are not equal: the lines " hello" and "\\thello" are
- considered to have no common leading whitespace. (This behaviour is
- new in Python 2.5; older versions of this module incorrectly
- expanded tabs before searching for common leading whitespace.)
+ considered to have no common leading whitespace.
+
+ Entirely blank lines are normalized to a newline character.
"""
# Look for the longest leading string of spaces and tabs common to
# all lines.
1
0
bpo-30754: Document textwrap.dedent blank line behavior. (GH-14469)
by Miss Islington (bot) 29 Jun '19
by Miss Islington (bot) 29 Jun '19
29 Jun '19
https://github.com/python/cpython/commit/e2e41cd114ae761fbfee4e7c6539f5df5c…
commit: e2e41cd114ae761fbfee4e7c6539f5df5c8c7116
branch: 3.7
author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
committer: GitHub <noreply(a)github.com>
date: 2019-06-29T21:38:11-07:00
summary:
bpo-30754: Document textwrap.dedent blank line behavior. (GH-14469)
* Added documentation for textwrap.dedent behavior.
* Remove an obsolete note about pre-2.5 behavior from the docstring.
(cherry picked from commit eb97b9211e7c99841d6cae8c63893b3525d5a401)
Co-authored-by: tmblweed <tmblweed(a)users.noreply.github.com>
files:
M Doc/library/textwrap.rst
M Lib/textwrap.py
diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
index d254466c9a32..0f11ef401569 100644
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -77,6 +77,9 @@ functions should be good enough; otherwise, you should use an instance of
equal: the lines ``" hello"`` and ``"\thello"`` are considered to have no
common leading whitespace.
+ Lines containing only whitespace are ignored in the input and normalized to a
+ single newline character in the output.
+
For example::
def test():
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 8103f347452d..30e693c8de03 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -420,9 +420,9 @@ def dedent(text):
Note that tabs and spaces are both treated as whitespace, but they
are not equal: the lines " hello" and "\\thello" are
- considered to have no common leading whitespace. (This behaviour is
- new in Python 2.5; older versions of this module incorrectly
- expanded tabs before searching for common leading whitespace.)
+ considered to have no common leading whitespace.
+
+ Entirely blank lines are normalized to a newline character.
"""
# Look for the longest leading string of spaces and tabs common to
# all lines.
1
0
29 Jun '19
https://github.com/python/cpython/commit/eb97b9211e7c99841d6cae8c63893b3525…
commit: eb97b9211e7c99841d6cae8c63893b3525d5a401
branch: master
author: tmblweed <tmblweed(a)users.noreply.github.com>
committer: Gregory P. Smith <greg(a)krypto.org>
date: 2019-06-29T21:20:03-07:00
summary:
bpo-30754: Document textwrap.dedent blank line behavior. (GH-14469)
* Added documentation for textwrap.dedent behavior.
* Remove an obsolete note about pre-2.5 behavior from the docstring.
files:
M Doc/library/textwrap.rst
M Lib/textwrap.py
diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
index d254466c9a32..0f11ef401569 100644
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -77,6 +77,9 @@ functions should be good enough; otherwise, you should use an instance of
equal: the lines ``" hello"`` and ``"\thello"`` are considered to have no
common leading whitespace.
+ Lines containing only whitespace are ignored in the input and normalized to a
+ single newline character in the output.
+
For example::
def test():
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 8103f347452d..30e693c8de03 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -420,9 +420,9 @@ def dedent(text):
Note that tabs and spaces are both treated as whitespace, but they
are not equal: the lines " hello" and "\\thello" are
- considered to have no common leading whitespace. (This behaviour is
- new in Python 2.5; older versions of this module incorrectly
- expanded tabs before searching for common leading whitespace.)
+ considered to have no common leading whitespace.
+
+ Entirely blank lines are normalized to a newline character.
"""
# Look for the longest leading string of spaces and tabs common to
# all lines.
1
0