[3.7] bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) (GH-8987)
![](https://secure.gravatar.com/avatar/cc7737cd64a84f1b5c61a160798e97ee.jpg?s=120&d=mm&r=g)
https://github.com/python/cpython/commit/0b9ea4b211b24464c7d38f63e45e51c275c... commit: 0b9ea4b211b24464c7d38f63e45e51c275c52dcd branch: 3.7 author: Victor Stinner <vstinner@redhat.com> committer: GitHub <noreply@github.com> date: 2018-08-29T11:01:33+02:00 summary: [3.7] bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) (GH-8987) * bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) Standard streams like sys.stdout now use the "surrogateescape" error handler, instead of "strict", on the POSIX locale (when the C locale is not coerced and the UTF-8 Mode is disabled). Add tests on sys.stdout.errors with LC_ALL=POSIX. Fix the error handler of standard streams like sys.stdout: PYTHONIOENCODING=":" is now ignored instead of setting the error handler to "strict". (cherry picked from commit 315877dc361d554bec34b4b62c270479ad36a1be) files: A Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst A Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst M Lib/test/test_sys.py M Lib/test/test_utf8_mode.py M Python/pylifecycle.c diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 336ae447a8de..27f75901c63f 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -654,10 +654,10 @@ def test_getfilesystemencoding(self): expected = None self.check_fsencoding(fs_encoding, expected) - def c_locale_get_error_handler(self, isolated=False, encoding=None): + def c_locale_get_error_handler(self, locale, isolated=False, encoding=None): # Force the POSIX locale env = os.environ.copy() - env["LC_ALL"] = "C" + env["LC_ALL"] = locale env["PYTHONCOERCECLOCALE"] = "0" code = '\n'.join(( 'import sys', @@ -683,44 +683,50 @@ def c_locale_get_error_handler(self, isolated=False, encoding=None): stdout, stderr = p.communicate() return stdout - def test_c_locale_surrogateescape(self): - out = self.c_locale_get_error_handler(isolated=True) + def check_locale_surrogateescape(self, locale): + out = self.c_locale_get_error_handler(locale, isolated=True) self.assertEqual(out, 'stdin: surrogateescape\n' 'stdout: surrogateescape\n' 'stderr: backslashreplace\n') # replace the default error handler - out = self.c_locale_get_error_handler(encoding=':ignore') + out = self.c_locale_get_error_handler(locale, encoding=':ignore') self.assertEqual(out, 'stdin: ignore\n' 'stdout: ignore\n' 'stderr: backslashreplace\n') # force the encoding - out = self.c_locale_get_error_handler(encoding='iso8859-1') + out = self.c_locale_get_error_handler(locale, encoding='iso8859-1') self.assertEqual(out, 'stdin: strict\n' 'stdout: strict\n' 'stderr: backslashreplace\n') - out = self.c_locale_get_error_handler(encoding='iso8859-1:') + out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:') self.assertEqual(out, 'stdin: strict\n' 'stdout: strict\n' 'stderr: backslashreplace\n') # have no any effect - out = self.c_locale_get_error_handler(encoding=':') + out = self.c_locale_get_error_handler(locale, encoding=':') self.assertEqual(out, - 'stdin: strict\n' - 'stdout: strict\n' + 'stdin: surrogateescape\n' + 'stdout: surrogateescape\n' 'stderr: backslashreplace\n') - out = self.c_locale_get_error_handler(encoding='') + out = self.c_locale_get_error_handler(locale, encoding='') self.assertEqual(out, 'stdin: surrogateescape\n' 'stdout: surrogateescape\n' 'stderr: backslashreplace\n') + def test_c_locale_surrogateescape(self): + self.check_locale_surrogateescape('C') + + def test_posix_locale_surrogateescape(self): + self.check_locale_surrogateescape('POSIX') + def test_implementation(self): # This test applies to all implementations equally. diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 4a16b7304689..554abfab3163 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -146,9 +146,9 @@ def test_stdio(self): out = self.get_output('-X', 'utf8', '-c', code, PYTHONIOENCODING=":namereplace") self.assertEqual(out.splitlines(), - ['stdin: UTF-8/namereplace', - 'stdout: UTF-8/namereplace', - 'stderr: UTF-8/backslashreplace']) + ['stdin: utf-8/namereplace', + 'stdout: utf-8/namereplace', + 'stderr: utf-8/backslashreplace']) def test_io(self): code = textwrap.dedent(''' diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst new file mode 100644 index 000000000000..5ca373aeab6d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst @@ -0,0 +1,3 @@ +Fix the error handler of standard streams like sys.stdout: +PYTHONIOENCODING=":" is now ignored instead of setting the error handler to +"strict". diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst new file mode 100644 index 000000000000..893e4f573f16 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst @@ -0,0 +1,3 @@ +Standard streams like sys.stdout now use the "surrogateescape" error +handler, instead of "strict", on the POSIX locale (when the C locale is not +coerced and the UTF-8 Mode is disabled). diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index fc4ee06f144f..539d62a2f0f4 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -423,13 +423,13 @@ get_default_standard_stream_error_handler(void) { const char *ctype_loc = setlocale(LC_CTYPE, NULL); if (ctype_loc != NULL) { - /* "surrogateescape" is the default in the legacy C locale */ - if (strcmp(ctype_loc, "C") == 0) { + /* surrogateescape is the default in the legacy C and POSIX locales */ + if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) { return "surrogateescape"; } #ifdef PY_COERCE_C_LOCALE - /* "surrogateescape" is the default in locale coercion target locales */ + /* surrogateescape is the default in locale coercion target locales */ const _LocaleCoercionTarget *target = NULL; for (target = _TARGET_LOCALES; target->locale_name; target++) { if (strcmp(ctype_loc, target->locale_name) == 0) { @@ -440,7 +440,7 @@ get_default_standard_stream_error_handler(void) } /* Otherwise return NULL to request the typical default error handler */ - return NULL; + return "strict"; } #ifdef PY_COERCE_C_LOCALE @@ -1851,20 +1851,42 @@ init_sys_streams(PyInterpreterState *interp) if (err) { *err = '\0'; err++; - if (*err && !errors) { - errors = err; + if (!err[0]) { + err = NULL; } } - if (*pythonioencoding && !encoding) { - encoding = pythonioencoding; + + /* Does PYTHONIOENCODING contain an encoding? */ + if (pythonioencoding[0]) { + if (!encoding) { + encoding = pythonioencoding; + } + + /* If the encoding is set but not the error handler, + use "strict" error handler by default. + PYTHONIOENCODING=latin1 behaves as + PYTHONIOENCODING=latin1:strict. */ + if (!err) { + err = "strict"; + } + } + + if (!errors && err != NULL) { + errors = err; } } - else if (interp->core_config.utf8_mode) { - encoding = "utf-8"; - errors = "surrogateescape"; + + if (interp->core_config.utf8_mode) { + if (!encoding) { + encoding = "utf-8"; + } + if (!errors) { + errors = "surrogateescape"; + } } - if (!errors && !pythonioencoding) { + + if (!errors) { /* Choose the default error handler based on the current locale */ errors = get_default_standard_stream_error_handler(); }
participants (1)
-
Victor Stinner