[Python-checkins] bpo-34527: POSIX locale enables the UTF-8 Mode (GH-8972) (GH-8974)
Victor Stinner
webhook-mailer at python.org
Tue Aug 28 07:51:25 EDT 2018
https://github.com/python/cpython/commit/65ef7425a32ee411d8047a4fad0fc6bb9ff733b1
commit: 65ef7425a32ee411d8047a4fad0fc6bb9ff733b1
branch: 3.7
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2018-08-28T13:51:20+02:00
summary:
bpo-34527: POSIX locale enables the UTF-8 Mode (GH-8972) (GH-8974)
* The UTF-8 Mode is now also enabled by the "POSIX" locale, not only
by the "C" locale.
* On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces
the ASCII encoding if the LC_CTYPE locale is "POSIX", not only if
the LC_CTYPE locale is "C".
* test_utf8_mode.test_cmd_line() checks also that the command line
arguments are decoded from UTF-8 when the the UTF-8 Mode is enabled
with POSIX locale or C locale.
(cherry picked from commit 5cb258950ce9b69b1f65646431c464c0c17b1510)
files:
A Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst
A Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst
M Lib/test/test_utf8_mode.py
M Modules/main.c
M Python/fileutils.c
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index 26e2e13ec533..4a16b7304689 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -12,6 +12,7 @@
MS_WINDOWS = (sys.platform == 'win32')
+POSIX_LOCALES = ('C', 'POSIX')
class UTF8ModeTests(unittest.TestCase):
@@ -23,7 +24,7 @@ class UTF8ModeTests(unittest.TestCase):
def posix_locale(self):
loc = locale.setlocale(locale.LC_CTYPE, None)
- return (loc == 'C')
+ return (loc in POSIX_LOCALES)
def get_output(self, *args, failure=False, **kw):
kw = dict(self.DEFAULT_ENV, **kw)
@@ -39,8 +40,10 @@ def get_output(self, *args, failure=False, **kw):
def test_posix_locale(self):
code = 'import sys; print(sys.flags.utf8_mode)'
- out = self.get_output('-c', code, LC_ALL='C')
- self.assertEqual(out, '1')
+ for loc in POSIX_LOCALES:
+ with self.subTest(LC_ALL=loc):
+ out = self.get_output('-c', code, LC_ALL=loc)
+ self.assertEqual(out, '1')
def test_xoption(self):
code = 'import sys; print(sys.flags.utf8_mode)'
@@ -201,8 +204,10 @@ def test_locale_getpreferredencoding(self):
out = self.get_output('-X', 'utf8', '-c', code)
self.assertEqual(out, 'UTF-8 UTF-8')
- out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C')
- self.assertEqual(out, 'UTF-8 UTF-8')
+ for loc in POSIX_LOCALES:
+ with self.subTest(LC_ALL=loc):
+ out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
+ self.assertEqual(out, 'UTF-8 UTF-8')
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
def test_cmd_line(self):
@@ -217,11 +222,17 @@ def check(utf8_opt, expected, **kw):
self.assertEqual(args, ascii(expected), out)
check('utf8', [arg_utf8])
+ for loc in POSIX_LOCALES:
+ with self.subTest(LC_ALL=loc):
+ check('utf8', [arg_utf8], LC_ALL=loc)
+
if sys.platform == 'darwin' or support.is_android:
c_arg = arg_utf8
else:
c_arg = arg_ascii
- check('utf8=0', [c_arg], LC_ALL='C')
+ for loc in POSIX_LOCALES:
+ with self.subTest(LC_ALL=loc):
+ check('utf8=0', [c_arg], LC_ALL=loc)
def test_optim_level(self):
# CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst
new file mode 100644
index 000000000000..280a8922edc3
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst
@@ -0,0 +1,2 @@
+The UTF-8 Mode is now also enabled by the "POSIX" locale, not only by the "C"
+locale.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst
new file mode 100644
index 000000000000..9fce794305cc
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst
@@ -0,0 +1,3 @@
+On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces the
+ASCII encoding if the LC_CTYPE locale is "POSIX", not only if the LC_CTYPE
+locale is "C".
diff --git a/Modules/main.c b/Modules/main.c
index 45148dc98391..856f92c9d9bb 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -2102,15 +2102,25 @@ pymain_read_conf(_PyMain *pymain, _Py_CommandLineDetails *cmdline)
static void
config_init_locale(_PyCoreConfig *config)
{
- if (_Py_LegacyLocaleDetected()) {
- /* POSIX locale: enable C locale coercion and UTF-8 Mode */
- if (config->utf8_mode < 0) {
- config->utf8_mode = 1;
- }
- if (config->coerce_c_locale < 0) {
+ if (config->coerce_c_locale < 0) {
+ /* The C locale enables the C locale coercion (PEP 538) */
+ if (_Py_LegacyLocaleDetected()) {
config->coerce_c_locale = 1;
}
}
+
+#ifndef MS_WINDOWS
+ if (config->utf8_mode < 0) {
+ /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+ if (ctype_loc != NULL
+ && (strcmp(ctype_loc, "C") == 0
+ || strcmp(ctype_loc, "POSIX") == 0))
+ {
+ config->utf8_mode = 1;
+ }
+ }
+#endif
}
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 35869c81ac9f..b413f4e1e682 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -128,7 +128,7 @@ check_force_ascii(void)
loc = setlocale(LC_CTYPE, NULL);
if (loc == NULL)
goto error;
- if (strcmp(loc, "C") != 0) {
+ if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
/* the LC_CTYPE locale is different than C */
return 0;
}
More information about the Python-checkins
mailing list