[Python-checkins] bpo-34589: Add -X coerce_c_locale command line option (GH-9378)

Victor Stinner webhook-mailer at python.org
Mon Sep 17 20:19:31 EDT 2018


https://github.com/python/cpython/commit/dbdee0073cf0b88fe541980ace1f650900f455cc
commit: dbdee0073cf0b88fe541980ace1f650900f455cc
branch: master
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2018-09-17T17:19:26-07:00
summary:

bpo-34589: Add -X coerce_c_locale command line option (GH-9378)

Add a new -X coerce_c_locale command line option to control C locale
coercion (PEP 538).

files:
A Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst
M Doc/using/cmdline.rst
M Doc/whatsnew/3.7.rst
M Lib/test/test_c_locale_coercion.py
M Lib/test/test_cmd_line.py
M Lib/test/test_sys.py
M Lib/test/test_utf8_mode.py
M Python/coreconfig.c

diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index b61df8a4b77d..cd3b2410c84d 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -438,13 +438,22 @@ Miscellaneous options
      * Set the :attr:`~sys.flags.dev_mode` attribute of :attr:`sys.flags` to
        ``True``
 
-   * ``-X utf8`` enables UTF-8 mode for operating system interfaces, overriding
+   * ``-X utf8`` enables UTF-8 mode (:pep:`540`) for operating system interfaces, overriding
      the default locale-aware mode. ``-X utf8=0`` explicitly disables UTF-8
      mode (even when it would otherwise activate automatically).
      See :envvar:`PYTHONUTF8` for more details.
    * ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel
      tree rooted at the given directory instead of to the code tree. See also
      :envvar:`PYTHONPYCACHEPREFIX`.
+   * ``-X coerce_c_locale`` or ``-X coerce_c_locale=1`` tries to coerce the C
+     locale (:pep:`538`).
+     ``-X coerce_c_locale=0`` skips coercing the legacy ASCII-based C and POSIX
+     locales to a more capable UTF-8 based alternative.
+     ``-X coerce_c_locale=warn`` will cause Python to emit warning messages on
+     ``stderr`` if either the locale coercion activates, or else if a locale
+     that *would* have triggered coercion is still active when the Python
+     runtime is initialized.
+     See :envvar:`PYTHONCOERCECLOCALE` for more details.
 
    It also allows passing arbitrary values and retrieving them through the
    :data:`sys._xoptions` dictionary.
@@ -464,6 +473,9 @@ Miscellaneous options
    .. versionadded:: 3.7
       The ``-X importtime``, ``-X dev`` and ``-X utf8`` options.
 
+   .. versionadded:: 3.7.1
+      The ``-X coerce_c_locale`` option.
+
    .. versionadded:: 3.8
       The ``-X pycache_prefix`` option.
 
@@ -850,6 +862,8 @@ conflict.
    order to force the interpreter to use ``ASCII`` instead of ``UTF-8`` for
    system interfaces.
 
+   Also available as the :option:`-X` ``coerce_c_locale`` option.
+
    Availability: \*nix
 
    .. versionadded:: 3.7
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index f53a0268738a..6cd9d46a42b0 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -2494,3 +2494,10 @@ versions, it respected an ill-defined subset of those environment variables,
 while in Python 3.7.0 it didn't read any of them due to :issue:`34247`). If
 this behavior is unwanted, set :c:data:`Py_IgnoreEnvironmentFlag` to 1 before
 calling :c:func:`Py_Initialize`.
+
+:c:func:`Py_Initialize` and :c:func:`Py_Main` cannot enable the C locale
+coercion (:pep:`538`) anymore: it is always disabled. It can now only be
+enabled by the Python program ("python3).
+
+New :option:`-X` ``coerce_c_locale`` command line option to control C locale
+coercion (:pep:`538`).
diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py
index 1db293b9c373..f62208ab2006 100644
--- a/Lib/test/test_c_locale_coercion.py
+++ b/Lib/test/test_c_locale_coercion.py
@@ -139,7 +139,7 @@ def _handle_output_variations(data):
         return data
 
     @classmethod
-    def get_child_details(cls, env_vars):
+    def get_child_details(cls, env_vars, xoption=None):
         """Retrieves fsencoding and standard stream details from a child process
 
         Returns (encoding_details, stderr_lines):
@@ -150,10 +150,11 @@ def get_child_details(cls, env_vars):
         The child is run in isolated mode if the current interpreter supports
         that.
         """
-        result, py_cmd = run_python_until_end(
-            "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
-            **env_vars
-        )
+        args = []
+        if xoption:
+            args.extend(("-X", f"coerce_c_locale={xoption}"))
+        args.extend(("-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT))
+        result, py_cmd = run_python_until_end(*args, **env_vars)
         if not result.rc == 0:
             result.fail(py_cmd)
         # All subprocess outputs in this test case should be pure ASCII
@@ -212,7 +213,8 @@ def _check_child_encoding_details(self,
                                       expected_fs_encoding,
                                       expected_stream_encoding,
                                       expected_warnings,
-                                      coercion_expected):
+                                      coercion_expected,
+                                      xoption=None):
         """Check the C locale handling for the given process environment
 
         Parameters:
@@ -220,7 +222,7 @@ def _check_child_encoding_details(self,
             expected_stream_encoding: expected encoding for standard streams
             expected_warning: stderr output to expect (if any)
         """
-        result = EncodingDetails.get_child_details(env_vars)
+        result = EncodingDetails.get_child_details(env_vars, xoption)
         encoding_details, stderr_lines = result
         expected_details = EncodingDetails.get_expected_details(
             coercion_expected,
@@ -290,6 +292,7 @@ def _check_c_locale_coercion(self,
                                  coerce_c_locale,
                                  expected_warnings=None,
                                  coercion_expected=True,
+                                 use_xoption=False,
                                  **extra_vars):
         """Check the C locale handling for various configurations
 
@@ -319,8 +322,12 @@ def _check_c_locale_coercion(self,
             "PYTHONCOERCECLOCALE": "",
         }
         base_var_dict.update(extra_vars)
+        xoption = None
         if coerce_c_locale is not None:
-            base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
+            if use_xoption:
+                xoption = coerce_c_locale
+            else:
+                base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
 
         # Check behaviour for the default locale
         with self.subTest(default_locale=True,
@@ -342,7 +349,8 @@ def _check_c_locale_coercion(self,
                                                fs_encoding,
                                                stream_encoding,
                                                _expected_warnings,
-                                               _coercion_expected)
+                                               _coercion_expected,
+                                               xoption=xoption)
 
         # Check behaviour for explicitly configured locales
         for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS:
@@ -357,7 +365,8 @@ def _check_c_locale_coercion(self,
                                                        fs_encoding,
                                                        stream_encoding,
                                                        expected_warnings,
-                                                       coercion_expected)
+                                                       coercion_expected,
+                                                       xoption=xoption)
 
     def test_PYTHONCOERCECLOCALE_not_set(self):
         # This should coerce to the first available target locale by default
@@ -404,6 +413,32 @@ def test_LC_ALL_set_to_C(self):
                                       expected_warnings=[LEGACY_LOCALE_WARNING],
                                       coercion_expected=False)
 
+    def test_xoption_set_to_1(self):
+        self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale="1",
+                                      use_xoption=True)
+
+    def test_xoption_set_to_zero(self):
+        # The setting "0" should result in the locale coercion being disabled
+        self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
+                                      EXPECTED_C_LOCALE_STREAM_ENCODING,
+                                      coerce_c_locale="0",
+                                      coercion_expected=False,
+                                      use_xoption=True)
+        # Setting LC_ALL=C shouldn't make any difference to the behaviour
+        self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
+                                      EXPECTED_C_LOCALE_STREAM_ENCODING,
+                                      coerce_c_locale="0",
+                                      LC_ALL="C",
+                                      coercion_expected=False,
+                                      use_xoption=True)
+
+    def test_xoption_set_to_warn(self):
+        # -X coerce_c_locale=warn enables runtime warnings for legacy locales
+        self._check_c_locale_coercion("utf-8", "utf-8",
+                                      coerce_c_locale="warn",
+                                      expected_warnings=[CLI_COERCION_WARNING],
+                                      use_xoption=True)
+
 def test_main():
     test.support.run_unittest(
         LocaleConfigurationTests,
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 21511b896cad..7e967b20ab88 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -159,13 +159,16 @@ def test_undecodable_code(self):
         env = os.environ.copy()
         # Use C locale to get ascii for the locale encoding
         env['LC_ALL'] = 'C'
-        env['PYTHONCOERCECLOCALE'] = '0'
         code = (
             b'import locale; '
             b'print(ascii("' + undecodable + b'"), '
                 b'locale.getpreferredencoding())')
         p = subprocess.Popen(
-            [sys.executable, "-c", code],
+            [sys.executable,
+             # Disable C locale coercion and UTF-8 Mode to not use UTF-8
+             "-X", "coerce_c_locale=0",
+             "-X", "utf8=0",
+             "-c", code],
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
             env=env)
         stdout, stderr = p.communicate()
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index b90366d81445..a7f292827130 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -656,9 +656,8 @@ def test_getfilesystemencoding(self):
 
     def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
         # Force the POSIX locale
-        env = os.environ.copy()
+        env = dict(os.environ)
         env["LC_ALL"] = locale
-        env["PYTHONCOERCECLOCALE"] = "0"
         code = '\n'.join((
             'import sys',
             'def dump(name):',
@@ -668,7 +667,10 @@ def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
             'dump("stdout")',
             'dump("stderr")',
         ))
-        args = [sys.executable, "-X", "utf8=0", "-c", code]
+        args = [sys.executable,
+                "-X", "utf8=0",
+                "-X", "coerce_c_locale=0",
+                "-c", code]
         if isolated:
             args.append("-I")
         if encoding is not None:
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index 7280ce77ef82..c3cbb49060e7 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -27,6 +27,8 @@ def posix_locale(self):
         return (loc in POSIX_LOCALES)
 
     def get_output(self, *args, failure=False, **kw):
+        # Always disable the C locale coercion (PEP 538)
+        args = ('-X', 'coerce_c_locale=0', *args)
         kw = dict(self.DEFAULT_ENV, **kw)
         if failure:
             out = assert_python_failure(*args, **kw)
@@ -116,7 +118,6 @@ def test_filesystemencoding(self):
             # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
             # and has the priority over -X utf8 and PYTHONUTF8
             out = self.get_output('-X', 'utf8', '-c', code,
-                                  PYTHONUTF8='strict',
                                   PYTHONLEGACYWINDOWSFSENCODING='1')
             self.assertEqual(out, 'mbcs/replace')
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst b/Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst
new file mode 100644
index 000000000000..618092d192c4
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst	
@@ -0,0 +1,2 @@
+Add a new :option:`-X` ``coerce_c_locale`` command line option to control C
+locale coercion (:pep:`538`).
diff --git a/Python/coreconfig.c b/Python/coreconfig.c
index 131a043ff280..b2459dca57b0 100644
--- a/Python/coreconfig.c
+++ b/Python/coreconfig.c
@@ -705,6 +705,17 @@ config_init_utf8_mode(_PyCoreConfig *config)
         return _Py_INIT_OK();
     }
 
+#ifndef MS_WINDOWS
+    /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
+    const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+    if (ctype_loc != NULL
+        && (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0))
+    {
+        config->utf8_mode = 1;
+        return _Py_INIT_OK();
+    }
+#endif
+
     return _Py_INIT_OK();
 }
 
@@ -808,25 +819,6 @@ config_read_env_vars(_PyCoreConfig *config)
         config->malloc_stats = 1;
     }
 
-    const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
-    if (env) {
-        if (strcmp(env, "0") == 0) {
-            if (config->_coerce_c_locale < 0) {
-                config->_coerce_c_locale = 0;
-            }
-        }
-        else if (strcmp(env, "warn") == 0) {
-            if (config->_coerce_c_locale_warn < 0) {
-                config->_coerce_c_locale_warn = 1;
-            }
-        }
-        else {
-            if (config->_coerce_c_locale < 0) {
-                config->_coerce_c_locale = 1;
-            }
-        }
-    }
-
     wchar_t *path;
     int res = _PyCoreConfig_GetEnvDup(config, &path,
                                       L"PYTHONPATH", "PYTHONPATH");
@@ -966,28 +958,76 @@ config_read_complex_options(_PyCoreConfig *config)
 }
 
 
-static void
-config_init_locale(_PyCoreConfig *config)
+static _PyInitError
+config_init_coerce_c_locale(_PyCoreConfig *config)
 {
+    const wchar_t *xopt = config_get_xoption(config, L"coerce_c_locale");
+    if (xopt) {
+        wchar_t *sep = wcschr(xopt, L'=');
+        if (sep) {
+            xopt = sep + 1;
+            if (wcscmp(xopt, L"1") == 0) {
+                if (config->_coerce_c_locale < 0) {
+                    config->_coerce_c_locale = 1;
+                }
+            }
+            else if (wcscmp(xopt, L"0") == 0) {
+                if (config->_coerce_c_locale < 0) {
+                    config->_coerce_c_locale = 0;
+                }
+            }
+            else if (wcscmp(xopt, L"warn") == 0) {
+                if (config->_coerce_c_locale_warn < 0) {
+                    config->_coerce_c_locale_warn = 1;
+                }
+            }
+            else {
+                return _Py_INIT_USER_ERR("invalid -X coerce_c_locale option value");
+            }
+        }
+        else {
+            if (config->_coerce_c_locale < 0) {
+                config->_coerce_c_locale = 1;
+            }
+        }
+
+        if (config->_coerce_c_locale_warn < 0) {
+            config->_coerce_c_locale_warn = 0;
+        }
+    }
+
+    const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
+    if (env) {
+        if (strcmp(env, "0") == 0) {
+            if (config->_coerce_c_locale < 0) {
+                config->_coerce_c_locale = 0;
+            }
+        }
+        else if (strcmp(env, "warn") == 0) {
+            if (config->_coerce_c_locale_warn < 0) {
+                config->_coerce_c_locale_warn = 1;
+            }
+        }
+        else {
+            if (config->_coerce_c_locale < 0) {
+                config->_coerce_c_locale = 1;
+            }
+        }
+
+        if (config->_coerce_c_locale_warn < 0) {
+            config->_coerce_c_locale_warn = 0;
+        }
+    }
+
     if (config->_coerce_c_locale < 0) {
         /* The C locale enables the C locale coercion (PEP 538) */
         if (_Py_LegacyLocaleDetected()) {
             config->_coerce_c_locale = 1;
+            return _Py_INIT_OK();
         }
     }
 
-#ifndef MS_WINDOWS
-    if (config->utf8_mode < 0) {
-        /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
-        const char *ctype_loc = setlocale(LC_CTYPE, NULL);
-        if (ctype_loc != NULL
-           && (strcmp(ctype_loc, "C") == 0
-               || strcmp(ctype_loc, "POSIX") == 0))
-        {
-            config->utf8_mode = 1;
-        }
-    }
-#endif
+    return _Py_INIT_OK();
 }
 
 
@@ -1293,8 +1333,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
         }
     }
 
-    if (config->utf8_mode < 0 || config->_coerce_c_locale < 0) {
-        config_init_locale(config);
+    if (config->_coerce_c_locale < 0 || config->_coerce_c_locale_warn < 0) {
+        err = config_init_coerce_c_locale(config);
+        if (_Py_INIT_FAILED(err)) {
+            return err;
+        }
     }
 
     if (config->_install_importlib) {
@@ -1349,6 +1392,7 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
     }
 
     assert(config->_coerce_c_locale >= 0);
+    assert(config->_coerce_c_locale_warn >= 0);
     assert(config->use_environment >= 0);
     assert(config->filesystem_encoding != NULL);
     assert(config->filesystem_errors != NULL);



More information about the Python-checkins mailing list