[Python-checkins] bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes (#4899)

Victor Stinner webhook-mailer at python.org
Fri Dec 15 22:54:26 EST 2017


https://github.com/python/cpython/commit/9454060e84a669dde63824d9e2fcaf295e34f687
commit: 9454060e84a669dde63824d9e2fcaf295e34f687
branch: master
author: Victor Stinner <victor.stinner at gmail.com>
committer: GitHub <noreply at github.com>
date: 2017-12-16T04:54:22+01:00
summary:

bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes (#4899)

bpo-29240, bpo-32030: If the encoding change (C locale coerced or
UTF-8 Mode changed), Py_Main() now reads again the configuration with
the new encoding.

Changes:

* Add _Py_UnixMain() called by main().
* Rename pymain_free_pymain() to pymain_clear_pymain(), it can now be
  called multipled times.
* Rename pymain_parse_cmdline_envvars() to pymain_read_conf().
* Py_Main() now clears orig_argc and orig_argv at exit.
* Remove argv_copy2, Py_Main() doesn't modify argv anymore. There is
  no need anymore to get two copies of the wchar_t** argv.
* _PyCoreConfig: add coerce_c_locale and coerce_c_locale_warn.
* Py_UTF8Mode is now initialized to -1.
* Locale coercion (PEP 538) now respects -I and -E options.

files:
M Doc/using/cmdline.rst
M Include/pylifecycle.h
M Include/pystate.h
M Lib/test/test_c_locale_coercion.py
M Lib/test/test_cmd_line.py
M Lib/test/test_utf8_mode.py
M Modules/getpath.c
M Modules/main.c
M Programs/python.c
M Python/bltinmodule.c
M Python/fileutils.c
M Python/pylifecycle.c

diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index 3915f336a5a..b1bd47fa6b9 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -779,9 +779,7 @@ conflict.
 
    If set to the value ``0``, causes the main Python command line application
    to skip coercing the legacy ASCII-based C locale to a more capable UTF-8
-   based alternative. Note that this setting is checked even when the
-   :option:`-E` or :option:`-I` options are used, as it is handled prior to
-   the processing of command line options.
+   based alternative.
 
    If this variable is *not* set, or is set to a value other than ``0``, and
    the current locale reported for the ``LC_CTYPE`` category is the default
diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h
index dcb7fcb8495..3ea8ad65088 100644
--- a/Include/pylifecycle.h
+++ b/Include/pylifecycle.h
@@ -105,6 +105,9 @@ PyAPI_FUNC(int) Py_FdIsInteractive(FILE *, const char *);
 
 /* Bootstrap __main__ (defined in Modules/main.c) */
 PyAPI_FUNC(int) Py_Main(int argc, wchar_t **argv);
+#ifdef Py_BUILD_CORE
+PyAPI_FUNC(int) _Py_UnixMain(int argc, char **argv);
+#endif
 
 /* In getpath.c */
 PyAPI_FUNC(wchar_t *) Py_GetProgramFullPath(void);
@@ -194,7 +197,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);
 
 /* Legacy locale support */
 #ifndef Py_LIMITED_API
-PyAPI_FUNC(void) _Py_CoerceLegacyLocale(void);
+PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
 PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
 PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
 #endif
diff --git a/Include/pystate.h b/Include/pystate.h
index a56c9b4ea6c..fff134a4970 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -38,7 +38,10 @@ typedef struct {
     int show_alloc_count;   /* -X showalloccount */
     int dump_refs;          /* PYTHONDUMPREFS */
     int malloc_stats;       /* PYTHONMALLOCSTATS */
-    int utf8_mode;          /* -X utf8 or PYTHONUTF8 environment variable */
+    int coerce_c_locale;    /* PYTHONCOERCECLOCALE, -1 means unknown */
+    int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */
+    int utf8_mode;          /* -X utf8 or PYTHONUTF8 environment variable,
+                               -1 means unknown */
 
     wchar_t *module_search_path_env; /* PYTHONPATH environment variable */
     wchar_t *home;          /* PYTHONHOME environment variable,
@@ -46,7 +49,8 @@ typedef struct {
     wchar_t *program_name;  /* Program name, see also Py_GetProgramName() */
 } _PyCoreConfig;
 
-#define _PyCoreConfig_INIT (_PyCoreConfig){.use_hash_seed = -1}
+#define _PyCoreConfig_INIT \
+    (_PyCoreConfig){.use_hash_seed = -1, .coerce_c_locale = -1, .utf8_mode = -1}
 /* Note: _PyCoreConfig_INIT sets other fields to 0/NULL */
 
 /* Placeholders while working on the new configuration API
diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py
index c0845d75a29..37dd834781c 100644
--- a/Lib/test/test_c_locale_coercion.py
+++ b/Lib/test/test_c_locale_coercion.py
@@ -65,7 +65,7 @@ def _set_locale_in_subprocess(locale_name):
         # If there's no valid CODESET, we expect coercion to be skipped
         cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))"
     cmd = cmd_fmt.format(locale_name)
-    result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
+    result, py_cmd = run_python_until_end("-c", cmd, PYTHONCOERCECLOCALE='')
     return result.rc == 0
 
 
@@ -131,7 +131,6 @@ def get_child_details(cls, env_vars):
         """
         result, py_cmd = run_python_until_end(
             "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
-            __isolated=True,
             **env_vars
         )
         if not result.rc == 0:
@@ -236,6 +235,7 @@ def test_external_target_locale_configuration(self):
             "LANG": "",
             "LC_CTYPE": "",
             "LC_ALL": "",
+            "PYTHONCOERCECLOCALE": "",
         }
         for env_var in ("LANG", "LC_CTYPE"):
             for locale_to_set in AVAILABLE_TARGETS:
@@ -294,6 +294,7 @@ def _check_c_locale_coercion(self,
             "LANG": "",
             "LC_CTYPE": "",
             "LC_ALL": "",
+            "PYTHONCOERCECLOCALE": "",
         }
         base_var_dict.update(extra_vars)
         for env_var in ("LANG", "LC_CTYPE"):
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 2b14c301c7d..54ea3773a06 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -551,7 +551,7 @@ def test_xdev(self):
         self.assertEqual(out, "True")
 
         # Warnings
-        code = ("import sys, warnings; "
+        code = ("import warnings; "
                 "print(' '.join('%s::%s' % (f[0], f[2].__name__) "
                                 "for f in warnings.filters))")
         if Py_DEBUG:
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index 275a6ea8ed6..73d1bd424ca 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -7,6 +7,7 @@
 import sys
 import textwrap
 import unittest
+from test import support
 from test.support.script_helper import assert_python_ok, assert_python_failure
 
 
@@ -14,9 +15,11 @@
 
 
 class UTF8ModeTests(unittest.TestCase):
-    # Override PYTHONUTF8 and PYTHONLEGACYWINDOWSFSENCODING environment
-    # variables by default
-    DEFAULT_ENV = {'PYTHONUTF8': '', 'PYTHONLEGACYWINDOWSFSENCODING': ''}
+    DEFAULT_ENV = {
+        'PYTHONUTF8': '',
+        'PYTHONLEGACYWINDOWSFSENCODING': '',
+        'PYTHONCOERCECLOCALE': '0',
+    }
 
     def posix_locale(self):
         loc = locale.setlocale(locale.LC_CTYPE, None)
@@ -53,7 +56,7 @@ def test_xoption(self):
         self.assertEqual(out, '0')
 
         if MS_WINDOWS:
-            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8
+            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
             # and has the priority over -X utf8
             out = self.get_output('-X', 'utf8', '-c', code,
                                   PYTHONLEGACYWINDOWSFSENCODING='1')
@@ -201,6 +204,25 @@ def test_locale_getpreferredencoding(self):
         out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C')
         self.assertEqual(out, 'UTF-8 UTF-8')
 
+    @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
+    def test_cmd_line(self):
+        arg = 'h\xe9\u20ac'.encode('utf-8')
+        arg_utf8 = arg.decode('utf-8')
+        arg_ascii = arg.decode('ascii', 'surrogateescape')
+        code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
+
+        def check(utf8_opt, expected, **kw):
+            out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
+            args = out.partition(':')[2].rstrip()
+            self.assertEqual(args, ascii(expected), out)
+
+        check('utf8', [arg_utf8])
+        if sys.platform == 'darwin' or support.is_android:
+            c_arg = arg_utf8
+        else:
+            c_arg = arg_ascii
+        check('utf8=0', [c_arg], LC_ALL='C')
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Modules/getpath.c b/Modules/getpath.c
index 6208a17f02e..b4b33437b6f 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -112,7 +112,7 @@ extern "C" {
 
 #define DECODE_LOCALE_ERR(NAME, LEN) \
     ((LEN) == (size_t)-2) \
-     ? _Py_INIT_USER_ERR("cannot decode " #NAME) \
+     ? _Py_INIT_USER_ERR("cannot decode " NAME) \
      : _Py_INIT_NO_MEMORY()
 
 typedef struct {
diff --git a/Modules/main.c b/Modules/main.c
index 6b602cf9b77..00de7f0d181 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -38,14 +38,14 @@ extern "C" {
 
 #define DECODE_LOCALE_ERR(NAME, LEN) \
     (((LEN) == -2) \
-     ? _Py_INIT_USER_ERR("cannot decode " #NAME) \
+     ? _Py_INIT_USER_ERR("cannot decode " NAME) \
      : _Py_INIT_NO_MEMORY())
 
 
 #define SET_DECODE_ERROR(NAME, LEN) \
     do { \
         if ((LEN) == (size_t)-2) { \
-            pymain->err = _Py_INIT_USER_ERR("cannot decode " #NAME); \
+            pymain->err = _Py_INIT_USER_ERR("cannot decode " NAME); \
         } \
         else { \
             pymain->err = _Py_INIT_NO_MEMORY(); \
@@ -53,8 +53,8 @@ extern "C" {
     } while (0)
 
 /* For Py_GetArgcArgv(); set by main() */
-static wchar_t **orig_argv;
-static int orig_argc;
+static wchar_t **orig_argv = NULL;
+static int orig_argc = 0;
 
 /* command line options */
 #define BASE_OPTS L"bBc:dEhiIJm:OqRsStuvVW:xX:?"
@@ -427,8 +427,11 @@ typedef struct {
     _PyInitError err;
     /* PYTHONWARNINGS env var */
     _Py_OptList env_warning_options;
+
     int argc;
     wchar_t **argv;
+    int use_bytes_argv;
+    char **bytes_argv;
 
     int sys_argc;
     wchar_t **sys_argv;
@@ -466,7 +469,6 @@ pymain_free_globals(_PyMain *pymain)
 {
     _PyPathConfig_Clear(&_Py_path_config);
     _PyImport_Fini2();
-    _PyCoreConfig_Clear(&pymain->core_config);
 
 #ifdef __INSURE__
     /* Insure++ is a memory analysis tool that aids in discovering
@@ -483,22 +485,69 @@ pymain_free_globals(_PyMain *pymain)
 }
 
 
+/* Clear argv allocated by pymain_decode_bytes_argv() */
+static void
+pymain_clear_bytes_argv(_PyMain *pymain, int argc)
+{
+    if (pymain->use_bytes_argv && pymain->argv != NULL) {
+        for (int i = 0; i < argc; i++) {
+            PyMem_RawFree(pymain->argv[i]);
+        }
+        PyMem_RawFree(pymain->argv);
+        pymain->argv = NULL;
+    }
+}
+
+
+static int
+pymain_decode_bytes_argv(_PyMain *pymain)
+{
+    assert(pymain->argv == NULL);
+
+    /* +1 for a the NULL terminator */
+    size_t size = sizeof(wchar_t*) * (pymain->argc + 1);
+    pymain->argv = (wchar_t **)PyMem_RawMalloc(size);
+    if (pymain->argv == NULL) {
+        pymain->err = _Py_INIT_NO_MEMORY();
+        return -1;
+    }
+
+    for (int i = 0; i < pymain->argc; i++) {
+        size_t len;
+        pymain->argv[i] = Py_DecodeLocale(pymain->bytes_argv[i], &len);
+        if (pymain->argv[i] == NULL) {
+            pymain_clear_bytes_argv(pymain, i);
+            pymain->err = DECODE_LOCALE_ERR("command line arguments",
+                                            (Py_ssize_t)len);
+            return -1;
+        }
+    }
+    pymain->argv[pymain->argc] = NULL;
+    return 0;
+}
+
+
 static void
-pymain_free_pymain(_PyMain *pymain)
+pymain_clear_pymain(_PyMain *pymain)
 {
     _Py_CommandLineDetails *cmdline = &pymain->cmdline;
     pymain_optlist_clear(&cmdline->warning_options);
     pymain_optlist_clear(&cmdline->xoptions);
     PyMem_RawFree(cmdline->command);
+    cmdline->command = NULL;
 
     PyMem_RawFree(pymain->sys_argv);
+    pymain->sys_argv = NULL;
     pymain_optlist_clear(&pymain->env_warning_options);
+    pymain_clear_bytes_argv(pymain, pymain->argc);
+
+    _PyCoreConfig_Clear(&pymain->core_config);
 }
 
 
 /* Clear Python ojects */
 static void
-pymain_free_python(_PyMain *pymain)
+pymain_clear_python(_PyMain *pymain)
 {
     Py_CLEAR(pymain->main_importer_path);
 
@@ -509,12 +558,12 @@ pymain_free_python(_PyMain *pymain)
 static void
 pymain_free(_PyMain *pymain)
 {
-    /* Force the allocator used by pymain_parse_cmdline_envvars() */
+    /* Force the allocator used by pymain_read_conf() */
     PyMemAllocatorEx old_alloc;
     _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
 
-    pymain_free_python(pymain);
-    pymain_free_pymain(pymain);
+    pymain_clear_python(pymain);
+    pymain_clear_pymain(pymain);
     pymain_free_globals(pymain);
 
     PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
@@ -743,6 +792,9 @@ pymain_parse_cmdline_impl(_PyMain *pymain)
         cmdline->filename = pymain->argv[_PyOS_optind];
     }
 
+    pymain->run_code = (cmdline->command != NULL || cmdline->filename != NULL
+                        || cmdline->module != NULL);
+
     /* -c and -m options are exclusive */
     assert(!(cmdline->command != NULL && cmdline->module != NULL));
 
@@ -1434,8 +1486,6 @@ pymain_repl(_PyMain *pymain)
 static int
 pymain_parse_cmdline(_PyMain *pymain)
 {
-    _Py_CommandLineDetails *cmdline = &pymain->cmdline;
-
     int res = pymain_parse_cmdline_impl(pymain);
     if (res < 0) {
         return -1;
@@ -1446,21 +1496,6 @@ pymain_parse_cmdline(_PyMain *pymain)
         return 1;
     }
 
-    if (cmdline->print_help) {
-        pymain_usage(0, pymain->argv[0]);
-        pymain->status = 0;
-        return 1;
-    }
-
-    if (cmdline->print_version) {
-        printf("Python %s\n",
-               (cmdline->print_version >= 2) ? Py_GetVersion() : PY_VERSION);
-        return 1;
-    }
-
-    pymain->run_code = (cmdline->command != NULL || cmdline->filename != NULL
-                        || cmdline->module != NULL);
-
     return 0;
 }
 
@@ -1852,6 +1887,19 @@ pymain_parse_envvars(_PyMain *pymain)
         pymain->core_config.malloc_stats = 1;
     }
 
+    const char* env = pymain_get_env_var("PYTHONCOERCECLOCALE");
+    if (env) {
+        if (strcmp(env, "0") == 0) {
+            pymain->core_config.coerce_c_locale = 0;
+        }
+        else if (strcmp(env, "warn") == 0) {
+            pymain->core_config.coerce_c_locale_warn = 1;
+        }
+        else {
+            pymain->core_config.coerce_c_locale = 1;
+        }
+    }
+
     if (pymain_init_utf8_mode(pymain) < 0) {
         return -1;
     }
@@ -1867,23 +1915,19 @@ pymain_parse_envvars(_PyMain *pymain)
    Return 1 if Python is done and must exit.
    Set pymain->err and return -1 on error. */
 static int
-pymain_parse_cmdline_envvars_impl(_PyMain *pymain)
+pymain_read_conf_impl(_PyMain *pymain)
 {
     int res = pymain_parse_cmdline(pymain);
-    if (res < 0) {
-        return -1;
-    }
-    if (res > 0) {
-        return 1;
+    if (res != 0) {
+        return res;
     }
 
-    /* Set Py_IgnoreEnvironmentFlag needed by Py_GETENV() */
-    pymain_set_global_config(pymain);
+    /* Set Py_IgnoreEnvironmentFlag for Py_GETENV() */
+    Py_IgnoreEnvironmentFlag = pymain->core_config.ignore_environment;
 
     if (pymain_parse_envvars(pymain) < 0) {
         return -1;
     }
-    /* FIXME: if utf8_mode value changed, parse again cmdline */
 
     if (pymain_init_sys_argv(pymain) < 0) {
         return -1;
@@ -1899,14 +1943,101 @@ pymain_parse_cmdline_envvars_impl(_PyMain *pymain)
 
 
 static int
-pymain_parse_cmdline_envvars(_PyMain *pymain)
+pymain_read_conf(_PyMain *pymain)
 {
+    int res = -1;
+
     /* Force default allocator, since pymain_free() must use the same allocator
        than this function. */
     PyMemAllocatorEx old_alloc;
     _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
 
-    int res = pymain_parse_cmdline_envvars_impl(pymain);
+    char *oldloc = _PyMem_RawStrdup(setlocale(LC_ALL, NULL));
+    if (oldloc == NULL) {
+        pymain->err = _Py_INIT_NO_MEMORY();
+        goto done;
+    }
+
+    /* Reconfigure the locale to the default for this process */
+    _Py_SetLocaleFromEnv(LC_ALL);
+
+    int locale_coerced = 0;
+    int loops = 0;
+    int init_ignore_env = pymain->core_config.ignore_environment;
+
+    while (1) {
+        int utf8_mode = pymain->core_config.utf8_mode;
+        int encoding_changed = 0;
+
+        /* Watchdog to prevent an infinite loop */
+        loops++;
+        if (loops == 3) {
+            pymain->err = _Py_INIT_ERR("Encoding changed twice while "
+                                       "reading the configuration");
+            goto done;
+        }
+
+        if (pymain->use_bytes_argv) {
+            if (pymain_decode_bytes_argv(pymain) < 0) {
+                goto done;
+            }
+        }
+
+        res = pymain_read_conf_impl(pymain);
+        if (res != 0) {
+            goto done;
+        }
+
+        /* The legacy C locale assumes ASCII as the default text encoding, which
+         * causes problems not only for the CPython runtime, but also other
+         * components like GNU readline.
+         *
+         * Accordingly, when the CLI detects it, it attempts to coerce it to a
+         * more capable UTF-8 based alternative.
+         *
+         * See the documentation of the PYTHONCOERCECLOCALE setting for more
+         * details.
+         */
+        if (pymain->core_config.coerce_c_locale == 1 && !locale_coerced) {
+            locale_coerced = 1;
+            _Py_CoerceLegacyLocale(&pymain->core_config);
+            encoding_changed = 1;
+        }
+
+        if (utf8_mode == -1) {
+            if (pymain->core_config.utf8_mode == 1) {
+                /* UTF-8 Mode enabled */
+                encoding_changed = 1;
+            }
+        }
+        else {
+            if (pymain->core_config.utf8_mode != utf8_mode) {
+                encoding_changed = 1;
+            }
+        }
+
+        if (!encoding_changed) {
+            break;
+        }
+
+        /* Reset the configuration, except UTF-8 Mode. Set Py_UTF8Mode for
+           Py_DecodeLocale(). Reset Py_IgnoreEnvironmentFlag, modified by
+           pymain_read_conf_impl(). */
+        Py_UTF8Mode = pymain->core_config.utf8_mode;
+        Py_IgnoreEnvironmentFlag = init_ignore_env;
+        pymain_clear_pymain(pymain);
+        pymain_get_global_config(pymain);
+
+        /* The encoding changed: read again the configuration
+           with the new encoding */
+    }
+    res = 0;
+
+done:
+    if (oldloc != NULL) {
+        setlocale(LC_ALL, oldloc);
+        PyMem_RawFree(oldloc);
+    }
 
     PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
     return res;
@@ -1940,6 +2071,24 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
         }
     }
 
+    if (config->utf8_mode < 0 || config->coerce_c_locale < 0) {
+        if (_Py_LegacyLocaleDetected()) {
+            if (config->utf8_mode < 0) {
+                config->utf8_mode = 1;
+            }
+            if (config->coerce_c_locale < 0) {
+                config->coerce_c_locale = 1;
+            }
+        }
+
+        if (config->coerce_c_locale < 0) {
+            config->coerce_c_locale = 0;
+        }
+        if (config->utf8_mode < 0) {
+            config->utf8_mode = 0;
+        }
+    }
+
     return _Py_INIT_OK();
 }
 
@@ -2247,17 +2396,24 @@ pymain_run_python(_PyMain *pymain)
 static int
 pymain_init(_PyMain *pymain)
 {
+    /* 754 requires that FP exceptions run in "no stop" mode by default,
+     * and until C vendors implement C99's ways to control FP exceptions,
+     * Python requires non-stop mode.  Alas, some platforms enable FP
+     * exceptions by default.  Here we disable them.
+     */
+#ifdef __FreeBSD__
+    fedisableexcept(FE_OVERFLOW);
+#endif
+
     pymain->err = _PyRuntime_Initialize();
     if (_Py_INIT_FAILED(pymain->err)) {
         return -1;
     }
 
-    pymain->core_config.utf8_mode = Py_UTF8Mode;
     pymain->core_config._disable_importlib = 0;
     pymain->config.install_signal_handlers = 1;
 
-    orig_argc = pymain->argc;           /* For Py_GetArgcArgv() */
-    orig_argv = pymain->argv;
+    pymain_get_global_config(pymain);
     return 0;
 }
 
@@ -2265,14 +2421,13 @@ pymain_init(_PyMain *pymain)
 static int
 pymain_impl(_PyMain *pymain)
 {
-    int res = pymain_init(pymain);
-    if (res < 0) {
+    if (pymain_init(pymain) < 0) {
         return -1;
     }
 
-    pymain_get_global_config(pymain);
-
-    res = pymain_parse_cmdline_envvars(pymain);
+    /* Read the configuration, but initialize also the LC_CTYPE locale:
+       enable UTF-8 mode (PEP 540) and/or coerce the C locale (PEP 538) */
+    int res = pymain_read_conf(pymain);
     if (res < 0) {
         return -1;
     }
@@ -2281,6 +2436,21 @@ pymain_impl(_PyMain *pymain)
         return 0;
     }
 
+    _Py_CommandLineDetails *cmdline = &pymain->cmdline;
+    if (cmdline->print_help) {
+        pymain_usage(0, pymain->argv[0]);
+        return 0;
+    }
+
+    if (cmdline->print_version) {
+        printf("Python %s\n",
+               (cmdline->print_version >= 2) ? Py_GetVersion() : PY_VERSION);
+        return 0;
+    }
+
+    orig_argc = pymain->argc;           /* For Py_GetArgcArgv() */
+    orig_argv = pymain->argv;
+
     res = pymain_init_python_core(pymain);
     if (res < 0) {
         return -1;
@@ -2293,7 +2463,7 @@ pymain_impl(_PyMain *pymain)
 
     pymain_run_python(pymain);
 
-    pymain_free_python(pymain);
+    pymain_clear_python(pymain);
 
     if (Py_FinalizeEx() < 0) {
         /* Value unlikely to be confused with a non-error exit status or
@@ -2304,22 +2474,46 @@ pymain_impl(_PyMain *pymain)
 }
 
 
+static int
+pymain_main(_PyMain *pymain)
+{
+    memset(&pymain->cmdline, 0, sizeof(pymain->cmdline));
+
+    if (pymain_impl(pymain) < 0) {
+        _Py_FatalInitError(pymain->err);
+    }
+    pymain_free(pymain);
+
+    orig_argc = 0;
+    orig_argv = NULL;
+
+    return pymain->status;
+}
+
+
 int
 Py_Main(int argc, wchar_t **argv)
 {
     _PyMain pymain = _PyMain_INIT;
-    memset(&pymain.cmdline, 0, sizeof(pymain.cmdline));
     pymain.argc = argc;
     pymain.argv = argv;
 
-    if (pymain_impl(&pymain) < 0) {
-        _Py_FatalInitError(pymain.err);
-    }
-    pymain_free(&pymain);
+    return pymain_main(&pymain);
+}
+
+
+int
+_Py_UnixMain(int argc, char **argv)
+{
+    _PyMain pymain = _PyMain_INIT;
+    pymain.argc = argc;
+    pymain.use_bytes_argv = 1;
+    pymain.bytes_argv = argv;
 
-    return pymain.status;
+    return pymain_main(&pymain);
 }
 
+
 /* this is gonna seem *real weird*, but if you put some other code between
    Py_Main() and Py_GetArgcArgv() you will need to adjust the test in the
    while statement in Misc/gdbinit:ppystack */
diff --git a/Programs/python.c b/Programs/python.c
index aef7122517a..a295486d73f 100644
--- a/Programs/python.c
+++ b/Programs/python.c
@@ -17,98 +17,9 @@ wmain(int argc, wchar_t **argv)
 #else
 
 
-static void _Py_NO_RETURN
-fatal_error(const char *msg)
-{
-    fprintf(stderr, "Fatal Python error: %s\n", msg);
-    fflush(stderr);
-    exit(1);
-}
-
-
 int
 main(int argc, char **argv)
 {
-    wchar_t **argv_copy;
-    /* We need a second copy, as Python might modify the first one. */
-    wchar_t **argv_copy2;
-    int i, status;
-    char *oldloc;
-
-    _PyInitError err = _PyRuntime_Initialize();
-    if (_Py_INIT_FAILED(err)) {
-        fatal_error(err.msg);
-    }
-
-    /* Force default allocator, to be able to release memory above
-       with a known allocator. */
-    _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
-
-    argv_copy = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
-    argv_copy2 = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
-    if (!argv_copy || !argv_copy2) {
-        fatal_error("out of memory");
-        return 1;
-    }
-
-    /* 754 requires that FP exceptions run in "no stop" mode by default,
-     * and until C vendors implement C99's ways to control FP exceptions,
-     * Python requires non-stop mode.  Alas, some platforms enable FP
-     * exceptions by default.  Here we disable them.
-     */
-#ifdef __FreeBSD__
-    fedisableexcept(FE_OVERFLOW);
-#endif
-
-    oldloc = _PyMem_RawStrdup(setlocale(LC_ALL, NULL));
-    if (!oldloc) {
-        fatal_error("out of memory");
-        return 1;
-    }
-
-    /* Reconfigure the locale to the default for this process */
-    _Py_SetLocaleFromEnv(LC_ALL);
-
-    /* The legacy C locale assumes ASCII as the default text encoding, which
-     * causes problems not only for the CPython runtime, but also other
-     * components like GNU readline.
-     *
-     * Accordingly, when the CLI detects it, it attempts to coerce it to a
-     * more capable UTF-8 based alternative.
-     *
-     * See the documentation of the PYTHONCOERCECLOCALE setting for more
-     * details.
-     */
-    if (_Py_LegacyLocaleDetected()) {
-        Py_UTF8Mode = 1;
-        _Py_CoerceLegacyLocale();
-    }
-
-    /* Convert from char to wchar_t based on the locale settings */
-    for (i = 0; i < argc; i++) {
-        argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
-        if (!argv_copy[i]) {
-            PyMem_RawFree(oldloc);
-            fatal_error("unable to decode the command line arguments");
-        }
-        argv_copy2[i] = argv_copy[i];
-    }
-    argv_copy2[argc] = argv_copy[argc] = NULL;
-
-    setlocale(LC_ALL, oldloc);
-    PyMem_RawFree(oldloc);
-
-    status = Py_Main(argc, argv_copy);
-
-    /* Py_Main() can change PyMem_RawMalloc() allocator, so restore the default
-       to release memory blocks allocated before Py_Main() */
-    _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
-
-    for (i = 0; i < argc; i++) {
-        PyMem_RawFree(argv_copy2[i]);
-    }
-    PyMem_RawFree(argv_copy);
-    PyMem_RawFree(argv_copy2);
-    return status;
+    return _Py_UnixMain(argc, argv);
 }
 #endif
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 604493dc5e2..e702f7c6e9e 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -29,9 +29,10 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
 int Py_HasFileSystemDefaultEncoding = 0;
 #endif
 const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
-/* UTF-8 mode (PEP 540): if non-zero, use the UTF-8 encoding, and change stdin
-   and stdout error handler to "surrogateescape". */
-int Py_UTF8Mode = 0;
+/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
+   stdin and stdout error handler to "surrogateescape". It is equal to
+   -1 by default: unknown, will be set by Py_Main() */
+int Py_UTF8Mode = -1;
 
 _Py_IDENTIFIER(__builtins__);
 _Py_IDENTIFIER(__dict__);
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 4b69049ce58..c4d495d0d63 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -393,7 +393,7 @@ Py_DecodeLocale(const char* arg, size_t *size)
 #if defined(__APPLE__) || defined(__ANDROID__)
     return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
 #else
-    if (Py_UTF8Mode) {
+    if (Py_UTF8Mode == 1) {
         return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
     }
 
@@ -539,7 +539,7 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
 #if defined(__APPLE__) || defined(__ANDROID__)
     return _Py_EncodeLocaleUTF8(text, error_pos);
 #else   /* __APPLE__ */
-    if (Py_UTF8Mode) {
+    if (Py_UTF8Mode == 1) {
         return _Py_EncodeLocaleUTF8(text, error_pos);
     }
 
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 8c626075d5d..6500995ee24 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -385,18 +385,10 @@ static const char *_C_LOCALE_WARNING =
     "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
     "locales is recommended.\n";
 
-static int
-_legacy_locale_warnings_enabled(void)
-{
-    const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
-    return (coerce_c_locale != NULL &&
-            strncmp(coerce_c_locale, "warn", 5) == 0);
-}
-
 static void
-_emit_stderr_warning_for_legacy_locale(void)
+_emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config)
 {
-    if (_legacy_locale_warnings_enabled()) {
+    if (core_config->coerce_c_locale_warn) {
         if (_Py_LegacyLocaleDetected()) {
             fprintf(stderr, "%s", _C_LOCALE_WARNING);
         }
@@ -440,12 +432,12 @@ get_default_standard_stream_error_handler(void)
 }
 
 #ifdef PY_COERCE_C_LOCALE
-static const char _C_LOCALE_COERCION_WARNING[] =
+static const char C_LOCALE_COERCION_WARNING[] =
     "Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale "
     "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
 
 static void
-_coerce_default_locale_settings(const _LocaleCoercionTarget *target)
+_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target)
 {
     const char *newloc = target->locale_name;
 
@@ -458,8 +450,8 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
                 "Error setting LC_CTYPE, skipping C locale coercion\n");
         return;
     }
-    if (_legacy_locale_warnings_enabled()) {
-        fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
+    if (config->coerce_c_locale_warn) {
+        fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
     }
 
     /* Reconfigure with the overridden environment variables */
@@ -468,47 +460,31 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
 #endif
 
 void
-_Py_CoerceLegacyLocale(void)
+_Py_CoerceLegacyLocale(const _PyCoreConfig *config)
 {
 #ifdef PY_COERCE_C_LOCALE
-    /* We ignore the Python -E and -I flags here, as the CLI needs to sort out
-     * the locale settings *before* we try to do anything with the command
-     * line arguments. For cross-platform debugging purposes, we also need
-     * to give end users a way to force even scripts that are otherwise
-     * isolated from their environment to use the legacy ASCII-centric C
-     * locale.
-     *
-     * Ignoring -E and -I is safe from a security perspective, as we only use
-     * the setting to turn *off* the implicit locale coercion, and anyone with
-     * access to the process environment already has the ability to set
-     * `LC_ALL=C` to override the C level locale settings anyway.
-     */
-    const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
-    if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
-        /* PYTHONCOERCECLOCALE is not set, or is set to something other than "0" */
-        const char *locale_override = getenv("LC_ALL");
-        if (locale_override == NULL || *locale_override == '\0') {
-            /* LC_ALL is also not set (or is set to an empty string) */
-            const _LocaleCoercionTarget *target = NULL;
-            for (target = _TARGET_LOCALES; target->locale_name; target++) {
-                const char *new_locale = setlocale(LC_CTYPE,
-                                                   target->locale_name);
-                if (new_locale != NULL) {
+    const char *locale_override = getenv("LC_ALL");
+    if (locale_override == NULL || *locale_override == '\0') {
+        /* LC_ALL is also not set (or is set to an empty string) */
+        const _LocaleCoercionTarget *target = NULL;
+        for (target = _TARGET_LOCALES; target->locale_name; target++) {
+            const char *new_locale = setlocale(LC_CTYPE,
+                                               target->locale_name);
+            if (new_locale != NULL) {
 #if !defined(__APPLE__) && !defined(__ANDROID__) && \
-    defined(HAVE_LANGINFO_H) && defined(CODESET)
-                    /* Also ensure that nl_langinfo works in this locale */
-                    char *codeset = nl_langinfo(CODESET);
-                    if (!codeset || *codeset == '\0') {
-                        /* CODESET is not set or empty, so skip coercion */
-                        new_locale = NULL;
-                        _Py_SetLocaleFromEnv(LC_CTYPE);
-                        continue;
-                    }
-#endif
-                    /* Successfully configured locale, so make it the default */
-                    _coerce_default_locale_settings(target);
-                    return;
+defined(HAVE_LANGINFO_H) && defined(CODESET)
+                /* Also ensure that nl_langinfo works in this locale */
+                char *codeset = nl_langinfo(CODESET);
+                if (!codeset || *codeset == '\0') {
+                    /* CODESET is not set or empty, so skip coercion */
+                    new_locale = NULL;
+                    _Py_SetLocaleFromEnv(LC_CTYPE);
+                    continue;
                 }
+#endif
+                /* Successfully configured locale, so make it the default */
+                _coerce_default_locale_settings(config, target);
+                return;
             }
         }
     }
@@ -648,7 +624,7 @@ _Py_InitializeCore(const _PyCoreConfig *core_config)
        the locale's charset without having to switch
        locales. */
     _Py_SetLocaleFromEnv(LC_CTYPE);
-    _emit_stderr_warning_for_legacy_locale();
+    _emit_stderr_warning_for_legacy_locale(core_config);
 #endif
 
     err = _Py_HashRandomization_Init(core_config);



More information about the Python-checkins mailing list