[Python-checkins] bpo-32002: Refactor C locale coercion tests (GH-4369)

Nick Coghlan webhook-mailer at python.org
Sat Dec 16 03:51:24 EST 2017


https://github.com/python/cpython/commit/9c19b020249c451891affd81751947321a1e6957
commit: 9c19b020249c451891affd81751947321a1e6957
branch: master
author: Nick Coghlan <ncoghlan at gmail.com>
committer: GitHub <noreply at github.com>
date: 2017-12-16T21:51:19+13:00
summary:

bpo-32002: Refactor C locale coercion tests (GH-4369)

Exactly which locale requests will end up giving
you the "C" locale is actually platform dependent.

A blank locale and "POSIX" will translate to "C"
on most Linux distros, but may not do so on other platforms, so this adjusts the way the tests are structured to better account for that.

This is an initial step towards fixing the current
test failure on Cygwin (hence the issue reference)

files:
A Misc/NEWS.d/next/Tests/2017-11-11-16-35-18.bpo-32002.itDxIo.rst
M Lib/test/test_c_locale_coercion.py

diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py
index 37dd834781c..1db293b9c37 100644
--- a/Lib/test/test_c_locale_coercion.py
+++ b/Lib/test/test_c_locale_coercion.py
@@ -14,30 +14,51 @@
     interpreter_requires_environment,
 )
 
+# Set the list of ways we expect to be able to ask for the "C" locale
+EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "invalid.ascii"]
+
 # Set our expectation for the default encoding used in the C locale
 # for the filesystem encoding and the standard streams
-
-# While most *nix platforms default to ASCII in the C locale, some use a
-# different encoding.
-if sys.platform.startswith("aix"):
-    C_LOCALE_STREAM_ENCODING = "iso8859-1"
-elif test.support.is_android:
-    C_LOCALE_STREAM_ENCODING = "utf-8"
-else:
-    C_LOCALE_STREAM_ENCODING = "ascii"
-
-# FS encoding is UTF-8 on macOS, other *nix platforms use the locale encoding
-if sys.platform == "darwin":
-    C_LOCALE_FS_ENCODING = "utf-8"
-else:
-    C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
-
-# Note that the above is probably still wrong in some cases, such as:
+EXPECTED_C_LOCALE_STREAM_ENCODING = "ascii"
+EXPECTED_C_LOCALE_FS_ENCODING = "ascii"
+
+# Set our expectation for the default locale used when none is specified
+EXPECT_COERCION_IN_DEFAULT_LOCALE = True
+
+# Apply some platform dependent overrides
+if sys.platform.startswith("linux"):
+    if test.support.is_android:
+        # Android defaults to using UTF-8 for all system interfaces
+        EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8"
+        EXPECTED_C_LOCALE_FS_ENCODING = "utf-8"
+    else:
+        # Linux distros typically alias the POSIX locale directly to the C
+        # locale.
+        # TODO: Once https://bugs.python.org/issue30672 is addressed, we'll be
+        #       able to check this case unconditionally
+        EXPECTED_C_LOCALE_EQUIVALENTS.append("POSIX")
+elif sys.platform.startswith("aix"):
+    # AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII
+    EXPECTED_C_LOCALE_STREAM_ENCODING = "iso8859-1"
+    EXPECTED_C_LOCALE_FS_ENCODING = "iso8859-1"
+elif sys.platform == "darwin":
+    # FS encoding is UTF-8 on macOS
+    EXPECTED_C_LOCALE_FS_ENCODING = "utf-8"
+elif sys.platform == "cygwin":
+    # Cygwin defaults to using C.UTF-8
+    # TODO: Work out a robust dynamic test for this that doesn't rely on
+    #       CPython's own locale handling machinery
+    EXPECT_COERCION_IN_DEFAULT_LOCALE = False
+
+# Note that the above expectations are still wrong in some cases, such as:
 # * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
-# * AIX and any other platforms that use latin-1 in the C locale
+# * Any platform other than AIX that uses latin-1 in the C locale
+# * Any Linux distro where POSIX isn't a simple alias for the C locale
+# * Any Linux distro where the default locale is something other than "C"
 #
 # Options for dealing with this:
-# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
+# * Don't set the PY_COERCE_C_LOCALE preprocessor definition on
+#   such platforms (e.g. it isn't set on Windows)
 # * Fix the test expectations to match the actual platform behaviour
 
 # In order to get the warning messages to match up as expected, the candidate
@@ -47,7 +68,7 @@
 # There's no reliable cross-platform way of checking locale alias
 # lists, so the only way of knowing which of these locales will work
 # is to try them with locale.setlocale(). We do that in a subprocess
-# to avoid altering the locale of the test runner.
+# in setUpModule() below to avoid altering the locale of the test runner.
 #
 # If the relevant locale module attributes exist, and we're not on a platform
 # where we expect it to always succeed, we also check that
@@ -216,8 +237,9 @@ def _check_child_encoding_details(self,
 class LocaleConfigurationTests(_LocaleHandlingTestCase):
     # Test explicit external configuration via the process environment
 
-    def setUpClass():
-        # This relies on setupModule() having been run, so it can't be
+    @classmethod
+    def setUpClass(cls):
+        # This relies on setUpModule() having been run, so it can't be
         # handled via the @unittest.skipUnless decorator
         if not AVAILABLE_TARGETS:
             raise unittest.SkipTest("No C-with-UTF-8 locale available")
@@ -284,8 +306,8 @@ def _check_c_locale_coercion(self,
 
         if not AVAILABLE_TARGETS:
             # Locale coercion is disabled when there aren't any target locales
-            fs_encoding = C_LOCALE_FS_ENCODING
-            stream_encoding = C_LOCALE_STREAM_ENCODING
+            fs_encoding = EXPECTED_C_LOCALE_FS_ENCODING
+            stream_encoding = EXPECTED_C_LOCALE_STREAM_ENCODING
             coercion_expected = False
             if expected_warnings:
                 expected_warnings = [LEGACY_LOCALE_WARNING]
@@ -297,41 +319,47 @@ def _check_c_locale_coercion(self,
             "PYTHONCOERCECLOCALE": "",
         }
         base_var_dict.update(extra_vars)
-        for env_var in ("LANG", "LC_CTYPE"):
-            for locale_to_set in ("", "C", "POSIX", "invalid.ascii"):
-                # XXX (ncoghlan): *BSD platforms don't behave as expected in the
-                #                 POSIX locale, so we skip that for now
-                # See https://bugs.python.org/issue30672 for discussion
-                if locale_to_set == "POSIX":
-                    continue
+        if coerce_c_locale is not None:
+            base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
 
-                # Platforms using UTF-8 in the C locale do not print
-                # CLI_COERCION_WARNING when all the locale envt variables are
-                # not set or set to the empty string.
+        # Check behaviour for the default locale
+        with self.subTest(default_locale=True,
+                          PYTHONCOERCECLOCALE=coerce_c_locale):
+            if EXPECT_COERCION_IN_DEFAULT_LOCALE:
                 _expected_warnings = expected_warnings
-                for _env_var in base_var_dict:
-                    if base_var_dict[_env_var]:
-                        break
-                else:
-                    if (C_LOCALE_STREAM_ENCODING == "utf-8" and
-                           locale_to_set == "" and coerce_c_locale == "warn"):
-                        _expected_warnings = None
-
+                _coercion_expected = coercion_expected
+            else:
+                _expected_warnings = None
+                _coercion_expected = False
+            # On Android CLI_COERCION_WARNING is not printed when all the
+            # locale environment variables are undefined or empty. When
+            # this code path is run with environ['LC_ALL'] == 'C', then
+            # LEGACY_LOCALE_WARNING is printed.
+            if (test.support.is_android and
+                    _expected_warnings == [CLI_COERCION_WARNING]):
+                _expected_warnings = None
+            self._check_child_encoding_details(base_var_dict,
+                                               fs_encoding,
+                                               stream_encoding,
+                                               _expected_warnings,
+                                               _coercion_expected)
+
+        # Check behaviour for explicitly configured locales
+        for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS:
+            for env_var in ("LANG", "LC_CTYPE"):
                 with self.subTest(env_var=env_var,
                                   nominal_locale=locale_to_set,
                                   PYTHONCOERCECLOCALE=coerce_c_locale):
                     var_dict = base_var_dict.copy()
                     var_dict[env_var] = locale_to_set
-                    if coerce_c_locale is not None:
-                        var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
                     # Check behaviour on successful coercion
                     self._check_child_encoding_details(var_dict,
                                                        fs_encoding,
                                                        stream_encoding,
-                                                       _expected_warnings,
+                                                       expected_warnings,
                                                        coercion_expected)
 
-    def test_test_PYTHONCOERCECLOCALE_not_set(self):
+    def test_PYTHONCOERCECLOCALE_not_set(self):
         # This should coerce to the first available target locale by default
         self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
 
@@ -350,27 +378,27 @@ def test_PYTHONCOERCECLOCALE_set_to_warn(self):
 
     def test_PYTHONCOERCECLOCALE_set_to_zero(self):
         # The setting "0" should result in the locale coercion being disabled
-        self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
-                                      C_LOCALE_STREAM_ENCODING,
+        self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
+                                      EXPECTED_C_LOCALE_STREAM_ENCODING,
                                       coerce_c_locale="0",
                                       coercion_expected=False)
         # Setting LC_ALL=C shouldn't make any difference to the behaviour
-        self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
-                                      C_LOCALE_STREAM_ENCODING,
+        self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
+                                      EXPECTED_C_LOCALE_STREAM_ENCODING,
                                       coerce_c_locale="0",
                                       LC_ALL="C",
                                       coercion_expected=False)
 
     def test_LC_ALL_set_to_C(self):
         # Setting LC_ALL should render the locale coercion ineffective
-        self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
-                                      C_LOCALE_STREAM_ENCODING,
+        self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
+                                      EXPECTED_C_LOCALE_STREAM_ENCODING,
                                       coerce_c_locale=None,
                                       LC_ALL="C",
                                       coercion_expected=False)
         # And result in a warning about a lack of locale compatibility
-        self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
-                                      C_LOCALE_STREAM_ENCODING,
+        self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
+                                      EXPECTED_C_LOCALE_STREAM_ENCODING,
                                       coerce_c_locale="warn",
                                       LC_ALL="C",
                                       expected_warnings=[LEGACY_LOCALE_WARNING],
diff --git a/Misc/NEWS.d/next/Tests/2017-11-11-16-35-18.bpo-32002.itDxIo.rst b/Misc/NEWS.d/next/Tests/2017-11-11-16-35-18.bpo-32002.itDxIo.rst
new file mode 100644
index 00000000000..9cc7ec22c45
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2017-11-11-16-35-18.bpo-32002.itDxIo.rst
@@ -0,0 +1,2 @@
+Adjust C locale coercion testing for the empty locale and POSIX locale
+cases to more readily adjust to platform dependent behaviour.



More information about the Python-checkins mailing list