[Python-checkins] bpo-18378: Recognize "UTF-8" as a valid name in locale._parse_localename (GH-14736)

Miss Islington (bot) webhook-mailer at python.org
Thu Aug 29 00:52:46 EDT 2019


https://github.com/python/cpython/commit/554143ebc2546e0b8b722dfafe397c0316f29980
commit: 554143ebc2546e0b8b722dfafe397c0316f29980
branch: 3.7
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2019-08-28T21:52:43-07:00
summary:

bpo-18378: Recognize "UTF-8" as a valid name in locale._parse_localename (GH-14736)

(cherry picked from commit b0caf329815120acf50287e29858093d328b0e3c)

Co-authored-by: Ronald Oussoren <ronaldoussoren at mac.com>

files:
A Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst
M Lib/locale.py
M Lib/test/test_locale.py

diff --git a/Lib/locale.py b/Lib/locale.py
index f3d3973d038c..dd8a08524a01 100644
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -492,6 +492,10 @@ def _parse_localename(localename):
         return tuple(code.split('.')[:2])
     elif code == 'C':
         return None, None
+    elif code == 'UTF-8':
+        # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
+        # for getting UTF-8 handling for text.
+        return None, 'UTF-8'
     raise ValueError('unknown locale: %s' % localename)
 
 def _build_localename(localetuple):
diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py
index e2c2178ae6cc..9a05029b42a4 100644
--- a/Lib/test/test_locale.py
+++ b/Lib/test/test_locale.py
@@ -493,6 +493,42 @@ def test_japanese(self):
 
 
 class TestMiscellaneous(unittest.TestCase):
+    def test_defaults_UTF8(self):
+        # Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is
+        # valid. Futhermore LC_CTYPE=UTF is used by the UTF-8 locale coercing
+        # during interpreter startup (on macOS).
+        import _locale
+        import os
+
+        self.assertEqual(locale._parse_localename('UTF-8'), (None, 'UTF-8'))
+
+        if hasattr(_locale, '_getdefaultlocale'):
+            orig_getlocale = _locale._getdefaultlocale
+            del _locale._getdefaultlocale
+        else:
+            orig_getlocale = None
+
+        orig_env = {}
+        try:
+            for key in ('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE'):
+                if key in os.environ:
+                    orig_env[key] = os.environ[key]
+                    del os.environ[key]
+
+            os.environ['LC_CTYPE'] = 'UTF-8'
+
+            self.assertEqual(locale.getdefaultlocale(), (None, 'UTF-8'))
+
+        finally:
+            for k in orig_env:
+                os.environ[k] = orig_env[k]
+
+            if 'LC_CTYPE' not in orig_env:
+                del os.environ['LC_CTYPE']
+
+            if orig_getlocale is not None:
+                _locale._getdefaultlocale = orig_getlocale
+
     def test_getpreferredencoding(self):
         # Invoke getpreferredencoding to make sure it does not cause exceptions.
         enc = locale.getpreferredencoding()
diff --git a/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst b/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst
new file mode 100644
index 000000000000..6dda8abf15d5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst
@@ -0,0 +1 @@
+Recognize "UTF-8" as a valid value for LC_CTYPE in locale._parse_localename.



More information about the Python-checkins mailing list