[Python-checkins] bpo-42236: os.device_encoding() respects UTF-8 Mode (GH-23119)

vstinner webhook-mailer at python.org
Wed Nov 4 05:20:20 EST 2020


https://github.com/python/cpython/commit/3529718925f40d14ed48d281d809187bc7314a14
commit: 3529718925f40d14ed48d281d809187bc7314a14
branch: master
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2020-11-04T11:20:10+01:00
summary:

bpo-42236: os.device_encoding() respects UTF-8 Mode (GH-23119)

On Unix, the os.device_encoding() function now returns 'UTF-8' rather
than the device encoding if the Python UTF-8 Mode is enabled.

files:
A Misc/NEWS.d/next/Library/2020-11-02-23-05-17.bpo-42236.aJ6ZBR.rst
M Doc/library/os.rst
M Lib/test/test_utf8_mode.py
M Python/fileutils.c
M Python/initconfig.c

diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index f9f35b3124360..3ffcfa04ffa75 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -113,6 +113,8 @@ of the UTF-8 encoding:
   :ref:`error handler <error-handlers>` being enabled for :data:`sys.stdin`
   and :data:`sys.stdout` (:data:`sys.stderr` continues to use
   ``backslashreplace`` as it does in the default locale-aware mode)
+* On Unix, :func:`os.device_encoding` returns ``'UTF-8'``. rather than the
+  device encoding.
 
 Note that the standard stream settings in UTF-8 mode can be overridden by
 :envvar:`PYTHONIOENCODING` (just as they can be in the default locale-aware
@@ -808,6 +810,12 @@ as internal buffering of data.
    Return a string describing the encoding of the device associated with *fd*
    if it is connected to a terminal; else return :const:`None`.
 
+   On Unix, if the :ref:`Python UTF-8 Mode <utf8-mode>` is enabled, return
+   ``'UTF-8'`` rather than the device encoding.
+
+   .. versionchanged:: 3.10
+      On Unix, the function now implements the Python UTF-8 Mode.
+
 
 .. function:: dup(fd)
 
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index bdb93457cfc40..8b6332ee22771 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -3,11 +3,13 @@
 """
 
 import locale
+import subprocess
 import sys
 import textwrap
 import unittest
 from test import support
 from test.support.script_helper import assert_python_ok, assert_python_failure
+from test.support import os_helper
 
 
 MS_WINDOWS = (sys.platform == 'win32')
@@ -250,6 +252,31 @@ def test_optim_level(self):
         out = self.get_output('-X', 'utf8', '-E', '-c', code)
         self.assertEqual(out, '1')
 
+    @unittest.skipIf(MS_WINDOWS,
+                     "os.device_encoding() doesn't implement "
+                     "the UTF-8 Mode on Windows")
+    def test_device_encoding(self):
+        # Use stdout as TTY
+        if not sys.stdout.isatty():
+            self.skipTest("sys.stdout is not a TTY")
+
+        filename = 'out.txt'
+        self.addCleanup(os_helper.unlink, filename)
+
+        code = (f'import os, sys; fd = sys.stdout.fileno(); '
+                f'out = open({filename!r}, "w", encoding="utf-8"); '
+                f'print(os.isatty(fd), os.device_encoding(fd), file=out); '
+                f'out.close()')
+        cmd = [sys.executable, '-X', 'utf8', '-c', code]
+        # The stdout TTY is inherited to the child process
+        proc = subprocess.run(cmd, text=True)
+        self.assertEqual(proc.returncode, 0, proc)
+
+        # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
+        with open(filename, encoding="utf8") as fp:
+            out = fp.read().rstrip()
+        self.assertEqual(out, 'True UTF-8')
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2020-11-02-23-05-17.bpo-42236.aJ6ZBR.rst b/Misc/NEWS.d/next/Library/2020-11-02-23-05-17.bpo-42236.aJ6ZBR.rst
new file mode 100644
index 0000000000000..15e2620366556
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-11-02-23-05-17.bpo-42236.aJ6ZBR.rst
@@ -0,0 +1,3 @@
+On Unix, the :func:`os.device_encoding` function now returns ``'UTF-8'`` rather
+than the device encoding if the :ref:`Python UTF-8 Mode <utf8-mode>` is
+enabled.
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 5177b3728824c..b589d7390d46d 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -55,9 +55,6 @@ get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
 PyObject *
 _Py_device_encoding(int fd)
 {
-#if defined(MS_WINDOWS)
-    UINT cp;
-#endif
     int valid;
     _Py_BEGIN_SUPPRESS_IPH
     valid = isatty(fd);
@@ -66,6 +63,7 @@ _Py_device_encoding(int fd)
         Py_RETURN_NONE;
 
 #if defined(MS_WINDOWS)
+    UINT cp;
     if (fd == 0)
         cp = GetConsoleCP();
     else if (fd == 1 || fd == 2)
@@ -74,16 +72,14 @@ _Py_device_encoding(int fd)
         cp = 0;
     /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
        has no console */
-    if (cp != 0)
-        return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
-#elif defined(CODESET)
-    {
-        char *codeset = nl_langinfo(CODESET);
-        if (codeset != NULL && codeset[0] != 0)
-            return PyUnicode_FromString(codeset);
+    if (cp == 0) {
+        Py_RETURN_NONE;
     }
+
+    return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
+#else
+    return _Py_GetLocaleEncodingObject();
 #endif
-    Py_RETURN_NONE;
 }
 
 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
diff --git a/Python/initconfig.c b/Python/initconfig.c
index 7bb28ed01f164..15fb3e4d2877d 100644
--- a/Python/initconfig.c
+++ b/Python/initconfig.c
@@ -1515,8 +1515,8 @@ config_init_stdio_encoding(PyConfig *config,
 {
     PyStatus status;
 
-    /* If Py_SetStandardStreamEncoding() have been called, use these
-        parameters. */
+    /* If Py_SetStandardStreamEncoding() has been called, use its
+        arguments if they are not NULL. */
     if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
         status = CONFIG_SET_BYTES_STR(config, &config->stdio_encoding,
                                       _Py_StandardStreamEncoding,
@@ -1535,6 +1535,7 @@ config_init_stdio_encoding(PyConfig *config,
         }
     }
 
+    // Exit if encoding and errors are defined
     if (config->stdio_encoding != NULL && config->stdio_errors != NULL) {
         return _PyStatus_OK();
     }
@@ -1634,12 +1635,12 @@ config_get_fs_encoding(PyConfig *config, const PyPreConfig *preconfig,
     if (preconfig->utf8_mode) {
         return PyConfig_SetString(config, fs_encoding, L"utf-8");
     }
-    else if (_Py_GetForceASCII()) {
+
+    if (_Py_GetForceASCII()) {
         return PyConfig_SetString(config, fs_encoding, L"ascii");
     }
-    else {
-        return config_get_locale_encoding(config, preconfig, fs_encoding);
-    }
+
+    return config_get_locale_encoding(config, preconfig, fs_encoding);
 #endif  // !MS_WINDOWS
 }
 



More information about the Python-checkins mailing list