[Python-checkins] bpo-47000: Make `io.text_encoding()` respects UTF-8 mode (GH-32003)

methane webhook-mailer at python.org
Sun Apr 3 22:47:06 EDT 2022


https://github.com/python/cpython/commit/4216dce04b7d3f329beaaafc82a77c4ac6cf4d57
commit: 4216dce04b7d3f329beaaafc82a77c4ac6cf4d57
branch: main
author: Inada Naoki <songofacandy at gmail.com>
committer: methane <songofacandy at gmail.com>
date: 2022-04-04T11:46:57+09:00
summary:

bpo-47000: Make `io.text_encoding()` respects UTF-8 mode (GH-32003)

Co-authored-by: Eric Snow <ericsnowcurrently at gmail.com>

files:
A Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst
M Doc/library/io.rst
M Include/internal/pycore_global_strings.h
M Include/internal/pycore_runtime_init.h
M Lib/_pyio.py
M Lib/test/test_io.py
M Lib/test/test_utf8_mode.py
M Modules/_io/_iomodule.c
M Modules/_io/clinic/_iomodule.c.h
M Python/sysmodule.c

diff --git a/Doc/library/io.rst b/Doc/library/io.rst
index d5123348195bd..80107d539505c 100644
--- a/Doc/library/io.rst
+++ b/Doc/library/io.rst
@@ -198,12 +198,13 @@ High-level Module Interface
    This is a helper function for callables that use :func:`open` or
    :class:`TextIOWrapper` and have an ``encoding=None`` parameter.
 
-   This function returns *encoding* if it is not ``None`` and ``"locale"`` if
-   *encoding* is ``None``.
+   This function returns *encoding* if it is not ``None``.
+   Otherwise, it returns ``"locale"`` or ``"utf-8"`` depending on
+   :ref:`UTF-8 Mode <utf8-mode>`.
 
    This function emits an :class:`EncodingWarning` if
    :data:`sys.flags.warn_default_encoding <sys.flags>` is true and *encoding*
-   is None. *stacklevel* specifies where the warning is emitted.
+   is ``None``. *stacklevel* specifies where the warning is emitted.
    For example::
 
       def read_text(path, encoding=None):
@@ -218,6 +219,10 @@ High-level Module Interface
 
    .. versionadded:: 3.10
 
+   .. versionchanged:: 3.11
+      :func:`text_encoding` returns "utf-8" when UTF-8 mode is enabled and
+      *encoding* is ``None``.
+
 
 .. exception:: BlockingIOError
 
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 3e533fd16509f..833ff2710a787 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -48,6 +48,7 @@ struct _Py_global_strings {
         STRUCT_FOR_STR(newline, "\n")
         STRUCT_FOR_STR(open_br, "{")
         STRUCT_FOR_STR(percent, "%")
+        STRUCT_FOR_STR(utf_8, "utf-8")
     } literals;
 
     struct {
diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h
index d5690d83a0482..fd925b3e060df 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -672,6 +672,7 @@ extern "C" {
                 INIT_STR(newline, "\n"), \
                 INIT_STR(open_br, "{"), \
                 INIT_STR(percent, "%"), \
+                INIT_STR(utf_8, "utf-8"), \
             }, \
             .identifiers = { \
                 INIT_ID(False), \
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index fd00d6536c076..e3ff59eb1adb1 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -44,8 +44,9 @@ def text_encoding(encoding, stacklevel=2):
     """
     A helper function to choose the text encoding.
 
-    When encoding is not None, just return it.
-    Otherwise, return the default text encoding (i.e. "locale").
+    When encoding is not None, this function returns it.
+    Otherwise, this function returns the default text encoding
+    (i.e. "locale" or "utf-8" depends on UTF-8 mode).
 
     This function emits an EncodingWarning if *encoding* is None and
     sys.flags.warn_default_encoding is true.
@@ -55,7 +56,10 @@ def text_encoding(encoding, stacklevel=2):
     However, please consider using encoding="utf-8" for new APIs.
     """
     if encoding is None:
-        encoding = "locale"
+        if sys.flags.utf8_mode:
+            encoding = "utf-8"
+        else:
+            encoding = "locale"
         if sys.flags.warn_default_encoding:
             import warnings
             warnings.warn("'encoding' argument not specified.",
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 2d0ca878788f2..67be108d2526f 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -4289,6 +4289,17 @@ def test_check_encoding_warning(self):
         self.assertTrue(
             warnings[1].startswith(b"<string>:8: EncodingWarning: "))
 
+    def test_text_encoding(self):
+        # PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8"
+        # based on sys.flags.utf8_mode
+        code = "import io; print(io.text_encoding(None))"
+
+        proc = assert_python_ok('-X', 'utf8=0', '-c', code)
+        self.assertEqual(b"locale", proc.out.strip())
+
+        proc = assert_python_ok('-X', 'utf8=1', '-c', code)
+        self.assertEqual(b"utf-8", proc.out.strip())
+
     @support.cpython_only
     # Depending if OpenWrapper was already created or not, the warning is
     # emitted or not. For example, the attribute is already created when this
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index 2b96f76df305f..308e8e8aea6c2 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -161,7 +161,7 @@ def test_io(self):
         filename = __file__
 
         out = self.get_output('-c', code, filename, PYTHONUTF8='1')
-        self.assertEqual(out, 'UTF-8/strict')
+        self.assertEqual(out.lower(), 'utf-8/strict')
 
     def _check_io_encoding(self, module, encoding=None, errors=None):
         filename = __file__
@@ -183,10 +183,10 @@ def _check_io_encoding(self, module, encoding=None, errors=None):
                               PYTHONUTF8='1')
 
         if not encoding:
-            encoding = 'UTF-8'
+            encoding = 'utf-8'
         if not errors:
             errors = 'strict'
-        self.assertEqual(out, f'{encoding}/{errors}')
+        self.assertEqual(out.lower(), f'{encoding}/{errors}')
 
     def check_io_encoding(self, module):
         self._check_io_encoding(module, encoding="latin1")
diff --git a/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst b/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst
new file mode 100644
index 0000000000000..f96b6e627ed11
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst
@@ -0,0 +1 @@
+Make :func:`io.text_encoding` returns "utf-8" when UTF-8 mode is enabled.
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c
index 7f029f26078b8..065f5e29c315b 100644
--- a/Modules/_io/_iomodule.c
+++ b/Modules/_io/_iomodule.c
@@ -457,8 +457,9 @@ _io.text_encoding
 
 A helper function to choose the text encoding.
 
-When encoding is not None, just return it.
-Otherwise, return the default text encoding (i.e. "locale").
+When encoding is not None, this function returns it.
+Otherwise, this function returns the default text encoding
+(i.e. "locale" or "utf-8" depends on UTF-8 mode).
 
 This function emits an EncodingWarning if encoding is None and
 sys.flags.warn_default_encoding is true.
@@ -469,7 +470,7 @@ However, please consider using encoding="utf-8" for new APIs.
 
 static PyObject *
 _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
-/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
+/*[clinic end generated code: output=91b2cfea6934cc0c input=4999aa8b3d90f3d4]*/
 {
     if (encoding == NULL || encoding == Py_None) {
         PyInterpreterState *interp = _PyInterpreterState_GET();
@@ -479,7 +480,14 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
                 return NULL;
             }
         }
-        return &_Py_ID(locale);
+        const PyPreConfig *preconfig = &_PyRuntime.preconfig;
+        if (preconfig->utf8_mode) {
+            _Py_DECLARE_STR(utf_8, "utf-8");
+            encoding = &_Py_STR(utf_8);
+        }
+        else {
+            encoding = &_Py_ID(locale);
+        }
     }
     Py_INCREF(encoding);
     return encoding;
diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h
index d5fb176eb66be..e4a6b8c42e1d8 100644
--- a/Modules/_io/clinic/_iomodule.c.h
+++ b/Modules/_io/clinic/_iomodule.c.h
@@ -273,8 +273,9 @@ PyDoc_STRVAR(_io_text_encoding__doc__,
 "\n"
 "A helper function to choose the text encoding.\n"
 "\n"
-"When encoding is not None, just return it.\n"
-"Otherwise, return the default text encoding (i.e. \"locale\").\n"
+"When encoding is not None, this function returns it.\n"
+"Otherwise, this function returns the default text encoding\n"
+"(i.e. \"locale\" or \"utf-8\" depends on UTF-8 mode).\n"
 "\n"
 "This function emits an EncodingWarning if encoding is None and\n"
 "sys.flags.warn_default_encoding is true.\n"
@@ -354,4 +355,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=6ea315343f6a94ba input=a9049054013a1b77]*/
+/*[clinic end generated code: output=1a7fd7755c9a9609 input=a9049054013a1b77]*/
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 5765e9ef6577c..de4e10a7e110c 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -841,7 +841,10 @@ static PyObject *
 sys_getdefaultencoding_impl(PyObject *module)
 /*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/
 {
-    return PyUnicode_FromString(PyUnicode_GetDefaultEncoding());
+    _Py_DECLARE_STR(utf_8, "utf-8");
+    PyObject *ret = &_Py_STR(utf_8);
+    Py_INCREF(ret);
+    return ret;
 }
 
 /*[clinic input]



More information about the Python-checkins mailing list