[Python-checkins] bpo-43510: Implement PEP 597 opt-in EncodingWarning. (GH-19481)

methane webhook-mailer at python.org
Sun Mar 28 23:28:23 EDT 2021


https://github.com/python/cpython/commit/4827483f47906fecee6b5d9097df2a69a293a85c
commit: 4827483f47906fecee6b5d9097df2a69a293a85c
branch: master
author: Inada Naoki <songofacandy at gmail.com>
committer: methane <songofacandy at gmail.com>
date: 2021-03-29T12:28:14+09:00
summary:

bpo-43510: Implement PEP 597 opt-in EncodingWarning. (GH-19481)

See [PEP 597](https://www.python.org/dev/peps/pep-0597/).

* Add `-X warn_default_encoding` and `PYTHONWARNDEFAULTENCODING`.
* Add EncodingWarning
* Add io.text_encoding()
* open(), TextIOWrapper() emits EncodingWarning when encoding is omitted and warn_default_encoding is enabled.
* _pyio.TextIOWrapper() uses UTF-8 as fallback default encoding used when failed to import locale module. (used during building Python)
* bz2, configparser, gzip, lzma, pathlib, tempfile modules use io.text_encoding().
* What's new entry

files:
A Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst
M Doc/c-api/init_config.rst
M Doc/library/exceptions.rst
M Doc/library/io.rst
M Doc/using/cmdline.rst
M Doc/whatsnew/3.10.rst
M Include/cpython/initconfig.h
M Include/internal/pycore_initconfig.h
M Include/pyerrors.h
M Lib/_pyio.py
M Lib/bz2.py
M Lib/configparser.py
M Lib/gzip.py
M Lib/io.py
M Lib/lzma.py
M Lib/pathlib.py
M Lib/site.py
M Lib/subprocess.py
M Lib/tempfile.py
M Lib/test/exception_hierarchy.txt
M Lib/test/test_embed.py
M Lib/test/test_io.py
M Lib/test/test_pickle.py
M Lib/test/test_sys.py
M Modules/_io/_iomodule.c
M Modules/_io/clinic/_iomodule.c.h
M Modules/_io/textio.c
M Objects/exceptions.c
M PC/python3dll.c
M Python/initconfig.c
M Python/preconfig.c
M Python/sysmodule.c

diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst
index db7c1f4376578..29fbb68195b34 100644
--- a/Doc/c-api/init_config.rst
+++ b/Doc/c-api/init_config.rst
@@ -583,6 +583,15 @@ PyConfig
 
       Default: ``0``.
 
+   .. c:member:: int warn_default_encoding
+
+      If non-zero, emit a :exc:`EncodingWarning` warning when :class:`io.TextIOWrapper`
+      uses its default encoding. See :ref:`io-encoding-warning` for details.
+
+      Default: ``0``.
+
+      .. versionadded:: 3.10
+
    .. c:member:: wchar_t* check_hash_pycs_mode
 
       Control the validation behavior of hash-based ``.pyc`` files:
diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst
index 1028213699d63..40ccde72d07cc 100644
--- a/Doc/library/exceptions.rst
+++ b/Doc/library/exceptions.rst
@@ -741,6 +741,15 @@ The following exceptions are used as warning categories; see the
    Base class for warnings related to Unicode.
 
 
+.. exception:: EncodingWarning
+
+   Base class for warnings related to encodings.
+
+   See :ref:`io-encoding-warning` for details.
+
+   .. versionadded:: 3.10
+
+
 .. exception:: BytesWarning
 
    Base class for warnings related to :class:`bytes` and :class:`bytearray`.
diff --git a/Doc/library/io.rst b/Doc/library/io.rst
index 96e02e839ae65..f9ffc19fac489 100644
--- a/Doc/library/io.rst
+++ b/Doc/library/io.rst
@@ -106,6 +106,56 @@ stream by opening a file in binary mode with buffering disabled::
 The raw stream API is described in detail in the docs of :class:`RawIOBase`.
 
 
+.. _io-text-encoding:
+
+Text Encoding
+-------------
+
+The default encoding of :class:`TextIOWrapper` and :func:`open` is
+locale-specific (:func:`locale.getpreferredencoding(False) <locale.getpreferredencoding>`).
+
+However, many developers forget to specify the encoding when opening text files
+encoded in UTF-8 (e.g. JSON, TOML, Markdown, etc...) since most Unix
+platforms use UTF-8 locale by default. This causes bugs because the locale
+encoding is not UTF-8 for most Windows users. For example::
+
+   # May not work on Windows when non-ASCII characters in the file.
+   with open("README.md") as f:
+       long_description = f.read()
+
+Additionally, while there is no concrete plan as of yet, Python may change
+the default text file encoding to UTF-8 in the future.
+
+Accordingly, it is highly recommended that you specify the encoding
+explicitly when opening text files. If you want to use UTF-8, pass
+``encoding="utf-8"``. To use the current locale encoding,
+``encoding="locale"`` is supported in Python 3.10.
+
+When you need to run existing code on Windows that attempts to opens
+UTF-8 files using the default locale encoding, you can enable the UTF-8
+mode. See :ref:`UTF-8 mode on Windows <win-utf8-mode>`.
+
+.. _io-encoding-warning:
+
+Opt-in EncodingWarning
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. versionadded:: 3.10
+   See :pep:`597` for more details.
+
+To find where the default locale encoding is used, you can enable
+the ``-X warn_default_encoding`` command line option or set the
+:envvar:`PYTHONWARNDEFAULTENCODING` environment variable, which will
+emit an :exc:`EncodingWarning` when the default encoding is used.
+
+If you are providing an API that uses :func:`open` or
+:class:`TextIOWrapper` and passes ``encoding=None`` as a parameter, you
+can use :func:`text_encoding` so that callers of the API will emit an
+:exc:`EncodingWarning` if they don't pass an ``encoding``. However,
+please consider using UTF-8 by default (i.e. ``encoding="utf-8"``) for
+new APIs.
+
+
 High-level Module Interface
 ---------------------------
 
@@ -143,6 +193,32 @@ High-level Module Interface
    .. versionadded:: 3.8
 
 
+.. function:: text_encoding(encoding, stacklevel=2)
+
+   This is a helper function for callables that use :func:`open` or
+   :class:`TextIOWrapper` and have an ``encoding=None`` parameter.
+
+   This function returns *encoding* if it is not ``None`` and ``"locale"`` if
+   *encoding* is ``None``.
+
+   This function emits an :class:`EncodingWarning` if
+   :data:`sys.flags.warn_default_encoding <sys.flags>` is true and *encoding*
+   is None. *stacklevel* specifies where the warning is emitted.
+   For example::
+
+      def read_text(path, encoding=None):
+          encoding = io.text_encoding(encoding)  # stacklevel=2
+          with open(path, encoding) as f:
+              return f.read()
+
+   In this example, an :class:`EncodingWarning` is emitted for the caller of
+   ``read_text()``.
+
+   See :ref:`io-text-encoding` for more information.
+
+   .. versionadded:: 3.10
+
+
 .. exception:: BlockingIOError
 
    This is a compatibility alias for the builtin :exc:`BlockingIOError`
@@ -869,6 +945,8 @@ Text I/O
    *encoding* gives the name of the encoding that the stream will be decoded or
    encoded with.  It defaults to
    :func:`locale.getpreferredencoding(False) <locale.getpreferredencoding>`.
+   ``encoding="locale"`` can be used to specify the current locale's encoding
+   explicitly. See :ref:`io-text-encoding` for more information.
 
    *errors* is an optional string that specifies how encoding and decoding
    errors are to be handled.  Pass ``'strict'`` to raise a :exc:`ValueError`
@@ -920,6 +998,9 @@ Text I/O
       locale encoding using :func:`locale.setlocale`, use the current locale
       encoding instead of the user preferred encoding.
 
+   .. versionchanged:: 3.10
+      The *encoding* argument now supports the ``"locale"`` dummy encoding name.
+
    :class:`TextIOWrapper` provides these data attributes and methods in
    addition to those from :class:`TextIOBase` and :class:`IOBase`:
 
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index 04e0f3267dbe7..1493c7c901754 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -453,6 +453,9 @@ Miscellaneous options
    * ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel
      tree rooted at the given directory instead of to the code tree. See also
      :envvar:`PYTHONPYCACHEPREFIX`.
+   * ``-X warn_default_encoding`` issues a :class:`EncodingWarning` when the
+     locale-specific default encoding is used for opening files.
+     See also :envvar:`PYTHONWARNDEFAULTENCODING`.
 
    It also allows passing arbitrary values and retrieving them through the
    :data:`sys._xoptions` dictionary.
@@ -482,6 +485,9 @@ Miscellaneous options
 
       The ``-X showalloccount`` option has been removed.
 
+   .. versionadded:: 3.10
+      The ``-X warn_default_encoding`` option.
+
    .. deprecated-removed:: 3.9 3.10
       The ``-X oldparser`` option.
 
@@ -907,6 +913,15 @@ conflict.
 
    .. versionadded:: 3.7
 
+.. envvar:: PYTHONWARNDEFAULTENCODING
+
+   If this environment variable is set to a non-empty string, issue a
+   :class:`EncodingWarning` when the locale-specific default encoding is used.
+
+   See :ref:`io-encoding-warning` for details.
+
+   .. versionadded:: 3.10
+
 
 Debug-mode variables
 ~~~~~~~~~~~~~~~~~~~~
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index 1c4e5c47fc681..3a563c10282c8 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -454,6 +454,30 @@ For the full specification see :pep:`634`.  Motivation and rationale
 are in :pep:`635`, and a longer tutorial is in :pep:`636`.
 
 
+.. _whatsnew310-pep597:
+
+Optional ``EncodingWarning`` and ``encoding="locale"`` option
+-------------------------------------------------------------
+
+The default encoding of :class:`TextIOWrapper` and :func:`open` is
+platform and locale dependent. Since UTF-8 is used on most Unix
+platforms, omitting ``encoding`` option when opening UTF-8 files
+(e.g. JSON, YAML, TOML, Markdown) is very common bug. For example::
+
+   # BUG: "rb" mode or encoding="utf-8" should be used.
+   with open("data.json") as f:
+       data = json.laod(f)
+
+To find this type of bugs, optional ``EncodingWarning`` is added.
+It is emitted when :data:`sys.flags.warn_default_encoding <sys.flags>`
+is true and locale-specific default encoding is used.
+
+``-X warn_default_encoding`` option and :envvar:`PYTHONWARNDEFAULTENCODING`
+are added to enable the warning.
+
+See :ref:`io-text-encoding` for more information.
+
+
 New Features Related to Type Annotations
 ========================================
 
diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h
index 666c1e419ca24..09f9a2947efef 100644
--- a/Include/cpython/initconfig.h
+++ b/Include/cpython/initconfig.h
@@ -153,6 +153,7 @@ typedef struct PyConfig {
     PyWideStringList warnoptions;
     int site_import;
     int bytes_warning;
+    int warn_default_encoding;
     int inspect;
     int interactive;
     int optimization_level;
diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h
index 28cd57030e218..4b009e816b492 100644
--- a/Include/internal/pycore_initconfig.h
+++ b/Include/internal/pycore_initconfig.h
@@ -102,6 +102,7 @@ typedef struct {
     int isolated;             /* -I option */
     int use_environment;      /* -E option */
     int dev_mode;             /* -X dev and PYTHONDEVMODE */
+    int warn_default_encoding;     /* -X warn_default_encoding and PYTHONWARNDEFAULTENCODING */
 } _PyPreCmdline;
 
 #define _PyPreCmdline_INIT \
diff --git a/Include/pyerrors.h b/Include/pyerrors.h
index 14129d3533cbe..f5d1c71157718 100644
--- a/Include/pyerrors.h
+++ b/Include/pyerrors.h
@@ -146,6 +146,7 @@ PyAPI_DATA(PyObject *) PyExc_FutureWarning;
 PyAPI_DATA(PyObject *) PyExc_ImportWarning;
 PyAPI_DATA(PyObject *) PyExc_UnicodeWarning;
 PyAPI_DATA(PyObject *) PyExc_BytesWarning;
+PyAPI_DATA(PyObject *) PyExc_EncodingWarning;
 PyAPI_DATA(PyObject *) PyExc_ResourceWarning;
 
 
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index 4804ed27cd14d..0f182d4240206 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -40,6 +40,29 @@
 _CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
 
 
+def text_encoding(encoding, stacklevel=2):
+    """
+    A helper function to choose the text encoding.
+
+    When encoding is not None, just return it.
+    Otherwise, return the default text encoding (i.e. "locale").
+
+    This function emits an EncodingWarning if *encoding* is None and
+    sys.flags.warn_default_encoding is true.
+
+    This can be used in APIs with an encoding=None parameter
+    that pass it to TextIOWrapper or open.
+    However, please consider using encoding="utf-8" for new APIs.
+    """
+    if encoding is None:
+        encoding = "locale"
+        if sys.flags.warn_default_encoding:
+            import warnings
+            warnings.warn("'encoding' argument not specified.",
+                          EncodingWarning, stacklevel + 1)
+    return encoding
+
+
 def open(file, mode="r", buffering=-1, encoding=None, errors=None,
          newline=None, closefd=True, opener=None):
 
@@ -248,6 +271,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
         result = buffer
         if binary:
             return result
+        encoding = text_encoding(encoding)
         text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
         result = text
         text.mode = mode
@@ -2004,19 +2028,22 @@ class TextIOWrapper(TextIOBase):
     def __init__(self, buffer, encoding=None, errors=None, newline=None,
                  line_buffering=False, write_through=False):
         self._check_newline(newline)
-        if encoding is None:
+        encoding = text_encoding(encoding)
+
+        if encoding == "locale":
             try:
-                encoding = os.device_encoding(buffer.fileno())
+                encoding = os.device_encoding(buffer.fileno()) or "locale"
             except (AttributeError, UnsupportedOperation):
                 pass
-            if encoding is None:
-                try:
-                    import locale
-                except ImportError:
-                    # Importing locale may fail if Python is being built
-                    encoding = "ascii"
-                else:
-                    encoding = locale.getpreferredencoding(False)
+
+        if encoding == "locale":
+            try:
+                import locale
+            except ImportError:
+                # Importing locale may fail if Python is being built
+                encoding = "utf-8"
+            else:
+                encoding = locale.getpreferredencoding(False)
 
         if not isinstance(encoding, str):
             raise ValueError("invalid encoding: %r" % encoding)
diff --git a/Lib/bz2.py b/Lib/bz2.py
index ce07ebeb142d9..1da3ce65c81b7 100644
--- a/Lib/bz2.py
+++ b/Lib/bz2.py
@@ -311,6 +311,7 @@ def open(filename, mode="rb", compresslevel=9,
     binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
 
     if "t" in mode:
+        encoding = io.text_encoding(encoding)
         return io.TextIOWrapper(binary_file, encoding, errors, newline)
     else:
         return binary_file
diff --git a/Lib/configparser.py b/Lib/configparser.py
index 924cc56a3f150..3b4cb5e6b2407 100644
--- a/Lib/configparser.py
+++ b/Lib/configparser.py
@@ -690,6 +690,7 @@ def read(self, filenames, encoding=None):
         """
         if isinstance(filenames, (str, bytes, os.PathLike)):
             filenames = [filenames]
+        encoding = io.text_encoding(encoding)
         read_ok = []
         for filename in filenames:
             try:
diff --git a/Lib/gzip.py b/Lib/gzip.py
index 136915725ab4f..0a8993ba35471 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -62,6 +62,7 @@ def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
         raise TypeError("filename must be a str or bytes object, or a file")
 
     if "t" in mode:
+        encoding = io.text_encoding(encoding)
         return io.TextIOWrapper(binary_file, encoding, errors, newline)
     else:
         return binary_file
diff --git a/Lib/io.py b/Lib/io.py
index fbce6efc010c0..01f1df80ded29 100644
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -54,7 +54,7 @@
 from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation,
                  open, open_code, FileIO, BytesIO, StringIO, BufferedReader,
                  BufferedWriter, BufferedRWPair, BufferedRandom,
-                 IncrementalNewlineDecoder, TextIOWrapper)
+                 IncrementalNewlineDecoder, text_encoding, TextIOWrapper)
 
 OpenWrapper = _io.open # for compatibility with _pyio
 
diff --git a/Lib/lzma.py b/Lib/lzma.py
index 0817b872d2019..c8b197055cddc 100644
--- a/Lib/lzma.py
+++ b/Lib/lzma.py
@@ -302,6 +302,7 @@ def open(filename, mode="rb", *,
                            preset=preset, filters=filters)
 
     if "t" in mode:
+        encoding = io.text_encoding(encoding)
         return io.TextIOWrapper(binary_file, encoding, errors, newline)
     else:
         return binary_file
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 531a699a40df4..5c9284b331a32 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -1241,6 +1241,8 @@ def open(self, mode='r', buffering=-1, encoding=None,
         Open the file pointed by this path and return a file object, as
         the built-in open() function does.
         """
+        if "b" not in mode:
+            encoding = io.text_encoding(encoding)
         return io.open(self, mode, buffering, encoding, errors, newline,
                        opener=self._opener)
 
@@ -1255,6 +1257,7 @@ def read_text(self, encoding=None, errors=None):
         """
         Open the file in text mode, read it, and close the file.
         """
+        encoding = io.text_encoding(encoding)
         with self.open(mode='r', encoding=encoding, errors=errors) as f:
             return f.read()
 
@@ -1274,6 +1277,7 @@ def write_text(self, data, encoding=None, errors=None, newline=None):
         if not isinstance(data, str):
             raise TypeError('data must be str, not %s' %
                             data.__class__.__name__)
+        encoding = io.text_encoding(encoding)
         with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
             return f.write(data)
 
diff --git a/Lib/site.py b/Lib/site.py
index 5f1b31e73d90a..939893eb5ee93 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -170,7 +170,9 @@ def addpackage(sitedir, name, known_paths):
     fullname = os.path.join(sitedir, name)
     _trace(f"Processing .pth file: {fullname!r}")
     try:
-        f = io.TextIOWrapper(io.open_code(fullname))
+        # locale encoding is not ideal especially on Windows. But we have used
+        # it for a long time. setuptools uses the locale encoding too.
+        f = io.TextIOWrapper(io.open_code(fullname), encoding="locale")
     except OSError:
         return
     with f:
diff --git a/Lib/subprocess.py b/Lib/subprocess.py
index 4b011e4ce5579..2b785496e4f5f 100644
--- a/Lib/subprocess.py
+++ b/Lib/subprocess.py
@@ -693,7 +693,7 @@ def _use_posix_spawn():
 _USE_POSIX_SPAWN = _use_posix_spawn()
 
 
-class Popen(object):
+class Popen:
     """ Execute a child program in a new process.
 
     For a complete description of the arguments see the Python documentation.
@@ -844,6 +844,13 @@ def __init__(self, args, bufsize=-1, executable=None,
 
         self.text_mode = encoding or errors or text or universal_newlines
 
+        # PEP 597: We suppress the EncodingWarning in subprocess module
+        # for now (at Python 3.10), because we focus on files for now.
+        # This will be changed to encoding = io.text_encoding(encoding)
+        # in the future.
+        if self.text_mode and encoding is None:
+            self.encoding = encoding = "locale"
+
         # How long to resume waiting on a child after the first ^C.
         # There is no right value for this.  The purpose is to be polite
         # yet remain good for interactive users trying to exit a tool.
diff --git a/Lib/tempfile.py b/Lib/tempfile.py
index 4b2547c98f1c7..efcf7a7fb3bbc 100644
--- a/Lib/tempfile.py
+++ b/Lib/tempfile.py
@@ -543,6 +543,9 @@ def NamedTemporaryFile(mode='w+b', buffering=-1, encoding=None,
     if _os.name == 'nt' and delete:
         flags |= _os.O_TEMPORARY
 
+    if "b" not in mode:
+        encoding = _io.text_encoding(encoding)
+
     (fd, name) = _mkstemp_inner(dir, prefix, suffix, flags, output_type)
     try:
         file = _io.open(fd, mode, buffering=buffering,
@@ -583,6 +586,9 @@ def TemporaryFile(mode='w+b', buffering=-1, encoding=None,
         """
         global _O_TMPFILE_WORKS
 
+        if "b" not in mode:
+            encoding = _io.text_encoding(encoding)
+
         prefix, suffix, dir, output_type = _sanitize_params(prefix, suffix, dir)
 
         flags = _bin_openflags
@@ -638,6 +644,7 @@ def __init__(self, max_size=0, mode='w+b', buffering=-1,
         if 'b' in mode:
             self._file = _io.BytesIO()
         else:
+            encoding = _io.text_encoding(encoding)
             self._file = _io.TextIOWrapper(_io.BytesIO(),
                             encoding=encoding, errors=errors,
                             newline=newline)
diff --git a/Lib/test/exception_hierarchy.txt b/Lib/test/exception_hierarchy.txt
index 763a6c899b48e..6c5e82139105b 100644
--- a/Lib/test/exception_hierarchy.txt
+++ b/Lib/test/exception_hierarchy.txt
@@ -61,4 +61,5 @@ BaseException
            +-- ImportWarning
            +-- UnicodeWarning
            +-- BytesWarning
+           +-- EncodingWarning
            +-- ResourceWarning
diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py
index 6833b2540d67d..646cd0632edd8 100644
--- a/Lib/test/test_embed.py
+++ b/Lib/test/test_embed.py
@@ -389,6 +389,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
 
         'site_import': 1,
         'bytes_warning': 0,
+        'warn_default_encoding': 0,
         'inspect': 0,
         'interactive': 0,
         'optimization_level': 0,
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 3768b625516f4..c731302a9f22f 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -4249,6 +4249,29 @@ def test_check_encoding_errors(self):
         proc = assert_python_failure('-X', 'dev', '-c', code)
         self.assertEqual(proc.rc, 10, proc)
 
+    def test_check_encoding_warning(self):
+        # PEP 597: Raise warning when encoding is not specified
+        # and sys.flags.warn_default_encoding is set.
+        mod = self.io.__name__
+        filename = __file__
+        code = textwrap.dedent(f'''\
+            import sys
+            from {mod} import open, TextIOWrapper
+            import pathlib
+
+            with open({filename!r}) as f:           # line 5
+                pass
+
+            pathlib.Path({filename!r}).read_text()  # line 8
+        ''')
+        proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code)
+        warnings = proc.err.splitlines()
+        self.assertEqual(len(warnings), 2)
+        self.assertTrue(
+            warnings[0].startswith(b"<string>:5: EncodingWarning: "))
+        self.assertTrue(
+            warnings[1].startswith(b"<string>:8: EncodingWarning: "))
+
 
 class CMiscIOTest(MiscIOTest):
     io = io
diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py
index 1f5cb103933e0..23c7bd261e85c 100644
--- a/Lib/test/test_pickle.py
+++ b/Lib/test/test_pickle.py
@@ -483,7 +483,8 @@ def test_exceptions(self):
                 if exc in (BlockingIOError,
                            ResourceWarning,
                            StopAsyncIteration,
-                           RecursionError):
+                           RecursionError,
+                           EncodingWarning):
                     continue
                 if exc is not OSError and issubclass(exc, OSError):
                     self.assertEqual(reverse_mapping('builtins', name),
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index fca05e6f88f30..5b004c2b52da8 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -591,7 +591,8 @@ def test_sys_flags(self):
                  "inspect", "interactive", "optimize",
                  "dont_write_bytecode", "no_user_site", "no_site",
                  "ignore_environment", "verbose", "bytes_warning", "quiet",
-                 "hash_randomization", "isolated", "dev_mode", "utf8_mode")
+                 "hash_randomization", "isolated", "dev_mode", "utf8_mode",
+                 "warn_default_encoding")
         for attr in attrs:
             self.assertTrue(hasattr(sys.flags, attr), attr)
             attr_type = bool if attr == "dev_mode" else int
diff --git a/Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst b/Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst
new file mode 100644
index 0000000000000..b79a49c881bcc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst
@@ -0,0 +1,3 @@
+Implement :pep:`597`: Add ``EncodingWarning`` warning, ``-X
+warn_default_encoding`` option, :envvar:`PYTHONWARNDEFAULTENCODING`
+environment variable and ``encoding="locale"`` argument value.
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c
index 9147648b243be..652c2ce5b0d61 100644
--- a/Modules/_io/_iomodule.c
+++ b/Modules/_io/_iomodule.c
@@ -10,6 +10,7 @@
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
 #include "_iomodule.h"
+#include "pycore_pystate.h"       // _PyInterpreterState_GET()
 
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
@@ -33,6 +34,7 @@ PyObject *_PyIO_str_fileno = NULL;
 PyObject *_PyIO_str_flush = NULL;
 PyObject *_PyIO_str_getstate = NULL;
 PyObject *_PyIO_str_isatty = NULL;
+PyObject *_PyIO_str_locale = NULL;
 PyObject *_PyIO_str_newlines = NULL;
 PyObject *_PyIO_str_nl = NULL;
 PyObject *_PyIO_str_peek = NULL;
@@ -504,6 +506,43 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode,
     return NULL;
 }
 
+
+/*[clinic input]
+_io.text_encoding
+    encoding: object
+    stacklevel: int = 2
+    /
+
+A helper function to choose the text encoding.
+
+When encoding is not None, just return it.
+Otherwise, return the default text encoding (i.e. "locale").
+
+This function emits an EncodingWarning if encoding is None and
+sys.flags.warn_default_encoding is true.
+
+This can be used in APIs with an encoding=None parameter.
+However, please consider using encoding="utf-8" for new APIs.
+[clinic start generated code]*/
+
+static PyObject *
+_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
+/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
+{
+    if (encoding == NULL || encoding == Py_None) {
+        PyInterpreterState *interp = _PyInterpreterState_GET();
+        if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
+            PyErr_WarnEx(PyExc_EncodingWarning,
+                         "'encoding' argument not specified", stacklevel);
+        }
+        Py_INCREF(_PyIO_str_locale);
+        return _PyIO_str_locale;
+    }
+    Py_INCREF(encoding);
+    return encoding;
+}
+
+
 /*[clinic input]
 _io.open_code
 
@@ -629,6 +668,7 @@ iomodule_free(PyObject *mod) {
 
 static PyMethodDef module_methods[] = {
     _IO_OPEN_METHODDEF
+    _IO_TEXT_ENCODING_METHODDEF
     _IO_OPEN_CODE_METHODDEF
     {NULL, NULL}
 };
@@ -747,6 +787,7 @@ PyInit__io(void)
     ADD_INTERNED(flush)
     ADD_INTERNED(getstate)
     ADD_INTERNED(isatty)
+    ADD_INTERNED(locale)
     ADD_INTERNED(newlines)
     ADD_INTERNED(peek)
     ADD_INTERNED(read)
diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h
index dc7b5ff243a78..91c55b1816cd8 100644
--- a/Modules/_io/clinic/_iomodule.c.h
+++ b/Modules/_io/clinic/_iomodule.c.h
@@ -272,6 +272,52 @@ _io_open(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw
     return return_value;
 }
 
+PyDoc_STRVAR(_io_text_encoding__doc__,
+"text_encoding($module, encoding, stacklevel=2, /)\n"
+"--\n"
+"\n"
+"A helper function to choose the text encoding.\n"
+"\n"
+"When encoding is not None, just return it.\n"
+"Otherwise, return the default text encoding (i.e. \"locale\").\n"
+"\n"
+"This function emits an EncodingWarning if encoding is None and\n"
+"sys.flags.warn_default_encoding is true.\n"
+"\n"
+"This can be used in APIs with an encoding=None parameter.\n"
+"However, please consider using encoding=\"utf-8\" for new APIs.");
+
+#define _IO_TEXT_ENCODING_METHODDEF    \
+    {"text_encoding", (PyCFunction)(void(*)(void))_io_text_encoding, METH_FASTCALL, _io_text_encoding__doc__},
+
+static PyObject *
+_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel);
+
+static PyObject *
+_io_text_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    PyObject *encoding;
+    int stacklevel = 2;
+
+    if (!_PyArg_CheckPositional("text_encoding", nargs, 1, 2)) {
+        goto exit;
+    }
+    encoding = args[0];
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    stacklevel = _PyLong_AsInt(args[1]);
+    if (stacklevel == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+skip_optional:
+    return_value = _io_text_encoding_impl(module, encoding, stacklevel);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_io_open_code__doc__,
 "open_code($module, /, path)\n"
 "--\n"
@@ -313,4 +359,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=5c0dd7a262c30ebc input=a9049054013a1b77]*/
+/*[clinic end generated code: output=06e055d1d80b835d input=a9049054013a1b77]*/
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 03001ecb0a5b3..6f89a879c9c2b 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -1123,6 +1123,17 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
     self->encodefunc = NULL;
     self->b2cratio = 0.0;
 
+    if (encoding == NULL) {
+        PyInterpreterState *interp = _PyInterpreterState_GET();
+        if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
+            PyErr_WarnEx(PyExc_EncodingWarning,
+                         "'encoding' argument not specified", 1);
+        }
+    }
+    else if (strcmp(encoding, "locale") == 0) {
+        encoding = NULL;
+    }
+
     if (encoding == NULL) {
         /* Try os.device_encoding(fileno) */
         PyObject *fileno;
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 88e2287b14354..dfa069e01d960 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2464,6 +2464,13 @@ SimpleExtendsException(PyExc_Warning, BytesWarning,
     "related to conversion from str or comparing to str.");
 
 
+/*
+ *    EncodingWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, EncodingWarning,
+    "Base class for warnings about encodings.");
+
+
 /*
  *    ResourceWarning extends Warning
  */
@@ -2592,6 +2599,7 @@ _PyExc_Init(PyInterpreterState *interp)
     PRE_INIT(BufferError);
     PRE_INIT(Warning);
     PRE_INIT(UserWarning);
+    PRE_INIT(EncodingWarning);
     PRE_INIT(DeprecationWarning);
     PRE_INIT(PendingDeprecationWarning);
     PRE_INIT(SyntaxWarning);
@@ -2731,6 +2739,7 @@ _PyBuiltins_AddExceptions(PyObject *bltinmod)
     POST_INIT(BufferError);
     POST_INIT(Warning);
     POST_INIT(UserWarning);
+    POST_INIT(EncodingWarning);
     POST_INIT(DeprecationWarning);
     POST_INIT(PendingDeprecationWarning);
     POST_INIT(SyntaxWarning);
diff --git a/PC/python3dll.c b/PC/python3dll.c
index ddbd1b1e8e422..1567ac159168a 100644
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -724,6 +724,7 @@ EXPORT_DATA(PyExc_BlockingIOError)
 EXPORT_DATA(PyExc_BrokenPipeError)
 EXPORT_DATA(PyExc_BufferError)
 EXPORT_DATA(PyExc_BytesWarning)
+EXPORT_DATA(PyExc_EncodingWarning)
 EXPORT_DATA(PyExc_ChildProcessError)
 EXPORT_DATA(PyExc_ConnectionAbortedError)
 EXPORT_DATA(PyExc_ConnectionError)
diff --git a/Python/initconfig.c b/Python/initconfig.c
index 7886d09f7a027..27ae48dd3c97c 100644
--- a/Python/initconfig.c
+++ b/Python/initconfig.c
@@ -94,6 +94,7 @@ static const char usage_3[] = "\
              otherwise activate automatically)\n\
          -X pycache_prefix=PATH: enable writing .pyc files to a parallel tree rooted at the\n\
              given directory instead of to the code tree\n\
+         -X warn_default_encoding: enable opt-in EncodingWarning for 'encoding=None'\n\
 \n\
 --check-hash-based-pycs always|default|never:\n\
     control how Python invalidates hash-based .pyc files\n\
@@ -129,7 +130,8 @@ static const char usage_6[] =
 "PYTHONBREAKPOINT: if this variable is set to 0, it disables the default\n"
 "   debugger. It can be set to the callable of your debugger of choice.\n"
 "PYTHONDEVMODE: enable the development mode.\n"
-"PYTHONPYCACHEPREFIX: root directory for bytecode cache (pyc) files.\n";
+"PYTHONPYCACHEPREFIX: root directory for bytecode cache (pyc) files.\n"
+"PYTHONWARNDEFAULTENCODING: enable opt-in EncodingWarning for 'encoding=None'.\n";
 
 #if defined(MS_WINDOWS)
 #  define PYTHONHOMEHELP "<prefix>\\python{major}{minor}"
@@ -600,6 +602,7 @@ config_check_consistency(const PyConfig *config)
     assert(config->malloc_stats >= 0);
     assert(config->site_import >= 0);
     assert(config->bytes_warning >= 0);
+    assert(config->warn_default_encoding >= 0);
     assert(config->inspect >= 0);
     assert(config->interactive >= 0);
     assert(config->optimization_level >= 0);
@@ -698,6 +701,7 @@ _PyConfig_InitCompatConfig(PyConfig *config)
     config->parse_argv = 0;
     config->site_import = -1;
     config->bytes_warning = -1;
+    config->warn_default_encoding = 0;
     config->inspect = -1;
     config->interactive = -1;
     config->optimization_level = -1;
@@ -906,6 +910,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2)
 
     COPY_ATTR(site_import);
     COPY_ATTR(bytes_warning);
+    COPY_ATTR(warn_default_encoding);
     COPY_ATTR(inspect);
     COPY_ATTR(interactive);
     COPY_ATTR(optimization_level);
@@ -1007,6 +1012,7 @@ _PyConfig_AsDict(const PyConfig *config)
     SET_ITEM_WSTR(platlibdir);
     SET_ITEM_INT(site_import);
     SET_ITEM_INT(bytes_warning);
+    SET_ITEM_INT(warn_default_encoding);
     SET_ITEM_INT(inspect);
     SET_ITEM_INT(interactive);
     SET_ITEM_INT(optimization_level);
@@ -1271,6 +1277,7 @@ _PyConfig_FromDict(PyConfig *config, PyObject *dict)
     GET_WSTRLIST(warnoptions);
     GET_UINT(site_import);
     GET_UINT(bytes_warning);
+    GET_UINT(warn_default_encoding);
     GET_UINT(inspect);
     GET_UINT(interactive);
     GET_UINT(optimization_level);
diff --git a/Python/preconfig.c b/Python/preconfig.c
index b8b0c3a0775ca..ae1cc3f90fca7 100644
--- a/Python/preconfig.c
+++ b/Python/preconfig.c
@@ -169,6 +169,7 @@ _PyPreCmdline_SetConfig(const _PyPreCmdline *cmdline, PyConfig *config)
     COPY_ATTR(isolated);
     COPY_ATTR(use_environment);
     COPY_ATTR(dev_mode);
+    COPY_ATTR(warn_default_encoding);
     return _PyStatus_OK();
 
 #undef COPY_ATTR
@@ -257,9 +258,17 @@ _PyPreCmdline_Read(_PyPreCmdline *cmdline, const PyPreConfig *preconfig)
         cmdline->dev_mode = 0;
     }
 
+    // warn_default_encoding
+    if (_Py_get_xoption(&cmdline->xoptions, L"warn_default_encoding")
+            || _Py_GetEnv(cmdline->use_environment, "PYTHONWARNDEFAULTENCODING"))
+    {
+        cmdline->warn_default_encoding = 1;
+    }
+
     assert(cmdline->use_environment >= 0);
     assert(cmdline->isolated >= 0);
     assert(cmdline->dev_mode >= 0);
+    assert(cmdline->warn_default_encoding >= 0);
 
     return _PyStatus_OK();
 }
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 686b6cae3b294..54d70ef056975 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -2514,6 +2514,7 @@ static PyStructSequence_Field flags_fields[] = {
     {"isolated",                "-I"},
     {"dev_mode",                "-X dev"},
     {"utf8_mode",               "-X utf8"},
+    {"warn_default_encoding",   "-X warn_default_encoding"},
     {0}
 };
 
@@ -2521,7 +2522,7 @@ static PyStructSequence_Desc flags_desc = {
     "sys.flags",        /* name */
     flags__doc__,       /* doc */
     flags_fields,       /* fields */
-    15
+    16
 };
 
 static int
@@ -2560,6 +2561,7 @@ set_flags_from_config(PyInterpreterState *interp, PyObject *flags)
     SetFlag(config->isolated);
     SetFlagObj(PyBool_FromLong(config->dev_mode));
     SetFlag(preconfig->utf8_mode);
+    SetFlag(config->warn_default_encoding);
 #undef SetFlagObj
 #undef SetFlag
     return 0;



More information about the Python-checkins mailing list