[Python-checkins] bpo-44349: Fix edge case when displaying text from files with encoding in syntax errors (GH-26611)

pablogsal webhook-mailer at python.org
Tue Jun 8 19:54:37 EDT 2021


https://github.com/python/cpython/commit/9fd21f649d66dcb10108ee395fd68ed32c8239cd
commit: 9fd21f649d66dcb10108ee395fd68ed32c8239cd
branch: main
author: Pablo Galindo <Pablogsal at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2021-06-09T00:54:29+01:00
summary:

bpo-44349: Fix edge case when displaying text from files with encoding in syntax errors (GH-26611)

files:
A Misc/NEWS.d/next/Core and Builtins/2021-06-08-22-49-06.bpo-44349.xgEgeA.rst
M Lib/test/test_exceptions.py
M Parser/pegen.c

diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index df5778d7e5f6a..b242c082f8568 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -2105,6 +2105,22 @@ def test_range_of_offsets(self):
                         sys.__excepthook__(*sys.exc_info())
                     the_exception = exc
 
+    def test_encodings(self):
+        source = (
+            '# -*- coding: cp437 -*-\n'
+            '"¢¢¢¢¢¢" + f(4, x for x in range(1))\n'
+        )
+        try:
+            with open(TESTFN, 'w', encoding='cp437') as testfile:
+                testfile.write(source)
+            rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
+            err = err.decode('utf-8').splitlines()
+
+            self.assertEqual(err[-3], '    "¢¢¢¢¢¢" + f(4, x for x in range(1))')
+            self.assertEqual(err[-2], '                          ^^^^^^^^^^^^^^^^^^^')
+        finally:
+            unlink(TESTFN)
+
     def test_attributes_new_constructor(self):
         args = ("bad.py", 1, 2, "abcdefg", 1, 100)
         the_exception = SyntaxError("bad bad", args)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-08-22-49-06.bpo-44349.xgEgeA.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-22-49-06.bpo-44349.xgEgeA.rst
new file mode 100644
index 0000000000000..b386a8ed2c846
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-22-49-06.bpo-44349.xgEgeA.rst	
@@ -0,0 +1 @@
+Fix an edge case when displaying text from files with encoding in syntax errors. Patch by Pablo Galindo.
\ No newline at end of file
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 42a992251da97..e6518198eca07 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -456,10 +456,13 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
         goto error;
     }
 
+    // PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
+    // with an arbitrary encoding or otherwise we could get some badly decoded text.
+    int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
     if (p->tok->fp_interactive) {
         error_line = get_error_line(p, lineno);
     }
-    else if (p->start_rule == Py_file_input) {
+    else if (uses_utf8_codec && p->start_rule == Py_file_input) {
         error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
     }
 
@@ -471,7 +474,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
            we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
            `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
            does not physically exist */
-        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
+        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
 
         if (p->tok->lineno <= lineno) {
             Py_ssize_t size = p->tok->inp - p->tok->buf;



More information about the Python-checkins mailing list