[Python-checkins] bpo-43591: Fix error location in interactive mode for errors at the end of the line (GH-24973)

pablogsal webhook-mailer at python.org
Mon Mar 22 12:24:57 EDT 2021


https://github.com/python/cpython/commit/123ff266cda9ad279106f20dca06ba114f6a9b8a
commit: 123ff266cda9ad279106f20dca06ba114f6a9b8a
branch: master
author: Pablo Galindo <Pablogsal at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2021-03-22T16:24:39Z
summary:

bpo-43591: Fix error location in interactive mode for errors at the end of the line (GH-24973)

Co-authored-by: Erlend Egeberg Aasland

files:
M Lib/test/test_cmd_line.py
M Parser/pegen.c

diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index e87eede0c2676..25d3eec40c13d 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -851,13 +851,19 @@ def test_sys_flags_not_set(self):
         )
 
 class SyntaxErrorTests(unittest.TestCase):
-    def test_tokenizer_error_with_stdin(self):
-        proc = subprocess.run([sys.executable, "-"], input = b"(1+2+3",
+    def check_string(self, code):
+        proc = subprocess.run([sys.executable, "-"], input=code,
                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         self.assertNotEqual(proc.returncode, 0)
         self.assertNotEqual(proc.stderr, None)
         self.assertIn(b"\nSyntaxError", proc.stderr)
 
+    def test_tokenizer_error_with_stdin(self):
+        self.check_string(b"(1+2+3")
+
+    def test_decoding_error_at_the_end_of_the_line(self):
+        self.check_string(b"'\u1f'")
+
 def test_main():
     support.run_unittest(CmdLineTest, IgnoreEnvironmentTest, SyntaxErrorTests)
     support.reap_children()
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 24aa3af336c34..953480df6ab05 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -147,7 +147,11 @@ byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
     if (!str) {
         return 0;
     }
-    assert(col_offset >= 0 && (unsigned long)col_offset <= strlen(str));
+    Py_ssize_t len = strlen(str);
+    if (col_offset > len) {
+        col_offset = len;
+    }
+    assert(col_offset >= 0);
     PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
     if (!text) {
         return 0;
@@ -392,10 +396,10 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
 static PyObject *
 get_error_line(Parser *p, Py_ssize_t lineno)
 {
-    /* If p->tok->fp == NULL, then we're parsing from a string, which means that
-       the whole source is stored in p->tok->str. If not, then we're parsing
-       from the REPL, so the source lines of the current (multi-line) statement
-       are stored in p->tok->stdin_content */
+    /* If the file descriptor is interactive, the source lines of the current
+     * (multi-line) statement are stored in p->tok->interactive_src_start.
+     * If not, we're parsing from a string, which means that the whole source
+     * is stored in p->tok->str. */
     assert(p->tok->fp == NULL || p->tok->fp == stdin);
 
     char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;



More information about the Python-checkins mailing list