[Python-checkins] bpo-40176: Improve error messages for unclosed string literals (GH-19346)

miss-islington webhook-mailer at python.org
Wed Jan 20 16:38:58 EST 2021


https://github.com/python/cpython/commit/a698d52c3975c80b45b139b2f08402ec514dce75
commit: a698d52c3975c80b45b139b2f08402ec514dce75
branch: master
author: Batuhan Taskaya <isidentical at gmail.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2021-01-20T13:38:47-08:00
summary:

bpo-40176: Improve error messages for unclosed string literals (GH-19346)



Automerge-Triggered-By: GH:isidentical

files:
A Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst
M Include/errcode.h
M Lib/test/test_eof.py
M Lib/test/test_exceptions.py
M Lib/test/test_fstring.py
M Parser/pegen.c
M Parser/tokenizer.c

diff --git a/Include/errcode.h b/Include/errcode.h
index 790518b8b7730..f2671d6c9b30b 100644
--- a/Include/errcode.h
+++ b/Include/errcode.h
@@ -26,8 +26,6 @@ extern "C" {
 #define E_TOODEEP       20      /* Too many indentation levels */
 #define E_DEDENT        21      /* No matching outer block for dedent */
 #define E_DECODE        22      /* Error in decoding into Unicode */
-#define E_EOFS          23      /* EOF in triple-quoted string */
-#define E_EOLS          24      /* EOL in single-quoted string */
 #define E_LINECONT      25      /* Unexpected characters after a line continuation */
 #define E_BADSINGLE     27      /* Ill-formed single statement input */
 
diff --git a/Lib/test/test_eof.py b/Lib/test/test_eof.py
index 2cf263d27463c..b370e27161cee 100644
--- a/Lib/test/test_eof.py
+++ b/Lib/test/test_eof.py
@@ -7,23 +7,25 @@
 import unittest
 
 class EOFTestCase(unittest.TestCase):
-    def test_EOFC(self):
-        expect = "EOL while scanning string literal (<string>, line 1)"
-        try:
-            eval("""'this is a test\
-            """)
-        except SyntaxError as msg:
-            self.assertEqual(str(msg), expect)
-        else:
-            raise support.TestFailed
+    def test_EOF_single_quote(self):
+        expect = "unterminated string literal (detected at line 1) (<string>, line 1)"
+        for quote in ("'", "\""):
+            try:
+                eval(f"""{quote}this is a test\
+                """)
+            except SyntaxError as msg:
+                self.assertEqual(str(msg), expect)
+                self.assertEqual(msg.offset, 1)
+            else:
+                raise support.TestFailed
 
     def test_EOFS(self):
-        expect = ("EOF while scanning triple-quoted string literal "
-                  "(<string>, line 1)")
+        expect = ("unterminated triple-quoted string literal (detected at line 1) (<string>, line 1)")
         try:
             eval("""'''this is a test""")
         except SyntaxError as msg:
             self.assertEqual(str(msg), expect)
+            self.assertEqual(msg.offset, 1)
         else:
             raise support.TestFailed
 
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index eb70d7b4e4972..21878c39f4fec 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -206,7 +206,7 @@ def testSyntaxErrorOffset(self):
         check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +',
               2, 19, encoding='cp1251')
         check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18)
-        check('x = "a', 1, 7)
+        check('x = "a', 1, 5)
         check('lambda x: x = 2', 1, 1)
         check('f{a + b + c}', 1, 2)
         check('[file for str(file) in []\n])', 1, 11)
@@ -238,7 +238,7 @@ def bar():
 
             def baz():
                 '''quux'''
-            """, 9, 20)
+            """, 9, 24)
         check("pass\npass\npass\n(1+)\npass\npass\npass", 4, 4)
         check("(1+)", 1, 4)
 
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 2345832abce62..7ca1512ebbf1b 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -661,7 +661,7 @@ def test_parens_in_expressions(self):
                             ["f'{3)+(4}'",
                              ])
 
-        self.assertAllRaise(SyntaxError, 'EOL while scanning string literal',
+        self.assertAllRaise(SyntaxError, 'unterminated string literal',
                             ["f'{\n}'",
                              ])
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst b/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst
new file mode 100644
index 0000000000000..df7de3bdf37bc
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst	
@@ -0,0 +1,2 @@
+Syntax errors for unterminated string literals now point to the start
+of the string instead of reporting EOF/EOL.
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 0d39030ea6ed1..0e7f86bc99e45 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -327,12 +327,6 @@ tokenizer_error(Parser *p)
         case E_TOKEN:
             msg = "invalid token";
             break;
-        case E_EOFS:
-            RAISE_SYNTAX_ERROR("EOF while scanning triple-quoted string literal");
-            return -1;
-        case E_EOLS:
-            RAISE_SYNTAX_ERROR("EOL while scanning string literal");
-            return -1;
         case E_EOF:
             if (p->tok->level) {
                 raise_unclosed_parentheses_error(p);
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index d3e846c0a5a12..d9334aaf148ba 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1739,20 +1739,26 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
         /* Get rest of string */
         while (end_quote_size != quote_size) {
             c = tok_nextc(tok);
-            if (c == EOF) {
+            if (c == EOF || (quote_size == 1 && c == '\n')) {
+                // shift the tok_state's location into
+                // the start of string, and report the error
+                // from the initial quote character
+                tok->cur = (char *)tok->start;
+                tok->cur++;
+                tok->line_start = tok->multi_line_start;
+                int start = tok->lineno;
+                tok->lineno = tok->first_lineno;
+
                 if (quote_size == 3) {
-                    tok->done = E_EOFS;
+                    return syntaxerror(tok,
+                                       "unterminated triple-quoted string literal"
+                                       " (detected at line %d)", start);
                 }
                 else {
-                    tok->done = E_EOLS;
+                    return syntaxerror(tok,
+                                       "unterminated string literal (detected at"
+                                       " line %d)", start);
                 }
-                tok->cur = tok->inp;
-                return ERRORTOKEN;
-            }
-            if (quote_size == 1 && c == '\n') {
-                tok->done = E_EOLS;
-                tok->cur = tok->inp;
-                return ERRORTOKEN;
             }
             if (c == quote) {
                 end_quote_size += 1;



More information about the Python-checkins mailing list