[Python-checkins] bpo-34683: Make SyntaxError column offsets consistently 1-indexed (gh-9338)

Guido van Rossum webhook-mailer at python.org
Mon Sep 24 17:12:54 EDT 2018


https://github.com/python/cpython/commit/025eb98dc0c1dc27404df6c544fc2944e0fa9f3a
commit: 025eb98dc0c1dc27404df6c544fc2944e0fa9f3a
branch: master
author: Ammar Askar <ammar_askar at hotmail.com>
committer: Guido van Rossum <guido at python.org>
date: 2018-09-24T14:12:49-07:00
summary:

bpo-34683: Make SyntaxError column offsets consistently 1-indexed (gh-9338)

Also point to start of tokens in parsing errors.

Fixes bpo-34683

files:
A Misc/NEWS.d/next/Core and Builtins/2018-09-15-19-32-34.bpo-34683.msCiQE.rst
M Lib/test/test_exceptions.py
M Lib/test/test_future.py
M Lib/test/test_global.py
M Lib/test/test_support.py
M Lib/test/test_symtable.py
M Parser/parsetok.c
M Python/ast.c
M Python/compile.c
M Python/future.c
M Python/symtable.c

diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 2a9ec706467f..535324880089 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -187,6 +187,45 @@ def check(src, lineno, offset):
         check('def spam():\n  print(1)\n print(2)', 3, 10)
         check('Python = "Python" +', 1, 20)
         check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
+        check('x = "a', 1, 7)
+        check('lambda x: x = 2', 1, 1)
+
+        # Errors thrown by compile.c
+        check('class foo:return 1', 1, 11)
+        check('def f():\n  continue', 2, 3)
+        check('def f():\n  break', 2, 3)
+        check('try:\n  pass\nexcept:\n  pass\nexcept ValueError:\n  pass', 2, 3)
+
+        # Errors thrown by tokenizer.c
+        check('(0x+1)', 1, 3)
+        check('x = 0xI', 1, 6)
+        check('0010 + 2', 1, 4)
+        check('x = 32e-+4', 1, 8)
+        check('x = 0o9', 1, 6)
+
+        # Errors thrown by symtable.c
+        check('x = [(yield i) for i in range(3)]', 1, 6)
+        check('def f():\n  from _ import *', 1, 1)
+        check('def f(x, x):\n  pass', 1, 1)
+        check('def f(x):\n  nonlocal x', 2, 3)
+        check('def f(x):\n  x = 1\n  global x', 3, 3)
+        check('nonlocal x', 1, 1)
+        check('def f():\n  global x\n  nonlocal x', 2, 3)
+
+        # Errors thrown by ast.c
+        check('for 1 in []: pass', 1, 5)
+        check('def f(*):\n  pass', 1, 7)
+        check('[*x for x in xs]', 1, 2)
+        check('def f():\n  x, y: int', 2, 3)
+        check('(yield i) = 2', 1, 1)
+        check('foo(x for x in range(10), 100)', 1, 5)
+        check('foo(1=2)', 1, 5)
+
+        # Errors thrown by future.c
+        check('from __future__ import doesnt_exist', 1, 1)
+        check('from __future__ import braces', 1, 1)
+        check('x=1\nfrom __future__ import division', 2, 1)
+
 
     @cpython_only
     def testSettingException(self):
diff --git a/Lib/test/test_future.py b/Lib/test/test_future.py
index 61cd63479d85..660701b4b3d8 100644
--- a/Lib/test/test_future.py
+++ b/Lib/test/test_future.py
@@ -15,7 +15,7 @@ def get_error_location(msg):
 
 class FutureTest(unittest.TestCase):
 
-    def check_syntax_error(self, err, basename, lineno, offset=0):
+    def check_syntax_error(self, err, basename, lineno, offset=1):
         self.assertIn('%s.py, line %d' % (basename, lineno), str(err))
         self.assertEqual(os.path.basename(err.filename), basename + '.py')
         self.assertEqual(err.lineno, lineno)
@@ -68,12 +68,12 @@ def test_badfuture8(self):
     def test_badfuture9(self):
         with self.assertRaises(SyntaxError) as cm:
             from test import badsyntax_future9
-        self.check_syntax_error(cm.exception, "badsyntax_future9", 3, 0)
+        self.check_syntax_error(cm.exception, "badsyntax_future9", 3)
 
     def test_badfuture10(self):
         with self.assertRaises(SyntaxError) as cm:
             from test import badsyntax_future10
-        self.check_syntax_error(cm.exception, "badsyntax_future10", 3, 0)
+        self.check_syntax_error(cm.exception, "badsyntax_future10", 3)
 
     def test_parserhack(self):
         # test that the parser.c::future_hack function works as expected
diff --git a/Lib/test/test_global.py b/Lib/test/test_global.py
index 9eeccf12506f..8159602be98e 100644
--- a/Lib/test/test_global.py
+++ b/Lib/test/test_global.py
@@ -24,7 +24,7 @@ def wrong1():
     global a
     global b
 """
-        check_syntax_error(self, prog_text_1, lineno=4, offset=4)
+        check_syntax_error(self, prog_text_1, lineno=4, offset=5)
 
     def test2(self):
         prog_text_2 = """\
@@ -32,7 +32,7 @@ def wrong2():
     print(x)
     global x
 """
-        check_syntax_error(self, prog_text_2, lineno=3, offset=4)
+        check_syntax_error(self, prog_text_2, lineno=3, offset=5)
 
     def test3(self):
         prog_text_3 = """\
@@ -41,7 +41,7 @@ def wrong3():
     x = 2
     global x
 """
-        check_syntax_error(self, prog_text_3, lineno=4, offset=4)
+        check_syntax_error(self, prog_text_3, lineno=4, offset=5)
 
     def test4(self):
         prog_text_4 = """\
diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py
index 7870e940a46e..171e28aaa802 100644
--- a/Lib/test/test_support.py
+++ b/Lib/test/test_support.py
@@ -286,7 +286,7 @@ def test_make_bad_fd(self):
         self.assertEqual(cm.exception.errno, errno.EBADF)
 
     def test_check_syntax_error(self):
-        support.check_syntax_error(self, "def class", lineno=1, offset=9)
+        support.check_syntax_error(self, "def class", lineno=1, offset=5)
         with self.assertRaises(AssertionError):
             support.check_syntax_error(self, "x=1")
 
diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py
index dfaee173ef72..2cd735bdc508 100644
--- a/Lib/test/test_symtable.py
+++ b/Lib/test/test_symtable.py
@@ -169,7 +169,7 @@ def checkfilename(brokencode, offset):
             else:
                 self.fail("no SyntaxError for %r" % (brokencode,))
         checkfilename("def f(x): foo)(", 14)  # parse-time
-        checkfilename("def f(x): global x", 10)  # symtable-build-time
+        checkfilename("def f(x): global x", 11)  # symtable-build-time
         symtable.symtable("pass", b"spam", "exec")
         with self.assertWarns(DeprecationWarning), \
              self.assertRaises(TypeError):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-09-15-19-32-34.bpo-34683.msCiQE.rst b/Misc/NEWS.d/next/Core and Builtins/2018-09-15-19-32-34.bpo-34683.msCiQE.rst
new file mode 100644
index 000000000000..43bf61c5b86b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-09-15-19-32-34.bpo-34683.msCiQE.rst	
@@ -0,0 +1 @@
+Fixed a bug where some SyntaxError error pointed to locations that were off-by-one.
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index a1580e6751e4..fc878d89d563 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -187,6 +187,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
     parser_state *ps;
     node *n;
     int started = 0;
+    int col_offset;
 
     if ((ps = PyParser_New(g, start)) == NULL) {
         err_ret->error = E_NOMEM;
@@ -203,7 +204,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         int type;
         size_t len;
         char *str;
-        int col_offset;
+        col_offset = -1;
 
         type = PyTokenizer_Get(tok, &a, &b);
         if (type == ERRORTOKEN) {
@@ -321,7 +322,10 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         if (tok->buf != NULL) {
             size_t len;
             assert(tok->cur - tok->buf < INT_MAX);
-            err_ret->offset = (int)(tok->cur - tok->buf);
+            /* if we've managed to parse a token, point the offset to its start,
+             * else use the current reading position of the tokenizer
+             */
+            err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf));
             len = tok->inp - tok->buf;
             err_ret->text = (char *) PyObject_MALLOC(len + 1);
             if (err_ret->text != NULL) {
diff --git a/Python/ast.c b/Python/ast.c
index b93eb88dae7f..b2fcb219752d 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -683,7 +683,7 @@ ast_error(struct compiling *c, const node *n, const char *errmsg)
         Py_INCREF(Py_None);
         loc = Py_None;
     }
-    tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset, loc);
+    tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc);
     if (!tmp)
         return 0;
     errstr = PyUnicode_FromString(errmsg);
diff --git a/Python/compile.c b/Python/compile.c
index 707da79ab662..3a45804580ed 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -4830,7 +4830,7 @@ compiler_error(struct compiler *c, const char *errstr)
         loc = Py_None;
     }
     u = Py_BuildValue("(OiiO)", c->c_filename, c->u->u_lineno,
-                      c->u->u_col_offset, loc);
+                      c->u->u_col_offset + 1, loc);
     if (!u)
         goto exit;
     v = Py_BuildValue("(zO)", errstr, u);
diff --git a/Python/future.c b/Python/future.c
index 4ea6827723bf..1663a38a6fdb 100644
--- a/Python/future.c
+++ b/Python/future.c
@@ -48,12 +48,12 @@ future_check_features(PyFutureFeatures *ff, stmt_ty s, PyObject *filename)
         } else if (strcmp(feature, "braces") == 0) {
             PyErr_SetString(PyExc_SyntaxError,
                             "not a chance");
-            PyErr_SyntaxLocationObject(filename, s->lineno, s->col_offset);
+            PyErr_SyntaxLocationObject(filename, s->lineno, s->col_offset + 1);
             return 0;
         } else {
             PyErr_Format(PyExc_SyntaxError,
                          UNDEFINED_FUTURE_FEATURE, feature);
-            PyErr_SyntaxLocationObject(filename, s->lineno, s->col_offset);
+            PyErr_SyntaxLocationObject(filename, s->lineno, s->col_offset + 1);
             return 0;
         }
     }
diff --git a/Python/symtable.c b/Python/symtable.c
index 3e8c6f5dae30..e7216147a8a4 100644
--- a/Python/symtable.c
+++ b/Python/symtable.c
@@ -390,7 +390,7 @@ error_at_directive(PySTEntryObject *ste, PyObject *name)
         if (PyUnicode_Compare(PyTuple_GET_ITEM(data, 0), name) == 0) {
             PyErr_SyntaxLocationObject(ste->ste_table->st_filename,
                                        PyLong_AsLong(PyTuple_GET_ITEM(data, 1)),
-                                       PyLong_AsLong(PyTuple_GET_ITEM(data, 2)));
+                                       PyLong_AsLong(PyTuple_GET_ITEM(data, 2)) + 1);
 
             return 0;
         }
@@ -990,7 +990,7 @@ symtable_add_def(struct symtable *st, PyObject *name, int flag)
             PyErr_Format(PyExc_SyntaxError, DUPLICATE_ARGUMENT, name);
             PyErr_SyntaxLocationObject(st->st_filename,
                                        st->st_cur->ste_lineno,
-                                       st->st_cur->ste_col_offset);
+                                       st->st_cur->ste_col_offset + 1);
             goto error;
         }
         val |= flag;
@@ -1179,7 +1179,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s)
                              e_name->v.Name.id);
                 PyErr_SyntaxLocationObject(st->st_filename,
                                            s->lineno,
-                                           s->col_offset);
+                                           s->col_offset + 1);
                 VISIT_QUIT(st, 0);
             }
             if (s->v.AnnAssign.simple &&
@@ -1274,7 +1274,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s)
                              msg, name);
                 PyErr_SyntaxLocationObject(st->st_filename,
                                            s->lineno,
-                                           s->col_offset);
+                                           s->col_offset + 1);
                 VISIT_QUIT(st, 0);
             }
             if (!symtable_add_def(st, name, DEF_GLOBAL))
@@ -1306,7 +1306,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s)
                 PyErr_Format(PyExc_SyntaxError, msg, name);
                 PyErr_SyntaxLocationObject(st->st_filename,
                                            s->lineno,
-                                           s->col_offset);
+                                           s->col_offset + 1);
                 VISIT_QUIT(st, 0);
             }
             if (!symtable_add_def(st, name, DEF_NONLOCAL))
@@ -1645,7 +1645,7 @@ symtable_visit_alias(struct symtable *st, alias_ty a)
             int lineno = st->st_cur->ste_lineno;
             int col_offset = st->st_cur->ste_col_offset;
             PyErr_SetString(PyExc_SyntaxError, IMPORT_STAR_WARNING);
-            PyErr_SyntaxLocationObject(st->st_filename, lineno, col_offset);
+            PyErr_SyntaxLocationObject(st->st_filename, lineno, col_offset + 1);
             Py_DECREF(store_name);
             return 0;
         }
@@ -1736,7 +1736,7 @@ symtable_handle_comprehension(struct symtable *st, expr_ty e,
             "'yield' inside generator expression");
         PyErr_SyntaxLocationObject(st->st_filename,
                                    st->st_cur->ste_lineno,
-                                   st->st_cur->ste_col_offset);
+                                   st->st_cur->ste_col_offset + 1);
         symtable_exit_block(st, (void *)e);
         return 0;
     }



More information about the Python-checkins mailing list