[Python-checkins] cpython (3.3): Issue #2382: SyntaxError cursor "^" now is written at correct position in most

serhiy.storchaka python-checkins at python.org
Tue Jan 21 21:30:34 CET 2014


http://hg.python.org/cpython/rev/eb7565c212f1
changeset:   88615:eb7565c212f1
branch:      3.3
parent:      88611:da35a4e724e5
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Tue Jan 21 22:26:52 2014 +0200
summary:
  Issue #2382: SyntaxError cursor "^" now is written at correct position in most
cases when multibyte characters are in line (before "^").  This still not
works correctly with wide East Asian characters.

files:
  Lib/test/test_exceptions.py |  13 +++++++++++++
  Lib/test/test_traceback.py  |   9 +++++++++
  Misc/NEWS                   |   4 ++++
  Python/pythonrun.c          |  14 ++++++++++++--
  4 files changed, 38 insertions(+), 2 deletions(-)


diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -148,6 +148,19 @@
         ckmsg(s, "'continue' not properly in loop")
         ckmsg("continue\n", "'continue' not properly in loop")
 
+    def testSyntaxErrorOffset(self):
+        def check(src, lineno, offset):
+            with self.assertRaises(SyntaxError) as cm:
+                compile(src, '<fragment>', 'exec')
+            self.assertEqual(cm.exception.lineno, lineno)
+            self.assertEqual(cm.exception.offset, offset)
+
+        check('def fact(x):\n\treturn x!\n', 2, 10)
+        check('1 +\n', 1, 4)
+        check('def spam():\n  print(1)\n print(2)', 3, 10)
+        check('Python = "Python" +', 1, 20)
+        check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
+
     @cpython_only
     def testSettingException(self):
         # test that setting an exception at the C level works even if the
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -32,6 +32,9 @@
     def syntax_error_bad_indentation(self):
         compile("def spam():\n  print(1)\n print(2)", "?", "exec")
 
+    def syntax_error_with_caret_non_ascii(self):
+        compile('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', "?", "exec")
+
     def test_caret(self):
         err = self.get_exception_format(self.syntax_error_with_caret,
                                         SyntaxError)
@@ -46,6 +49,12 @@
         self.assertTrue(err[2].count('\n') == 1) # and no additional newline
         self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place
 
+        err = self.get_exception_format(self.syntax_error_with_caret_non_ascii,
+                                        SyntaxError)
+        self.assertIn("^", err[2]) # third line has caret
+        self.assertTrue(err[2].count('\n') == 1) # and no additional newline
+        self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place
+
     def test_nocaret(self):
         exc = SyntaxError("error", ("x.py", 23, None, "bad syntax"))
         err = traceback.format_exception_only(SyntaxError, exc)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@
 Core and Builtins
 -----------------
 
+- Issue #2382: SyntaxError cursor "^" is now written at correct position in most
+  cases when multibyte characters are in line (before "^").  This still not
+  works correctly with wide East Asian characters.
+
 - Issue #18960: The first line of Python script could be executed twice when
   the source encoding was specified on the second line.  Now the source encoding
   declaration on the second line isn't effective if the first line contains
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -2226,6 +2226,7 @@
     PyObject *v, *w, *errtype, *errtext;
     PyObject *msg_obj = NULL;
     char *msg = NULL;
+    int offset = err->offset;
 
     errtype = PyExc_SyntaxError;
     switch (err->error) {
@@ -2310,11 +2311,20 @@
         errtext = Py_None;
         Py_INCREF(Py_None);
     } else {
-        errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
+        errtext = PyUnicode_DecodeUTF8(err->text, err->offset,
                                        "replace");
+        if (errtext != NULL) {
+            Py_ssize_t len = strlen(err->text);
+            offset = (int)PyUnicode_GET_LENGTH(errtext);
+            if (len != err->offset) {
+                Py_DECREF(errtext);
+                errtext = PyUnicode_DecodeUTF8(err->text, len,
+                                               "replace");
+            }
+        }
     }
     v = Py_BuildValue("(OiiN)", err->filename,
-                      err->lineno, err->offset, errtext);
+                      err->lineno, offset, errtext);
     if (v != NULL) {
         if (msg_obj)
             w = Py_BuildValue("(OO)", msg_obj, v);

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list