[Python-checkins] cpython (merge 3.2 -> 3.3): Issue #12983: Bytes literals with invalid \x escape now raise a SyntaxError

serhiy.storchaka python-checkins at python.org
Sun Feb 10 16:45:35 CET 2013


http://hg.python.org/cpython/rev/d5b731446a91
changeset:   82137:d5b731446a91
branch:      3.3
parent:      82131:e9b4f2927412
parent:      82136:305210a08fc9
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sun Feb 10 17:42:01 2013 +0200
summary:
  Issue #12983: Bytes literals with invalid \x escape now raise a SyntaxError
and a full traceback including line number.

files:
  Lib/test/test_strlit.py |  34 +++++++++++++++++++++++++++++
  Misc/NEWS               |   3 ++
  Objects/bytesobject.c   |   5 ++-
  Python/ast.c            |  18 +++++++++-----
  4 files changed, 51 insertions(+), 9 deletions(-)


diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py
--- a/Lib/test/test_strlit.py
+++ b/Lib/test/test_strlit.py
@@ -50,6 +50,10 @@
 assert ord(f) == 0x1881
 g = r'\u1881'
 assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
+h = '\U0001d120'
+assert ord(h) == 0x1d120
+i = r'\U0001d120'
+assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
 """
 
 
@@ -82,6 +86,24 @@
         self.assertEqual(eval(""" '\x81' """), chr(0x81))
         self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
         self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
+        self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
+        self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
+
+    def test_eval_str_incomplete(self):
+        self.assertRaises(SyntaxError, eval, r""" '\x' """)
+        self.assertRaises(SyntaxError, eval, r""" '\x0' """)
+        self.assertRaises(SyntaxError, eval, r""" '\u' """)
+        self.assertRaises(SyntaxError, eval, r""" '\u0' """)
+        self.assertRaises(SyntaxError, eval, r""" '\u00' """)
+        self.assertRaises(SyntaxError, eval, r""" '\u000' """)
+        self.assertRaises(SyntaxError, eval, r""" '\U' """)
+        self.assertRaises(SyntaxError, eval, r""" '\U0' """)
+        self.assertRaises(SyntaxError, eval, r""" '\U00' """)
+        self.assertRaises(SyntaxError, eval, r""" '\U000' """)
+        self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
+        self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
+        self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
+        self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
 
     def test_eval_str_raw(self):
         self.assertEqual(eval(""" r'x' """), 'x')
@@ -91,6 +113,8 @@
         self.assertEqual(eval(""" r'\x81' """), chr(0x81))
         self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
         self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
+        self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
+        self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
 
     def test_eval_bytes_normal(self):
         self.assertEqual(eval(""" b'x' """), b'x')
@@ -100,6 +124,12 @@
         self.assertRaises(SyntaxError, eval, """ b'\x81' """)
         self.assertEqual(eval(r""" b'\u1881' """), b'\\' + b'u1881')
         self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
+        self.assertEqual(eval(r""" b'\U0001d120' """), b'\\' + b'U0001d120')
+        self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
+
+    def test_eval_bytes_incomplete(self):
+        self.assertRaises(SyntaxError, eval, r""" b'\x' """)
+        self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
 
     def test_eval_bytes_raw(self):
         self.assertEqual(eval(""" br'x' """), b'x')
@@ -116,6 +146,10 @@
         self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
         self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
         self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
+        self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
+        self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
+        self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
+        self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
         self.assertRaises(SyntaxError, eval, """ bb'' """)
         self.assertRaises(SyntaxError, eval, """ rr'' """)
         self.assertRaises(SyntaxError, eval, """ brr'' """)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@
 Core and Builtins
 -----------------
 
+- Issue #12983: Bytes literals with invalid \x escape now raise a SyntaxError
+  and a full traceback including line number.
+
 - Issue #17173: Remove uses of locale-dependent C functions (isalpha() etc.)
   in the interpreter.
 
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -465,8 +465,9 @@
                 break;
             }
             if (!errors || strcmp(errors, "strict") == 0) {
-                PyErr_SetString(PyExc_ValueError,
-                                "invalid \\x escape");
+                PyErr_Format(PyExc_ValueError,
+                             "invalid \\x escape at position %d",
+                             s - 2 - (end - len));
                 goto failed;
             }
             if (strcmp(errors, "replace") == 0) {
diff --git a/Python/ast.c b/Python/ast.c
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -1829,20 +1829,24 @@
     case STRING: {
         PyObject *str = parsestrplus(c, n, &bytesmode);
         if (!str) {
-            if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
+            const char *errtype = NULL;
+            if (PyErr_ExceptionMatches(PyExc_UnicodeError))
+                errtype = "unicode error";
+            else if (PyErr_ExceptionMatches(PyExc_ValueError))
+                errtype = "value error";
+            if (errtype) {
+                char buf[128];
                 PyObject *type, *value, *tback, *errstr;
                 PyErr_Fetch(&type, &value, &tback);
                 errstr = PyObject_Str(value);
                 if (errstr) {
-                    char *s = "";
-                    char buf[128];
-                    s = _PyUnicode_AsString(errstr);
-                    PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s);
-                    ast_error(c, n, buf);
+                    char *s = _PyUnicode_AsString(errstr);
+                    PyOS_snprintf(buf, sizeof(buf), "(%s) %s", errtype, s);
                     Py_DECREF(errstr);
                 } else {
-                    ast_error(c, n, "(unicode error) unknown error");
+                    PyOS_snprintf(buf, sizeof(buf), "(%s) unknown error", errtype);
                 }
+                ast_error(c, n, buf);
                 Py_DECREF(type);
                 Py_DECREF(value);
                 Py_XDECREF(tback);

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list