[Python-checkins] cpython (3.4): Issue #21669: Special case print & exec syntax errors

nick.coghlan python-checkins at python.org
Mon Jun 16 11:50:08 CEST 2014


http://hg.python.org/cpython/rev/2b8cd2bc2745
changeset:   91208:2b8cd2bc2745
branch:      3.4
parent:      91206:9ba324a20bad
user:        Nick Coghlan <ncoghlan at gmail.com>
date:        Mon Jun 16 19:48:02 2014 +1000
summary:
  Issue #21669: Special case print & exec syntax errors

files:
  Lib/test/test_grammar.py |   25 +++++
  Misc/NEWS                |    5 +
  Objects/exceptions.c     |  135 +++++++++++++++++++++++++++
  3 files changed, 165 insertions(+), 0 deletions(-)


diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -390,6 +390,31 @@
         check_syntax_error(self, "x + 1 = 1")
         check_syntax_error(self, "a + 1 = b + 2")
 
+    # Check the heuristic for print & exec covers significant cases
+    # As well as placing some limits on false positives
+    def test_former_statements_refer_to_builtins(self):
+        keywords = "print", "exec"
+        # Cases where we want the custom error
+        cases = [
+            "{} foo",
+            "{} {{1:foo}}",
+            "if 1: {} foo",
+            "if 1: {} {{1:foo}}",
+            "if 1:\n    {} foo",
+            "if 1:\n    {} {{1:foo}}",
+        ]
+        for keyword in keywords:
+            custom_msg = "call to '{}'".format(keyword)
+            for case in cases:
+                source = case.format(keyword)
+                with self.subTest(source=source):
+                    with self.assertRaisesRegex(SyntaxError, custom_msg):
+                        exec(source)
+                source = source.replace("foo", "(foo.)")
+                with self.subTest(source=source):
+                    with self.assertRaisesRegex(SyntaxError, "invalid syntax"):
+                        exec(source)
+
     def test_del_stmt(self):
         # 'del' exprlist
         abc = [1,2,3]
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,11 @@
 Core and Builtins
 -----------------
 
+- Issue #21669: With the aid of heuristics in SyntaxError.__init__, the
+  parser now attempts to generate more meaningful (or at least more search
+  engine friendly) error messages when "exec" and "print" are used as
+  statements.
+
 - Issue #21642: If the conditional if-else expression, allow an integer written
   with no space between itself and the ``else`` keyword (e.g. ``True if 42else
   False``) to be valid syntax.
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -1254,6 +1254,9 @@
  *    SyntaxError extends Exception
  */
 
+/* Helper function to customise error message for some syntax errors */
+static int _report_missing_parentheses(PySyntaxErrorObject *self);
+
 static int
 SyntaxError_init(PySyntaxErrorObject *self, PyObject *args, PyObject *kwds)
 {
@@ -1298,6 +1301,13 @@
         Py_INCREF(self->text);
 
         Py_DECREF(info);
+
+        /* Issue #21669: Custom error for 'print' & 'exec' as statements */
+        if (self->text && PyUnicode_Check(self->text)) {
+            if (_report_missing_parentheses(self) < 0) {
+                return -1;
+            }
+        }
     }
     return 0;
 }
@@ -2783,3 +2793,128 @@
     PyErr_Restore(new_exc, new_val, new_tb);
     return new_val;
 }
+
+
+/* To help with migration from Python 2, SyntaxError.__init__ applies some
+ * heuristics to try to report a more meaningful exception when print and
+ * exec are used like statements.
+ *
+ * The heuristics are currently expected to detect the following cases:
+ *   - top level statement
+ *   - statement in a nested suite
+ *   - trailing section of a one line complex statement
+ *
+ * They're currently known not to trigger:
+ *   - after a semi-colon
+ *
+ * The error message can be a bit odd in cases where the "arguments" are
+ * completely illegal syntactically, but that isn't worth the hassle of
+ * fixing.
+ *
+ * We also can't do anything about cases that are legal Python 3 syntax
+ * but mean something entirely different from what they did in Python 2
+ * (omitting the arguments entirely, printing items preceded by a unary plus
+ * or minus, using the stream redirection syntax).
+ */
+
+static int
+_check_for_legacy_statements(PySyntaxErrorObject *self, Py_ssize_t start)
+{
+    /* Return values:
+     *   -1: an error occurred
+     *    0: nothing happened
+     *    1: the check triggered & the error message was changed
+     */
+    static PyObject *print_prefix = NULL;
+    static PyObject *exec_prefix = NULL;
+    Py_ssize_t text_len = PyUnicode_GET_LENGTH(self->text);
+    int kind = PyUnicode_KIND(self->text);
+    void *data = PyUnicode_DATA(self->text);
+
+    /* Ignore leading whitespace */
+    while (start < text_len) {
+        Py_UCS4 ch = PyUnicode_READ(kind, data, start);
+        if (!Py_UNICODE_ISSPACE(ch))
+            break;
+        start++;
+    }
+    /* Checking against an empty or whitespace-only part of the string */
+    if (start == text_len) {
+        return 0;
+    }
+
+    /* Check for legacy print statements */
+    if (print_prefix == NULL) {
+        print_prefix = PyUnicode_InternFromString("print ");
+        if (print_prefix == NULL) {
+            return -1;
+        }
+    }
+    if (PyUnicode_Tailmatch(self->text, print_prefix,
+                            start, text_len, -1)) {
+        Py_CLEAR(self->msg);
+        self->msg = PyUnicode_FromString(
+                   "Missing parentheses in call to 'print'");
+        return 1;
+    }
+
+    /* Check for legacy exec statements */
+    if (exec_prefix == NULL) {
+        exec_prefix = PyUnicode_InternFromString("exec ");
+        if (exec_prefix == NULL) {
+            return -1;
+        }
+    }
+    if (PyUnicode_Tailmatch(self->text, exec_prefix,
+                            start, text_len, -1)) {
+        Py_CLEAR(self->msg);
+        self->msg = PyUnicode_FromString(
+                    "Missing parentheses in call to 'exec'");
+        return 1;
+    }
+    /* Fall back to the default error message */
+    return 0;
+}
+
+static int
+_report_missing_parentheses(PySyntaxErrorObject *self)
+{
+    Py_UCS4 left_paren = 40;
+    Py_ssize_t left_paren_index;
+    Py_ssize_t text_len = PyUnicode_GET_LENGTH(self->text);
+    int legacy_check_result = 0;
+
+    /* Skip entirely if there is an opening parenthesis */
+    left_paren_index = PyUnicode_FindChar(self->text, left_paren,
+                                          0, text_len, 1);
+    if (left_paren_index < -1) {
+        return -1;
+    }
+    if (left_paren_index != -1) {
+        /* Use default error message for any line with an opening paren */
+        return 0;
+    }
+    /* Handle the simple statement case */
+    legacy_check_result = _check_for_legacy_statements(self, 0);
+    if (legacy_check_result < 0) {
+        return -1;
+
+    }
+    if (legacy_check_result == 0) {
+        /* Handle the one-line complex statement case */
+        Py_UCS4 colon = 58;
+        Py_ssize_t colon_index;
+        colon_index = PyUnicode_FindChar(self->text, colon,
+                                         0, text_len, 1);
+        if (colon_index < -1) {
+            return -1;
+        }
+        if (colon_index >= 0 && colon_index < text_len) {
+            /* Check again, starting from just after the colon */
+            if (_check_for_legacy_statements(self, colon_index+1) < 0) {
+                return -1;
+            }
+        }
+    }
+    return 0;
+}

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list