[Python-checkins] bpo-40334: Produce better error messages for non-parenthesized genexps (GH-20153)

Lysandros Nikolaou webhook-mailer at python.org
Thu May 21 20:57:00 EDT 2020


https://github.com/python/cpython/commit/ae145833025b0156ee2a28219e3370f3b27b2a36
commit: ae145833025b0156ee2a28219e3370f3b27b2a36
branch: master
author: Lysandros Nikolaou <lisandrosnik at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-05-22T01:56:52+01:00
summary:

bpo-40334: Produce better error messages for non-parenthesized genexps (GH-20153)

The error message, generated for a non-parenthesized generator expression
in function calls, was still the generic `invalid syntax`, when the generator expression wasn't appearing as the first argument in the call. With this patch, even on input like `f(a, b, c for c in d, e)`, the correct error message gets produced.

files:
M Grammar/python.gram
M Lib/test/test_syntax.py
M Parser/pegen/parse.c
M Parser/pegen/pegen.c
M Parser/pegen/pegen.h

diff --git a/Grammar/python.gram b/Grammar/python.gram
index a771abf46fd25..19d9bb36fed5f 100644
--- a/Grammar/python.gram
+++ b/Grammar/python.gram
@@ -627,6 +627,9 @@ incorrect_arguments:
     | args ',' '*' { RAISE_SYNTAX_ERROR("iterable argument unpacking follows keyword argument unpacking") }
     | a=expression for_if_clauses ',' [args | expression for_if_clauses] {
         RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "Generator expression must be parenthesized") }
+    | a=args for_if_clauses { _PyPegen_nonparen_genexp_in_call(p, a) }
+    | args ',' a=expression for_if_clauses {
+        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "Generator expression must be parenthesized") }
     | a=args ',' args { _PyPegen_arguments_parsing_error(p, a) }
 invalid_kwarg:
     | a=expression '=' {
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index 6d9c4e4b55e1c..4df5535b0053b 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -216,11 +216,9 @@
 >>> f(x for x in L, **{})
 Traceback (most recent call last):
 SyntaxError: Generator expression must be parenthesized
-
-# >>> f(L, x for x in L)
-# Traceback (most recent call last):
-# SyntaxError: Generator expression must be parenthesized
-
+>>> f(L, x for x in L)
+Traceback (most recent call last):
+SyntaxError: Generator expression must be parenthesized
 >>> f(x for x in L, y for y in L)
 Traceback (most recent call last):
 SyntaxError: Generator expression must be parenthesized
diff --git a/Parser/pegen/parse.c b/Parser/pegen/parse.c
index b4756319783e2..5dff77abc9fd7 100644
--- a/Parser/pegen/parse.c
+++ b/Parser/pegen/parse.c
@@ -11670,6 +11670,8 @@ t_atom_rule(Parser *p)
 // incorrect_arguments:
 //     | args ',' '*'
 //     | expression for_if_clauses ',' [args | expression for_if_clauses]
+//     | args for_if_clauses
+//     | args ',' expression for_if_clauses
 //     | args ',' args
 static void *
 incorrect_arguments_rule(Parser *p)
@@ -11731,6 +11733,54 @@ incorrect_arguments_rule(Parser *p)
         }
         p->mark = _mark;
     }
+    { // args for_if_clauses
+        if (p->error_indicator) {
+            return NULL;
+        }
+        expr_ty a;
+        asdl_seq* for_if_clauses_var;
+        if (
+            (a = args_rule(p))  // args
+            &&
+            (for_if_clauses_var = for_if_clauses_rule(p))  // for_if_clauses
+        )
+        {
+            _res = _PyPegen_nonparen_genexp_in_call ( p , a );
+            if (_res == NULL && PyErr_Occurred()) {
+                p->error_indicator = 1;
+                return NULL;
+            }
+            goto done;
+        }
+        p->mark = _mark;
+    }
+    { // args ',' expression for_if_clauses
+        if (p->error_indicator) {
+            return NULL;
+        }
+        Token * _literal;
+        expr_ty a;
+        expr_ty args_var;
+        asdl_seq* for_if_clauses_var;
+        if (
+            (args_var = args_rule(p))  // args
+            &&
+            (_literal = _PyPegen_expect_token(p, 12))  // token=','
+            &&
+            (a = expression_rule(p))  // expression
+            &&
+            (for_if_clauses_var = for_if_clauses_rule(p))  // for_if_clauses
+        )
+        {
+            _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "Generator expression must be parenthesized" );
+            if (_res == NULL && PyErr_Occurred()) {
+                p->error_indicator = 1;
+                return NULL;
+            }
+            goto done;
+        }
+        p->mark = _mark;
+    }
     { // args ',' args
         if (p->error_indicator) {
             return NULL;
diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c
index ca4ea824b3f28..f1e3f9efb2f6b 100644
--- a/Parser/pegen/pegen.c
+++ b/Parser/pegen/pegen.c
@@ -2100,3 +2100,24 @@ void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
 
     return RAISE_SYNTAX_ERROR(msg);
 }
+
+void *
+_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args)
+{
+    /* The rule that calls this function is 'args for_if_clauses'.
+       For the input f(L, x for x in y), L and x are in args and
+       the for is parsed as a for_if_clause. We have to check if
+       len <= 1, so that input like dict((a, b) for a, b in x)
+       gets successfully parsed and then we pass the last
+       argument (x in the above example) as the location of the
+       error */
+    Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
+    if (len <= 1) {
+        return NULL;
+    }
+
+    return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
+        (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
+        "Generator expression must be parenthesized"
+    );
+}
diff --git a/Parser/pegen/pegen.h b/Parser/pegen/pegen.h
index 146804a896fd1..761e90f06db8e 100644
--- a/Parser/pegen/pegen.h
+++ b/Parser/pegen/pegen.h
@@ -152,7 +152,7 @@ RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype, int lineno,
 #define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
 #define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
 #define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \
-    RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->col_offset, msg, ##__VA_ARGS__)
+    RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, msg, ##__VA_ARGS__)
 
 Py_LOCAL_INLINE(void *)
 CHECK_CALL(Parser *p, void *result)
@@ -262,6 +262,7 @@ mod_ty _PyPegen_make_module(Parser *, asdl_seq *);
 // Error reporting helpers
 expr_ty _PyPegen_get_invalid_target(expr_ty e);
 void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
+void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args);
 
 
 void *_PyPegen_parse(Parser *);



More information about the Python-checkins mailing list