[Python-checkins] bpo-42214: Fix check for NOTEQUAL token in the PEG parser for the barry_as_flufl rule (GH-23048)

pablogsal webhook-mailer at python.org
Fri Oct 30 19:48:49 EDT 2020


https://github.com/python/cpython/commit/06f8c3328dcd81c84d1ee2b3a57b5381dcb38482
commit: 06f8c3328dcd81c84d1ee2b3a57b5381dcb38482
branch: master
author: Pablo Galindo <Pablogsal at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2020-10-30T23:48:42Z
summary:

bpo-42214: Fix check for NOTEQUAL token in the PEG parser for the barry_as_flufl rule (GH-23048)

files:
A Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst
M Grammar/python.gram
M Lib/test/test_syntax.py
M Parser/parser.c
M Parser/pegen.c
M Parser/pegen.h

diff --git a/Grammar/python.gram b/Grammar/python.gram
index b8da554b8ec99..ae5e4b5d4ca64 100644
--- a/Grammar/python.gram
+++ b/Grammar/python.gram
@@ -428,7 +428,7 @@ compare_op_bitwise_or_pair[CmpopExprPair*]:
     | is_bitwise_or
 eq_bitwise_or[CmpopExprPair*]: '==' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Eq, a) }
 noteq_bitwise_or[CmpopExprPair*]:
-    | (tok='!=' {_PyPegen_check_barry_as_flufl(p) ? NULL : tok}) a=bitwise_or {_PyPegen_cmpop_expr_pair(p, NotEq, a) }
+    | (tok='!=' { _PyPegen_check_barry_as_flufl(p, tok) ? NULL : tok}) a=bitwise_or {_PyPegen_cmpop_expr_pair(p, NotEq, a) }
 lte_bitwise_or[CmpopExprPair*]: '<=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, LtE, a) }
 lt_bitwise_or[CmpopExprPair*]: '<' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Lt, a) }
 gte_bitwise_or[CmpopExprPair*]: '>=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, GtE, a) }
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index c25b85246b919..e89d9401f2c39 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -955,6 +955,23 @@ def test_nested_named_except_blocks(self):
         code += f"{' '*4*12}pass"
         self._check_error(code, "too many statically nested blocks")
 
+    def test_barry_as_flufl_with_syntax_errors(self):
+        # The "barry_as_flufl" rule can produce some "bugs-at-a-distance" if
+        # is reading the wrong token in the presence of syntax errors later
+        # in the file. See bpo-42214 for more information.
+        code = """
+def func1():
+    if a != b:
+        raise ValueError
+
+def func2():
+    try
+        return 1
+    finally:
+        pass
+"""
+        self._check_error(code, "invalid syntax")
+
 def test_main():
     support.run_unittest(SyntaxTestCase)
     from test import test_syntax
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst
new file mode 100644
index 0000000000000..3f85bbe83901a
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst	
@@ -0,0 +1,2 @@
+Fixed a possible crash in the PEG parser when checking for the '!=' token in
+the ``barry_as_flufl`` rule. Patch by Pablo Galindo.
diff --git a/Parser/parser.c b/Parser/parser.c
index a22cf2752d18d..a882a81344cc6 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -21288,7 +21288,7 @@ _tmp_93_rule(Parser *p)
         )
         {
             D(fprintf(stderr, "%*c+ _tmp_93[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!='"));
-            _res = _PyPegen_check_barry_as_flufl ( p ) ? NULL : tok;
+            _res = _PyPegen_check_barry_as_flufl ( p , tok ) ? NULL : tok;
             if (_res == NULL && PyErr_Occurred()) {
                 p->error_indicator = 1;
                 D(p->level--);
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 216edd810e246..188fd282b7604 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -62,8 +62,7 @@ init_normalization(Parser *p)
 /* Checks if the NOTEQUAL token is valid given the current parser flags
 0 indicates success and nonzero indicates failure (an exception may be set) */
 int
-_PyPegen_check_barry_as_flufl(Parser *p) {
-    Token *t = p->tokens[p->fill - 1];
+_PyPegen_check_barry_as_flufl(Parser *p, Token* t) {
     assert(t->bytes != NULL);
     assert(t->type == NOTEQUAL);
 
diff --git a/Parser/pegen.h b/Parser/pegen.h
index 841f1e5eb4396..f82a3a00b2ba0 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -263,7 +263,7 @@ expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
                      int end_col_offset, PyArena *arena);
 expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *);
 asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
-int _PyPegen_check_barry_as_flufl(Parser *);
+int _PyPegen_check_barry_as_flufl(Parser *, Token *);
 mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
 
 // Error reporting helpers



More information about the Python-checkins mailing list