[Python-checkins] Fix lookahead of soft keywords in the PEG parser (GH-20436)

Pablo Galindo webhook-mailer at python.org
Tue May 26 19:16:01 EDT 2020


https://github.com/python/cpython/commit/404b23b85b17c84e022779f31fc89cb0ed0d37e8
commit: 404b23b85b17c84e022779f31fc89cb0ed0d37e8
branch: master
author: Pablo Galindo <Pablogsal at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-05-26T16:15:52-07:00
summary:

Fix lookahead of soft keywords in the PEG parser (GH-20436)



Automerge-Triggered-By: @gvanrossum

files:
M Lib/test/test_peg_generator/test_c_parser.py
M Parser/pegen/pegen.c
M Parser/pegen/pegen.h
M Tools/peg_generator/pegen/c_generator.py

diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py
index 72383d5b5a631..a5d88501f77ad 100644
--- a/Lib/test/test_peg_generator/test_c_parser.py
+++ b/Lib/test/test_peg_generator/test_c_parser.py
@@ -432,3 +432,15 @@ def test_soft_keywords_parse(self) -> None:
         self.check_input_strings_for_grammar(valid_cases, invalid_cases)
         """
         self.run_test(grammar_source, test_source)
+
+    def test_soft_keywords_lookahead(self) -> None:
+        grammar_source = """
+        start: &"if" "if" expr '+' expr NEWLINE
+        expr: NAME
+        """
+        test_source = """
+        valid_cases = ["if if + if"]
+        invalid_cases = ["if if"]
+        self.check_input_strings_for_grammar(valid_cases, invalid_cases)
+        """
+        self.run_test(grammar_source, test_source)
diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c
index ee30c2c0688f8..a0285bcb60e95 100644
--- a/Parser/pegen/pegen.c
+++ b/Parser/pegen/pegen.c
@@ -718,6 +718,15 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
     return (res != NULL) == positive;
 }
 
+int
+_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
+{
+    int mark = p->mark;
+    void *res = func(p, arg);
+    p->mark = mark;
+    return (res != NULL) == positive;
+}
+
 int
 _PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
 {
diff --git a/Parser/pegen/pegen.h b/Parser/pegen/pegen.h
index 9507d9955ae32..64cf0ec892913 100644
--- a/Parser/pegen/pegen.h
+++ b/Parser/pegen/pegen.h
@@ -119,6 +119,7 @@ int _PyPegen_is_memoized(Parser *p, int type, void *pres);
 
 int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
 int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
+int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
 int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
 
 Token *_PyPegen_expect_token(Parser *p, int type);
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
index 885ff05858f67..ce1d6bb7bf355 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -58,7 +58,8 @@ class NodeTypes(Enum):
     STRING_TOKEN = 2
     GENERIC_TOKEN = 3
     KEYWORD = 4
-    CUT_OPERATOR = 5
+    SOFT_KEYWORD = 5
+    CUT_OPERATOR = 6
 
 
 BASE_NODETYPES = {
@@ -123,7 +124,7 @@ def soft_keyword_helper(self, value: str) -> FunctionCall:
             function="_PyPegen_expect_soft_keyword",
             arguments=["p", value],
             return_type="expr_ty",
-            nodetype=NodeTypes.NAME_TOKEN,
+            nodetype=NodeTypes.SOFT_KEYWORD,
             comment=f"soft_keyword='{value}'",
         )
 
@@ -217,6 +218,12 @@ def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
                 arguments=[positive, call.function, *call.arguments],
                 return_type="int",
             )
+        elif call.nodetype == NodeTypes.SOFT_KEYWORD:
+            return FunctionCall(
+                function=f"_PyPegen_lookahead_with_string",
+                arguments=[positive, call.function, *call.arguments],
+                return_type="int",
+            )
         elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
             return FunctionCall(
                 function=f"_PyPegen_lookahead_with_int",



More information about the Python-checkins mailing list