[Python-checkins] bpo-40334: refactor and cleanup for the PEG generators (GH-19775)

Pablo Galindo webhook-mailer at python.org
Wed Apr 29 05:42:32 EDT 2020


https://github.com/python/cpython/commit/4db245ee9ddbe6c53d375de59a35ff59dea2a8e0
commit: 4db245ee9ddbe6c53d375de59a35ff59dea2a8e0
branch: master
author: Pablo Galindo <Pablogsal at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-04-29T10:42:21+01:00
summary:

bpo-40334: refactor and cleanup for the PEG generators (GH-19775)

files:
M Parser/pegen/parse.c
M Parser/pegen/pegen.c
M Parser/pegen/pegen.h
M Tools/peg_generator/Makefile
M Tools/peg_generator/pegen/c_generator.py
M Tools/peg_generator/pegen/parser_generator.py
M Tools/peg_generator/pegen/python_generator.py

diff --git a/Parser/pegen/parse.c b/Parser/pegen/parse.c
index b26f7327bd273..76dd6d31da05a 100644
--- a/Parser/pegen/parse.c
+++ b/Parser/pegen/parse.c
@@ -648,7 +648,7 @@ file_rule(Parser *p)
         if (
             (a = statements_rule(p), 1)
             &&
-            (endmarker_var = _PyPegen_endmarker_token(p))
+            (endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
         )
         {
             res = Module ( a , NULL , p -> arena );
@@ -712,7 +712,7 @@ eval_rule(Parser *p)
             &&
             (_loop0_1_var = _loop0_1_rule(p))
             &&
-            (endmarker_var = _PyPegen_endmarker_token(p))
+            (endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
         )
         {
             res = Expression ( a , p -> arena );
@@ -846,7 +846,7 @@ statement_newline_rule(Parser *p)
         if (
             (a = compound_stmt_rule(p))
             &&
-            (newline_var = _PyPegen_newline_token(p))
+            (newline_var = _PyPegen_expect_token(p, NEWLINE))
         )
         {
             res = _PyPegen_singleton_seq ( p , a );
@@ -872,7 +872,7 @@ statement_newline_rule(Parser *p)
     { // NEWLINE
         void *newline_var;
         if (
-            (newline_var = _PyPegen_newline_token(p))
+            (newline_var = _PyPegen_expect_token(p, NEWLINE))
         )
         {
             Token *token = _PyPegen_get_last_nonnwhitespace_token(p);
@@ -895,7 +895,7 @@ statement_newline_rule(Parser *p)
     { // $
         void *endmarker_var;
         if (
-            (endmarker_var = _PyPegen_endmarker_token(p))
+            (endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
         )
         {
             res = _PyPegen_interactive_exit ( p );
@@ -929,7 +929,7 @@ simple_stmt_rule(Parser *p)
             &&
             _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 13)
             &&
-            (newline_var = _PyPegen_newline_token(p))
+            (newline_var = _PyPegen_expect_token(p, NEWLINE))
         )
         {
             res = _PyPegen_singleton_seq ( p , a );
@@ -951,7 +951,7 @@ simple_stmt_rule(Parser *p)
             &&
             (opt_var = _PyPegen_expect_token(p, 13), 1)
             &&
-            (newline_var = _PyPegen_newline_token(p))
+            (newline_var = _PyPegen_expect_token(p, NEWLINE))
         )
         {
             res = a;
@@ -2684,7 +2684,7 @@ for_stmt_rule(Parser *p)
         void *literal;
         expr_ty t;
         if (
-            (is_async = _PyPegen_async_token(p), 1)
+            (is_async = _PyPegen_expect_token(p, ASYNC), 1)
             &&
             (keyword = _PyPegen_expect_token(p, 517))
             &&
@@ -2751,7 +2751,7 @@ with_stmt_rule(Parser *p)
         void *literal_1;
         void *literal_2;
         if (
-            (is_async = _PyPegen_async_token(p), 1)
+            (is_async = _PyPegen_expect_token(p, ASYNC), 1)
             &&
             (keyword = _PyPegen_expect_token(p, 519))
             &&
@@ -2790,7 +2790,7 @@ with_stmt_rule(Parser *p)
         void *keyword;
         void *literal;
         if (
-            (is_async = _PyPegen_async_token(p), 1)
+            (is_async = _PyPegen_expect_token(p, ASYNC), 1)
             &&
             (keyword = _PyPegen_expect_token(p, 519))
             &&
@@ -3263,7 +3263,7 @@ function_def_raw_rule(Parser *p)
         expr_ty n;
         void *params;
         if (
-            (is_async = _PyPegen_async_token(p), 1)
+            (is_async = _PyPegen_expect_token(p, ASYNC), 1)
             &&
             (keyword = _PyPegen_expect_token(p, 522))
             &&
@@ -4002,13 +4002,13 @@ block_rule(Parser *p)
         void *indent_var;
         void *newline_var;
         if (
-            (newline_var = _PyPegen_newline_token(p))
+            (newline_var = _PyPegen_expect_token(p, NEWLINE))
             &&
-            (indent_var = _PyPegen_indent_token(p))
+            (indent_var = _PyPegen_expect_token(p, INDENT))
             &&
             (a = statements_rule(p))
             &&
-            (dedent_var = _PyPegen_dedent_token(p))
+            (dedent_var = _PyPegen_expect_token(p, DEDENT))
         )
         {
             res = a;
@@ -6754,7 +6754,7 @@ await_primary_rule(Parser *p)
         expr_ty a;
         void *await_var;
         if (
-            (await_var = _PyPegen_await_token(p))
+            (await_var = _PyPegen_expect_token(p, AWAIT))
             &&
             (a = primary_rule(p))
         )
@@ -9919,9 +9919,9 @@ invalid_block_rule(Parser *p)
     { // NEWLINE !INDENT
         void *newline_var;
         if (
-            (newline_var = _PyPegen_newline_token(p))
+            (newline_var = _PyPegen_expect_token(p, NEWLINE))
             &&
-            _PyPegen_lookahead(0, _PyPegen_indent_token, p)
+            _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, INDENT)
         )
         {
             res = RAISE_INDENTATION_ERROR ( "expected an indented block" );
@@ -10036,7 +10036,7 @@ _loop0_1_rule(Parser *p)
     { // NEWLINE
         void *newline_var;
         while (
-            (newline_var = _PyPegen_newline_token(p))
+            (newline_var = _PyPegen_expect_token(p, NEWLINE))
         )
         {
             res = newline_var;
@@ -10273,7 +10273,7 @@ _tmp_6_rule(Parser *p)
     { // ASYNC
         void *async_var;
         if (
-            (async_var = _PyPegen_async_token(p))
+            (async_var = _PyPegen_expect_token(p, ASYNC))
         )
         {
             res = async_var;
@@ -10345,7 +10345,7 @@ _tmp_8_rule(Parser *p)
     { // ASYNC
         void *async_var;
         if (
-            (async_var = _PyPegen_async_token(p))
+            (async_var = _PyPegen_expect_token(p, ASYNC))
         )
         {
             res = async_var;
@@ -10381,7 +10381,7 @@ _tmp_9_rule(Parser *p)
     { // ASYNC
         void *async_var;
         if (
-            (async_var = _PyPegen_async_token(p))
+            (async_var = _PyPegen_expect_token(p, ASYNC))
         )
         {
             res = async_var;
@@ -15068,7 +15068,7 @@ _tmp_128_rule(Parser *p)
             &&
             (f = named_expression_rule(p))
             &&
-            (newline_var = _PyPegen_newline_token(p))
+            (newline_var = _PyPegen_expect_token(p, NEWLINE))
         )
         {
             res = f;
@@ -15257,7 +15257,7 @@ _tmp_134_rule(Parser *p)
         void *keyword_1;
         void *y;
         if (
-            (y = _PyPegen_async_token(p), 1)
+            (y = _PyPegen_expect_token(p, ASYNC), 1)
             &&
             (keyword = _PyPegen_expect_token(p, 517))
             &&
diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c
index 39da2709991b9..942447b0f8fd1 100644
--- a/Parser/pegen/pegen.c
+++ b/Parser/pegen/pegen.c
@@ -692,16 +692,6 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
     return (res != NULL) == positive;
 }
 
-int
-_PyPegen_lookahead_with_string(int positive, void *(func)(Parser *, const char *), Parser *p,
-                      const char *arg)
-{
-    int mark = p->mark;
-    void *res = func(p, arg);
-    p->mark = mark;
-    return (res != NULL) == positive;
-}
-
 int
 _PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
 {
@@ -751,24 +741,6 @@ _PyPegen_get_last_nonnwhitespace_token(Parser *p)
     return token;
 }
 
-void *
-_PyPegen_async_token(Parser *p)
-{
-    return _PyPegen_expect_token(p, ASYNC);
-}
-
-void *
-_PyPegen_await_token(Parser *p)
-{
-    return _PyPegen_expect_token(p, AWAIT);
-}
-
-void *
-_PyPegen_endmarker_token(Parser *p)
-{
-    return _PyPegen_expect_token(p, ENDMARKER);
-}
-
 expr_ty
 _PyPegen_name_token(Parser *p)
 {
@@ -794,24 +766,6 @@ _PyPegen_string_token(Parser *p)
     return _PyPegen_expect_token(p, STRING);
 }
 
-void *
-_PyPegen_newline_token(Parser *p)
-{
-    return _PyPegen_expect_token(p, NEWLINE);
-}
-
-void *
-_PyPegen_indent_token(Parser *p)
-{
-    return _PyPegen_expect_token(p, INDENT);
-}
-
-void *
-_PyPegen_dedent_token(Parser *p)
-{
-    return _PyPegen_expect_token(p, DEDENT);
-}
-
 static PyObject *
 parsenumber_raw(const char *s)
 {
diff --git a/Parser/pegen/pegen.h b/Parser/pegen/pegen.h
index 0ac9b317efe59..99ec0f44e6518 100644
--- a/Parser/pegen/pegen.h
+++ b/Parser/pegen/pegen.h
@@ -104,7 +104,6 @@ int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
 int _PyPegen_is_memoized(Parser *p, int type, void *pres);
 
 int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
-int _PyPegen_lookahead_with_string(int, void *(func)(Parser *, const char *), Parser *, const char *);
 int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
 int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
 
diff --git a/Tools/peg_generator/Makefile b/Tools/peg_generator/Makefile
index a37cbfcaa8551..c1219b9263851 100644
--- a/Tools/peg_generator/Makefile
+++ b/Tools/peg_generator/Makefile
@@ -33,7 +33,7 @@ dump: peg_extension/parse.c
 	$(PYTHON) -c "from peg_extension import parse; import ast; t = parse.parse_file('$(TESTFILE)', mode=1); print(ast.dump(t))"
 
 regen-metaparser: pegen/metagrammar.gram pegen/*.py
-	$(PYTHON) -m pegen -q -c pegen/metagrammar.gram -o pegen/grammar_parser.py
+	$(PYTHON) -m pegen -q python pegen/metagrammar.gram -o pegen/grammar_parser.py
 
 # Note: These targets really depend on the generated shared object in peg_extension/parse.*.so but
 # this has different names in different systems so we are abusing the implicit dependency on
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
index a01c3097c365b..a59da2ffae8e1 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -1,33 +1,36 @@
 import ast
+from dataclasses import dataclass, field
 import re
-from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple, Set
+from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
+from enum import Enum
 
+from pegen import grammar
 from pegen.grammar import (
+    Alt,
     Cut,
+    Gather,
     GrammarVisitor,
-    Rhs,
-    Alt,
+    Group,
+    Lookahead,
     NamedItem,
     NameLeaf,
-    StringLeaf,
-    Lookahead,
-    PositiveLookahead,
     NegativeLookahead,
     Opt,
+    PositiveLookahead,
     Repeat0,
     Repeat1,
-    Gather,
-    Group,
+    Rhs,
     Rule,
+    StringLeaf,
 )
-from pegen import grammar
-from pegen.parser_generator import dedupe, ParserGenerator
+from pegen.parser_generator import ParserGenerator
 
 EXTENSION_PREFIX = """\
 #include "pegen.h"
 
 """
 
+
 EXTENSION_SUFFIX = """
 void *
 _PyPegen_parse(Parser *p)
@@ -41,6 +44,43 @@
 """
 
 
+class NodeTypes(Enum):
+    NAME_TOKEN = 0
+    NUMBER_TOKEN = 1
+    STRING_TOKEN = 2
+    GENERIC_TOKEN = 3
+    KEYWORD = 4
+    CUT_OPERATOR = 5
+
+
+BASE_NODETYPES = {
+    "NAME": NodeTypes.NAME_TOKEN,
+    "NUMBER": NodeTypes.NUMBER_TOKEN,
+    "STRING": NodeTypes.STRING_TOKEN,
+}
+
+
+ at dataclass
+class FunctionCall:
+    function: str
+    arguments: Optional[List[Any]] = None
+    assigned_variable: Optional[str] = None
+    nodetype: Optional[NodeTypes] = None
+    force_true: bool = False
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def __str__(self) -> str:
+        parts = []
+        parts.append(self.function)
+        if self.arguments:
+            parts.append(f"({', '.join(map(str, self.arguments))})")
+        if self.force_true:
+            parts.append(", 1")
+        if self.assigned_variable:
+            parts = ["(", self.assigned_variable, " = ", *parts, ")"]
+        return "".join(parts)
+
+
 class CCallMakerVisitor(GrammarVisitor):
     def __init__(
         self,
@@ -54,28 +94,57 @@ def __init__(
         self.cache: Dict[Any, Any] = {}
         self.keyword_cache: Dict[str, int] = {}
 
-    def keyword_helper(self, keyword: str) -> Tuple[str, str]:
+    def keyword_helper(self, keyword: str) -> FunctionCall:
         if keyword not in self.keyword_cache:
             self.keyword_cache[keyword] = self.gen.keyword_type()
-        return "keyword", f"_PyPegen_expect_token(p, {self.keyword_cache[keyword]})"
+        return FunctionCall(
+            assigned_variable="keyword",
+            function="_PyPegen_expect_token",
+            arguments=["p", self.keyword_cache[keyword]],
+            nodetype=NodeTypes.KEYWORD,
+        )
 
-    def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]:
+    def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
         name = node.value
         if name in self.non_exact_tokens:
-            name = name.lower()
-            return f"{name}_var", f"_PyPegen_{name}_token(p)"
-        return f"{name}_var", f"{name}_rule(p)"
+            if name in BASE_NODETYPES:
+                return FunctionCall(
+                    assigned_variable=f"{name.lower()}_var",
+                    function=f"_PyPegen_{name.lower()}_token",
+                    arguments=["p"],
+                    nodetype=BASE_NODETYPES[name],
+                    metadata={"rulename": name.lower()},
+                )
+            return FunctionCall(
+                assigned_variable=f"{name.lower()}_var",
+                function=f"_PyPegen_expect_token",
+                arguments=["p", name],
+                nodetype=NodeTypes.GENERIC_TOKEN,
+                metadata={"rulename": name.lower()},
+            )
+
+        return FunctionCall(
+            assigned_variable=f"{name}_var",
+            function=f"{name}_rule",
+            arguments=["p"],
+            metadata={"rulename": name.lower()},
+        )
 
-    def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
+    def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
         val = ast.literal_eval(node.value)
         if re.match(r"[a-zA-Z_]\w*\Z", val):  # This is a keyword
             return self.keyword_helper(val)
         else:
             assert val in self.exact_tokens, f"{node.value} is not a known literal"
             type = self.exact_tokens[val]
-            return "literal", f"_PyPegen_expect_token(p, {type})"
+            return FunctionCall(
+                assigned_variable="literal",
+                function=f"_PyPegen_expect_token",
+                arguments=["p", type],
+                nodetype=NodeTypes.GENERIC_TOKEN,
+            )
 
-    def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
+    def visit_Rhs(self, node: Rhs) -> FunctionCall:
         def can_we_inline(node: Rhs) -> int:
             if len(node.alts) != 1 or len(node.alts[0].items) != 1:
                 return False
@@ -90,65 +159,96 @@ def can_we_inline(node: Rhs) -> int:
             self.cache[node] = self.visit(node.alts[0].items[0])
         else:
             name = self.gen.name_node(node)
-            self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+            self.cache[node] = FunctionCall(
+                assigned_variable=f"{name}_var",
+                function=f"{name}_rule",
+                arguments=["p"],
+                metadata={"rulename": name},
+            )
         return self.cache[node]
 
-    def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
-        name, call = self.visit(node.item)
+    def visit_NamedItem(self, node: NamedItem) -> FunctionCall:
+        call = self.visit(node.item)
         if node.name:
-            name = node.name
-        return name, call
-
-    def lookahead_call_helper(self, node: Lookahead, positive: int) -> Tuple[None, str]:
-        name, call = self.visit(node.node)
-        func, args = call.split("(", 1)
-        assert args[-1] == ")"
-        args = args[:-1]
-        if "name_token" in call:
-            return None, f"_PyPegen_lookahead_with_name({positive}, {func}, {args})"
-        elif not args.startswith("p,"):
-            return None, f"_PyPegen_lookahead({positive}, {func}, {args})"
-        elif args[2:].strip().isalnum():
-            return None, f"_PyPegen_lookahead_with_int({positive}, {func}, {args})"
+            call.assigned_variable = node.name
+        return call
+
+    def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
+        call = self.visit(node.node)
+        if call.nodetype == NodeTypes.NAME_TOKEN:
+            return FunctionCall(
+                function=f"_PyPegen_lookahead_with_name",
+                arguments=[positive, call.function, *call.arguments],
+            )
+        elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
+            return FunctionCall(
+                function=f"_PyPegen_lookahead_with_int",
+                arguments=[positive, call.function, *call.arguments],
+            )
         else:
-            return None, f"_PyPegen_lookahead_with_string({positive}, {func}, {args})"
+            return FunctionCall(
+                function=f"_PyPegen_lookahead",
+                arguments=[positive, call.function, *call.arguments],
+            )
 
-    def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
+    def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall:
         return self.lookahead_call_helper(node, 1)
 
-    def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
+    def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall:
         return self.lookahead_call_helper(node, 0)
 
-    def visit_Opt(self, node: Opt) -> Tuple[str, str]:
-        name, call = self.visit(node.node)
-        return "opt_var", f"{call}, 1"  # Using comma operator!
+    def visit_Opt(self, node: Opt) -> FunctionCall:
+        call = self.visit(node.node)
+        return FunctionCall(
+            assigned_variable="opt_var",
+            function=call.function,
+            arguments=call.arguments,
+            force_true=True,
+        )
 
-    def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
+    def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
         if node in self.cache:
             return self.cache[node]
         name = self.gen.name_loop(node.node, False)
-        self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+        self.cache[node] = FunctionCall(
+            assigned_variable=f"{name}_var",
+            function=f"{name}_rule",
+            arguments=["p"],
+            metadata={"rulename": name},
+        )
         return self.cache[node]
 
-    def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
+    def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
         if node in self.cache:
             return self.cache[node]
         name = self.gen.name_loop(node.node, True)
-        self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+        self.cache[node] = FunctionCall(
+            assigned_variable=f"{name}_var",
+            function=f"{name}_rule",
+            arguments=["p"],
+            metadata={"rulename": name},
+        )
         return self.cache[node]
 
-    def visit_Gather(self, node: Gather) -> Tuple[str, str]:
+    def visit_Gather(self, node: Gather) -> FunctionCall:
         if node in self.cache:
             return self.cache[node]
         name = self.gen.name_gather(node)
-        self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+        self.cache[node] = FunctionCall(
+            assigned_variable=f"{name}_var",
+            function=f"{name}_rule",
+            arguments=["p"],
+            metadata={"rulename": name},
+        )
         return self.cache[node]
 
-    def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
+    def visit_Group(self, node: Group) -> FunctionCall:
         return self.visit(node.rhs)
 
-    def visit_Cut(self, node: Cut) -> Tuple[str, str]:
-        return "cut_var", "1"
+    def visit_Cut(self, node: Cut) -> FunctionCall:
+        return FunctionCall(
+            assigned_variable="cut_var", function="1", nodetype=NodeTypes.CUT_OPERATOR
+        )
 
 
 class CParserGenerator(ParserGenerator, GrammarVisitor):
@@ -252,7 +352,6 @@ def generate(self, filename: str) -> None:
                 mode += 1
         modulename = self.grammar.metas.get("modulename", "parse")
         trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
-        keyword_cache = self.callmakervisitor.keyword_cache
         if trailer:
             self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
 
@@ -448,13 +547,11 @@ def visit_Rule(self, node: Rule) -> None:
             self._handle_default_rule_body(node, rhs, result_type)
         self.print("}")
 
-    def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
-        name, call = self.callmakervisitor.visit(node)
-        if not name:
-            self.print(call)
-        else:
-            name = dedupe(name, names)
-            self.print(f"({name} = {call})")
+    def visit_NamedItem(self, node: NamedItem) -> None:
+        call = self.callmakervisitor.visit(node)
+        if call.assigned_variable:
+            call.assigned_variable = self.dedupe(call.assigned_variable)
+        self.print(call)
 
     def visit_Rhs(
         self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
@@ -464,7 +561,7 @@ def visit_Rhs(
         for alt in node.alts:
             self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
 
-    def join_conditions(self, keyword: str, node: Any, names: List[str]) -> None:
+    def join_conditions(self, keyword: str, node: Any) -> None:
         self.print(f"{keyword} (")
         with self.indent():
             first = True
@@ -473,7 +570,7 @@ def join_conditions(self, keyword: str, node: Any, names: List[str]) -> None:
                     first = False
                 else:
                     self.print("&&")
-                self.visit(item, names=names)
+                self.visit(item)
         self.print(")")
 
     def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
@@ -492,29 +589,34 @@ def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
                 f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");'
             )
 
-    def emit_default_action(self, is_gather: bool, names: List[str], node: Alt) -> None:
-        if len(names) > 1:
+    def emit_default_action(self, is_gather: bool, node: Alt) -> None:
+        if len(self.local_variable_names) > 1:
             if is_gather:
-                assert len(names) == 2
-                self.print(f"res = _PyPegen_seq_insert_in_front(p, {names[0]}, {names[1]});")
+                assert len(self.local_variable_names) == 2
+                self.print(
+                    f"res = _PyPegen_seq_insert_in_front(p, "
+                    f"{self.local_variable_names[0]}, {self.local_variable_names[1]});"
+                )
             else:
                 if self.debug:
                     self.print(
                         f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
                     )
-                self.print(f"res = _PyPegen_dummy_name(p, {', '.join(names)});")
+                self.print(
+                    f"res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});"
+                )
         else:
             if self.debug:
                 self.print(
                     f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
                 )
-            self.print(f"res = {names[0]};")
+            self.print(f"res = {self.local_variable_names[0]};")
 
     def emit_dummy_action(self) -> None:
         self.print(f"res = _PyPegen_dummy_name(p);")
 
-    def handle_alt_normal(self, node: Alt, is_gather: bool, names: List[str]) -> None:
-        self.join_conditions(keyword="if", node=node, names=names)
+    def handle_alt_normal(self, node: Alt, is_gather: bool) -> None:
+        self.join_conditions(keyword="if", node=node)
         self.print("{")
         # We have parsed successfully all the conditions for the option.
         with self.indent():
@@ -526,17 +628,15 @@ def handle_alt_normal(self, node: Alt, is_gather: bool, names: List[str]) -> Non
             elif node.action:
                 self.emit_action(node)
             else:
-                self.emit_default_action(is_gather, names, node)
+                self.emit_default_action(is_gather, node)
 
             # As the current option has parsed correctly, do not continue with the rest.
             self.print(f"goto done;")
         self.print("}")
 
-    def handle_alt_loop(
-        self, node: Alt, is_gather: bool, rulename: Optional[str], names: List[str]
-    ) -> None:
+    def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
         # Condition of the main body of the alternative
-        self.join_conditions(keyword="while", node=node, names=names)
+        self.join_conditions(keyword="while", node=node)
         self.print("{")
         # We have parsed successfully one item!
         with self.indent():
@@ -548,7 +648,7 @@ def handle_alt_loop(
             elif node.action:
                 self.emit_action(node, cleanup_code="PyMem_Free(children);")
             else:
-                self.emit_default_action(is_gather, names, node)
+                self.emit_default_action(is_gather, node)
 
             # Add the result of rule to the temporary buffer of children. This buffer
             # will populate later an asdl_seq with all elements to return.
@@ -580,47 +680,45 @@ def visit_Alt(
                 if v == "opt_var":
                     self.print("UNUSED(opt_var); // Silence compiler warnings")
 
-            names: List[str] = []
-            if is_loop:
-                self.handle_alt_loop(node, is_gather, rulename, names)
-            else:
-                self.handle_alt_normal(node, is_gather, names)
+            with self.local_variable_context():
+                if is_loop:
+                    self.handle_alt_loop(node, is_gather, rulename)
+                else:
+                    self.handle_alt_normal(node, is_gather)
 
             self.print("p->mark = mark;")
-            if "cut_var" in names:
+            if "cut_var" in vars:
                 self.print("if (cut_var) return NULL;")
         self.print("}")
 
-    def collect_vars(self, node: Alt) -> Dict[str, Optional[str]]:
-        names: List[str] = []
+    def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
         types = {}
-        for item in node.items:
-            name, type = self.add_var(item, names)
-            types[name] = type
+        with self.local_variable_context():
+            for item in node.items:
+                name, type = self.add_var(item)
+                types[name] = type
         return types
 
-    def add_var(self, node: NamedItem, names: List[str]) -> Tuple[str, Optional[str]]:
-        name: str
-        call: str
-        name, call = self.callmakervisitor.visit(node.item)
-        type = None
-        if not name:
-            return name, type
-        if name.startswith("cut"):
-            return name, "int"
-        if name.endswith("_var"):
-            rulename = name[:-4]
-            rule = self.rules.get(rulename)
-            if rule is not None:
-                if rule.is_loop() or rule.is_gather():
-                    type = "asdl_seq *"
-                else:
-                    type = rule.type
-            elif name.startswith("_loop") or name.startswith("_gather"):
+    def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
+        call = self.callmakervisitor.visit(node.item)
+        if not call.assigned_variable:
+            return None, None
+        if call.nodetype == NodeTypes.CUT_OPERATOR:
+            return call.assigned_variable, "int"
+
+        name = call.assigned_variable
+        rulename = call.metadata.get("rulename")
+
+        type: Optional[str] = None
+
+        assert self.all_rules is not None
+        if rulename and rulename in self.all_rules:
+            rule = self.all_rules.get(rulename)
+            if rule.is_loop() or rule.is_gather():
                 type = "asdl_seq *"
-            elif name in ("name_var", "string_var", "number_var"):
-                type = "expr_ty"
-        if node.name:
-            name = node.name
-        name = dedupe(name, names)
-        return name, type
+            else:
+                type = rule.type
+        elif call.nodetype in BASE_NODETYPES.values():
+            type = "expr_ty"
+
+        return self.dedupe(node.name if node.name else call.assigned_variable), type
diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py
index 7851a7c90f4d5..3f6cdbe409d56 100644
--- a/Tools/peg_generator/pegen/parser_generator.py
+++ b/Tools/peg_generator/pegen/parser_generator.py
@@ -13,7 +13,6 @@
     NamedItem,
     Plain,
     NameLeaf,
-    StringLeaf,
     Gather,
 )
 from pegen.grammar import GrammarError, GrammarVisitor
@@ -48,6 +47,18 @@ def __init__(self, grammar: Grammar, file: Optional[IO[Text]]):
         self.todo = self.rules.copy()  # Rules to generate
         self.counter = 0  # For name_rule()/name_loop()
         self.keyword_counter = 499  # For keyword_type()
+        self.all_rules: Optional[Dict[str, Rule]] = None  # Rules + temporal rules
+        self._local_variable_stack: List[List[str]] = []
+
+    @contextlib.contextmanager
+    def local_variable_context(self) -> Iterator[None]:
+        self._local_variable_stack.append([])
+        yield
+        self._local_variable_stack.pop()
+
+    @property
+    def local_variable_names(self) -> List[str]:
+        return self._local_variable_stack[-1]
 
     @abstractmethod
     def generate(self, filename: str) -> None:
@@ -82,6 +93,7 @@ def collect_todo(self) -> None:
             for rulename in todo:
                 self.todo[rulename].collect_todo(self)
             done = set(alltodo)
+        self.all_rules = self.todo.copy()
 
     def keyword_type(self) -> int:
         self.keyword_counter += 1
@@ -109,26 +121,23 @@ def name_gather(self, node: Gather) -> str:
         self.counter += 1
         extra_function_name = f"_loop0_{self.counter}"
         extra_function_alt = Alt(
-            [NamedItem(None, node.separator), NamedItem("elem", node.node),], action="elem",
+            [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
         )
         self.todo[extra_function_name] = Rule(
             extra_function_name, None, Rhs([extra_function_alt]),
         )
-        alt = Alt(
-            [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name)),],
-        )
+        alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
         self.todo[name] = Rule(name, None, Rhs([alt]),)
         return name
 
-
-def dedupe(name: str, names: List[str]) -> str:
-    origname = name
-    counter = 0
-    while name in names:
-        counter += 1
-        name = f"{origname}_{counter}"
-    names.append(name)
-    return name
+    def dedupe(self, name: str) -> str:
+        origname = name
+        counter = 0
+        while name in self.local_variable_names:
+            counter += 1
+            name = f"{origname}_{counter}"
+        self.local_variable_names.append(name)
+        return name
 
 
 def compute_nullables(rules: Dict[str, Rule]) -> None:
@@ -153,13 +162,13 @@ def compute_left_recursives(
             leaders = set(scc)
             for start in scc:
                 for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
-                    ## print("Cycle:", " -> ".join(cycle))
+                    # print("Cycle:", " -> ".join(cycle))
                     leaders -= scc - set(cycle)
                     if not leaders:
                         raise ValueError(
                             f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
                         )
-            ## print("Leaders:", leaders)
+            # print("Leaders:", leaders)
             leader = min(leaders)  # Pick an arbitrary leader from the candidates.
             rules[leader].leader = True
         else:
diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py
index b2891885f957e..bde27890c15a6 100644
--- a/Tools/peg_generator/pegen/python_generator.py
+++ b/Tools/peg_generator/pegen/python_generator.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, IO, Text, Tuple
+from typing import Any, Dict, Optional, IO, Text, Tuple
 
 from pegen.grammar import (
     Cut,
@@ -19,7 +19,7 @@
     Alt,
 )
 from pegen import grammar
-from pegen.parser_generator import dedupe, ParserGenerator
+from pegen.parser_generator import ParserGenerator
 
 MODULE_PREFIX = """\
 #!/usr/bin/env python3.8
@@ -173,7 +173,7 @@ def visit_Rule(self, node: Rule) -> None:
             else:
                 self.print("return None")
 
-    def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
+    def visit_NamedItem(self, node: NamedItem) -> None:
         name, call = self.callmakervisitor.visit(node.item)
         if node.name:
             name = node.name
@@ -181,7 +181,7 @@ def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
             self.print(call)
         else:
             if name != "cut":
-                name = dedupe(name, names)
+                name = self.dedupe(name)
             self.print(f"({name} := {call})")
 
     def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
@@ -191,34 +191,36 @@ def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -
             self.visit(alt, is_loop=is_loop, is_gather=is_gather)
 
     def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
-        names: List[str] = []
-        self.print("cut = False")  # TODO: Only if needed.
-        if is_loop:
-            self.print("while (")
-        else:
-            self.print("if (")
-        with self.indent():
-            first = True
-            for item in node.items:
-                if first:
-                    first = False
-                else:
-                    self.print("and")
-                self.visit(item, names=names)
-        self.print("):")
-        with self.indent():
-            action = node.action
-            if not action:
-                if is_gather:
-                    assert len(names) == 2
-                    action = f"[{names[0]}] + {names[1]}"
-                else:
-                    action = f"[{', '.join(names)}]"
+        with self.local_variable_context():
+            self.print("cut = False")  # TODO: Only if needed.
             if is_loop:
-                self.print(f"children.append({action})")
-                self.print(f"mark = self.mark()")
+                self.print("while (")
             else:
-                self.print(f"return {action}")
-        self.print("self.reset(mark)")
-        # Skip remaining alternatives if a cut was reached.
-        self.print("if cut: return None")  # TODO: Only if needed.
+                self.print("if (")
+            with self.indent():
+                first = True
+                for item in node.items:
+                    if first:
+                        first = False
+                    else:
+                        self.print("and")
+                    self.visit(item)
+            self.print("):")
+            with self.indent():
+                action = node.action
+                if not action:
+                    if is_gather:
+                        assert len(self.local_variable_names) == 2
+                        action = (
+                            f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
+                        )
+                    else:
+                        action = f"[{', '.join(self.local_variable_names)}]"
+                if is_loop:
+                    self.print(f"children.append({action})")
+                    self.print(f"mark = self.mark()")
+                else:
+                    self.print(f"return {action}")
+            self.print("self.reset(mark)")
+            # Skip remaining alternatives if a cut was reached.
+            self.print("if cut: return None")  # TODO: Only if needed.



More information about the Python-checkins mailing list