[Python-checkins] bpo-40334: use the TOKENS file when checking dangling rules (GH-19849)
Pablo Galindo
webhook-mailer at python.org
Fri May 1 18:14:16 EDT 2020
https://github.com/python/cpython/commit/7ba08ff7b41911f972d0750e068a2270e0dbd68f
commit: 7ba08ff7b41911f972d0750e068a2270e0dbd68f
branch: master
author: Pablo Galindo <Pablogsal at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-05-01T23:14:12+01:00
summary:
bpo-40334: use the TOKENS file when checking dangling rules (GH-19849)
files:
M Tools/peg_generator/pegen/build.py
M Tools/peg_generator/pegen/c_generator.py
M Tools/peg_generator/pegen/parser_generator.py
M Tools/peg_generator/pegen/python_generator.py
M Tools/peg_generator/pegen/testutil.py
diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py
index d33dd049d63c7..907feeaf122de 100644
--- a/Tools/peg_generator/pegen/build.py
+++ b/Tools/peg_generator/pegen/build.py
@@ -17,6 +17,8 @@
MOD_DIR = pathlib.Path(__file__).resolve().parent
+TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
+
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
flags = sysconfig.get_config_var(compiler_flags)
@@ -112,7 +114,8 @@ def build_parser(
return grammar, parser, tokenizer
-def generate_token_definitions(tokens: IO[str]) -> Tuple[Dict[str, int], Set[str]]:
+def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
+ all_tokens = {}
exact_tokens = {}
non_exact_tokens = set()
numbers = itertools.count(0)
@@ -129,13 +132,15 @@ def generate_token_definitions(tokens: IO[str]) -> Tuple[Dict[str, int], Set[str
if len(pieces) == 1:
(token,) = pieces
non_exact_tokens.add(token)
+ all_tokens[index] = token
elif len(pieces) == 2:
- _, op = pieces
+ token, op = pieces
exact_tokens[op.strip("'")] = index
+ all_tokens[index] = token
else:
raise ValueError(f"Unexpected line found in Tokens file: {line}")
- return exact_tokens, non_exact_tokens
+ return all_tokens, exact_tokens, non_exact_tokens
def build_c_generator(
@@ -149,10 +154,10 @@ def build_c_generator(
skip_actions: bool = False,
) -> ParserGenerator:
with open(tokens_file, "r") as tok_file:
- exact_tok, non_exact_tok = generate_token_definitions(tok_file)
+ all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
with open(output_file, "w") as file:
gen: ParserGenerator = CParserGenerator(
- grammar, exact_tok, non_exact_tok, file, skip_actions=skip_actions
+ grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
)
gen.generate(grammar_file)
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
index 6c77f43991bbe..c9c67067d4677 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -265,13 +265,14 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def __init__(
self,
grammar: grammar.Grammar,
+ tokens: Dict[int, str],
exact_tokens: Dict[str, int],
non_exact_tokens: Set[str],
file: Optional[IO[Text]],
debug: bool = False,
skip_actions: bool = False,
):
- super().__init__(grammar, file)
+ super().__init__(grammar, tokens, file)
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
self, exact_tokens, non_exact_tokens
)
diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py
index b92df2267762d..03452510b9669 100644
--- a/Tools/peg_generator/pegen/parser_generator.py
+++ b/Tools/peg_generator/pegen/parser_generator.py
@@ -1,5 +1,4 @@
import contextlib
-import token
from abc import abstractmethod
from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
@@ -19,11 +18,12 @@
class RuleCheckingVisitor(GrammarVisitor):
- def __init__(self, rules: Dict[str, Rule]):
+ def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]):
self.rules = rules
+ self.tokens = tokens
def visit_NameLeaf(self, node: NameLeaf) -> None:
- if node.value not in self.rules and node.value not in token.tok_name.values():
+ if node.value not in self.rules and node.value not in self.tokens.values():
# TODO: Add line/col info to (leaf) nodes
raise GrammarError(f"Dangling reference to rule {node.value!r}")
@@ -32,12 +32,13 @@ class ParserGenerator:
callmakervisitor: GrammarVisitor
- def __init__(self, grammar: Grammar, file: Optional[IO[Text]]):
+ def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]):
self.grammar = grammar
+ self.tokens = tokens
self.rules = grammar.rules
if "trailer" not in grammar.metas and "start" not in self.rules:
raise GrammarError("Grammar without a trailer must have a 'start' rule")
- checker = RuleCheckingVisitor(self.rules)
+ checker = RuleCheckingVisitor(self.rules, self.tokens)
for rule in self.rules.values():
checker.visit(rule)
self.file = file
diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py
index bde27890c15a6..64336552f24f6 100644
--- a/Tools/peg_generator/pegen/python_generator.py
+++ b/Tools/peg_generator/pegen/python_generator.py
@@ -1,3 +1,4 @@
+import token
from typing import Any, Dict, Optional, IO, Text, Tuple
from pegen.grammar import (
@@ -123,8 +124,13 @@ def visit_Cut(self, node: Cut) -> Tuple[str, str]:
class PythonParserGenerator(ParserGenerator, GrammarVisitor):
- def __init__(self, grammar: grammar.Grammar, file: Optional[IO[Text]]):
- super().__init__(grammar, file)
+ def __init__(
+ self,
+ grammar: grammar.Grammar,
+ file: Optional[IO[Text]],
+ tokens: Dict[int, str] = token.tok_name,
+ ):
+ super().__init__(grammar, tokens, file)
self.callmakervisitor = PythonCallMakerVisitor(self)
def generate(self, filename: str) -> None:
diff --git a/Tools/peg_generator/pegen/testutil.py b/Tools/peg_generator/pegen/testutil.py
index 1f79d8f702fb1..264659e71768c 100644
--- a/Tools/peg_generator/pegen/testutil.py
+++ b/Tools/peg_generator/pegen/testutil.py
@@ -17,6 +17,7 @@
from pegen.python_generator import PythonParserGenerator
from pegen.tokenizer import Tokenizer
+ALL_TOKENS = token.tok_name
EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore
NON_EXACT_TOKENS = {
name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
@@ -76,7 +77,7 @@ def import_file(full_name: str, path: str) -> Any:
def generate_c_parser_source(grammar: Grammar) -> str:
out = io.StringIO()
- genr = CParserGenerator(grammar, EXACT_TOKENS, NON_EXACT_TOKENS, out)
+ genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
genr.generate("<string>")
return out.getvalue()
@@ -96,7 +97,9 @@ def generate_parser_c_extension(
assert not os.listdir(path)
source = path / "parse.c"
with open(source, "w") as file:
- genr = CParserGenerator(grammar, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug)
+ genr = CParserGenerator(
+ grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
+ )
genr.generate("parse.c")
compile_c_extension(str(source), build_dir=str(path))
More information about the Python-checkins
mailing list