[Python-checkins] bpo-40939: Generate keyword.py using the new parser (GH-20800)

Lysandros Nikolaou webhook-mailer at python.org
Thu Jun 11 08:45:23 EDT 2020


https://github.com/python/cpython/commit/9727694f08cad4b019d2939224e3416312b1c0e1
commit: 9727694f08cad4b019d2939224e3416312b1c0e1
branch: master
author: Lysandros Nikolaou <lisandrosnik at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-06-11T13:45:15+01:00
summary:

bpo-40939: Generate keyword.py using the new parser (GH-20800)

files:
A Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst
A Tools/peg_generator/pegen/keywordgen.py
M Lib/keyword.py
M Lib/pydoc.py
M Makefile.pre.in
M PCbuild/regen.vcxproj

diff --git a/Lib/keyword.py b/Lib/keyword.py
index ddcbb25d3d3f5..afc3db3942ccb 100644
--- a/Lib/keyword.py
+++ b/Lib/keyword.py
@@ -1,13 +1,14 @@
-"""Keywords (from "Grammar/Grammar")
+"""Keywords (from "Grammar/python.gram")
 
 This file is automatically generated; please don't muck it up!
 
 To update the symbols in this file, 'cd' to the top directory of
 the python source tree and run:
 
-    python3 -m Parser.pgen.keywordgen Grammar/Grammar \
-                                      Grammar/Tokens \
-                                      Lib/keyword.py
+    PYTHONPATH=Tools/peg_generator python3 -m pegen.keywordgen \
+        Grammar/Grammar \
+        Grammar/Tokens \
+        Lib/keyword.py
 
 Alternatively, you can run 'make regen-keyword'.
 """
@@ -18,6 +19,7 @@
     'False',
     'None',
     'True',
+    '__new_parser__',
     'and',
     'as',
     'assert',
diff --git a/Lib/pydoc.py b/Lib/pydoc.py
index 628f9fc7d1d1e..a5368bf8bfe55 100755
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@@ -1817,6 +1817,7 @@ class Helper:
         'False': '',
         'None': '',
         'True': '',
+        '__new_parser__': '',
         'and': 'BOOLEAN',
         'as': 'with',
         'assert': ('assert', ''),
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 7c16d2905fbf4..9a82729aa0f21 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -895,9 +895,10 @@ regen-token:
 
 .PHONY: regen-keyword
 regen-keyword:
-	# Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens
-	# using Parser/pgen
-	PYTHONPATH=$(srcdir) $(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \
+	# Regenerate Lib/keyword.py from Grammar/python.gram and Grammar/Tokens
+	# using Tools/peg_generator/pegen
+	PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen.keywordgen \
+		$(srcdir)/Grammar/python.gram \
 		$(srcdir)/Grammar/Tokens \
 		$(srcdir)/Lib/keyword.py.new
 	$(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new
diff --git a/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst b/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst
new file mode 100644
index 0000000000000..0e831129dd87e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst
@@ -0,0 +1 @@
+Use the new PEG parser when generating the stdlib :mod:`keyword` module.
\ No newline at end of file
diff --git a/PCbuild/regen.vcxproj b/PCbuild/regen.vcxproj
index d46fb997dbd79..564a4dd71188c 100644
--- a/PCbuild/regen.vcxproj
+++ b/PCbuild/regen.vcxproj
@@ -205,8 +205,9 @@
     <Exec Command=""$(PythonExe)" $(PySourcePath)Tools\scripts\generate_token.py py "$(PySourcePath)Grammar\Tokens" "$(PySourcePath)Lib\token.py"" />
   </Target>
   <Target Name="_RegenKeywords" AfterTargets="_RegenTokens">
-    <!-- Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens using Parser/pgen-->
-    <Exec Command=""$(PythonExe)" -m Parser.pgen.keywordgen "$(PySourcePath)Grammar\Grammar" "$(PySourcePath)Grammar\Tokens" "$(IntDir)keyword.py"" />
+    <!-- Regenerate Lib/keyword.py from Grammar/python.gram and Grammar/Tokens using Tools/peg_generator/pegen-->
+    <SetEnv Name="PYTHONPATH" Prefix="true" Value="$(PySourcePath)Tools\peg_generator\" />
+    <Exec Command=""$(PythonExe)" -m pegen.keywordgen "$(PySourcePath)Grammar\python.gram" "$(PySourcePath)Grammar\Tokens" "$(IntDir)keyword.py"" />
     <Copy SourceFiles="$(IntDir)keyword.py" DestinationFiles="$(PySourcePath)Lib\keyword.py">
       <Output TaskParameter="CopiedFiles" ItemName="_Updated" />
     </Copy>
diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py
new file mode 100644
index 0000000000000..279c34b6dae5b
--- /dev/null
+++ b/Tools/peg_generator/pegen/keywordgen.py
@@ -0,0 +1,73 @@
+"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""
+
+import argparse
+
+from .build import build_parser, generate_token_definitions
+from .c_generator import CParserGenerator
+
+TEMPLATE = r'''
+"""Keywords (from "Grammar/python.gram")
+
+This file is automatically generated; please don't muck it up!
+
+To update the symbols in this file, 'cd' to the top directory of
+the python source tree and run:
+
+    PYTHONPATH=Tools/peg_generator python3 -m pegen.keywordgen \
+        Grammar/Grammar \
+        Grammar/Tokens \
+        Lib/keyword.py
+
+Alternatively, you can run 'make regen-keyword'.
+"""
+
+__all__ = ["iskeyword", "kwlist"]
+
+kwlist = [
+    {keywords}
+]
+
+iskeyword = frozenset(kwlist).__contains__
+'''.lstrip()
+
+EXTRA_KEYWORDS = ["async", "await"]
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate the Lib/keywords.py file from the grammar."
+    )
+    parser.add_argument(
+        "grammar", type=str, help="The file with the grammar definition in PEG format"
+    )
+    parser.add_argument(
+        "tokens_file",
+        type=argparse.FileType("r"),
+        help="The file with the token definitions"
+    )
+    parser.add_argument(
+        "keyword_file",
+        type=argparse.FileType("w"),
+        help="The path to write the keyword definitions",
+    )
+    args = parser.parse_args()
+
+    grammar, _, _ = build_parser(args.grammar)
+    with args.tokens_file as tok_file:
+        all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
+    gen: ParserGenerator = CParserGenerator(
+        grammar, all_tokens, exact_tok, non_exact_tok, file=None
+    )
+    gen.collect_todo()
+
+    with args.keyword_file as thefile:
+        all_keywords = sorted(
+            list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS
+        )
+
+        keywords = ",\n    ".join(map(repr, all_keywords))
+        thefile.write(TEMPLATE.format(keywords=keywords))
+
+
+if __name__ == "__main__":
+    main()



More information about the Python-checkins mailing list