[Python-checkins] bpo-40528: Improve AST generation script to do builds simultaneously (GH-19968)

Batuhan Taskaya webhook-mailer at python.org
Mon May 18 13:42:17 EDT 2020

commit: 63b8e0cba3d43e53a8dd8878ee1443c8427f462d
branch: master
author: Batuhan Taskaya <batuhanosmantaskaya at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-05-18T18:42:10+01:00

bpo-40528: Improve AST generation script to do builds simultaneously (GH-19968)

- Switch from getopt to argparse.
- Removed the limitation of not being able to produce both C and H simultaneously.

This will make it run faster since it parses the asdl definition once and uses the generated tree to generate both the header and the C source.

M Makefile.pre.in
M PCbuild/regen.vcxproj
M Parser/asdl_c.py

diff --git a/Makefile.pre.in b/Makefile.pre.in
index dbfd805f1a02f..de50f6b7f7022 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -845,17 +845,15 @@ regen-pegen:
-	# Regenerate Include/Python-ast.h using Parser/asdl_c.py -h
+	# Regenerate Include/Python-ast.h and Python/Python-ast.c using Parser/asdl_c.py
 	$(MKDIR_P) $(srcdir)/Include
-	$(PYTHON_FOR_REGEN) $(srcdir)/Parser/asdl_c.py \
-		-h $(srcdir)/Include/Python-ast.h.new \
-		$(srcdir)/Parser/Python.asdl
-	$(UPDATE_FILE) $(srcdir)/Include/Python-ast.h $(srcdir)/Include/Python-ast.h.new
-	# Regenerate Python/Python-ast.c using Parser/asdl_c.py -c
 	$(MKDIR_P) $(srcdir)/Python
 	$(PYTHON_FOR_REGEN) $(srcdir)/Parser/asdl_c.py \
-		-c $(srcdir)/Python/Python-ast.c.new \
-		$(srcdir)/Parser/Python.asdl
+		$(srcdir)/Parser/Python.asdl \
+		-H $(srcdir)/Include/Python-ast.h.new \
+		-C $(srcdir)/Python/Python-ast.c.new
+	$(UPDATE_FILE) $(srcdir)/Include/Python-ast.h $(srcdir)/Include/Python-ast.h.new
 	$(UPDATE_FILE) $(srcdir)/Python/Python-ast.c $(srcdir)/Python/Python-ast.c.new
 .PHONY: regen-opcode
diff --git a/PCbuild/regen.vcxproj b/PCbuild/regen.vcxproj
index c97536f7dd96d..d46fb997dbd79 100644
--- a/PCbuild/regen.vcxproj
+++ b/PCbuild/regen.vcxproj
@@ -176,20 +176,15 @@
     <Warning Text="Pegen updated. You will need to rebuild pythoncore to see the changes." Condition="'@(_UpdatedParse)' != ''" />
   <Target Name="_RegenAST_H" AfterTargets="_RegenGrammar">
-    <!-- Regenerate Include/Python-ast.h using Parser/asdl_c.py -h -->
-    <Exec Command=""$(PythonExe)" "$(PySourcePath)Parser\asdl_c.py" -h "$(IntDir)Python-ast.h" "$(PySourcePath)Parser\Python.asdl"" />
+    <!-- Regenerate Include/Python-ast.h and Python/Python-ast.c using Parser/asdl_c.py -h -->
+    <Exec Command=""$(PythonExe)" "$(PySourcePath)Parser\asdl_c.py" "$(PySourcePath)Parser\Python.asdl" -H "$(IntDir)Python-ast.h" -C "$(IntDir)Python-ast.c"" />
     <Copy SourceFiles="$(IntDir)Python-ast.h" DestinationFiles="$(PySourcePath)Include\Python-ast.h">
       <Output TaskParameter="CopiedFiles" ItemName="_UpdatedH" />
-    <Warning Text="Python-ast.h updated. You will need to rebuild pythoncore to see the changes." Condition="'@(_UpdatedH)' != '' and '@(_UpdatedC)' != ''" />
-  </Target>
-  <Target Name="_RegenAST_C" AfterTargets="_RegenGrammar">
-    <!-- Regenerate Python/Python-ast.c using Parser/asdl_c.py -c -->
-    <Exec Command=""$(PythonExe)" "$(PySourcePath)Parser\asdl_c.py" -c "$(IntDir)Python-ast.c" "$(PySourcePath)Parser\Python.asdl"" />
     <Copy SourceFiles="$(IntDir)Python-ast.c" DestinationFiles="$(PySourcePath)Python\Python-ast.c">
-      <Output TaskParameter="CopiedFiles" ItemName="_UpdatedH" />
+      <Output TaskParameter="CopiedFiles" ItemName="_UpdatedC" />
-    <Warning Text="Python-ast.c updated. You will need to rebuild pythoncore to see the changes." Condition="'@(_UpdatedH)' != '' and '@(_UpdatedC)' != ''" />
+    <Warning Text="ASDL is updated. You will need to rebuild pythoncore to see the changes." Condition="'@(_UpdatedH)' != '' and '@(_UpdatedC)' != ''" />
   <Target Name="_RegenOpcodes" AfterTargets="_RegenAST_C">
     <!-- Regenerate Include/opcode.h from Lib/opcode.py using Tools/scripts/generate_opcode_h.py-->
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 59bf03ef8df3d..6d572755e68e8 100755
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -1,12 +1,17 @@
 #! /usr/bin/env python
 """Generate C code from an ASDL description."""
-import os, sys
+import os
+import sys
+from argparse import ArgumentParser
+from pathlib import Path
 import asdl
 MAX_COL = 80
+AUTOGEN_MESSAGE = "/* File automatically generated by {}. */\n\n"
 def get_c_type(name):
     """Return a string for the C name of the type.
@@ -1369,94 +1374,77 @@ def generate_module_def(f, mod):
     f.write('    return 1;\n')
-common_msg = "/* File automatically generated by %s. */\n\n"
-def main(srcfile, dump_module=False):
-    argv0 = sys.argv[0]
-    components = argv0.split(os.sep)
-    # Always join with '/' so different OS does not keep changing the file
-    argv0 = '/'.join(components[-2:])
-    auto_gen_msg = common_msg % argv0
-    mod = asdl.parse(srcfile)
+def write_header(f, mod):
+    f.write('#ifndef Py_PYTHON_AST_H\n')
+    f.write('#define Py_PYTHON_AST_H\n')
+    f.write('#ifdef __cplusplus\n')
+    f.write('extern "C" {\n')
+    f.write('#endif\n')
+    f.write('\n')
+    f.write('#ifndef Py_LIMITED_API\n')
+    f.write('#include "asdl.h"\n')
+    f.write('\n')
+    f.write('#undef Yield   /* undefine macro conflicting with <winbase.h> */\n')
+    f.write('\n')
+    c = ChainOfVisitors(TypeDefVisitor(f),
+                        StructVisitor(f))
+    c.visit(mod)
+    f.write("// Note: these macros affect function definitions, not only call sites.\n")
+    PrototypeVisitor(f).visit(mod)
+    f.write("\n")
+    f.write("PyObject* PyAST_mod2obj(mod_ty t);\n")
+    f.write("mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n")
+    f.write("int PyAST_Check(PyObject* obj);\n")
+    f.write("#endif /* !Py_LIMITED_API */\n")
+    f.write('\n')
+    f.write('#ifdef __cplusplus\n')
+    f.write('}\n')
+    f.write('#endif\n')
+    f.write('#endif /* !Py_PYTHON_AST_H */\n')
+def write_source(f, mod):
+    f.write('#include <stddef.h>\n')
+    f.write('\n')
+    f.write('#include "Python.h"\n')
+    f.write('#include "%s-ast.h"\n' % mod.name)
+    f.write('#include "structmember.h"         // PyMemberDef\n')
+    f.write('\n')
+    generate_module_def(f, mod)
+    v = ChainOfVisitors(
+        PyTypesDeclareVisitor(f),
+        PyTypesVisitor(f),
+        Obj2ModPrototypeVisitor(f),
+        FunctionVisitor(f),
+        ObjVisitor(f),
+        Obj2ModVisitor(f),
+        ASTModuleVisitor(f),
+        PartingShots(f),
+    )
+    v.visit(mod)
+def main(input_file, c_file, h_file, dump_module=False):
+    auto_gen_msg = AUTOGEN_MESSAGE.format("/".join(Path(__file__).parts[-2:]))
+    mod = asdl.parse(input_file)
     if dump_module:
         print('Parsed Module:')
     if not asdl.check(mod):
-    if H_FILE:
-        with open(H_FILE, "w") as f:
-            f.write(auto_gen_msg)
-            f.write('#ifndef Py_PYTHON_AST_H\n')
-            f.write('#define Py_PYTHON_AST_H\n')
-            f.write('#ifdef __cplusplus\n')
-            f.write('extern "C" {\n')
-            f.write('#endif\n')
-            f.write('\n')
-            f.write('#ifndef Py_LIMITED_API\n')
-            f.write('#include "asdl.h"\n')
-            f.write('\n')
-            f.write('#undef Yield   /* undefine macro conflicting with <winbase.h> */\n')
-            f.write('\n')
-            c = ChainOfVisitors(TypeDefVisitor(f),
-                                StructVisitor(f))
-            c.visit(mod)
-            f.write("// Note: these macros affect function definitions, not only call sites.\n")
-            PrototypeVisitor(f).visit(mod)
-            f.write("\n")
-            f.write("PyObject* PyAST_mod2obj(mod_ty t);\n")
-            f.write("mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n")
-            f.write("int PyAST_Check(PyObject* obj);\n")
-            f.write("#endif /* !Py_LIMITED_API */\n")
-            f.write('\n')
-            f.write('#ifdef __cplusplus\n')
-            f.write('}\n')
-            f.write('#endif\n')
-            f.write('#endif /* !Py_PYTHON_AST_H */\n')
-    if C_FILE:
-        with open(C_FILE, "w") as f:
-            f.write(auto_gen_msg)
-            f.write('#include <stddef.h>\n')
-            f.write('\n')
-            f.write('#include "Python.h"\n')
-            f.write('#include "%s-ast.h"\n' % mod.name)
-            f.write('#include "structmember.h"         // PyMemberDef\n')
-            f.write('\n')
-            generate_module_def(f, mod)
-            v = ChainOfVisitors(
-                PyTypesDeclareVisitor(f),
-                PyTypesVisitor(f),
-                Obj2ModPrototypeVisitor(f),
-                FunctionVisitor(f),
-                ObjVisitor(f),
-                Obj2ModVisitor(f),
-                ASTModuleVisitor(f),
-                PartingShots(f),
-                )
-            v.visit(mod)
+    for file, writer in (c_file, write_source), (h_file, write_header):
+        if file is not None:
+            with file.open("w") as f:
+                f.write(auto_gen_msg)
+                writer(f, mod)
+            print(file, "regenerated.")
 if __name__ == "__main__":
-    import getopt
-    H_FILE = ''
-    C_FILE = ''
-    dump_module = False
-    opts, args = getopt.getopt(sys.argv[1:], "dh:c:")
-    for o, v in opts:
-        if o == '-h':
-            H_FILE = v
-        elif o == '-c':
-            C_FILE = v
-        elif o == '-d':
-            dump_module = True
-    if H_FILE and C_FILE:
-        print('Must specify exactly one output file')
-        sys.exit(1)
-    elif len(args) != 1:
-        print('Must specify single input file')
-        sys.exit(1)
-    main(args[0], dump_module)
+    parser = ArgumentParser()
+    parser.add_argument("input_file", type=Path)
+    parser.add_argument("-C", "--c-file", type=Path, default=None)
+    parser.add_argument("-H", "--h-file", type=Path, default=None)
+    parser.add_argument("-d", "--dump-module", action="store_true")
+    options = parser.parse_args()
+    main(**vars(options))

More information about the Python-checkins mailing list