[Python-checkins] bpo-41631: _ast module uses again a global state (#21961)

Victor Stinner webhook-mailer at python.org
Tue Sep 15 12:03:41 EDT 2020


https://github.com/python/cpython/commit/e5fbe0cbd4be99ced5f000ad382208ad2a561c90
commit: e5fbe0cbd4be99ced5f000ad382208ad2a561c90
branch: master
author: Victor Stinner <vstinner at python.org>
committer: GitHub <noreply at github.com>
date: 2020-09-15T18:03:34+02:00
summary:

bpo-41631: _ast module uses again a global state (#21961)

Partially revert commit ac46eb4ad6662cf6d771b20d8963658b2186c48c:
"bpo-38113: Update the Python-ast.c generator to PEP384 (gh-15957)".

Using a module state per module instance is causing subtle practical
problems.

For example, the Mercurial project replaces the __import__() function
to implement lazy import, whereas Python expected that "import _ast"
always return a fully initialized _ast module.

Add _PyAST_Fini() to clear the state at exit.

The _ast module has no state (set _astmodule.m_size to 0). Remove
astmodule_traverse(), astmodule_clear() and astmodule_free()
functions.

files:
A Misc/NEWS.d/next/Core and Builtins/2020-08-26-11-23-31.bpo-41631.3jZcd9.rst
M Include/internal/pycore_pylifecycle.h
M Lib/ast.py
M Lib/test/test_ast.py
M Parser/asdl_c.py
M Python/Python-ast.c
M Python/pylifecycle.c

diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h
index 22def3dbc8b66..6d84e37232b30 100644
--- a/Include/internal/pycore_pylifecycle.h
+++ b/Include/internal/pycore_pylifecycle.h
@@ -84,6 +84,7 @@ extern void _PyFaulthandler_Fini(void);
 extern void _PyHash_Fini(void);
 extern void _PyTraceMalloc_Fini(void);
 extern void _PyWarnings_Fini(PyInterpreterState *interp);
+extern void _PyAST_Fini(PyThreadState *tstate);
 
 extern PyStatus _PyGILState_Init(PyThreadState *tstate);
 extern void _PyGILState_Fini(PyThreadState *tstate);
diff --git a/Lib/ast.py b/Lib/ast.py
index 65ebd0100de00..d860917f4d03a 100644
--- a/Lib/ast.py
+++ b/Lib/ast.py
@@ -497,18 +497,20 @@ def generic_visit(self, node):
         return node
 
 
-# The following code is for backward compatibility.
-# It will be removed in future.
+# If the ast module is loaded more than once, only add deprecated methods once
+if not hasattr(Constant, 'n'):
+    # The following code is for backward compatibility.
+    # It will be removed in future.
 
-def _getter(self):
-    """Deprecated. Use value instead."""
-    return self.value
+    def _getter(self):
+        """Deprecated. Use value instead."""
+        return self.value
 
-def _setter(self, value):
-    self.value = value
+    def _setter(self, value):
+        self.value = value
 
-Constant.n = property(_getter, _setter)
-Constant.s = property(_getter, _setter)
+    Constant.n = property(_getter, _setter)
+    Constant.s = property(_getter, _setter)
 
 class _ABC(type):
 
@@ -600,14 +602,19 @@ class ExtSlice(slice):
     def __new__(cls, dims=(), **kwargs):
         return Tuple(list(dims), Load(), **kwargs)
 
-def _dims_getter(self):
-    """Deprecated. Use elts instead."""
-    return self.elts
+# If the ast module is loaded more than once, only add deprecated methods once
+if not hasattr(Tuple, 'dims'):
+    # The following code is for backward compatibility.
+    # It will be removed in future.
 
-def _dims_setter(self, value):
-    self.elts = value
+    def _dims_getter(self):
+        """Deprecated. Use elts instead."""
+        return self.elts
 
-Tuple.dims = property(_dims_getter, _dims_setter)
+    def _dims_setter(self, value):
+        self.elts = value
+
+    Tuple.dims = property(_dims_getter, _dims_setter)
 
 class Suite(mod):
     """Deprecated AST node class.  Unused in Python 3."""
diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py
index f5aef61ec6f7c..5f57ce8724482 100644
--- a/Lib/test/test_ast.py
+++ b/Lib/test/test_ast.py
@@ -1,7 +1,9 @@
 import ast
+import builtins
 import dis
 import os
 import sys
+import types
 import unittest
 import warnings
 import weakref
@@ -1945,6 +1947,88 @@ def visit_Ellipsis(self, node):
         ])
 
 
+ at support.cpython_only
+class ModuleStateTests(unittest.TestCase):
+    # bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state.
+
+    def check_ast_module(self):
+        # Check that the _ast module still works as expected
+        code = 'x + 1'
+        filename = '<string>'
+        mode = 'eval'
+
+        # Create _ast.AST subclasses instances
+        ast_tree = compile(code, filename, mode, flags=ast.PyCF_ONLY_AST)
+
+        # Call PyAST_Check()
+        code = compile(ast_tree, filename, mode)
+        self.assertIsInstance(code, types.CodeType)
+
+    def test_reload_module(self):
+        # bpo-41194: Importing the _ast module twice must not crash.
+        with support.swap_item(sys.modules, '_ast', None):
+            del sys.modules['_ast']
+            import _ast as ast1
+
+            del sys.modules['_ast']
+            import _ast as ast2
+
+            self.check_ast_module()
+
+        # Unloading the two _ast module instances must not crash.
+        del ast1
+        del ast2
+        support.gc_collect()
+
+        self.check_ast_module()
+
+    def test_sys_modules(self):
+        # bpo-41631: Test reproducing a Mercurial crash when PyAST_Check()
+        # imported the _ast module internally.
+        lazy_mod = object()
+
+        def my_import(name, *args, **kw):
+            sys.modules[name] = lazy_mod
+            return lazy_mod
+
+        with support.swap_item(sys.modules, '_ast', None):
+            del sys.modules['_ast']
+
+            with support.swap_attr(builtins, '__import__', my_import):
+                # Test that compile() does not import the _ast module
+                self.check_ast_module()
+                self.assertNotIn('_ast', sys.modules)
+
+                # Sanity check of the test itself
+                import _ast
+                self.assertIs(_ast, lazy_mod)
+
+    def test_subinterpreter(self):
+        # bpo-41631: Importing and using the _ast module in a subinterpreter
+        # must not crash.
+        code = dedent('''
+            import _ast
+            import ast
+            import gc
+            import sys
+            import types
+
+            # Create _ast.AST subclasses instances and call PyAST_Check()
+            ast_tree = compile('x+1', '<string>', 'eval',
+                               flags=ast.PyCF_ONLY_AST)
+            code = compile(ast_tree, 'string', 'eval')
+            if not isinstance(code, types.CodeType):
+                raise AssertionError
+
+            # Unloading the _ast module must not crash.
+            del ast, _ast
+            del sys.modules['ast'], sys.modules['_ast']
+            gc.collect()
+        ''')
+        res = support.run_in_subinterp(code)
+        self.assertEqual(res, 0)
+
+
 def main():
     if __name__ != '__main__':
         return
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-08-26-11-23-31.bpo-41631.3jZcd9.rst b/Misc/NEWS.d/next/Core and Builtins/2020-08-26-11-23-31.bpo-41631.3jZcd9.rst
new file mode 100644
index 0000000000000..68bb51024d9e7
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-08-26-11-23-31.bpo-41631.3jZcd9.rst	
@@ -0,0 +1,5 @@
+The ``_ast`` module uses again a global state. Using a module state per module
+instance is causing subtle practical problems. For example, the Mercurial
+project replaces the ``__import__()`` function to implement lazy import,
+whereas Python expected that ``import _ast`` always return a fully initialized
+``_ast`` module.
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 6fe44b99f793b..0c053393d688b 100755
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -1089,11 +1089,9 @@ def visitModule(self, mod):
 static struct PyModuleDef _astmodule = {
     PyModuleDef_HEAD_INIT,
     .m_name = "_ast",
-    .m_size = sizeof(astmodulestate),
+    // The _ast module uses a global state (global_ast_state).
+    .m_size = 0,
     .m_slots = astmodule_slots,
-    .m_traverse = astmodule_traverse,
-    .m_clear = astmodule_clear,
-    .m_free = astmodule_free,
 };
 
 PyMODINIT_FUNC
@@ -1374,59 +1372,40 @@ def generate_module_def(f, mod):
         f.write('    PyObject *' + s + ';\n')
     f.write('} astmodulestate;\n\n')
     f.write("""
-static astmodulestate*
-get_ast_state(PyObject *module)
-{
-    void *state = PyModule_GetState(module);
-    assert(state != NULL);
-    return (astmodulestate*)state;
-}
+// Forward declaration
+static int init_types(astmodulestate *state);
+
+// bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state.
+static astmodulestate global_ast_state = {0};
 
 static astmodulestate*
 get_global_ast_state(void)
 {
-    _Py_IDENTIFIER(_ast);
-    PyObject *name = _PyUnicode_FromId(&PyId__ast);  // borrowed reference
-    if (name == NULL) {
+    astmodulestate* state = &global_ast_state;
+    if (!init_types(state)) {
         return NULL;
     }
-    PyObject *module = PyImport_GetModule(name);
-    if (module == NULL) {
-        if (PyErr_Occurred()) {
-            return NULL;
-        }
-        module = PyImport_Import(name);
-        if (module == NULL) {
-            return NULL;
-        }
-    }
-    astmodulestate *state = get_ast_state(module);
-    Py_DECREF(module);
     return state;
 }
 
-static int astmodule_clear(PyObject *module)
+static astmodulestate*
+get_ast_state(PyObject* Py_UNUSED(module))
 {
-    astmodulestate *state = get_ast_state(module);
-""")
-    for s in module_state:
-        f.write("    Py_CLEAR(state->" + s + ');\n')
-    f.write("""
-    return 0;
+    astmodulestate* state = get_global_ast_state();
+    // get_ast_state() must only be called after _ast module is imported,
+    // and astmodule_exec() calls init_types()
+    assert(state != NULL);
+    return state;
 }
 
-static int astmodule_traverse(PyObject *module, visitproc visit, void* arg)
+void _PyAST_Fini(PyThreadState *tstate)
 {
-    astmodulestate *state = get_ast_state(module);
+    astmodulestate* state = &global_ast_state;
 """)
     for s in module_state:
-        f.write("    Py_VISIT(state->" + s + ');\n')
+        f.write("    Py_CLEAR(state->" + s + ');\n')
     f.write("""
-    return 0;
-}
-
-static void astmodule_free(void* module) {
-    astmodule_clear((PyObject*)module);
+    state->initialized = 0;
 }
 
 """)
diff --git a/Python/Python-ast.c b/Python/Python-ast.c
index 396a6832702b3..094010e6c9ddc 100644
--- a/Python/Python-ast.c
+++ b/Python/Python-ast.c
@@ -224,40 +224,35 @@ typedef struct {
 } astmodulestate;
 
 
-static astmodulestate*
-get_ast_state(PyObject *module)
-{
-    void *state = PyModule_GetState(module);
-    assert(state != NULL);
-    return (astmodulestate*)state;
-}
+// Forward declaration
+static int init_types(astmodulestate *state);
+
+// bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state.
+static astmodulestate global_ast_state = {0};
 
 static astmodulestate*
 get_global_ast_state(void)
 {
-    _Py_IDENTIFIER(_ast);
-    PyObject *name = _PyUnicode_FromId(&PyId__ast);  // borrowed reference
-    if (name == NULL) {
+    astmodulestate* state = &global_ast_state;
+    if (!init_types(state)) {
         return NULL;
     }
-    PyObject *module = PyImport_GetModule(name);
-    if (module == NULL) {
-        if (PyErr_Occurred()) {
-            return NULL;
-        }
-        module = PyImport_Import(name);
-        if (module == NULL) {
-            return NULL;
-        }
-    }
-    astmodulestate *state = get_ast_state(module);
-    Py_DECREF(module);
     return state;
 }
 
-static int astmodule_clear(PyObject *module)
+static astmodulestate*
+get_ast_state(PyObject* Py_UNUSED(module))
 {
-    astmodulestate *state = get_ast_state(module);
+    astmodulestate* state = get_global_ast_state();
+    // get_ast_state() must only be called after _ast module is imported,
+    // and astmodule_exec() calls init_types()
+    assert(state != NULL);
+    return state;
+}
+
+void _PyAST_Fini(PyThreadState *tstate)
+{
+    astmodulestate* state = &global_ast_state;
     Py_CLEAR(state->AST_type);
     Py_CLEAR(state->Add_singleton);
     Py_CLEAR(state->Add_type);
@@ -472,231 +467,7 @@ static int astmodule_clear(PyObject *module)
     Py_CLEAR(state->vararg);
     Py_CLEAR(state->withitem_type);
 
-    return 0;
-}
-
-static int astmodule_traverse(PyObject *module, visitproc visit, void* arg)
-{
-    astmodulestate *state = get_ast_state(module);
-    Py_VISIT(state->AST_type);
-    Py_VISIT(state->Add_singleton);
-    Py_VISIT(state->Add_type);
-    Py_VISIT(state->And_singleton);
-    Py_VISIT(state->And_type);
-    Py_VISIT(state->AnnAssign_type);
-    Py_VISIT(state->Assert_type);
-    Py_VISIT(state->Assign_type);
-    Py_VISIT(state->AsyncFor_type);
-    Py_VISIT(state->AsyncFunctionDef_type);
-    Py_VISIT(state->AsyncWith_type);
-    Py_VISIT(state->Attribute_type);
-    Py_VISIT(state->AugAssign_type);
-    Py_VISIT(state->Await_type);
-    Py_VISIT(state->BinOp_type);
-    Py_VISIT(state->BitAnd_singleton);
-    Py_VISIT(state->BitAnd_type);
-    Py_VISIT(state->BitOr_singleton);
-    Py_VISIT(state->BitOr_type);
-    Py_VISIT(state->BitXor_singleton);
-    Py_VISIT(state->BitXor_type);
-    Py_VISIT(state->BoolOp_type);
-    Py_VISIT(state->Break_type);
-    Py_VISIT(state->Call_type);
-    Py_VISIT(state->ClassDef_type);
-    Py_VISIT(state->Compare_type);
-    Py_VISIT(state->Constant_type);
-    Py_VISIT(state->Continue_type);
-    Py_VISIT(state->Del_singleton);
-    Py_VISIT(state->Del_type);
-    Py_VISIT(state->Delete_type);
-    Py_VISIT(state->DictComp_type);
-    Py_VISIT(state->Dict_type);
-    Py_VISIT(state->Div_singleton);
-    Py_VISIT(state->Div_type);
-    Py_VISIT(state->Eq_singleton);
-    Py_VISIT(state->Eq_type);
-    Py_VISIT(state->ExceptHandler_type);
-    Py_VISIT(state->Expr_type);
-    Py_VISIT(state->Expression_type);
-    Py_VISIT(state->FloorDiv_singleton);
-    Py_VISIT(state->FloorDiv_type);
-    Py_VISIT(state->For_type);
-    Py_VISIT(state->FormattedValue_type);
-    Py_VISIT(state->FunctionDef_type);
-    Py_VISIT(state->FunctionType_type);
-    Py_VISIT(state->GeneratorExp_type);
-    Py_VISIT(state->Global_type);
-    Py_VISIT(state->GtE_singleton);
-    Py_VISIT(state->GtE_type);
-    Py_VISIT(state->Gt_singleton);
-    Py_VISIT(state->Gt_type);
-    Py_VISIT(state->IfExp_type);
-    Py_VISIT(state->If_type);
-    Py_VISIT(state->ImportFrom_type);
-    Py_VISIT(state->Import_type);
-    Py_VISIT(state->In_singleton);
-    Py_VISIT(state->In_type);
-    Py_VISIT(state->Interactive_type);
-    Py_VISIT(state->Invert_singleton);
-    Py_VISIT(state->Invert_type);
-    Py_VISIT(state->IsNot_singleton);
-    Py_VISIT(state->IsNot_type);
-    Py_VISIT(state->Is_singleton);
-    Py_VISIT(state->Is_type);
-    Py_VISIT(state->JoinedStr_type);
-    Py_VISIT(state->LShift_singleton);
-    Py_VISIT(state->LShift_type);
-    Py_VISIT(state->Lambda_type);
-    Py_VISIT(state->ListComp_type);
-    Py_VISIT(state->List_type);
-    Py_VISIT(state->Load_singleton);
-    Py_VISIT(state->Load_type);
-    Py_VISIT(state->LtE_singleton);
-    Py_VISIT(state->LtE_type);
-    Py_VISIT(state->Lt_singleton);
-    Py_VISIT(state->Lt_type);
-    Py_VISIT(state->MatMult_singleton);
-    Py_VISIT(state->MatMult_type);
-    Py_VISIT(state->Mod_singleton);
-    Py_VISIT(state->Mod_type);
-    Py_VISIT(state->Module_type);
-    Py_VISIT(state->Mult_singleton);
-    Py_VISIT(state->Mult_type);
-    Py_VISIT(state->Name_type);
-    Py_VISIT(state->NamedExpr_type);
-    Py_VISIT(state->Nonlocal_type);
-    Py_VISIT(state->NotEq_singleton);
-    Py_VISIT(state->NotEq_type);
-    Py_VISIT(state->NotIn_singleton);
-    Py_VISIT(state->NotIn_type);
-    Py_VISIT(state->Not_singleton);
-    Py_VISIT(state->Not_type);
-    Py_VISIT(state->Or_singleton);
-    Py_VISIT(state->Or_type);
-    Py_VISIT(state->Pass_type);
-    Py_VISIT(state->Pow_singleton);
-    Py_VISIT(state->Pow_type);
-    Py_VISIT(state->RShift_singleton);
-    Py_VISIT(state->RShift_type);
-    Py_VISIT(state->Raise_type);
-    Py_VISIT(state->Return_type);
-    Py_VISIT(state->SetComp_type);
-    Py_VISIT(state->Set_type);
-    Py_VISIT(state->Slice_type);
-    Py_VISIT(state->Starred_type);
-    Py_VISIT(state->Store_singleton);
-    Py_VISIT(state->Store_type);
-    Py_VISIT(state->Sub_singleton);
-    Py_VISIT(state->Sub_type);
-    Py_VISIT(state->Subscript_type);
-    Py_VISIT(state->Try_type);
-    Py_VISIT(state->Tuple_type);
-    Py_VISIT(state->TypeIgnore_type);
-    Py_VISIT(state->UAdd_singleton);
-    Py_VISIT(state->UAdd_type);
-    Py_VISIT(state->USub_singleton);
-    Py_VISIT(state->USub_type);
-    Py_VISIT(state->UnaryOp_type);
-    Py_VISIT(state->While_type);
-    Py_VISIT(state->With_type);
-    Py_VISIT(state->YieldFrom_type);
-    Py_VISIT(state->Yield_type);
-    Py_VISIT(state->__dict__);
-    Py_VISIT(state->__doc__);
-    Py_VISIT(state->__module__);
-    Py_VISIT(state->_attributes);
-    Py_VISIT(state->_fields);
-    Py_VISIT(state->alias_type);
-    Py_VISIT(state->annotation);
-    Py_VISIT(state->arg);
-    Py_VISIT(state->arg_type);
-    Py_VISIT(state->args);
-    Py_VISIT(state->argtypes);
-    Py_VISIT(state->arguments_type);
-    Py_VISIT(state->asname);
-    Py_VISIT(state->ast);
-    Py_VISIT(state->attr);
-    Py_VISIT(state->bases);
-    Py_VISIT(state->body);
-    Py_VISIT(state->boolop_type);
-    Py_VISIT(state->cause);
-    Py_VISIT(state->cmpop_type);
-    Py_VISIT(state->col_offset);
-    Py_VISIT(state->comparators);
-    Py_VISIT(state->comprehension_type);
-    Py_VISIT(state->context_expr);
-    Py_VISIT(state->conversion);
-    Py_VISIT(state->ctx);
-    Py_VISIT(state->decorator_list);
-    Py_VISIT(state->defaults);
-    Py_VISIT(state->elt);
-    Py_VISIT(state->elts);
-    Py_VISIT(state->end_col_offset);
-    Py_VISIT(state->end_lineno);
-    Py_VISIT(state->exc);
-    Py_VISIT(state->excepthandler_type);
-    Py_VISIT(state->expr_context_type);
-    Py_VISIT(state->expr_type);
-    Py_VISIT(state->finalbody);
-    Py_VISIT(state->format_spec);
-    Py_VISIT(state->func);
-    Py_VISIT(state->generators);
-    Py_VISIT(state->handlers);
-    Py_VISIT(state->id);
-    Py_VISIT(state->ifs);
-    Py_VISIT(state->is_async);
-    Py_VISIT(state->items);
-    Py_VISIT(state->iter);
-    Py_VISIT(state->key);
-    Py_VISIT(state->keys);
-    Py_VISIT(state->keyword_type);
-    Py_VISIT(state->keywords);
-    Py_VISIT(state->kind);
-    Py_VISIT(state->kw_defaults);
-    Py_VISIT(state->kwarg);
-    Py_VISIT(state->kwonlyargs);
-    Py_VISIT(state->left);
-    Py_VISIT(state->level);
-    Py_VISIT(state->lineno);
-    Py_VISIT(state->lower);
-    Py_VISIT(state->mod_type);
-    Py_VISIT(state->module);
-    Py_VISIT(state->msg);
-    Py_VISIT(state->name);
-    Py_VISIT(state->names);
-    Py_VISIT(state->op);
-    Py_VISIT(state->operand);
-    Py_VISIT(state->operator_type);
-    Py_VISIT(state->ops);
-    Py_VISIT(state->optional_vars);
-    Py_VISIT(state->orelse);
-    Py_VISIT(state->posonlyargs);
-    Py_VISIT(state->returns);
-    Py_VISIT(state->right);
-    Py_VISIT(state->simple);
-    Py_VISIT(state->slice);
-    Py_VISIT(state->step);
-    Py_VISIT(state->stmt_type);
-    Py_VISIT(state->tag);
-    Py_VISIT(state->target);
-    Py_VISIT(state->targets);
-    Py_VISIT(state->test);
-    Py_VISIT(state->type);
-    Py_VISIT(state->type_comment);
-    Py_VISIT(state->type_ignore_type);
-    Py_VISIT(state->type_ignores);
-    Py_VISIT(state->unaryop_type);
-    Py_VISIT(state->upper);
-    Py_VISIT(state->value);
-    Py_VISIT(state->values);
-    Py_VISIT(state->vararg);
-    Py_VISIT(state->withitem_type);
-
-    return 0;
-}
-
-static void astmodule_free(void* module) {
-    astmodule_clear((PyObject*)module);
+    state->initialized = 0;
 }
 
 static int init_identifiers(astmodulestate *state)
@@ -10316,11 +10087,9 @@ static PyModuleDef_Slot astmodule_slots[] = {
 static struct PyModuleDef _astmodule = {
     PyModuleDef_HEAD_INIT,
     .m_name = "_ast",
-    .m_size = sizeof(astmodulestate),
+    // The _ast module uses a global state (global_ast_state).
+    .m_size = 0,
     .m_slots = astmodule_slots,
-    .m_traverse = astmodule_traverse,
-    .m_clear = astmodule_clear,
-    .m_free = astmodule_free,
 };
 
 PyMODINIT_FUNC
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index ab5a6767864dc..75d57805c07b6 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1259,6 +1259,12 @@ flush_std_files(void)
 static void
 finalize_interp_types(PyThreadState *tstate)
 {
+    // The _ast module state is shared by all interpreters.
+    // The state must only be cleared by the main interpreter.
+    if (_Py_IsMainInterpreter(tstate)) {
+        _PyAST_Fini(tstate);
+    }
+
     _PyExc_Fini(tstate);
     _PyFrame_Fini(tstate);
     _PyAsyncGen_Fini(tstate);



More information about the Python-checkins mailing list