[Python-checkins] cpython: Issue #18214: Improve finalization of Python modules to avoid setting their

antoine.pitrou python-checkins at python.org
Wed Jul 31 23:15:46 CEST 2013


http://hg.python.org/cpython/rev/79e2f5bbc30c
changeset:   84936:79e2f5bbc30c
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Wed Jul 31 23:14:08 2013 +0200
summary:
  Issue #18214: Improve finalization of Python modules to avoid setting their globals to None, in most cases.

files:
  Lib/rlcompleter.py      |    6 +
  Lib/site.py             |   39 +++++-
  Lib/test/final_a.py     |   19 +++
  Lib/test/final_b.py     |   19 +++
  Lib/test/test_module.py |   15 ++-
  Lib/test/test_sys.py    |    2 +-
  Misc/NEWS               |    3 +
  Objects/moduleobject.c  |   29 +++-
  Python/import.c         |  155 +++++++++++----------------
  9 files changed, 178 insertions(+), 109 deletions(-)


diff --git a/Lib/rlcompleter.py b/Lib/rlcompleter.py
--- a/Lib/rlcompleter.py
+++ b/Lib/rlcompleter.py
@@ -29,6 +29,7 @@
 
 """
 
+import atexit
 import builtins
 import __main__
 
@@ -158,3 +159,8 @@
     pass
 else:
     readline.set_completer(Completer().complete)
+    # Release references early at shutdown (the readline module's
+    # contents are quasi-immortal, and the completer function holds a
+    # reference to globals).
+    atexit.register(lambda: readline.set_completer(None))
+
diff --git a/Lib/site.py b/Lib/site.py
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -68,6 +68,7 @@
 ImportError exception, it is silently ignored.
 """
 
+import atexit
 import sys
 import os
 import re
@@ -86,6 +87,25 @@
 USER_BASE = None
 
 
+_no_builtin = object()
+
+def _patch_builtins(**items):
+    # When patching builtins, we make some objects almost immortal
+    # (builtins are only reclaimed at the very end of the interpreter
+    #  shutdown sequence).  To avoid keeping to many references alive,
+    # we register callbacks to undo our builtins additions.
+    old_items = {k: getattr(builtins, k, _no_builtin) for k in items}
+    def unpatch(old_items=old_items):
+        for k, v in old_items.items():
+            if v is _no_builtin:
+                delattr(builtins, k)
+            else:
+                setattr(builtins, k, v)
+    for k, v in items.items():
+        setattr(builtins, k, v)
+    atexit.register(unpatch)
+
+
 def makepath(*paths):
     dir = os.path.join(*paths)
     try:
@@ -357,8 +377,7 @@
             except:
                 pass
             raise SystemExit(code)
-    builtins.quit = Quitter('quit')
-    builtins.exit = Quitter('exit')
+    _patch_builtins(quit=Quitter('quit'), exit=Quitter('exit'))
 
 
 class _Printer(object):
@@ -423,20 +442,20 @@
 
 def setcopyright():
     """Set 'copyright' and 'credits' in builtins"""
-    builtins.copyright = _Printer("copyright", sys.copyright)
+    _patch_builtins(copyright=_Printer("copyright", sys.copyright))
     if sys.platform[:4] == 'java':
-        builtins.credits = _Printer(
+        _patch_builtins(credits=_Printer(
             "credits",
-            "Jython is maintained by the Jython developers (www.jython.org).")
+            "Jython is maintained by the Jython developers (www.jython.org)."))
     else:
-        builtins.credits = _Printer("credits", """\
+        _patch_builtins(credits=_Printer("credits", """\
     Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands
-    for supporting Python development.  See www.python.org for more information.""")
+    for supporting Python development.  See www.python.org for more information."""))
     here = os.path.dirname(os.__file__)
-    builtins.license = _Printer(
+    _patch_builtins(license=_Printer(
         "license", "See http://www.python.org/%.3s/license.html" % sys.version,
         ["LICENSE.txt", "LICENSE"],
-        [os.path.join(here, os.pardir), here, os.curdir])
+        [os.path.join(here, os.pardir), here, os.curdir]))
 
 
 class _Helper(object):
@@ -453,7 +472,7 @@
         return pydoc.help(*args, **kwds)
 
 def sethelper():
-    builtins.help = _Helper()
+    _patch_builtins(help=_Helper())
 
 def enablerlcompleter():
     """Enable default readline configuration on interactive prompts, by
diff --git a/Lib/test/final_a.py b/Lib/test/final_a.py
new file mode 100644
--- /dev/null
+++ b/Lib/test/final_a.py
@@ -0,0 +1,19 @@
+"""
+Fodder for module finalization tests in test_module.
+"""
+
+import shutil
+import test.final_b
+
+x = 'a'
+
+class C:
+    def __del__(self):
+        # Inspect module globals and builtins
+        print("x =", x)
+        print("final_b.x =", test.final_b.x)
+        print("shutil.rmtree =", getattr(shutil.rmtree, '__name__', None))
+        print("len =", getattr(len, '__name__', None))
+
+c = C()
+_underscored = C()
diff --git a/Lib/test/final_b.py b/Lib/test/final_b.py
new file mode 100644
--- /dev/null
+++ b/Lib/test/final_b.py
@@ -0,0 +1,19 @@
+"""
+Fodder for module finalization tests in test_module.
+"""
+
+import shutil
+import test.final_a
+
+x = 'b'
+
+class C:
+    def __del__(self):
+        # Inspect module globals and builtins
+        print("x =", x)
+        print("final_a.x =", test.final_a.x)
+        print("shutil.rmtree =", getattr(shutil.rmtree, '__name__', None))
+        print("len =", getattr(len, '__name__', None))
+
+c = C()
+_underscored = C()
diff --git a/Lib/test/test_module.py b/Lib/test/test_module.py
--- a/Lib/test/test_module.py
+++ b/Lib/test/test_module.py
@@ -1,6 +1,7 @@
 # Test the module type
 import unittest
 from test.support import run_unittest, gc_collect
+from test.script_helper import assert_python_ok
 
 import sys
 ModuleType = type(sys)
@@ -70,7 +71,6 @@
                "__loader__": None, "__package__": None})
         self.assertTrue(foo.__dict__ is d)
 
-    @unittest.expectedFailure
     def test_dont_clear_dict(self):
         # See issue 7140.
         def f():
@@ -181,6 +181,19 @@
         self.assertEqual(r[:25], "<module 'unittest' from '")
         self.assertEqual(r[-13:], "__init__.py'>")
 
+    def test_module_finalization_at_shutdown(self):
+        # Module globals and builtins should still be available during shutdown
+        rc, out, err = assert_python_ok("-c", "from test import final_a")
+        self.assertFalse(err)
+        lines = out.splitlines()
+        self.assertEqual(set(lines), {
+            b"x = a",
+            b"x = b",
+            b"final_a.x = a",
+            b"final_b.x = b",
+            b"len = len",
+            b"shutil.rmtree = rmtree"})
+
     # frozen and namespace module reprs are tested in importlib.
 
 
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -810,7 +810,7 @@
         # memoryview
         check(memoryview(b''), size('Pnin 2P2n2i5P 3cPn'))
         # module
-        check(unittest, size('PnP'))
+        check(unittest, size('PnPPP'))
         # None
         check(None, size(''))
         # NotImplementedType
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #18214: Improve finalization of Python modules to avoid setting
+  their globals to None, in most cases.
+
 - Issue #18112: PEP 442 implementation (safe object finalization).
 
 - Issue #18552: Check return value of PyArena_AddPyObject() in
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -11,6 +11,8 @@
     PyObject *md_dict;
     struct PyModuleDef *md_def;
     void *md_state;
+    PyObject *md_weaklist;
+    PyObject *md_name;  /* for logging purposes after md_dict is cleared */
 } PyModuleObject;
 
 static PyMemberDef module_members[] = {
@@ -27,7 +29,8 @@
 
 
 static int
-module_init_dict(PyObject *md_dict, PyObject *name, PyObject *doc)
+module_init_dict(PyModuleObject *mod, PyObject *md_dict,
+                 PyObject *name, PyObject *doc)
 {
     if (md_dict == NULL)
         return -1;
@@ -42,6 +45,11 @@
         return -1;
     if (PyDict_SetItemString(md_dict, "__loader__", Py_None) != 0)
         return -1;
+    if (PyUnicode_CheckExact(name)) {
+        Py_INCREF(name);
+        Py_XDECREF(mod->md_name);
+        mod->md_name = name;
+    }
 
     return 0;
 }
@@ -56,8 +64,10 @@
         return NULL;
     m->md_def = NULL;
     m->md_state = NULL;
+    m->md_weaklist = NULL;
+    m->md_name = NULL;
     m->md_dict = PyDict_New();
-    if (module_init_dict(m->md_dict, name, NULL) != 0)
+    if (module_init_dict(m, m->md_dict, name, NULL) != 0)
         goto fail;
     PyObject_GC_Track(m);
     return (PyObject *)m;
@@ -362,7 +372,7 @@
             return -1;
         m->md_dict = dict;
     }
-    if (module_init_dict(dict, name, doc) < 0)
+    if (module_init_dict(m, dict, name, doc) < 0)
         return -1;
     return 0;
 }
@@ -371,12 +381,15 @@
 module_dealloc(PyModuleObject *m)
 {
     PyObject_GC_UnTrack(m);
+    if (Py_VerboseFlag && m->md_name) {
+        PySys_FormatStderr("# destroy %S\n", m->md_name);
+    }
+    if (m->md_weaklist != NULL)
+        PyObject_ClearWeakRefs((PyObject *) m);
     if (m->md_def && m->md_def->m_free)
         m->md_def->m_free(m);
-    if (m->md_dict != NULL) {
-        _PyModule_Clear((PyObject *)m);
-        Py_DECREF(m->md_dict);
-    }
+    Py_XDECREF(m->md_dict);
+    Py_XDECREF(m->md_name);
     if (m->md_state != NULL)
         PyMem_FREE(m->md_state);
     Py_TYPE(m)->tp_free((PyObject *)m);
@@ -522,7 +535,7 @@
     (traverseproc)module_traverse,              /* tp_traverse */
     (inquiry)module_clear,                      /* tp_clear */
     0,                                          /* tp_richcompare */
-    0,                                          /* tp_weaklistoffset */
+    offsetof(PyModuleObject, md_weaklist),      /* tp_weaklistoffset */
     0,                                          /* tp_iter */
     0,                                          /* tp_iternext */
     module_methods,                             /* tp_methods */
diff --git a/Python/import.c b/Python/import.c
--- a/Python/import.c
+++ b/Python/import.c
@@ -277,6 +277,7 @@
     "path", "argv", "ps1", "ps2",
     "last_type", "last_value", "last_traceback",
     "path_hooks", "path_importer_cache", "meta_path",
+    "__interactivehook__",
     /* misc stuff */
     "flags", "float_info",
     NULL
@@ -289,40 +290,17 @@
     NULL
 };
 
-static int
-is_essential_module(PyObject *name)
-{
-    Py_ssize_t name_len;
-    char *name_str = PyUnicode_AsUTF8AndSize(name, &name_len);
-
-    if (name_str == NULL) {
-        PyErr_Clear();
-        return 0;
-    }
-    if (strcmp(name_str, "builtins") == 0)
-        return 1;
-    if (strcmp(name_str, "sys") == 0)
-        return 1;
-    /* These are all needed for stderr to still function */
-    if (strcmp(name_str, "codecs") == 0)
-        return 1;
-    if (strcmp(name_str, "_codecs") == 0)
-        return 1;
-    if (strncmp(name_str, "encodings.", 10) == 0)
-        return 1;
-    return 0;
-}
-
-
 /* Un-initialize things, as good as we can */
 
 void
 PyImport_Cleanup(void)
 {
-    Py_ssize_t pos, ndone;
+    Py_ssize_t pos;
     PyObject *key, *value, *dict;
     PyInterpreterState *interp = PyThreadState_GET()->interp;
     PyObject *modules = interp->modules;
+    PyObject *builtins = interp->builtins;
+    PyObject *weaklist = NULL;
 
     if (modules == NULL)
         return; /* Already done */
@@ -333,6 +311,8 @@
        deleted *last* of all, they would come too late in the normal
        destruction order.  Sigh. */
 
+    /* XXX Perhaps these precautions are obsolete. Who knows? */
+
     value = PyDict_GetItemString(modules, "builtins");
     if (value != NULL && PyModule_Check(value)) {
         dict = PyModule_GetDict(value);
@@ -360,87 +340,84 @@
         }
     }
 
-    /* First, delete __main__ */
-    value = PyDict_GetItemString(modules, "__main__");
-    if (value != NULL && PyModule_Check(value)) {
-        if (Py_VerboseFlag)
-            PySys_WriteStderr("# cleanup __main__\n");
-        _PyModule_Clear(value);
-        PyDict_SetItemString(modules, "__main__", Py_None);
+    /* We prepare a list which will receive (name, weakref) tuples of
+       modules when they are removed from sys.modules.  The name is used
+       for diagnosis messages (in verbose mode), while the weakref helps
+       detect those modules which have been held alive. */
+    weaklist = PyList_New(0);
+
+#define STORE_MODULE_WEAKREF(mod) \
+    if (weaklist != NULL) { \
+        PyObject *name = PyModule_GetNameObject(mod); \
+        PyObject *wr = PyWeakref_NewRef(mod, NULL); \
+        if (name && wr) { \
+            PyObject *tup = PyTuple_Pack(2, name, wr); \
+            PyList_Append(weaklist, tup); \
+            Py_XDECREF(tup); \
+        } \
+        Py_XDECREF(name); \
+        Py_XDECREF(wr); \
+        if (PyErr_Occurred()) \
+            PyErr_Clear(); \
     }
 
-    /* The special treatment of "builtins" here is because even
-       when it's not referenced as a module, its dictionary is
-       referenced by almost every module's __builtins__.  Since
-       deleting a module clears its dictionary (even if there are
-       references left to it), we need to delete the "builtins"
-       module last.  Likewise, we don't delete sys until the very
-       end because it is implicitly referenced (e.g. by print).
-
-       Also note that we 'delete' modules by replacing their entry
-       in the modules dict with None, rather than really deleting
-       them; this avoids a rehash of the modules dictionary and
-       also marks them as "non existent" so they won't be
-       re-imported. */
-
-    /* Next, repeatedly delete modules with a reference count of
-       one (skipping builtins and sys) and delete them */
-    do {
-        ndone = 0;
-        pos = 0;
-        while (PyDict_Next(modules, &pos, &key, &value)) {
-            if (value->ob_refcnt != 1)
-                continue;
-            if (PyUnicode_Check(key) && PyModule_Check(value)) {
-                if (is_essential_module(key))
-                    continue;
-                if (Py_VerboseFlag)
-                    PySys_FormatStderr(
-                        "# cleanup[1] %U\n", key);
-                _PyModule_Clear(value);
-                PyDict_SetItem(modules, key, Py_None);
-                ndone++;
-            }
-        }
-    } while (ndone > 0);
-
-    /* Next, delete all modules (still skipping builtins and sys) */
+    /* Remove all modules from sys.modules, hoping that garbage collection
+       can reclaim most of them. */
     pos = 0;
     while (PyDict_Next(modules, &pos, &key, &value)) {
-        if (PyUnicode_Check(key) && PyModule_Check(value)) {
-            if (is_essential_module(key))
-                continue;
-            if (Py_VerboseFlag)
-                PySys_FormatStderr("# cleanup[2] %U\n", key);
-            _PyModule_Clear(value);
+        if (PyModule_Check(value)) {
+            if (Py_VerboseFlag && PyUnicode_Check(key))
+                PySys_FormatStderr("# cleanup[2] removing %U\n", key, value);
+            STORE_MODULE_WEAKREF(value);
             PyDict_SetItem(modules, key, Py_None);
         }
     }
 
-    /* Collect garbage remaining after deleting the modules. Mostly
-       reference cycles created by classes. */
-    PyGC_Collect();
-
+    /* Clear the modules dict. */
+    PyDict_Clear(modules);
+    /* Replace the interpreter's reference to builtins with an empty dict
+       (module globals still have a reference to the original builtins). */
+    builtins = interp->builtins;
+    interp->builtins = PyDict_New();
+    Py_DECREF(builtins);
+    /* Collect references */
+    _PyGC_CollectNoFail();
     /* Dump GC stats before it's too late, since it uses the warnings
        machinery. */
     _PyGC_DumpShutdownStats();
 
-    /* Next, delete all remaining modules */
-    pos = 0;
-    while (PyDict_Next(modules, &pos, &key, &value)) {
-        if (PyUnicode_Check(key) && PyModule_Check(value)) {
+    /* Now, if there are any modules left alive, clear their globals to
+       minimize potential leaks.  All C extension modules actually end
+       up here, since they are kept alive in the interpreter state. */
+    if (weaklist != NULL) {
+        Py_ssize_t i, n;
+        n = PyList_GET_SIZE(weaklist);
+        for (i = 0; i < n; i++) {
+            PyObject *tup = PyList_GET_ITEM(weaklist, i);
+            PyObject *mod = PyWeakref_GET_OBJECT(PyTuple_GET_ITEM(tup, 1));
+            if (mod == Py_None)
+                continue;
+            Py_INCREF(mod);
+            assert(PyModule_Check(mod));
             if (Py_VerboseFlag)
-                PySys_FormatStderr("# cleanup[3] %U\n", key);
-            _PyModule_Clear(value);
-            PyDict_SetItem(modules, key, Py_None);
+                PySys_FormatStderr("# cleanup[3] wiping %U\n",
+                                   PyTuple_GET_ITEM(tup, 0), mod);
+            _PyModule_Clear(mod);
+            Py_DECREF(mod);
         }
+        Py_DECREF(weaklist);
     }
 
-    /* Finally, clear and delete the modules directory */
-    PyDict_Clear(modules);
-    _PyGC_CollectNoFail();
+    /* Clear and delete the modules directory.  Actual modules will
+       still be there only if imported during the execution of some
+       destructor. */
     interp->modules = NULL;
     Py_DECREF(modules);
+
+    /* Once more */
+    _PyGC_CollectNoFail();
+
+#undef STORE_MODULE_WEAKREF
 }
 
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list