[Python-checkins] bpo-42745: Make the type cache per-interpreter (GH-23947)

vstinner webhook-mailer at python.org
Fri Dec 25 19:45:47 EST 2020


https://github.com/python/cpython/commit/41010184880151d6ae02a226dbacc796e5c90d11
commit: 41010184880151d6ae02a226dbacc796e5c90d11
branch: master
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2020-12-26T01:45:43+01:00
summary:

bpo-42745: Make the type cache per-interpreter (GH-23947)

Make the type attribute lookup cache per-interpreter.

Add private _PyType_InitCache() function, called by PyInterpreterState_New().

Continue to share next_version_tag between interpreters, since static
types are still shared by interpreters.

Remove MCACHE macro: the cache is no longer disabled if the
EXPERIMENTAL_ISOLATED_SUBINTERPRETERS macro is defined.

files:
A Misc/NEWS.d/next/Core and Builtins/2020-12-25-23-30-58.bpo-42745.XsFoHS.rst
M Include/internal/pycore_interp.h
M Include/internal/pycore_object.h
M Include/internal/pycore_pylifecycle.h
M Objects/typeobject.c
M Python/pylifecycle.c
M Python/pystate.c

diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index 8c61802545253..339c2c4c61f78 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -180,6 +180,27 @@ struct atexit_state {
 };
 
 
+// Type attribute lookup cache: speed up attribute and method lookups,
+// see _PyType_Lookup().
+struct type_cache_entry {
+    unsigned int version;  // initialized from type->tp_version_tag
+    PyObject *name;        // reference to exactly a str or None
+    PyObject *value;       // borrowed reference or NULL
+};
+
+#define MCACHE_SIZE_EXP 12
+#define MCACHE_STATS 0
+
+struct type_cache {
+    struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP];
+#if MCACHE_STATS
+    size_t hits;
+    size_t misses;
+    size_t collisions;
+#endif
+};
+
+
 /* interpreter state */
 
 #define _PY_NSMALLPOSINTS           257
@@ -284,6 +305,7 @@ struct _is {
     struct _Py_exc_state exc_state;
 
     struct ast_state ast;
+    struct type_cache type_cache;
 };
 
 extern void _PyInterpreterState_ClearModules(PyInterpreterState *interp);
diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index edd0031c3eff8..3975765a46cc4 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -27,6 +27,9 @@ _PyType_HasFeature(PyTypeObject *type, unsigned long feature) {
     return ((type->tp_flags & feature) != 0);
 }
 
+extern void _PyType_InitCache(PyInterpreterState *interp);
+
+
 /* Inline functions trading binary compatibility for speed:
    _PyObject_Init() is the fast version of PyObject_Init(), and
    _PyObject_InitVar() is the fast version of PyObject_InitVar().
diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h
index d1c23c8179150..c9e6947ae6cfc 100644
--- a/Include/internal/pycore_pylifecycle.h
+++ b/Include/internal/pycore_pylifecycle.h
@@ -76,7 +76,7 @@ extern void _PyExc_Fini(PyThreadState *tstate);
 extern void _PyImport_Fini(void);
 extern void _PyImport_Fini2(void);
 extern void _PyGC_Fini(PyThreadState *tstate);
-extern void _PyType_Fini(void);
+extern void _PyType_Fini(PyThreadState *tstate);
 extern void _Py_HashRandomization_Fini(void);
 extern void _PyUnicode_Fini(PyThreadState *tstate);
 extern void _PyUnicode_ClearInterned(PyThreadState *tstate);
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-12-25-23-30-58.bpo-42745.XsFoHS.rst b/Misc/NEWS.d/next/Core and Builtins/2020-12-25-23-30-58.bpo-42745.XsFoHS.rst
new file mode 100644
index 0000000000000..fb7de9c215024
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-12-25-23-30-58.bpo-42745.XsFoHS.rst	
@@ -0,0 +1 @@
+Make the type attribute lookup cache per-interpreter. Patch by Victor Stinner.
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 83bc877eb7d05..661ccb71ab0c7 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -20,20 +20,13 @@ class object "PyObject *" "&PyBaseObject_Type"
 
 #include "clinic/typeobject.c.h"
 
-/* bpo-40521: Type method cache is shared by all subinterpreters */
-#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
-#  define MCACHE
-#endif
-
-#ifdef MCACHE
-/* Support type attribute cache */
+/* Support type attribute lookup cache */
 
 /* The cache can keep references to the names alive for longer than
    they normally would.  This is why the maximum size is limited to
    MCACHE_MAX_ATTR_SIZE, since it might be a problem if very large
    strings are used as attribute names. */
 #define MCACHE_MAX_ATTR_SIZE    100
-#define MCACHE_SIZE_EXP         12
 #define MCACHE_HASH(version, name_hash)                                 \
         (((unsigned int)(version) ^ (unsigned int)(name_hash))          \
          & ((1 << MCACHE_SIZE_EXP) - 1))
@@ -44,30 +37,16 @@ class object "PyObject *" "&PyBaseObject_Type"
 #define MCACHE_CACHEABLE_NAME(name)                             \
         PyUnicode_CheckExact(name) &&                           \
         PyUnicode_IS_READY(name) &&                             \
-        PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE
-
-struct method_cache_entry {
-    unsigned int version;
-    PyObject *name;             /* reference to exactly a str or None */
-    PyObject *value;            /* borrowed */
-};
+        (PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE)
 
-static struct method_cache_entry method_cache[1 << MCACHE_SIZE_EXP];
+// Used to set PyTypeObject.tp_version_tag
 static unsigned int next_version_tag = 0;
-#endif
 
 typedef struct PySlot_Offset {
     short subslot_offset;
     short slot_offset;
 } PySlot_Offset;
 
-#define MCACHE_STATS 0
-
-#if MCACHE_STATS
-static size_t method_cache_hits = 0;
-static size_t method_cache_misses = 0;
-static size_t method_cache_collisions = 0;
-#endif
 
 /* bpo-40521: Interned strings are shared by all subinterpreters */
 #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
@@ -229,46 +208,93 @@ _PyType_GetTextSignatureFromInternalDoc(const char *name, const char *internal_d
     return PyUnicode_FromStringAndSize(start, end - start);
 }
 
-unsigned int
-PyType_ClearCache(void)
+
+static struct type_cache*
+get_type_cache(void)
 {
-#ifdef MCACHE
-    Py_ssize_t i;
-    unsigned int cur_version_tag = next_version_tag - 1;
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    return &interp->type_cache;
+}
+
 
+static void
+type_cache_clear(struct type_cache *cache, int use_none)
+{
+    for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) {
+        struct type_cache_entry *entry = &cache->hashtable[i];
+        entry->version = 0;
+        if (use_none) {
+            // Set to None so _PyType_Lookup() can use Py_SETREF(),
+            // rather than using slower Py_XSETREF().
+            Py_XSETREF(entry->name, Py_NewRef(Py_None));
+        }
+        else {
+            Py_CLEAR(entry->name);
+        }
+        entry->value = NULL;
+    }
+
+    // Mark all version tags as invalid
+    PyType_Modified(&PyBaseObject_Type);
+}
+
+
+void
+_PyType_InitCache(PyInterpreterState *interp)
+{
+    struct type_cache *cache = &interp->type_cache;
+    for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) {
+        struct type_cache_entry *entry = &cache->hashtable[i];
+        assert(entry->name == NULL);
+
+        entry->version = 0;
+        // Set to None so _PyType_Lookup() can use Py_SETREF(),
+        // rather than using slower Py_XSETREF().
+        entry->name = Py_NewRef(Py_None);
+        entry->value = NULL;
+    }
+}
+
+
+static unsigned int
+_PyType_ClearCache(struct type_cache *cache)
+{
 #if MCACHE_STATS
-    size_t total = method_cache_hits + method_cache_collisions + method_cache_misses;
+    size_t total = cache->hits + cache->collisions + cache->misses;
     fprintf(stderr, "-- Method cache hits        = %zd (%d%%)\n",
-            method_cache_hits, (int) (100.0 * method_cache_hits / total));
+            cache->hits, (int) (100.0 * cache->hits / total));
     fprintf(stderr, "-- Method cache true misses = %zd (%d%%)\n",
-            method_cache_misses, (int) (100.0 * method_cache_misses / total));
+            cache->misses, (int) (100.0 * cache->misses / total));
     fprintf(stderr, "-- Method cache collisions  = %zd (%d%%)\n",
-            method_cache_collisions, (int) (100.0 * method_cache_collisions / total));
+            cache->collisions, (int) (100.0 * cache->collisions / total));
     fprintf(stderr, "-- Method cache size        = %zd KiB\n",
-            sizeof(method_cache) / 1024);
+            sizeof(cache->hashtable) / 1024);
 #endif
 
-    for (i = 0; i < (1 << MCACHE_SIZE_EXP); i++) {
-        method_cache[i].version = 0;
-        Py_CLEAR(method_cache[i].name);
-        method_cache[i].value = NULL;
-    }
+    unsigned int cur_version_tag = next_version_tag - 1;
     next_version_tag = 0;
-    /* mark all version tags as invalid */
-    PyType_Modified(&PyBaseObject_Type);
+    type_cache_clear(cache, 0);
+
     return cur_version_tag;
-#else
-    return 0;
-#endif
 }
 
+
+unsigned int
+PyType_ClearCache(void)
+{
+    struct type_cache *cache = get_type_cache();
+    return _PyType_ClearCache(cache);
+}
+
+
 void
-_PyType_Fini(void)
+_PyType_Fini(PyThreadState *tstate)
 {
-    PyType_ClearCache();
+    _PyType_ClearCache(&tstate->interp->type_cache);
     clear_slotdefs();
 }
 
+
 void
 PyType_Modified(PyTypeObject *type)
 {
@@ -370,9 +396,8 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
                         Py_TPFLAGS_VALID_VERSION_TAG);
 }
 
-#ifdef MCACHE
 static int
-assign_version_tag(PyTypeObject *type)
+assign_version_tag(struct type_cache *cache, PyTypeObject *type)
 {
     /* Ensure that the tp_version_tag is valid and set
        Py_TPFLAGS_VALID_VERSION_TAG.  To respect the invariant, this
@@ -393,31 +418,22 @@ assign_version_tag(PyTypeObject *type)
     /* for stress-testing: next_version_tag &= 0xFF; */
 
     if (type->tp_version_tag == 0) {
-        /* wrap-around or just starting Python - clear the whole
-           cache by filling names with references to Py_None.
-           Values are also set to NULL for added protection, as they
-           are borrowed reference */
-        for (i = 0; i < (1 << MCACHE_SIZE_EXP); i++) {
-            method_cache[i].value = NULL;
-            Py_INCREF(Py_None);
-            Py_XSETREF(method_cache[i].name, Py_None);
-        }
-        /* mark all version tags as invalid */
-        PyType_Modified(&PyBaseObject_Type);
+        // Wrap-around or just starting Python - clear the whole cache
+        type_cache_clear(cache, 1);
         return 1;
     }
+
     bases = type->tp_bases;
     n = PyTuple_GET_SIZE(bases);
     for (i = 0; i < n; i++) {
         PyObject *b = PyTuple_GET_ITEM(bases, i);
         assert(PyType_Check(b));
-        if (!assign_version_tag((PyTypeObject *)b))
+        if (!assign_version_tag(cache, (PyTypeObject *)b))
             return 0;
     }
     type->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG;
     return 1;
 }
-#endif
 
 
 static PyMemberDef type_members[] = {
@@ -3316,20 +3332,19 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name)
     PyObject *res;
     int error;
 
-#ifdef MCACHE
     if (MCACHE_CACHEABLE_NAME(name) &&
         _PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) {
         /* fast path */
         unsigned int h = MCACHE_HASH_METHOD(type, name);
-        if (method_cache[h].version == type->tp_version_tag &&
-            method_cache[h].name == name) {
+        struct type_cache *cache = get_type_cache();
+        struct type_cache_entry *entry = &cache->hashtable[h];
+        if (entry->version == type->tp_version_tag && entry->name == name) {
 #if MCACHE_STATS
-            method_cache_hits++;
+            cache->hits++;
 #endif
-            return method_cache[h].value;
+            return entry->value;
         }
     }
-#endif
 
     /* We may end up clearing live exceptions below, so make sure it's ours. */
     assert(!PyErr_Occurred());
@@ -3351,22 +3366,25 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name)
         return NULL;
     }
 
-#ifdef MCACHE
-    if (MCACHE_CACHEABLE_NAME(name) && assign_version_tag(type)) {
-        unsigned int h = MCACHE_HASH_METHOD(type, name);
-        method_cache[h].version = type->tp_version_tag;
-        method_cache[h].value = res;  /* borrowed */
-        Py_INCREF(name);
-        assert(((PyASCIIObject *)(name))->hash != -1);
+    if (MCACHE_CACHEABLE_NAME(name)) {
+        struct type_cache *cache = get_type_cache();
+        if (assign_version_tag(cache, type)) {
+            unsigned int h = MCACHE_HASH_METHOD(type, name);
+            struct type_cache_entry *entry = &cache->hashtable[h];
+            entry->version = type->tp_version_tag;
+            entry->value = res;  /* borrowed */
+            assert(((PyASCIIObject *)(name))->hash != -1);
 #if MCACHE_STATS
-        if (method_cache[h].name != Py_None && method_cache[h].name != name)
-            method_cache_collisions++;
-        else
-            method_cache_misses++;
+            if (entry->name != Py_None && entry->name != name) {
+                cache->collisions++;
+            }
+            else {
+                cache->misses++;
+            }
 #endif
-        Py_SETREF(method_cache[h].name, name);
+            Py_SETREF(entry->name, Py_NewRef(name));
+        }
     }
-#endif
     return res;
 }
 
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 8d744c7bfd4a9..c3c1aa22e94a3 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1750,7 +1750,7 @@ Py_FinalizeEx(void)
     _PyImport_Fini();
 
     /* Cleanup typeobject.c's internal caches. */
-    _PyType_Fini();
+    _PyType_Fini(tstate);
 
     /* unload faulthandler module */
     _PyFaulthandler_Fini();
diff --git a/Python/pystate.c b/Python/pystate.c
index 231144b082861..c791b23999383 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -4,6 +4,7 @@
 #include "Python.h"
 #include "pycore_ceval.h"
 #include "pycore_initconfig.h"
+#include "pycore_object.h"        // _PyType_InitCache()
 #include "pycore_pyerrors.h"
 #include "pycore_pylifecycle.h"
 #include "pycore_pymem.h"         // _PyMem_SetDefaultAllocator()
@@ -223,6 +224,7 @@ PyInterpreterState_New(void)
 
     _PyGC_InitState(&interp->gc);
     PyConfig_InitPythonConfig(&interp->config);
+    _PyType_InitCache(interp);
 
     interp->eval_frame = _PyEval_EvalFrameDefault;
 #ifdef HAVE_DLOPEN



More information about the Python-checkins mailing list