[Python-checkins] bpo-42093: Add opcode cache for LOAD_ATTR (GH-22803)

Pablo Galindo webhook-mailer at python.org
Tue Oct 20 01:22:53 EDT 2020


https://github.com/python/cpython/commit/109826c8508dd02e06ae0f1784f1d202495a8680
commit: 109826c8508dd02e06ae0f1784f1d202495a8680
branch: master
author: Pablo Galindo <Pablogsal at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-10-20T06:22:44+01:00
summary:

bpo-42093: Add opcode cache for LOAD_ATTR (GH-22803)

files:
A Misc/NEWS.d/next/Core and Builtins/2020-10-20-04-24-07.bpo-42093.ooZZNh.rst
M Doc/whatsnew/3.10.rst
M Include/cpython/dictobject.h
M Include/internal/pycore_code.h
M Objects/codeobject.c
M Objects/dictobject.c
M Python/ceval.c

diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index f57e1b412378e..e275a7cb4573f 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -252,6 +252,9 @@ Optimizations
   average.
   (Contributed by Victor Stinner in :issue:`41006`.)
 
+* The ``LOAD_ATTR`` instruction now uses new "per opcode cache" mechanism.
+  It is about 36% faster now.  (Contributed by Pablo Galindo and Yury Selivanov
+  in :issue:`42093`.)
 
 Deprecated
 ==========
diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h
index 5a15630cfbac7..f67c3214ddd9a 100644
--- a/Include/cpython/dictobject.h
+++ b/Include/cpython/dictobject.h
@@ -71,6 +71,7 @@ PyAPI_FUNC(void) _PyDict_DebugMallocStats(FILE *out);
 
 int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
 PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
+Py_ssize_t _PyDict_GetItemHint(PyDictObject *, PyObject *, Py_ssize_t, PyObject **);
 
 /* _PyDictView */
 
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 88956f109b4f7..f1e89d96b9ebb 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -10,9 +10,16 @@ typedef struct {
     uint64_t builtins_ver; /* ma_version of builtin dict */
 } _PyOpcache_LoadGlobal;
 
+typedef struct {
+    PyTypeObject *type;
+    Py_ssize_t hint;
+    unsigned int tp_version_tag;
+} _PyOpCodeOpt_LoadAttr;
+
 struct _PyOpcache {
     union {
         _PyOpcache_LoadGlobal lg;
+        _PyOpCodeOpt_LoadAttr la;
     } u;
     char optimized;
 };
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-20-04-24-07.bpo-42093.ooZZNh.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-20-04-24-07.bpo-42093.ooZZNh.rst
new file mode 100644
index 0000000000000..36a12c1c1cb58
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-20-04-24-07.bpo-42093.ooZZNh.rst	
@@ -0,0 +1,2 @@
+The ``LOAD_ATTR`` instruction now uses new "per opcode cache" mechanism and
+it is about 36% faster now. Patch by Pablo Galindo and Yury Selivanov.
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 4ca22fc5029b8..c86d0e1f4ab71 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -301,8 +301,8 @@ _PyCode_InitOpcache(PyCodeObject *co)
         unsigned char opcode = _Py_OPCODE(opcodes[i]);
         i++;  // 'i' is now aligned to (next_instr - first_instr)
 
-        // TODO: LOAD_METHOD, LOAD_ATTR
-        if (opcode == LOAD_GLOBAL) {
+        // TODO: LOAD_METHOD
+        if (opcode == LOAD_GLOBAL || opcode == LOAD_ATTR) {
             opts++;
             co->co_opcache_map[i] = (unsigned char)opts;
             if (opts > 254) {
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 6c3fc62d2ecc7..8e74962345286 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1437,6 +1437,71 @@ PyDict_GetItem(PyObject *op, PyObject *key)
     return value;
 }
 
+Py_ssize_t
+_PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
+                     Py_ssize_t hint, PyObject **value)
+{
+    Py_hash_t hash;
+    PyThreadState *tstate;
+
+    assert(*value == NULL);
+    assert(PyDict_CheckExact((PyObject*)mp));
+    assert(PyUnicode_CheckExact(key));
+
+    if (hint >= 0 && hint < _PyDict_KeysSize(mp->ma_keys)) {
+        PyObject *res = NULL;
+
+        PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys) + (size_t)hint;
+        if (ep->me_key == key) {
+            if (mp->ma_keys->dk_lookup == lookdict_split) {
+                assert(mp->ma_values != NULL);
+                res = mp->ma_values[(size_t)hint];
+            }
+            else {
+                res = ep->me_value;
+            }
+            if (res != NULL) {
+                *value = res;
+                return hint;
+            }
+        }
+    }
+
+    if ((hash = ((PyASCIIObject *) key)->hash) == -1)
+    {
+        hash = PyObject_Hash(key);
+        if (hash == -1) {
+            PyErr_Clear();
+            return -1;
+        }
+    }
+
+    // We can arrive here with a NULL tstate during initialization: try
+    // running "python -Wi" for an example related to string interning
+    tstate = _PyThreadState_UncheckedGet();
+    Py_ssize_t ix = 0;
+    if (tstate != NULL && tstate->curexc_type != NULL) {
+        /* preserve the existing exception */
+        PyObject *err_type, *err_value, *err_tb;
+        PyErr_Fetch(&err_type, &err_value, &err_tb);
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, value);
+        /* ignore errors */
+        PyErr_Restore(err_type, err_value, err_tb);
+        if (ix < 0) {
+            return -1;
+        }
+    }
+    else {
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, value);
+        if (ix < 0) {
+            PyErr_Clear();
+            return -1;
+        }
+    }
+
+    return ix;
+}
+
 /* Same as PyDict_GetItemWithError() but with hash supplied by caller.
    This returns NULL *with* an exception set if an exception occurred.
    It returns NULL *without* an exception set if the key wasn't present.
diff --git a/Python/ceval.c b/Python/ceval.c
index 762de577e6b55..fafbf7524bb84 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -111,6 +111,7 @@ static long dxp[256];
 #else
 #define OPCACHE_MIN_RUNS 1024  /* create opcache when code executed this time */
 #endif
+#define OPCODE_CACHE_MAX_TRIES 20
 #define OPCACHE_STATS 0  /* Enable stats */
 
 #if OPCACHE_STATS
@@ -120,6 +121,12 @@ static size_t opcache_code_objects_extra_mem = 0;
 static size_t opcache_global_opts = 0;
 static size_t opcache_global_hits = 0;
 static size_t opcache_global_misses = 0;
+
+static size_t opcache_attr_opts = 0;
+static size_t opcache_attr_hits = 0;
+static size_t opcache_attr_misses = 0;
+static size_t opcache_attr_deopts = 0;
+static size_t opcache_attr_total = 0;
 #endif
 
 
@@ -365,6 +372,25 @@ _PyEval_Fini(void)
             opcache_global_opts);
 
     fprintf(stderr, "\n");
+
+    fprintf(stderr, "-- Opcode cache LOAD_ATTR hits     = %zd (%d%%)\n",
+            opcache_attr_hits,
+            (int) (100.0 * opcache_attr_hits /
+                opcache_attr_total));
+
+    fprintf(stderr, "-- Opcode cache LOAD_ATTR misses   = %zd (%d%%)\n",
+            opcache_attr_misses,
+            (int) (100.0 * opcache_attr_misses /
+                opcache_attr_total));
+
+    fprintf(stderr, "-- Opcode cache LOAD_ATTR opts     = %zd\n",
+            opcache_attr_opts);
+
+    fprintf(stderr, "-- Opcode cache LOAD_ATTR deopts   = %zd\n",
+            opcache_attr_deopts);
+
+    fprintf(stderr, "-- Opcode cache LOAD_ATTR total    = %zd\n",
+            opcache_attr_total);
 #endif
 }
 
@@ -1224,16 +1250,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
     do { \
         co_opcache = NULL; \
         if (co->co_opcache != NULL) { \
-            unsigned char co_opt_offset = \
+            unsigned char co_opcache_offset = \
                 co->co_opcache_map[next_instr - first_instr]; \
-            if (co_opt_offset > 0) { \
-                assert(co_opt_offset <= co->co_opcache_size); \
-                co_opcache = &co->co_opcache[co_opt_offset - 1]; \
+            if (co_opcache_offset > 0) { \
+                assert(co_opcache_offset <= co->co_opcache_size); \
+                co_opcache = &co->co_opcache[co_opcache_offset - 1]; \
                 assert(co_opcache != NULL); \
             } \
         } \
     } while (0)
 
+#define OPCACHE_DEOPT() \
+    do { \
+        if (co_opcache != NULL) { \
+            co_opcache->optimized = -1; \
+            unsigned char co_opcache_offset = \
+                co->co_opcache_map[next_instr - first_instr]; \
+            assert(co_opcache_offset <= co->co_opcache_size); \
+            co->co_opcache_map[co_opcache_offset] = 0; \
+            co_opcache = NULL; \
+        } \
+    } while (0)
+
+#define OPCACHE_DEOPT_LOAD_ATTR() \
+    do { \
+        if (co_opcache != NULL) { \
+            OPCACHE_STAT_ATTR_DEOPT(); \
+            OPCACHE_DEOPT(); \
+        } \
+    } while (0)
+
+#define OPCACHE_MAYBE_DEOPT_LOAD_ATTR() \
+    do { \
+        if (co_opcache != NULL && --co_opcache->optimized <= 0) { \
+            OPCACHE_DEOPT_LOAD_ATTR(); \
+        } \
+    } while (0)
+
 #if OPCACHE_STATS
 
 #define OPCACHE_STAT_GLOBAL_HIT() \
@@ -1251,12 +1304,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
         if (co->co_opcache != NULL) opcache_global_opts++; \
     } while (0)
 
+#define OPCACHE_STAT_ATTR_HIT() \
+    do { \
+        if (co->co_opcache != NULL) opcache_attr_hits++; \
+    } while (0)
+
+#define OPCACHE_STAT_ATTR_MISS() \
+    do { \
+        if (co->co_opcache != NULL) opcache_attr_misses++; \
+    } while (0)
+
+#define OPCACHE_STAT_ATTR_OPT() \
+    do { \
+        if (co->co_opcache!= NULL) opcache_attr_opts++; \
+    } while (0)
+
+#define OPCACHE_STAT_ATTR_DEOPT() \
+    do { \
+        if (co->co_opcache != NULL) opcache_attr_deopts++; \
+    } while (0)
+
+#define OPCACHE_STAT_ATTR_TOTAL() \
+    do { \
+        if (co->co_opcache != NULL) opcache_attr_total++; \
+    } while (0)
+
 #else /* OPCACHE_STATS */
 
 #define OPCACHE_STAT_GLOBAL_HIT()
 #define OPCACHE_STAT_GLOBAL_MISS()
 #define OPCACHE_STAT_GLOBAL_OPT()
 
+#define OPCACHE_STAT_ATTR_HIT()
+#define OPCACHE_STAT_ATTR_MISS()
+#define OPCACHE_STAT_ATTR_OPT()
+#define OPCACHE_STAT_ATTR_DEOPT()
+#define OPCACHE_STAT_ATTR_TOTAL()
+
 #endif
 
 /* Start of code */
@@ -3023,7 +3107,134 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
         case TARGET(LOAD_ATTR): {
             PyObject *name = GETITEM(names, oparg);
             PyObject *owner = TOP();
-            PyObject *res = PyObject_GetAttr(owner, name);
+
+            PyTypeObject *type = Py_TYPE(owner);
+            PyObject *res;
+            PyObject **dictptr;
+            PyObject *dict;
+            _PyOpCodeOpt_LoadAttr *la;
+
+            OPCACHE_STAT_ATTR_TOTAL();
+
+            OPCACHE_CHECK();
+            if (co_opcache != NULL && PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG))
+            {
+                if (co_opcache->optimized > 0) {
+                    /* Fast path -- cache hit makes LOAD_ATTR ~30% faster */
+                    la = &co_opcache->u.la;
+                    if (la->type == type && la->tp_version_tag == type->tp_version_tag)
+                    {
+                        assert(type->tp_dict != NULL);
+                        assert(type->tp_dictoffset > 0);
+
+                        dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
+                        dict = *dictptr;
+                        if (dict != NULL && PyDict_CheckExact(dict)) {
+                            Py_ssize_t hint = la->hint;
+                            Py_INCREF(dict);
+                            res = NULL;
+                            la->hint = _PyDict_GetItemHint((PyDictObject*)dict, name, hint, &res);
+
+                            if (res != NULL) {
+                                if (la->hint == hint && hint >= 0) {
+                                    /* Our hint has helped -- cache hit. */
+                                    OPCACHE_STAT_ATTR_HIT();
+                                } else {
+                                    /* The hint we provided didn't work.
+                                       Maybe next time? */
+                                    OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
+                                }
+
+                                Py_INCREF(res);
+                                SET_TOP(res);
+                                Py_DECREF(owner);
+                                Py_DECREF(dict);
+                                DISPATCH();
+                            } else {
+                                // This attribute can be missing sometimes -- we
+                                // don't want to optimize this lookup.
+                                OPCACHE_DEOPT_LOAD_ATTR();
+                                Py_DECREF(dict);
+                            }
+                        } else {
+                            // There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact
+                            OPCACHE_DEOPT_LOAD_ATTR();
+                        }
+                    } else {
+                        // The type of the object has either been updated,
+                        // or is different.  Maybe it will stabilize?
+                        OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
+                    }
+
+                    OPCACHE_STAT_ATTR_MISS();
+                }
+
+                if (co_opcache != NULL && /* co_opcache can be NULL after a DEOPT() call. */
+                    type->tp_getattro == PyObject_GenericGetAttr)
+                {
+                    PyObject *descr;
+                    Py_ssize_t ret;
+
+                    if (type->tp_dictoffset > 0) {
+                        if (type->tp_dict == NULL) {
+                            if (PyType_Ready(type) < 0) {
+                                Py_DECREF(owner);
+                                SET_TOP(NULL);
+                                goto error;
+                            }
+                        }
+
+                        descr = _PyType_Lookup(type, name);
+                        if (descr == NULL ||
+                            descr->ob_type->tp_descr_get == NULL ||
+                            !PyDescr_IsData(descr))
+                        {
+                            dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
+                            dict = *dictptr;
+
+                            if (dict != NULL && PyDict_CheckExact(dict)) {
+                                Py_INCREF(dict);
+                                res = NULL;
+                                ret = _PyDict_GetItemHint((PyDictObject*)dict, name, -1, &res);
+                                if (res != NULL) {
+                                    Py_INCREF(res);
+                                    Py_DECREF(dict);
+                                    Py_DECREF(owner);
+                                    SET_TOP(res);
+
+                                    if (co_opcache->optimized == 0) {
+                                        // First time we optimize this opcode. */
+                                        OPCACHE_STAT_ATTR_OPT();
+                                        co_opcache->optimized = OPCODE_CACHE_MAX_TRIES;
+                                    }
+
+                                    la = &co_opcache->u.la;
+                                    la->type = type;
+                                    la->tp_version_tag = type->tp_version_tag;
+                                    la->hint = ret;
+
+                                    DISPATCH();
+                                }
+                                Py_DECREF(dict);
+                            } else {
+                                // There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact
+                                OPCACHE_DEOPT_LOAD_ATTR();
+                            }
+                        } else {
+                            // We failed to find an attribute without a data-like descriptor
+                            OPCACHE_DEOPT_LOAD_ATTR();
+                        }
+                    } else {
+                        // The object's class does not have a tp_dictoffset we can use
+                        OPCACHE_DEOPT_LOAD_ATTR();
+                    }
+                } else if (type->tp_getattro != PyObject_GenericGetAttr) {
+                    OPCACHE_DEOPT_LOAD_ATTR();
+                }
+            }
+
+            /* slow path */
+            res = PyObject_GetAttr(owner, name);
             Py_DECREF(owner);
             SET_TOP(res);
             if (res == NULL)



More information about the Python-checkins mailing list