[Python-checkins] bpo-46841: Use inline cache for `BINARY_SUBSCR`. (GH-31618)

markshannon webhook-mailer at python.org
Tue Mar 1 11:00:52 EST 2022


https://github.com/python/cpython/commit/3b0f1c5a710eff289dc44bec972dbaea353cc54f
commit: 3b0f1c5a710eff289dc44bec972dbaea353cc54f
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2022-03-01T16:00:34Z
summary:

bpo-46841: Use inline cache for `BINARY_SUBSCR`. (GH-31618)

files:
A Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst
M Include/cpython/object.h
M Include/internal/pycore_code.h
M Include/opcode.h
M Lib/importlib/_bootstrap_external.py
M Lib/opcode.py
M Lib/test/test_capi.py
M Lib/test/test_sys.py
M Programs/test_frozenmain.h
M Python/ceval.c
M Python/specialize.c

diff --git a/Include/cpython/object.h b/Include/cpython/object.h
index 5f978eec46580..b018dabf9d862 100644
--- a/Include/cpython/object.h
+++ b/Include/cpython/object.h
@@ -229,6 +229,13 @@ struct _typeobject {
     vectorcallfunc tp_vectorcall;
 };
 
+/* This struct is used by the specializer
+ * It should should be treated as an opaque blob
+ * by code other than the specializer and interpreter. */
+struct _specialization_cache {
+    PyObject *getitem;
+};
+
 /* The *real* layout of a type object when allocated on the heap */
 typedef struct _heaptypeobject {
     /* Note: there's a dependency on the order of these members
@@ -247,6 +254,7 @@ typedef struct _heaptypeobject {
     struct _dictkeysobject *ht_cached_keys;
     PyObject *ht_module;
     char *_ht_tpname;  // Storage for "tp_name"; see PyType_FromModuleAndSpec
+    struct _specialization_cache _spec_cache; // For use by the specializer.
     /* here are optional user slots, followed by the members. */
 } PyHeapTypeObject;
 
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 47c1998c88d21..b9671d0ec32bb 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -92,6 +92,15 @@ typedef struct {
 
 #define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
 
+typedef struct {
+    _Py_CODEUNIT counter;
+    _Py_CODEUNIT type_version;
+    _Py_CODEUNIT _t1;
+    _Py_CODEUNIT func_version;
+} _PyBinarySubscrCache;
+
+#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
+
 /* Maximum size of code to quicken, in code units. */
 #define MAX_SIZE_TO_QUICKEN 5000
 
@@ -323,7 +332,7 @@ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObjec
 extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
 extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name);
 extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
-extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
+extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
 extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
 extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
     PyObject *kwnames, SpecializedCacheEntry *cache);
diff --git a/Include/opcode.h b/Include/opcode.h
index ba85b7ff75fcd..f6330d9056aa1 100644
--- a/Include/opcode.h
+++ b/Include/opcode.h
@@ -211,6 +211,7 @@ static const uint32_t _PyOpcode_Jump[8] = {
 };
 
 const uint8_t _PyOpcode_InlineCacheEntries[256] = {
+    [BINARY_SUBSCR] = 4,
     [UNPACK_SEQUENCE] = 1,
     [COMPARE_OP] = 2,
     [LOAD_GLOBAL] = 5,
diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py
index c69c0c73e7a27..dd1f6ffd64cee 100644
--- a/Lib/importlib/_bootstrap_external.py
+++ b/Lib/importlib/_bootstrap_external.py
@@ -389,7 +389,7 @@ def _write_atomic(path, data, mode=0o666):
 #     Python 3.11a5 3480 (New CALL opcodes, second iteration)
 #     Python 3.11a5 3481 (Use inline cache for BINARY_OP)
 #     Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL)
-#     Python 3.11a5 3483 (Use inline caching for COMPARE_OP)
+#     Python 3.11a5 3483 (Use inline caching for COMPARE_OP and BINARY_SUBSCR)
 
 #     Python 3.12 will start with magic number 3500
 
diff --git a/Lib/opcode.py b/Lib/opcode.py
index dc45cff3017a6..9b08562cd04f6 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -68,7 +68,7 @@ def jabs_op(name, op, entries=0):
 
 def_op('UNARY_INVERT', 15)
 
-def_op('BINARY_SUBSCR', 25)
+def_op('BINARY_SUBSCR', 25, 4)
 
 def_op('GET_LEN', 30)
 def_op('MATCH_MAPPING', 31)
diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py
index 8832292a9991a..d9615430327a4 100644
--- a/Lib/test/test_capi.py
+++ b/Lib/test/test_capi.py
@@ -335,7 +335,7 @@ class C(): pass
             *_, count = line.split(b' ')
             count = int(count)
             self.assertLessEqual(count, i*5)
-            self.assertGreaterEqual(count, i*5-1)
+            self.assertGreaterEqual(count, i*5-2)
 
     def test_mapping_keys_values_items(self):
         class Mapping1(dict):
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index f828d1b15d286..70768f56fa9f1 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -1501,7 +1501,9 @@ def delx(self): del self.__x
                   '3P'                  # PyMappingMethods
                   '10P'                 # PySequenceMethods
                   '2P'                  # PyBufferProcs
-                  '6P')
+                  '6P'
+                  '1P'                  # Specializer cache
+                  )
         class newstyleclass(object): pass
         # Separate block for PyDictKeysObject with 8 keys and 5 entries
         check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst b/Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst
new file mode 100644
index 0000000000000..97b03debcf092
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst	
@@ -0,0 +1 @@
+Use inline cache for :opcode:`BINARY_SUBSCR`.
diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h
index 11593a9ba3d68..3fef981e42ff9 100644
--- a/Programs/test_frozenmain.h
+++ b/Programs/test_frozenmain.h
@@ -1,13 +1,14 @@
 // Auto-generated by Programs/freeze_test_frozenmain.py
 unsigned char M_test_frozenmain[] = {
     227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,
-    0,0,0,0,0,115,104,0,0,0,151,0,100,0,100,1,
+    0,0,0,0,0,115,120,0,0,0,151,0,100,0,100,1,
     108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2,
     100,2,166,1,171,1,1,0,2,0,101,2,100,3,101,0,
     106,3,166,2,171,2,1,0,2,0,101,1,106,4,166,0,
-    171,0,100,4,25,0,90,5,100,5,68,0,93,16,90,6,
-    2,0,101,2,100,6,101,6,155,0,100,7,101,5,101,6,
-    25,0,155,0,157,4,166,1,171,1,1,0,113,33,100,1,
+    171,0,100,4,25,0,3,0,3,0,3,0,3,0,90,5,
+    100,5,68,0,93,20,90,6,2,0,101,2,100,6,101,6,
+    155,0,100,7,101,5,101,6,25,0,3,0,3,0,3,0,
+    3,0,155,0,157,4,166,1,171,1,1,0,113,37,100,1,
     83,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122,
     101,110,32,72,101,108,108,111,32,87,111,114,108,100,122,8,
     115,121,115,46,97,114,103,118,218,6,99,111,110,102,105,103,
@@ -24,14 +25,15 @@ unsigned char M_test_frozenmain[] = {
     0,0,250,18,116,101,115,116,95,102,114,111,122,101,110,109,
     97,105,110,46,112,121,250,8,60,109,111,100,117,108,101,62,
     114,11,0,0,0,1,0,0,0,115,18,0,0,0,2,128,
-    8,3,8,1,12,2,16,1,16,1,8,1,30,7,4,249,
-    115,20,0,0,0,2,128,8,3,8,1,12,2,16,1,16,
-    1,2,7,4,1,2,249,34,7,115,104,0,0,0,0,0,
+    8,3,8,1,12,2,16,1,24,1,8,1,38,7,4,249,
+    115,20,0,0,0,2,128,8,3,8,1,12,2,16,1,24,
+    1,2,7,4,1,2,249,42,7,115,120,0,0,0,0,0,
     1,11,1,11,1,11,1,11,1,25,1,25,1,25,1,25,
     1,6,1,6,7,27,1,28,1,28,1,28,1,6,1,6,
     7,17,19,22,19,27,1,28,1,28,1,28,10,39,10,27,
-    10,39,10,41,10,41,42,50,10,51,1,7,12,2,1,42,
-    1,42,5,8,5,10,5,10,11,41,21,24,11,41,11,41,
-    28,34,35,38,28,39,11,41,11,41,5,42,5,42,5,42,
+    10,39,10,41,10,41,42,50,10,51,10,51,10,51,10,51,
+    10,51,1,7,12,2,1,42,1,42,5,8,5,10,5,10,
+    11,41,21,24,11,41,11,41,28,34,35,38,28,39,28,39,
+    28,39,28,39,28,39,11,41,11,41,5,42,5,42,5,42,
     5,42,1,42,1,42,114,9,0,0,0,
 };
diff --git a/Python/ceval.c b/Python/ceval.c
index 0f57e7dc94a04..b3673d7d04ab2 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -2102,25 +2102,24 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
             SET_TOP(res);
             if (res == NULL)
                 goto error;
+            JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
             DISPATCH();
         }
 
         TARGET(BINARY_SUBSCR_ADAPTIVE) {
-            SpecializedCacheEntry *cache = GET_CACHE();
-            if (cache->adaptive.counter == 0) {
+            _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
+            if (cache->counter == 0) {
                 PyObject *sub = TOP();
                 PyObject *container = SECOND();
                 next_instr--;
-                if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) {
+                if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) {
                     goto error;
                 }
                 DISPATCH();
             }
             else {
                 STAT_INC(BINARY_SUBSCR, deferred);
-                cache->adaptive.counter--;
-                assert(cache->adaptive.original_oparg == 0);
-                /* No need to set oparg here; it isn't used by BINARY_SUBSCR */
+                cache->counter--;
                 JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
             }
         }
@@ -2146,6 +2145,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
             Py_DECREF(sub);
             SET_TOP(res);
             Py_DECREF(list);
+            JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
             NOTRACE_DISPATCH();
         }
 
@@ -2170,6 +2170,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
             Py_DECREF(sub);
             SET_TOP(res);
             Py_DECREF(tuple);
+            JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
             NOTRACE_DISPATCH();
         }
 
@@ -2188,18 +2189,22 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
             Py_DECREF(sub);
             SET_TOP(res);
             Py_DECREF(dict);
+            JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
             DISPATCH();
         }
 
         TARGET(BINARY_SUBSCR_GETITEM) {
             PyObject *sub = TOP();
             PyObject *container = SECOND();
-            SpecializedCacheEntry *caches = GET_CACHE();
-            _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
-            _PyObjectCache *cache1 = &caches[-1].obj;
-            PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj;
-            DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR);
-            DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR);
+            _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
+            uint32_t type_version = read32(&cache->type_version);
+            PyTypeObject *tp = Py_TYPE(container);
+            DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR);
+            assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
+            PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem;
+            assert(PyFunction_Check(cached));
+            PyFunctionObject *getitem = (PyFunctionObject *)cached;
+            DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR);
             PyCodeObject *code = (PyCodeObject *)getitem->func_code;
             size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE;
             assert(code->co_argcount == 2);
@@ -2218,6 +2223,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
                 new_frame->localsplus[i] = NULL;
             }
             _PyFrame_SetStackPointer(frame, stack_pointer);
+            frame->f_lasti += INLINE_CACHE_ENTRIES_BINARY_SUBSCR;
             new_frame->previous = frame;
             frame = cframe.current_frame = new_frame;
             CALL_STAT_INC(inlined_py_calls);
@@ -5605,7 +5611,7 @@ MISS_WITH_CACHE(PRECALL)
 MISS_WITH_CACHE(CALL)
 MISS_WITH_INLINE_CACHE(BINARY_OP)
 MISS_WITH_INLINE_CACHE(COMPARE_OP)
-MISS_WITH_CACHE(BINARY_SUBSCR)
+MISS_WITH_INLINE_CACHE(BINARY_SUBSCR)
 MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE)
 MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)
 
diff --git a/Python/specialize.c b/Python/specialize.c
index 925edf3f88d5f..5486b5b1f65dc 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -60,7 +60,6 @@ static uint8_t adaptive_opcodes[256] = {
 static uint8_t cache_requirements[256] = {
     [LOAD_ATTR] = 1,  // _PyAdaptiveEntry
     [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
-    [BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
     [STORE_SUBSCR] = 0,
     [CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
     [PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
@@ -385,6 +384,8 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
         if (adaptive_opcode) {
             if (_PyOpcode_InlineCacheEntries[opcode]) {
                 instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg);
+                previous_opcode = -1;
+                i += _PyOpcode_InlineCacheEntries[opcode];
             }
             else if (previous_opcode != EXTENDED_ARG) {
                 int new_oparg = oparg_from_instruction_and_update_offset(
@@ -553,6 +554,7 @@ initial_counter_value(void) {
 #define SPEC_FAIL_SUBSCR_PY_SIMPLE 20
 #define SPEC_FAIL_SUBSCR_PY_OTHER 21
 #define SPEC_FAIL_SUBSCR_DICT_SUBCLASS_NO_OVERRIDE 22
+#define SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE 23
 
 /* Binary op */
 
@@ -1335,9 +1337,11 @@ function_kind(PyCodeObject *code) {
 
 int
 _Py_Specialize_BinarySubscr(
-     PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
+     PyObject *container, PyObject *sub, _Py_CODEUNIT *instr)
 {
-    _PyAdaptiveEntry *cache0 = &cache->adaptive;
+    assert(_PyOpcode_InlineCacheEntries[BINARY_SUBSCR] ==
+           INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
+    _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1);
     PyTypeObject *container_type = Py_TYPE(container);
     if (container_type == &PyList_Type) {
         if (PyLong_CheckExact(sub)) {
@@ -1364,26 +1368,30 @@ _Py_Specialize_BinarySubscr(
     PyTypeObject *cls = Py_TYPE(container);
     PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
     if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
+        if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
+            SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE);
+            goto fail;
+        }
         PyFunctionObject *func = (PyFunctionObject *)descriptor;
-        PyCodeObject *code = (PyCodeObject *)func->func_code;
-        int kind = function_kind(code);
+        PyCodeObject *fcode = (PyCodeObject *)func->func_code;
+        int kind = function_kind(fcode);
         if (kind != SIMPLE_FUNCTION) {
             SPECIALIZATION_FAIL(BINARY_SUBSCR, kind);
             goto fail;
         }
-        if (code->co_argcount != 2) {
+        if (fcode->co_argcount != 2) {
             SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
             goto fail;
         }
         assert(cls->tp_version_tag != 0);
-        cache0->version = cls->tp_version_tag;
+        write32(&cache->type_version, cls->tp_version_tag);
         int version = _PyFunction_GetVersionForCurrentState(func);
         if (version == 0 || version != (uint16_t)version) {
             SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
             goto fail;
         }
-        cache0->index = version;
-        cache[-1].obj.obj = descriptor;
+        cache->func_version = version;
+        ((PyHeapTypeObject *)container_type)->_spec_cache.getitem = descriptor;
         *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr));
         goto success;
     }
@@ -1392,12 +1400,12 @@ _Py_Specialize_BinarySubscr(
 fail:
     STAT_INC(BINARY_SUBSCR, failure);
     assert(!PyErr_Occurred());
-    cache_backoff(cache0);
+    cache->counter = ADAPTIVE_CACHE_BACKOFF;
     return 0;
 success:
     STAT_INC(BINARY_SUBSCR, success);
     assert(!PyErr_Occurred());
-    cache0->counter = initial_counter_value();
+    cache->counter = initial_counter_value();
     return 0;
 }
 



More information about the Python-checkins mailing list