[Python-checkins] bpo-45885: Specialize COMPARE_OP (GH-29734)

markshannon webhook-mailer at python.org
Fri Dec 3 06:29:17 EST 2021


https://github.com/python/cpython/commit/03768c4d139df46212a091ed931aad03bec18b57
commit: 03768c4d139df46212a091ed931aad03bec18b57
branch: main
author: Dennis Sweeney <36520290+sweeneyde at users.noreply.github.com>
committer: markshannon <mark at hotpy.org>
date: 2021-12-03T11:29:12Z
summary:

bpo-45885: Specialize COMPARE_OP (GH-29734)

* Add COMPARE_OP_ADAPTIVE adaptive instruction.

* Add COMPARE_OP_FLOAT_JUMP, COMPARE_OP_INT_JUMP and COMPARE_OP_STR_JUMP specialized instructions.

* Introduce and use _PyUnicode_Equal

files:
A Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst
M Include/cpython/unicodeobject.h
M Include/internal/pycore_code.h
M Include/opcode.h
M Lib/opcode.py
M Objects/unicodeobject.c
M Python/ceval.c
M Python/opcode_targets.h
M Python/specialize.c

diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index ab4aebf5e70b9..e02137c7cad7d 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -1016,6 +1016,9 @@ PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
    and where the hash values are equal (i.e. a very probable match) */
 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
 
+/* Equality check. Returns -1 on failure. */
+PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *);
+
 PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *);
 PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *);
 
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index d4d1392d05bde..496d52f580f1f 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -42,6 +42,7 @@ typedef struct {
     uint16_t defaults_len;
 } _PyCallCache;
 
+
 /* Add specialized versions of entries to this union.
  *
  * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
@@ -273,6 +274,7 @@ int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT
 int _Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins);
 void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
                              SpecializedCacheEntry *cache);
+void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
 
 #define PRINT_SPECIALIZATION_STATS 0
 #define PRINT_SPECIALIZATION_STATS_DETAILED 0
diff --git a/Include/opcode.h b/Include/opcode.h
index 2c1a212cbd634..f22f7e94f6190 100644
--- a/Include/opcode.h
+++ b/Include/opcode.h
@@ -121,43 +121,47 @@ extern "C" {
 #define BINARY_OP_MULTIPLY_FLOAT         18
 #define BINARY_OP_SUBTRACT_INT           19
 #define BINARY_OP_SUBTRACT_FLOAT         20
-#define BINARY_SUBSCR_ADAPTIVE           21
-#define BINARY_SUBSCR_GETITEM            22
-#define BINARY_SUBSCR_LIST_INT           23
-#define BINARY_SUBSCR_TUPLE_INT          24
-#define BINARY_SUBSCR_DICT               26
-#define STORE_SUBSCR_ADAPTIVE            27
-#define STORE_SUBSCR_LIST_INT            28
-#define STORE_SUBSCR_DICT                29
-#define CALL_FUNCTION_ADAPTIVE           34
-#define CALL_FUNCTION_BUILTIN_O          36
-#define CALL_FUNCTION_BUILTIN_FAST       38
-#define CALL_FUNCTION_LEN                39
-#define CALL_FUNCTION_ISINSTANCE         40
-#define CALL_FUNCTION_PY_SIMPLE          41
-#define JUMP_ABSOLUTE_QUICK              42
-#define LOAD_ATTR_ADAPTIVE               43
-#define LOAD_ATTR_INSTANCE_VALUE         44
-#define LOAD_ATTR_WITH_HINT              45
-#define LOAD_ATTR_SLOT                   46
-#define LOAD_ATTR_MODULE                 47
-#define LOAD_GLOBAL_ADAPTIVE             48
-#define LOAD_GLOBAL_MODULE               55
-#define LOAD_GLOBAL_BUILTIN              56
-#define LOAD_METHOD_ADAPTIVE             57
-#define LOAD_METHOD_CACHED               58
-#define LOAD_METHOD_CLASS                59
-#define LOAD_METHOD_MODULE               62
-#define LOAD_METHOD_NO_DICT              63
-#define STORE_ATTR_ADAPTIVE              64
-#define STORE_ATTR_INSTANCE_VALUE        65
-#define STORE_ATTR_SLOT                  66
-#define STORE_ATTR_WITH_HINT             67
-#define LOAD_FAST__LOAD_FAST             75
-#define STORE_FAST__LOAD_FAST            76
-#define LOAD_FAST__LOAD_CONST            77
-#define LOAD_CONST__LOAD_FAST            78
-#define STORE_FAST__STORE_FAST           79
+#define COMPARE_OP_ADAPTIVE              21
+#define COMPARE_OP_FLOAT_JUMP            22
+#define COMPARE_OP_INT_JUMP              23
+#define COMPARE_OP_STR_JUMP              24
+#define BINARY_SUBSCR_ADAPTIVE           26
+#define BINARY_SUBSCR_GETITEM            27
+#define BINARY_SUBSCR_LIST_INT           28
+#define BINARY_SUBSCR_TUPLE_INT          29
+#define BINARY_SUBSCR_DICT               34
+#define STORE_SUBSCR_ADAPTIVE            36
+#define STORE_SUBSCR_LIST_INT            38
+#define STORE_SUBSCR_DICT                39
+#define CALL_FUNCTION_ADAPTIVE           40
+#define CALL_FUNCTION_BUILTIN_O          41
+#define CALL_FUNCTION_BUILTIN_FAST       42
+#define CALL_FUNCTION_LEN                43
+#define CALL_FUNCTION_ISINSTANCE         44
+#define CALL_FUNCTION_PY_SIMPLE          45
+#define JUMP_ABSOLUTE_QUICK              46
+#define LOAD_ATTR_ADAPTIVE               47
+#define LOAD_ATTR_INSTANCE_VALUE         48
+#define LOAD_ATTR_WITH_HINT              55
+#define LOAD_ATTR_SLOT                   56
+#define LOAD_ATTR_MODULE                 57
+#define LOAD_GLOBAL_ADAPTIVE             58
+#define LOAD_GLOBAL_MODULE               59
+#define LOAD_GLOBAL_BUILTIN              62
+#define LOAD_METHOD_ADAPTIVE             63
+#define LOAD_METHOD_CACHED               64
+#define LOAD_METHOD_CLASS                65
+#define LOAD_METHOD_MODULE               66
+#define LOAD_METHOD_NO_DICT              67
+#define STORE_ATTR_ADAPTIVE              75
+#define STORE_ATTR_INSTANCE_VALUE        76
+#define STORE_ATTR_SLOT                  77
+#define STORE_ATTR_WITH_HINT             78
+#define LOAD_FAST__LOAD_FAST             79
+#define STORE_FAST__LOAD_FAST            80
+#define LOAD_FAST__LOAD_CONST            81
+#define LOAD_CONST__LOAD_FAST            87
+#define STORE_FAST__STORE_FAST           88
 #define DO_TRACING                      255
 #ifdef NEED_OPCODE_JUMP_TABLES
 static uint32_t _PyOpcode_RelativeJump[8] = {
diff --git a/Lib/opcode.py b/Lib/opcode.py
index 60805e92ff3c4..e5889bca4c161 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -234,6 +234,10 @@ def jabs_op(name, op):
     "BINARY_OP_MULTIPLY_FLOAT",
     "BINARY_OP_SUBTRACT_INT",
     "BINARY_OP_SUBTRACT_FLOAT",
+    "COMPARE_OP_ADAPTIVE",
+    "COMPARE_OP_FLOAT_JUMP",
+    "COMPARE_OP_INT_JUMP",
+    "COMPARE_OP_STR_JUMP",
     "BINARY_SUBSCR_ADAPTIVE",
     "BINARY_SUBSCR_GETITEM",
     "BINARY_SUBSCR_LIST_INT",
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst
new file mode 100644
index 0000000000000..316daf966f149
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst	
@@ -0,0 +1 @@
+Specialized the ``COMPARE_OP`` opcode using the PEP 659 machinery.
\ No newline at end of file
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 61fc34d71da3c..532c48ad4d4aa 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -11168,6 +11168,20 @@ unicode_compare_eq(PyObject *str1, PyObject *str2)
     return (cmp == 0);
 }
 
+int
+_PyUnicode_Equal(PyObject *str1, PyObject *str2)
+{
+    assert(PyUnicode_CheckExact(str1));
+    assert(PyUnicode_CheckExact(str2));
+    if (str1 == str2) {
+        return 1;
+    }
+    if (PyUnicode_READY(str1) || PyUnicode_READY(str2)) {
+        return -1;
+    }
+    return unicode_compare_eq(str1, str2);
+}
+
 
 int
 PyUnicode_Compare(PyObject *left, PyObject *right)
diff --git a/Python/ceval.c b/Python/ceval.c
index 97c684479abdc..05897c561a16e 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -3778,6 +3778,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
         }
 
         TARGET(COMPARE_OP) {
+            PREDICTED(COMPARE_OP);
+            STAT_INC(COMPARE_OP, unquickened);
             assert(oparg <= Py_GE);
             PyObject *right = POP();
             PyObject *left = TOP();
@@ -3792,6 +3794,125 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
             DISPATCH();
         }
 
+        TARGET(COMPARE_OP_ADAPTIVE) {
+            assert(cframe.use_tracing == 0);
+            SpecializedCacheEntry *cache = GET_CACHE();
+            if (cache->adaptive.counter == 0) {
+                PyObject *right = TOP();
+                PyObject *left = SECOND();
+                next_instr--;
+                _Py_Specialize_CompareOp(left, right, next_instr, cache);
+                DISPATCH();
+            }
+            else {
+                STAT_INC(COMPARE_OP, deferred);
+                cache->adaptive.counter--;
+                oparg = cache->adaptive.original_oparg;
+                STAT_DEC(COMPARE_OP, unquickened);
+                JUMP_TO_INSTRUCTION(COMPARE_OP);
+            }
+        }
+
+        TARGET(COMPARE_OP_FLOAT_JUMP) {
+            assert(cframe.use_tracing == 0);
+            // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false)
+            SpecializedCacheEntry *caches = GET_CACHE();
+            int when_to_jump_mask = caches[0].adaptive.index;
+            PyObject *right = TOP();
+            PyObject *left = SECOND();
+            DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP);
+            DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP);
+            double dleft = PyFloat_AS_DOUBLE(left);
+            double dright = PyFloat_AS_DOUBLE(right);
+            int sign = (dleft > dright) - (dleft < dright);
+            DEOPT_IF(isnan(dleft), COMPARE_OP);
+            DEOPT_IF(isnan(dright), COMPARE_OP);
+            STAT_INC(COMPARE_OP, hit);
+            NEXTOPARG();
+            STACK_SHRINK(2);
+            Py_DECREF(left);
+            Py_DECREF(right);
+            assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE);
+            int jump = (1 << (sign + 1)) & when_to_jump_mask;
+            if (!jump) {
+                next_instr++;
+                NOTRACE_DISPATCH();
+            }
+            else {
+                JUMPTO(oparg);
+                CHECK_EVAL_BREAKER();
+                NOTRACE_DISPATCH();
+            }
+        }
+
+        TARGET(COMPARE_OP_INT_JUMP) {
+            assert(cframe.use_tracing == 0);
+            // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false)
+            SpecializedCacheEntry *caches = GET_CACHE();
+            int when_to_jump_mask = caches[0].adaptive.index;
+            PyObject *right = TOP();
+            PyObject *left = SECOND();
+            DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP);
+            DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP);
+            DEOPT_IF((size_t)(Py_SIZE(left) + 1) > 2, COMPARE_OP);
+            DEOPT_IF((size_t)(Py_SIZE(right) + 1) > 2, COMPARE_OP);
+            STAT_INC(COMPARE_OP, hit);
+            assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1);
+            Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0];
+            Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0];
+            int sign = (ileft > iright) - (ileft < iright);
+            NEXTOPARG();
+            STACK_SHRINK(2);
+            Py_DECREF(left);
+            Py_DECREF(right);
+            assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE);
+            int jump = (1 << (sign + 1)) & when_to_jump_mask;
+            if (!jump) {
+                next_instr++;
+                NOTRACE_DISPATCH();
+            }
+            else {
+                JUMPTO(oparg);
+                CHECK_EVAL_BREAKER();
+                NOTRACE_DISPATCH();
+            }
+        }
+
+        TARGET(COMPARE_OP_STR_JUMP) {
+            assert(cframe.use_tracing == 0);
+            // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false)
+            SpecializedCacheEntry *caches = GET_CACHE();
+            int invert = caches[0].adaptive.index;
+            PyObject *right = TOP();
+            PyObject *left = SECOND();
+            DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP);
+            DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP);
+            STAT_INC(COMPARE_OP, hit);
+            int res = _PyUnicode_Equal(left, right);
+            if (res < 0) {
+                goto error;
+            }
+            assert(caches[0].adaptive.original_oparg == Py_EQ ||
+                   caches[0].adaptive.original_oparg == Py_NE);
+            NEXTOPARG();
+            assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE);
+            STACK_SHRINK(2);
+            Py_DECREF(left);
+            Py_DECREF(right);
+            assert(res == 0 || res == 1);
+            assert(invert == 0 || invert == 1);
+            int jump = res ^ invert;
+            if (!jump) {
+                next_instr++;
+                NOTRACE_DISPATCH();
+            }
+            else {
+                JUMPTO(oparg);
+                CHECK_EVAL_BREAKER();
+                NOTRACE_DISPATCH();
+            }
+        }
+
         TARGET(IS_OP) {
             PyObject *right = POP();
             PyObject *left = TOP();
@@ -5083,6 +5204,7 @@ MISS_WITH_CACHE(LOAD_GLOBAL)
 MISS_WITH_CACHE(LOAD_METHOD)
 MISS_WITH_CACHE(CALL_FUNCTION)
 MISS_WITH_CACHE(BINARY_OP)
+MISS_WITH_CACHE(COMPARE_OP)
 MISS_WITH_CACHE(BINARY_SUBSCR)
 MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)
 
diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h
index c9d430d26814c..872a688311992 100644
--- a/Python/opcode_targets.h
+++ b/Python/opcode_targets.h
@@ -20,23 +20,27 @@ static void *opcode_targets[256] = {
     &&TARGET_BINARY_OP_MULTIPLY_FLOAT,
     &&TARGET_BINARY_OP_SUBTRACT_INT,
     &&TARGET_BINARY_OP_SUBTRACT_FLOAT,
+    &&TARGET_COMPARE_OP_ADAPTIVE,
+    &&TARGET_COMPARE_OP_FLOAT_JUMP,
+    &&TARGET_COMPARE_OP_INT_JUMP,
+    &&TARGET_COMPARE_OP_STR_JUMP,
+    &&TARGET_BINARY_SUBSCR,
     &&TARGET_BINARY_SUBSCR_ADAPTIVE,
     &&TARGET_BINARY_SUBSCR_GETITEM,
     &&TARGET_BINARY_SUBSCR_LIST_INT,
     &&TARGET_BINARY_SUBSCR_TUPLE_INT,
-    &&TARGET_BINARY_SUBSCR,
-    &&TARGET_BINARY_SUBSCR_DICT,
-    &&TARGET_STORE_SUBSCR_ADAPTIVE,
-    &&TARGET_STORE_SUBSCR_LIST_INT,
-    &&TARGET_STORE_SUBSCR_DICT,
     &&TARGET_GET_LEN,
     &&TARGET_MATCH_MAPPING,
     &&TARGET_MATCH_SEQUENCE,
     &&TARGET_MATCH_KEYS,
-    &&TARGET_CALL_FUNCTION_ADAPTIVE,
+    &&TARGET_BINARY_SUBSCR_DICT,
     &&TARGET_PUSH_EXC_INFO,
-    &&TARGET_CALL_FUNCTION_BUILTIN_O,
+    &&TARGET_STORE_SUBSCR_ADAPTIVE,
     &&TARGET_POP_EXCEPT_AND_RERAISE,
+    &&TARGET_STORE_SUBSCR_LIST_INT,
+    &&TARGET_STORE_SUBSCR_DICT,
+    &&TARGET_CALL_FUNCTION_ADAPTIVE,
+    &&TARGET_CALL_FUNCTION_BUILTIN_O,
     &&TARGET_CALL_FUNCTION_BUILTIN_FAST,
     &&TARGET_CALL_FUNCTION_LEN,
     &&TARGET_CALL_FUNCTION_ISINSTANCE,
@@ -44,29 +48,25 @@ static void *opcode_targets[256] = {
     &&TARGET_JUMP_ABSOLUTE_QUICK,
     &&TARGET_LOAD_ATTR_ADAPTIVE,
     &&TARGET_LOAD_ATTR_INSTANCE_VALUE,
-    &&TARGET_LOAD_ATTR_WITH_HINT,
-    &&TARGET_LOAD_ATTR_SLOT,
-    &&TARGET_LOAD_ATTR_MODULE,
-    &&TARGET_LOAD_GLOBAL_ADAPTIVE,
     &&TARGET_WITH_EXCEPT_START,
     &&TARGET_GET_AITER,
     &&TARGET_GET_ANEXT,
     &&TARGET_BEFORE_ASYNC_WITH,
     &&TARGET_BEFORE_WITH,
     &&TARGET_END_ASYNC_FOR,
+    &&TARGET_LOAD_ATTR_WITH_HINT,
+    &&TARGET_LOAD_ATTR_SLOT,
+    &&TARGET_LOAD_ATTR_MODULE,
+    &&TARGET_LOAD_GLOBAL_ADAPTIVE,
     &&TARGET_LOAD_GLOBAL_MODULE,
+    &&TARGET_STORE_SUBSCR,
+    &&TARGET_DELETE_SUBSCR,
     &&TARGET_LOAD_GLOBAL_BUILTIN,
     &&TARGET_LOAD_METHOD_ADAPTIVE,
     &&TARGET_LOAD_METHOD_CACHED,
     &&TARGET_LOAD_METHOD_CLASS,
-    &&TARGET_STORE_SUBSCR,
-    &&TARGET_DELETE_SUBSCR,
     &&TARGET_LOAD_METHOD_MODULE,
     &&TARGET_LOAD_METHOD_NO_DICT,
-    &&TARGET_STORE_ATTR_ADAPTIVE,
-    &&TARGET_STORE_ATTR_INSTANCE_VALUE,
-    &&TARGET_STORE_ATTR_SLOT,
-    &&TARGET_STORE_ATTR_WITH_HINT,
     &&TARGET_GET_ITER,
     &&TARGET_GET_YIELD_FROM_ITER,
     &&TARGET_PRINT_EXPR,
@@ -74,20 +74,20 @@ static void *opcode_targets[256] = {
     &&TARGET_YIELD_FROM,
     &&TARGET_GET_AWAITABLE,
     &&TARGET_LOAD_ASSERTION_ERROR,
+    &&TARGET_STORE_ATTR_ADAPTIVE,
+    &&TARGET_STORE_ATTR_INSTANCE_VALUE,
+    &&TARGET_STORE_ATTR_SLOT,
+    &&TARGET_STORE_ATTR_WITH_HINT,
     &&TARGET_LOAD_FAST__LOAD_FAST,
     &&TARGET_STORE_FAST__LOAD_FAST,
     &&TARGET_LOAD_FAST__LOAD_CONST,
-    &&TARGET_LOAD_CONST__LOAD_FAST,
-    &&TARGET_STORE_FAST__STORE_FAST,
-    &&_unknown_opcode,
-    &&_unknown_opcode,
     &&TARGET_LIST_TO_TUPLE,
     &&TARGET_RETURN_VALUE,
     &&TARGET_IMPORT_STAR,
     &&TARGET_SETUP_ANNOTATIONS,
     &&TARGET_YIELD_VALUE,
-    &&_unknown_opcode,
-    &&_unknown_opcode,
+    &&TARGET_LOAD_CONST__LOAD_FAST,
+    &&TARGET_STORE_FAST__STORE_FAST,
     &&TARGET_POP_EXCEPT,
     &&TARGET_STORE_NAME,
     &&TARGET_DELETE_NAME,
diff --git a/Python/specialize.c b/Python/specialize.c
index f5f12139df79b..b384675560be7 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -129,6 +129,7 @@ _Py_GetSpecializationStats(void) {
     err += add_stat_dict(stats, STORE_ATTR, "store_attr");
     err += add_stat_dict(stats, CALL_FUNCTION, "call_function");
     err += add_stat_dict(stats, BINARY_OP, "binary_op");
+    err += add_stat_dict(stats, COMPARE_OP, "compare_op");
     if (err < 0) {
         Py_DECREF(stats);
         return NULL;
@@ -187,6 +188,7 @@ _Py_PrintSpecializationStats(void)
     print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr");
     print_stats(out, &_specialization_stats[CALL_FUNCTION], "call_function");
     print_stats(out, &_specialization_stats[BINARY_OP], "binary_op");
+    print_stats(out, &_specialization_stats[COMPARE_OP], "compare_op");
     if (out != stderr) {
         fclose(out);
     }
@@ -239,6 +241,7 @@ static uint8_t adaptive_opcodes[256] = {
     [CALL_FUNCTION] = CALL_FUNCTION_ADAPTIVE,
     [STORE_ATTR] = STORE_ATTR_ADAPTIVE,
     [BINARY_OP] = BINARY_OP_ADAPTIVE,
+    [COMPARE_OP] = COMPARE_OP_ADAPTIVE,
 };
 
 /* The number of cache entries required for a "family" of instructions. */
@@ -251,6 +254,7 @@ static uint8_t cache_requirements[256] = {
     [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
     [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
     [BINARY_OP] = 1,  // _PyAdaptiveEntry
+    [COMPARE_OP] = 1, /* _PyAdaptiveEntry */
 };
 
 /* Return the oparg for the cache_offset and instruction index.
@@ -487,6 +491,10 @@ initial_counter_value(void) {
 #define SPEC_FAIL_BAD_CALL_FLAGS 17
 #define SPEC_FAIL_CLASS 18
 
+/* COMPARE_OP */
+#define SPEC_FAIL_STRING_COMPARE 13
+#define SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP 14
+#define SPEC_FAIL_BIG_INT 15
 
 static int
 specialize_module_load_attr(
@@ -1536,3 +1544,74 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
     STAT_INC(BINARY_OP, specialization_success);
     adaptive->counter = initial_counter_value();
 }
+
+static int compare_masks[] = {
+    // 1-bit: jump if less than
+    // 2-bit: jump if equal
+    // 4-bit: jump if greater
+    [Py_LT] = 1 | 0 | 0,
+    [Py_LE] = 1 | 2 | 0,
+    [Py_EQ] = 0 | 2 | 0,
+    [Py_NE] = 1 | 0 | 4,
+    [Py_GT] = 0 | 0 | 4,
+    [Py_GE] = 0 | 2 | 4,
+};
+
+void
+_Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
+                         _Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
+{
+    _PyAdaptiveEntry *adaptive = &cache->adaptive;
+    int op = adaptive->original_oparg;
+    int next_opcode = _Py_OPCODE(instr[1]);
+    if (next_opcode != POP_JUMP_IF_FALSE && next_opcode != POP_JUMP_IF_TRUE) {
+        // Can't ever combine, so don't don't bother being adaptive.
+        SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP);
+        *instr = _Py_MAKECODEUNIT(COMPARE_OP, adaptive->original_oparg);
+        goto failure;
+    }
+    assert(op <= Py_GE);
+    int when_to_jump_mask = compare_masks[op];
+    if (next_opcode == POP_JUMP_IF_FALSE) {
+        when_to_jump_mask = (1 | 2 | 4) & ~when_to_jump_mask;
+    }
+    if (Py_TYPE(lhs) != Py_TYPE(rhs)) {
+        SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_DIFFERENT_TYPES);
+        goto failure;
+    }
+    if (PyFloat_CheckExact(lhs)) {
+        *instr = _Py_MAKECODEUNIT(COMPARE_OP_FLOAT_JUMP, _Py_OPARG(*instr));
+        adaptive->index = when_to_jump_mask;
+        goto success;
+    }
+    if (PyLong_CheckExact(lhs)) {
+        if (Py_ABS(Py_SIZE(lhs)) <= 1 && Py_ABS(Py_SIZE(rhs)) <= 1) {
+            *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT_JUMP, _Py_OPARG(*instr));
+            adaptive->index = when_to_jump_mask;
+            goto success;
+        }
+        else {
+            SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_BIG_INT);
+            goto failure;
+        }
+    }
+    if (PyUnicode_CheckExact(lhs)) {
+        if (op != Py_EQ && op != Py_NE) {
+            SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_STRING_COMPARE);
+            goto failure;
+        }
+        else {
+            *instr = _Py_MAKECODEUNIT(COMPARE_OP_STR_JUMP, _Py_OPARG(*instr));
+            adaptive->index = (when_to_jump_mask & 2) == 0;
+            goto success;
+        }
+    }
+    SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_OTHER);
+failure:
+    STAT_INC(COMPARE_OP, specialization_failure);
+    cache_backoff(adaptive);
+    return;
+success:
+    STAT_INC(COMPARE_OP, specialization_success);
+    adaptive->counter = initial_counter_value();
+}



More information about the Python-checkins mailing list