[Python-checkins] bpo-46072: Add top level stats struct (GH-30169)

Fri Dec 17 09:48:12 EST 2021

https://github.com/python/cpython/commit/efd6236d36b292c2c43540132c87cf8425e8d627
commit: efd6236d36b292c2c43540132c87cf8425e8d627
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2021-12-17T14:48:01Z
summary:

bpo-46072:  Add top level stats struct (GH-30169)

files:
M Include/internal/pycore_code.h
M Lib/opcode.py
M Lib/test/test__opcode.py
M Python/ceval.c
M Python/specialize.c
M Tools/scripts/summarize_stats.py

diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index e9b1ad406496a..dfc75300315e2 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -281,20 +281,32 @@ void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
 
 #define SPECIALIZATION_FAILURE_KINDS 30
 
-typedef struct _stats {
-    uint64_t specialization_success;
-    uint64_t specialization_failure;
+typedef struct _specialization_stats {
+    uint64_t success;
+    uint64_t failure;
     uint64_t hit;
     uint64_t deferred;
     uint64_t miss;
     uint64_t deopt;
-    uint64_t unquickened;
-    uint64_t specialization_failure_kinds[SPECIALIZATION_FAILURE_KINDS];
+    uint64_t failure_kinds[SPECIALIZATION_FAILURE_KINDS];
 } SpecializationStats;
 
-extern SpecializationStats _specialization_stats[256];
-#define STAT_INC(opname, name) _specialization_stats[opname].name++
-#define STAT_DEC(opname, name) _specialization_stats[opname].name--
+typedef struct _opcode_stats {
+    SpecializationStats specialization;
+    uint64_t execution_count;
+    uint64_t pair_count[256];
+} OpcodeStats;
+
+typedef struct _stats {
+    OpcodeStats opcode_stats[256];
+} PyStats;
+
+extern PyStats _py_stats;
+
+#define STAT_INC(opname, name) _py_stats.opcode_stats[opname].specialization.name++
+#define STAT_DEC(opname, name) _py_stats.opcode_stats[opname].specialization.name--
+#define OPCODE_EXE_INC(opname) _py_stats.opcode_stats[opname].execution_count++
+
 void _Py_PrintSpecializationStats(int to_file);
 
 PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
@@ -302,6 +314,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
 #else
 #define STAT_INC(opname, name) ((void)0)
 #define STAT_DEC(opname, name) ((void)0)
+#define OPCODE_EXE_INC(opname) ((void)0)
 #endif
 
 
diff --git a/Lib/opcode.py b/Lib/opcode.py
index 7b69988f91315..e654a1088b7ea 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -288,11 +288,10 @@ def jabs_op(name, op):
     "STORE_FAST__STORE_FAST",
 ]
 _specialization_stats = [
-    "specialization_success",
-    "specialization_failure",
+    "success",
+    "failure",
     "hit",
     "deferred",
     "miss",
     "deopt",
-    "unquickened",
 ]
diff --git a/Lib/test/test__opcode.py b/Lib/test/test__opcode.py
index 6bbab539903ce..f6b6b3d3532bd 100644
--- a/Lib/test/test__opcode.py
+++ b/Lib/test/test__opcode.py
@@ -82,13 +82,13 @@ def test_specialization_stats(self):
             self.assertCountEqual(stats.keys(), specialized_opcodes)
             self.assertCountEqual(
                 stats['load_attr'].keys(),
-                stat_names + ['specialization_failure_kinds'])
+                stat_names + ['failure_kinds'])
             for sn in stat_names:
                 self.assertIsInstance(stats['load_attr'][sn], int)
             self.assertIsInstance(
-                stats['load_attr']['specialization_failure_kinds'],
+                stats['load_attr']['failure_kinds'],
                 tuple)
-            for v in stats['load_attr']['specialization_failure_kinds']:
+            for v in stats['load_attr']['failure_kinds']:
                 self.assertIsInstance(v, int)
 
 
diff --git a/Python/ceval.c b/Python/ceval.c
index bac57ccb7cc75..9976bdeffbe96 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -1290,13 +1290,17 @@ eval_frame_handle_pending(PyThreadState *tstate)
     #define USE_COMPUTED_GOTOS 0
 #endif
 
-#define INSTRUCTION_START() frame->f_lasti = INSTR_OFFSET(); next_instr++
+#ifdef Py_STATS
+#define INSTRUCTION_START(op) frame->f_lasti = INSTR_OFFSET(); next_instr++; OPCODE_EXE_INC(op);
+#else
+#define INSTRUCTION_START(op) frame->f_lasti = INSTR_OFFSET(); next_instr++
+#endif
 
 #if USE_COMPUTED_GOTOS
-#define TARGET(op) TARGET_##op: INSTRUCTION_START();
+#define TARGET(op) TARGET_##op: INSTRUCTION_START(op);
 #define DISPATCH_GOTO() goto *opcode_targets[opcode]
 #else
-#define TARGET(op) case op: INSTRUCTION_START();
+#define TARGET(op) case op: INSTRUCTION_START(op);
 #define DISPATCH_GOTO() goto dispatch_opcode
 #endif
 
@@ -1416,7 +1420,7 @@ eval_frame_handle_pending(PyThreadState *tstate)
         opcode = _Py_OPCODE(word) | cframe.use_tracing OR_DTRACE_LINE; \
         if (opcode == op) { \
             oparg = _Py_OPARG(word); \
-            INSTRUCTION_START(); \
+            INSTRUCTION_START(op); \
             goto PREDICT_ID(op); \
         } \
     } while(0)
@@ -2186,7 +2190,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 
         TARGET(BINARY_SUBSCR) {
             PREDICTED(BINARY_SUBSCR);
-            STAT_INC(BINARY_SUBSCR, unquickened);
             PyObject *sub = POP();
             PyObject *container = TOP();
             PyObject *res = PyObject_GetItem(container, sub);
@@ -2214,7 +2217,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
                 cache->adaptive.counter--;
                 assert(cache->adaptive.original_oparg == 0);
                 /* No need to set oparg here; it isn't used by BINARY_SUBSCR */
-                STAT_DEC(BINARY_SUBSCR, unquickened);
                 JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
             }
         }
@@ -2339,7 +2341,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 
         TARGET(STORE_SUBSCR) {
             PREDICTED(STORE_SUBSCR);
-            STAT_INC(STORE_SUBSCR, unquickened);
             PyObject *sub = TOP();
             PyObject *container = SECOND();
             PyObject *v = THIRD();
@@ -2369,7 +2370,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
                 STAT_INC(STORE_SUBSCR, deferred);
                 // oparg is the adaptive cache counter
                 UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1);
-                STAT_DEC(STORE_SUBSCR, unquickened);
                 JUMP_TO_INSTRUCTION(STORE_SUBSCR);
             }
         }
@@ -2933,7 +2933,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 
         TARGET(STORE_ATTR) {
             PREDICTED(STORE_ATTR);
-            STAT_INC(STORE_ATTR, unquickened);
             PyObject *name = GETITEM(names, oparg);
             PyObject *owner = TOP();
             PyObject *v = SECOND();
@@ -3049,7 +3048,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 
         TARGET(LOAD_GLOBAL) {
             PREDICTED(LOAD_GLOBAL);
-            STAT_INC(LOAD_GLOBAL, unquickened);
             PyObject *name = GETITEM(names, oparg);
             PyObject *v;
             if (PyDict_CheckExact(GLOBALS())
@@ -3112,7 +3110,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
                 STAT_INC(LOAD_GLOBAL, deferred);
                 cache->adaptive.counter--;
                 oparg = cache->adaptive.original_oparg;
-                STAT_DEC(LOAD_GLOBAL, unquickened);
                 JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
             }
         }
@@ -3532,7 +3529,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 
         TARGET(LOAD_ATTR) {
             PREDICTED(LOAD_ATTR);
-            STAT_INC(LOAD_ATTR, unquickened);
             PyObject *name = GETITEM(names, oparg);
             PyObject *owner = TOP();
             PyObject *res = PyObject_GetAttr(owner, name);
@@ -3560,7 +3556,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
                 STAT_INC(LOAD_ATTR, deferred);
                 cache->adaptive.counter--;
                 oparg = cache->adaptive.original_oparg;
-                STAT_DEC(LOAD_ATTR, unquickened);
                 JUMP_TO_INSTRUCTION(LOAD_ATTR);
             }
         }
@@ -3663,7 +3658,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
                 STAT_INC(STORE_ATTR, deferred);
                 cache->adaptive.counter--;
                 oparg = cache->adaptive.original_oparg;
-                STAT_DEC(STORE_ATTR, unquickened);
                 JUMP_TO_INSTRUCTION(STORE_ATTR);
             }
         }
@@ -3754,7 +3748,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 
         TARGET(COMPARE_OP) {
             PREDICTED(COMPARE_OP);
-            STAT_INC(COMPARE_OP, unquickened);
             assert(oparg <= Py_GE);
             PyObject *right = POP();
             PyObject *left = TOP();
@@ -3783,7 +3776,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
                 STAT_INC(COMPARE_OP, deferred);
                 cache->adaptive.counter--;
                 oparg = cache->adaptive.original_oparg;
-                STAT_DEC(COMPARE_OP, unquickened);
                 JUMP_TO_INSTRUCTION(COMPARE_OP);
             }
         }
@@ -4438,7 +4430,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 
         TARGET(LOAD_METHOD) {
             PREDICTED(LOAD_METHOD);
-            STAT_INC(LOAD_METHOD, unquickened);
             /* Designed to work in tandem with CALL_METHOD. */
             PyObject *name = GETITEM(names, oparg);
             PyObject *obj = TOP();
@@ -4491,7 +4482,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
                 STAT_INC(LOAD_METHOD, deferred);
                 cache->adaptive.counter--;
                 oparg = cache->adaptive.original_oparg;
-                STAT_DEC(LOAD_METHOD, unquickened);
                 JUMP_TO_INSTRUCTION(LOAD_METHOD);
             }
         }
@@ -4617,7 +4607,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
         TARGET(CALL_NO_KW) {
             PyObject *function;
             PREDICTED(CALL_NO_KW);
-            STAT_INC(CALL_NO_KW, unquickened);
             kwnames = NULL;
             oparg += extra_args;
             nargs = oparg;
@@ -5186,7 +5175,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 
         TARGET(BINARY_OP) {
             PREDICTED(BINARY_OP);
-            STAT_INC(BINARY_OP, unquickened);
             PyObject *rhs = POP();
             PyObject *lhs = TOP();
             assert(0 <= oparg);
@@ -5216,7 +5204,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
                 STAT_INC(BINARY_OP, deferred);
                 cache->adaptive.counter--;
                 oparg = cache->adaptive.original_oparg;
-                STAT_DEC(BINARY_OP, unquickened);
                 JUMP_TO_INSTRUCTION(BINARY_OP);
             }
         }
@@ -5301,7 +5288,6 @@ opname ## _miss: \
             cache_backoff(cache); \
         } \
         oparg = cache->original_oparg; \
-        STAT_DEC(opname, unquickened); \
         JUMP_TO_INSTRUCTION(opname); \
     }
 
@@ -5317,7 +5303,6 @@ opname ## _miss: \
             next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, oparg); \
             STAT_INC(opname, deopt); \
         } \
-        STAT_DEC(opname, unquickened); \
         JUMP_TO_INSTRUCTION(opname); \
     }
 
diff --git a/Python/specialize.c b/Python/specialize.c
index 1f168e31e6d3a..8991fa94f8e36 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -40,7 +40,7 @@
 
 Py_ssize_t _Py_QuickenedCount = 0;
 #ifdef Py_STATS
-SpecializationStats _specialization_stats[256] = { 0 };
+PyStats _py_stats = { 0 };
 
 #define ADD_STAT_TO_DICT(res, field) \
     do { \
@@ -64,20 +64,19 @@ stats_to_dict(SpecializationStats *stats)
     if (res == NULL) {
         return NULL;
     }
-    ADD_STAT_TO_DICT(res, specialization_success);
-    ADD_STAT_TO_DICT(res, specialization_failure);
+    ADD_STAT_TO_DICT(res, success);
+    ADD_STAT_TO_DICT(res, failure);
     ADD_STAT_TO_DICT(res, hit);
     ADD_STAT_TO_DICT(res, deferred);
     ADD_STAT_TO_DICT(res, miss);
     ADD_STAT_TO_DICT(res, deopt);
-    ADD_STAT_TO_DICT(res, unquickened);
     PyObject *failure_kinds = PyTuple_New(SPECIALIZATION_FAILURE_KINDS);
     if (failure_kinds == NULL) {
         Py_DECREF(res);
         return NULL;
     }
     for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
-        PyObject *stat = PyLong_FromUnsignedLongLong(stats->specialization_failure_kinds[i]);
+        PyObject *stat = PyLong_FromUnsignedLongLong(stats->failure_kinds[i]);
         if (stat == NULL) {
             Py_DECREF(res);
             Py_DECREF(failure_kinds);
@@ -85,7 +84,7 @@ stats_to_dict(SpecializationStats *stats)
         }
         PyTuple_SET_ITEM(failure_kinds, i, stat);
     }
-    if (PyDict_SetItemString(res, "specialization_failure_kinds", failure_kinds)) {
+    if (PyDict_SetItemString(res, "failure_kinds", failure_kinds)) {
         Py_DECREF(res);
         Py_DECREF(failure_kinds);
         return NULL;
@@ -101,7 +100,7 @@ add_stat_dict(
     int opcode,
     const char *name) {
 
-    SpecializationStats *stats = &_specialization_stats[opcode];
+    SpecializationStats *stats = &_py_stats.opcode_stats[opcode].specialization;
     PyObject *d = stats_to_dict(stats);
     if (d == NULL) {
         return -1;
@@ -137,25 +136,38 @@ _Py_GetSpecializationStats(void) {
 #endif
 
 
-#define PRINT_STAT(name, field) fprintf(out, "    %s." #field " : %" PRIu64 "\n", name, stats->field);
+#define PRINT_STAT(i, field) \
+    if (stats[i].field) { \
+        fprintf(out, "    opcode[%d]." #field " : %" PRIu64 "\n", i, stats[i].field); \
+    }
 
 static void
-print_stats(FILE *out, SpecializationStats *stats, const char *name)
+print_spec_stats(FILE *out, OpcodeStats *stats)
 {
-    PRINT_STAT(name, specialization_success);
-    PRINT_STAT(name, specialization_failure);
-    PRINT_STAT(name, hit);
-    PRINT_STAT(name, deferred);
-    PRINT_STAT(name, miss);
-    PRINT_STAT(name, deopt);
-    PRINT_STAT(name, unquickened);
-    for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
-        fprintf(out, "    %s.specialization_failure_kinds[%d] : %" PRIu64 "\n",
-            name, i, stats->specialization_failure_kinds[i]);
+    for (int i = 0; i < 256; i++) {
+        PRINT_STAT(i, specialization.success);
+        PRINT_STAT(i, specialization.failure);
+        PRINT_STAT(i, specialization.hit);
+        PRINT_STAT(i, specialization.deferred);
+        PRINT_STAT(i, specialization.miss);
+        PRINT_STAT(i, specialization.deopt);
+        PRINT_STAT(i, execution_count);
+        for (int j = 0; j < SPECIALIZATION_FAILURE_KINDS; j++) {
+            uint64_t val = stats[i].specialization.failure_kinds[j];
+            if (val) {
+                fprintf(out, "    opcode[%d].specialization.failure_kinds[%d] : %"
+                    PRIu64 "\n", i, j, val);
+            }
+        }
     }
 }
 #undef PRINT_STAT
 
+static void
+print_stats(FILE *out, PyStats *stats) {
+    print_spec_stats(out, stats->opcode_stats);
+}
+
 void
 _Py_PrintSpecializationStats(int to_file)
 {
@@ -189,15 +201,7 @@ _Py_PrintSpecializationStats(int to_file)
     else {
         fprintf(out, "Specialization stats:\n");
     }
-    print_stats(out, &_specialization_stats[LOAD_ATTR], "load_attr");
-    print_stats(out, &_specialization_stats[LOAD_GLOBAL], "load_global");
-    print_stats(out, &_specialization_stats[LOAD_METHOD], "load_method");
-    print_stats(out, &_specialization_stats[BINARY_SUBSCR], "binary_subscr");
-    print_stats(out, &_specialization_stats[STORE_SUBSCR], "store_subscr");
-    print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr");
-    print_stats(out, &_specialization_stats[CALL_NO_KW], "call_no_kw");
-    print_stats(out, &_specialization_stats[BINARY_OP], "binary_op");
-    print_stats(out, &_specialization_stats[COMPARE_OP], "compare_op");
+    print_stats(out, &_py_stats);
     if (out != stderr) {
         fclose(out);
     }
@@ -205,7 +209,7 @@ _Py_PrintSpecializationStats(int to_file)
 
 #ifdef Py_STATS
 
-#define SPECIALIZATION_FAIL(opcode, kind) _specialization_stats[opcode].specialization_failure_kinds[kind]++
+#define SPECIALIZATION_FAIL(opcode, kind) _py_stats.opcode_stats[opcode].specialization.failure_kinds[kind]++
 
 
 #endif
@@ -775,12 +779,12 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp
         goto success;
     }
 fail:
-    STAT_INC(LOAD_ATTR, specialization_failure);
+    STAT_INC(LOAD_ATTR, failure);
     assert(!PyErr_Occurred());
     cache_backoff(cache0);
     return 0;
 success:
-    STAT_INC(LOAD_ATTR, specialization_success);
+    STAT_INC(LOAD_ATTR, success);
     assert(!PyErr_Occurred());
     cache0->counter = initial_counter_value();
     return 0;
@@ -857,12 +861,12 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S
         goto success;
     }
 fail:
-    STAT_INC(STORE_ATTR, specialization_failure);
+    STAT_INC(STORE_ATTR, failure);
     assert(!PyErr_Occurred());
     cache_backoff(cache0);
     return 0;
 success:
-    STAT_INC(STORE_ATTR, specialization_success);
+    STAT_INC(STORE_ATTR, success);
     assert(!PyErr_Occurred());
     cache0->counter = initial_counter_value();
     return 0;
@@ -1013,12 +1017,12 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name,
     cache2->obj = descr;
     // Fall through.
 success:
-    STAT_INC(LOAD_METHOD, specialization_success);
+    STAT_INC(LOAD_METHOD, success);
     assert(!PyErr_Occurred());
     cache0->counter = initial_counter_value();
     return 0;
 fail:
-    STAT_INC(LOAD_METHOD, specialization_failure);
+    STAT_INC(LOAD_METHOD, failure);
     assert(!PyErr_Occurred());
     cache_backoff(cache0);
     return 0;
@@ -1084,12 +1088,12 @@ _Py_Specialize_LoadGlobal(
     *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_BUILTIN, _Py_OPARG(*instr));
     goto success;
 fail:
-    STAT_INC(LOAD_GLOBAL, specialization_failure);
+    STAT_INC(LOAD_GLOBAL, failure);
     assert(!PyErr_Occurred());
     cache_backoff(cache0);
     return 0;
 success:
-    STAT_INC(LOAD_GLOBAL, specialization_success);
+    STAT_INC(LOAD_GLOBAL, success);
     assert(!PyErr_Occurred());
     cache0->counter = initial_counter_value();
     return 0;
@@ -1211,12 +1215,12 @@ _Py_Specialize_BinarySubscr(
     SPECIALIZATION_FAIL(BINARY_SUBSCR,
                         binary_subscr_fail_kind(container_type, sub));
 fail:
-    STAT_INC(BINARY_SUBSCR, specialization_failure);
+    STAT_INC(BINARY_SUBSCR, failure);
     assert(!PyErr_Occurred());
     cache_backoff(cache0);
     return 0;
 success:
-    STAT_INC(BINARY_SUBSCR, specialization_success);
+    STAT_INC(BINARY_SUBSCR, success);
     assert(!PyErr_Occurred());
     cache0->counter = initial_counter_value();
     return 0;
@@ -1259,12 +1263,12 @@ _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *ins
         goto fail;
     }
 fail:
-    STAT_INC(STORE_SUBSCR, specialization_failure);
+    STAT_INC(STORE_SUBSCR, failure);
     assert(!PyErr_Occurred());
     *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF);
     return 0;
 success:
-    STAT_INC(STORE_SUBSCR, specialization_success);
+    STAT_INC(STORE_SUBSCR, success);
     assert(!PyErr_Occurred());
     return 0;
 }
@@ -1518,12 +1522,12 @@ _Py_Specialize_CallNoKw(
     }
     _PyAdaptiveEntry *cache0 = &cache->adaptive;
     if (fail) {
-        STAT_INC(CALL_NO_KW, specialization_failure);
+        STAT_INC(CALL_NO_KW, failure);
         assert(!PyErr_Occurred());
         cache_backoff(cache0);
     }
     else {
-        STAT_INC(CALL_NO_KW, specialization_success);
+        STAT_INC(CALL_NO_KW, success);
         assert(!PyErr_Occurred());
         cache0->counter = initial_counter_value();
     }
@@ -1604,11 +1608,11 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
     }
     SPECIALIZATION_FAIL(BINARY_OP, SPEC_FAIL_OTHER);
 failure:
-    STAT_INC(BINARY_OP, specialization_failure);
+    STAT_INC(BINARY_OP, failure);
     cache_backoff(adaptive);
     return;
 success:
-    STAT_INC(BINARY_OP, specialization_success);
+    STAT_INC(BINARY_OP, success);
     adaptive->counter = initial_counter_value();
 }
 
@@ -1675,10 +1679,10 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
     }
     SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_OTHER);
 failure:
-    STAT_INC(COMPARE_OP, specialization_failure);
+    STAT_INC(COMPARE_OP, failure);
     cache_backoff(adaptive);
     return;
 success:
-    STAT_INC(COMPARE_OP, specialization_success);
+    STAT_INC(COMPARE_OP, success);
     adaptive->counter = initial_counter_value();
 }
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
index 15b1887415e44..a5a8e93c17392 100644
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@@ -4,29 +4,50 @@
 
 import collections
 import os.path
+import opcode
 
 if os.name == "nt":
     DEFAULT_DIR = "c:\\temp\\py_stats\\"
 else:
     DEFAULT_DIR = "/tmp/py_stats/"
 
+#Create list of all instruction names
+specialized = iter(opcode._specialized_instructions)
+opname = ["<0>"]
+for name in opcode.opname[1:]:
+    if name.startswith("<"):
+        try:
+            name = next(specialized)
+        except StopIteration:
+            pass
+    opname.append(name)
 
-TOTAL = "deferred", "hit", "miss", "unquickened"
 
-def print_stats(name, family_stats):
+TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
+
+def print_specialization_stats(name, family_stats):
+    if "specialization.deferred" not in family_stats:
+        return
     total = sum(family_stats[kind] for kind in TOTAL)
     if total == 0:
         return
     print(name+":")
     for key in sorted(family_stats):
-        if not key.startswith("specialization"):
-            print(f"{key:>12}:{family_stats[key]:>12} {100*family_stats[key]/total:0.1f}%")
-    for key in ("specialization_success",  "specialization_failure"):
-        print(f"  {key}:{family_stats[key]:>12}")
-    total_failures = family_stats["specialization_failure"]
+        if key.startswith("specialization.failure_kinds"):
+            continue
+        if key.startswith("specialization."):
+            label = key[len("specialization."):]
+        elif key == "execution_count":
+            label = "unquickened"
+        if key not in ("specialization.success",  "specialization.failure"):
+            print(f"{label:>12}:{family_stats[key]:>12} {100*family_stats[key]/total:0.1f}%")
+    for key in ("specialization.success",  "specialization.failure"):
+        label = key[len("specialization."):]
+        print(f"  {label}:{family_stats.get(key, 0):>12}")
+    total_failures = family_stats["specialization.failure"]
     failure_kinds = [ 0 ] * 30
     for key in family_stats:
-        if not key.startswith("specialization_failure_kind"):
+        if not key.startswith("specialization.failure_kind"):
             continue
         _, index = key[:-1].split("[")
         index =  int(index)
@@ -36,18 +57,47 @@ def print_stats(name, family_stats):
             continue
         print(f"    kind {index:>2}: {value:>8} {100*value/total_failures:0.1f}%")
 
-def main():
-    stats = collections.defaultdict(collections.Counter)
+def gather_stats():
+    stats = collections.Counter()
     for filename in os.listdir(DEFAULT_DIR):
-        for line in open(os.path.join(DEFAULT_DIR, filename)):
-            key, value = line.split(":")
-            key = key.strip()
-            family, stat = key.split(".")
-            value = int(value.strip())
-            stats[family][stat] += value
-
-    for name in sorted(stats):
-        print_stats(name, stats[name])
+        with open(os.path.join(DEFAULT_DIR, filename)) as fd:
+            for line in fd:
+                key, value = line.split(":")
+                key = key.strip()
+                value = int(value.strip())
+                stats[key] += value
+    return stats
+
+def extract_opcode_stats(stats):
+    opcode_stats = [ {} for _ in range(256) ]
+    for key, value in stats.items():
+        if not key.startswith("opcode"):
+            continue
+        n, _, rest = key[7:].partition("]")
+        opcode_stats[int(n)][rest.strip(".")] = value
+    return opcode_stats
+
+
+def main():
+    stats = gather_stats()
+    opcode_stats = extract_opcode_stats(stats)
+    print("Execution counts:")
+    counts = []
+    total = 0
+    for i, opcode_stat in enumerate(opcode_stats):
+        if "execution_count" in opcode_stat:
+            count = opcode_stat['execution_count']
+            counts.append((count, opname[i]))
+            total += count
+    counts.sort(reverse=True)
+    cummulative = 0
+    for (count, name) in counts:
+        cummulative += count
+        print(f"{name}: {count} {100*count/total:0.1f}% {100*cummulative/total:0.1f}%")
+    print("Specialization stats:")
+    for i, opcode_stat in enumerate(opcode_stats):
+        name = opname[i]
+        print_specialization_stats(name, opcode_stat)
 
 if __name__ == "__main__":
     main()