[Python-checkins] Classify specialization failures. Provides more useful stats, with lower overhead. (GH-27701)

markshannon webhook-mailer at python.org
Tue Aug 10 09:53:10 EDT 2021


https://github.com/python/cpython/commit/9816777861a3f19ea9faa5b98c9b7024f13c6b73
commit: 9816777861a3f19ea9faa5b98c9b7024f13c6b73
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2021-08-10T14:53:05+01:00
summary:

Classify specialization failures. Provides more useful stats, with lower overhead. (GH-27701)

files:
M Include/internal/pycore_code.h
M Lib/test/test__opcode.py
M Python/specialize.c
M Tools/scripts/summarize_specialization_stats.py

diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 3d8f9c2d228e7f..7ae7aee961c3a0 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -305,6 +305,8 @@ int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT
 #define SPECIALIZATION_STATS_DETAILED 0
 #define SPECIALIZATION_STATS_TO_FILE 0
 
+#define SPECIALIZATION_FAILURE_KINDS 20
+
 #if SPECIALIZATION_STATS
 
 typedef struct _stats {
@@ -316,7 +318,7 @@ typedef struct _stats {
     uint64_t deopt;
     uint64_t unquickened;
 #if SPECIALIZATION_STATS_DETAILED
-    PyObject *miss_types;
+    uint64_t specialization_failure_kinds[SPECIALIZATION_FAILURE_KINDS];
 #endif
 } SpecializationStats;
 
diff --git a/Lib/test/test__opcode.py b/Lib/test/test__opcode.py
index 928198ae123b57..f6d8c730645c19 100644
--- a/Lib/test/test__opcode.py
+++ b/Lib/test/test__opcode.py
@@ -85,9 +85,8 @@ def test_specialization_stats(self):
                 stat_names + ['fails'])
             for sn in stat_names:
                 self.assertIsInstance(stats['load_attr'][sn], int)
-            self.assertIsInstance(stats['load_attr']['fails'], dict)
-            for k,v in stats['load_attr']['fails'].items():
-                self.assertIsInstance(k, tuple)
+            self.assertIsInstance(stats['load_attr']['specialization_failure_kinds'], tuple)
+            for v in stats['load_attr']['specialization_failure_kinds']:
                 self.assertIsInstance(v, int)
 
 
diff --git a/Python/specialize.c b/Python/specialize.c
index effbca269bff1b..d0e45d70c08801 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -70,12 +70,26 @@ stats_to_dict(SpecializationStats *stats)
     ADD_STAT_TO_DICT(res, deopt);
     ADD_STAT_TO_DICT(res, unquickened);
 #if SPECIALIZATION_STATS_DETAILED
-    if (stats->miss_types != NULL) {
-        if (PyDict_SetItemString(res, "fails", stats->miss_types) == -1) {
+    PyObject *failure_kinds = PyTuple_New(SPECIALIZATION_FAILURE_KINDS);
+    if (failure_kinds == NULL) {
+        Py_DECREF(res);
+        return NULL;
+    }
+    for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
+        PyObject *stat = PyLong_FromUnsignedLongLong(stats->specialization_failure_kinds[i]);
+        if (stat == NULL) {
             Py_DECREF(res);
+            Py_DECREF(failure_kinds);
             return NULL;
         }
+        PyTuple_SET_ITEM(failure_kinds, i, stat);
+    }
+    if (PyDict_SetItemString(res, "specialization_failure_kinds", failure_kinds)) {
+        Py_DECREF(res);
+        Py_DECREF(failure_kinds);
+        return NULL;
     }
+    Py_DECREF(failure_kinds);
 #endif
     return res;
 }
@@ -130,21 +144,9 @@ print_stats(FILE *out, SpecializationStats *stats, const char *name)
     PRINT_STAT(name, deopt);
     PRINT_STAT(name, unquickened);
 #if SPECIALIZATION_STATS_DETAILED
-    if (stats->miss_types == NULL) {
-        return;
-    }
-    fprintf(out, "    %s.fails:\n", name);
-    PyObject *key, *count;
-    Py_ssize_t pos = 0;
-    while (PyDict_Next(stats->miss_types, &pos, &key, &count)) {
-        PyObject *type = PyTuple_GetItem(key, 0);
-        PyObject *name = PyTuple_GetItem(key, 1);
-        PyObject *kind = PyTuple_GetItem(key, 2);
-        fprintf(out, "        %s.", ((PyTypeObject *)type)->tp_name);
-        PyObject_Print(name, out, Py_PRINT_RAW);
-        fprintf(out, " (");
-        PyObject_Print(kind, out, Py_PRINT_RAW);
-        fprintf(out, "): %ld\n", PyLong_AsLong(count));
+    for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
+        fprintf(out, "    %s.specialization_failure_kinds[%d] : %" PRIu64 "\n",
+            name, i, stats->specialization_failure_kinds[i]);
     }
 #endif
 }
@@ -180,57 +182,15 @@ _Py_PrintSpecializationStats(void)
 }
 
 #if SPECIALIZATION_STATS_DETAILED
-void
-_Py_IncrementTypeCounter(int opcode, PyObject *type, PyObject *name, const char *kind)
-{
-    PyObject *counter = _specialization_stats[opcode].miss_types;
-    if (counter == NULL) {
-        _specialization_stats[opcode].miss_types = PyDict_New();
-        counter = _specialization_stats[opcode].miss_types;
-        if (counter == NULL) {
-            return;
-        }
-    }
-    PyObject *key = NULL;
-    PyObject *kind_object = _PyUnicode_FromASCII(kind, strlen(kind));
-    if (kind_object == NULL) {
-        PyErr_Clear();
-        goto done;
-    }
-    key = PyTuple_Pack(3, type, name, kind_object);
-    if (key == NULL) {
-        PyErr_Clear();
-        goto done;
-    }
-    PyObject *count = PyDict_GetItem(counter, key);
-    if (count == NULL) {
-        count = _PyLong_GetZero();
-        if (PyDict_SetItem(counter, key, count) < 0) {
-            PyErr_Clear();
-            goto done;
-        }
-    }
-    count = PyNumber_Add(count, _PyLong_GetOne());
-    if (count == NULL) {
-        PyErr_Clear();
-        goto done;
-    }
-    if (PyDict_SetItem(counter, key, count)) {
-        PyErr_Clear();
-    }
-done:
-    Py_XDECREF(kind_object);
-    Py_XDECREF(key);
-}
 
-#define SPECIALIZATION_FAIL(opcode, type, attribute, kind) _Py_IncrementTypeCounter(opcode, (PyObject *)(type), (PyObject *)(attribute), kind)
+#define SPECIALIZATION_FAIL(opcode, kind) _specialization_stats[opcode].specialization_failure_kinds[kind]++
 
 
 #endif
 #endif
 
 #ifndef SPECIALIZATION_FAIL
-#define SPECIALIZATION_FAIL(opcode, type, attribute, kind) ((void)0)
+#define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
 #endif
 
 static SpecializedCacheOrInstruction *
@@ -415,6 +375,34 @@ _Py_Quicken(PyCodeObject *code) {
 }
 
 
+/* Common */
+
+#define SPEC_FAIL_NO_DICT 1
+#define SPEC_FAIL_OVERRIDDEN 2
+#define SPEC_FAIL_OUT_OF_VERSIONS 3
+#define SPEC_FAIL_OUT_OF_RANGE 4
+#define SPEC_FAIL_EXPECTED_ERROR 5
+
+/* Attributes */
+
+#define SPEC_FAIL_NON_STRING_OR_SPLIT 6
+#define SPEC_FAIL_MODULE_ATTR_NOT_FOUND 7
+#define SPEC_FAIL_OVERRIDING_DESCRIPTOR 8
+#define SPEC_FAIL_NON_OVERRIDING_DESCRIPTOR 9
+#define SPEC_FAIL_NOT_DESCRIPTOR 10
+#define SPEC_FAIL_METHOD 11
+#define SPEC_FAIL_MUTABLE_CLASS 12
+#define SPEC_FAIL_PROPERTY 13
+#define SPEC_FAIL_NON_OBJECT_SLOT 14
+#define SPEC_FAIL_READ_ONLY 15
+#define SPEC_FAIL_AUDITED_SLOT 16
+
+/* Binary subscr */
+
+#define SPEC_FAIL_LIST_NON_INT_SUBSCRIPT 8
+#define SPEC_FAIL_TUPLE_NON_INT_SUBSCRIPT 9
+#define SPEC_FAIL_NOT_TUPLE_LIST_OR_DICT 10
+
 
 static int
 specialize_module_load_attr(
@@ -427,34 +415,34 @@ specialize_module_load_attr(
     _Py_IDENTIFIER(__getattr__);
     PyDictObject *dict = (PyDictObject *)m->md_dict;
     if (dict == NULL) {
-        SPECIALIZATION_FAIL(LOAD_ATTR, Py_TYPE(owner), name, "no __dict__");
+        SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_NO_DICT);
         return -1;
     }
     if (dict->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
-        SPECIALIZATION_FAIL(LOAD_ATTR, Py_TYPE(owner), name, "non-string keys (or split)");
+        SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_NON_STRING_OR_SPLIT);
         return -1;
     }
     getattr = _PyUnicode_FromId(&PyId___getattr__); /* borrowed */
     if (getattr == NULL) {
-        SPECIALIZATION_FAIL(LOAD_ATTR, Py_TYPE(owner), name, "module.__getattr__ overridden");
+        SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OVERRIDDEN);
         PyErr_Clear();
         return -1;
     }
     Py_ssize_t index = _PyDict_GetItemHint(dict, getattr, -1,  &value);
     assert(index != DKIX_ERROR);
     if (index != DKIX_EMPTY) {
-        SPECIALIZATION_FAIL(LOAD_ATTR, Py_TYPE(owner), name, "module attribute not found");
+        SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_MODULE_ATTR_NOT_FOUND);
         return -1;
     }
     index = _PyDict_GetItemHint(dict, name, -1, &value);
     assert (index != DKIX_ERROR);
     if (index != (uint16_t)index) {
-        SPECIALIZATION_FAIL(LOAD_ATTR, Py_TYPE(owner), name, "index out of range");
+        SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_RANGE);
         return -1;
     }
     uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict);
     if (keys_version == 0) {
-        SPECIALIZATION_FAIL(LOAD_ATTR, Py_TYPE(owner), name, "no more key versions");
+        SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS);
         return -1;
     }
     cache1->dk_version_or_hint = keys_version;
@@ -544,13 +532,13 @@ specialize_dict_access(
     assert(kind == NON_OVERRIDING || kind == NON_DESCRIPTOR || kind == ABSENT);
     // No desciptor, or non overriding.
     if (type->tp_dictoffset < 0) {
-        SPECIALIZATION_FAIL(base_op, type, name, "negative offset");
+        SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE);
         return 0;
     }
     if (type->tp_dictoffset > 0) {
         PyObject **dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
         if (*dictptr == NULL || !PyDict_CheckExact(*dictptr)) {
-            SPECIALIZATION_FAIL(base_op, type, name, "no dict or not a dict");
+            SPECIALIZATION_FAIL(base_op, SPEC_FAIL_NO_DICT);
             return 0;
         }
         // We found an instance with a __dict__.
@@ -568,13 +556,12 @@ specialize_dict_access(
             Py_ssize_t index = _Py_dict_lookup(dict, name, hash, &value);
             assert (index != DKIX_ERROR);
             if (index != (uint16_t)index) {
-                SPECIALIZATION_FAIL(base_op, type, name,
-                                    index < 0 ? "attribute not in dict" : "index out of range");
+                SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE);
                 return 0;
             }
             uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict);
             if (keys_version == 0) {
-                SPECIALIZATION_FAIL(base_op, type, name, "no more key versions");
+                SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_VERSIONS);
                 return 0;
             }
             cache1->dk_version_or_hint = keys_version;
@@ -588,7 +575,7 @@ specialize_dict_access(
             Py_ssize_t hint =
                 _PyDict_GetItemHint(dict, name, -1, &value);
             if (hint != (uint32_t)hint) {
-                SPECIALIZATION_FAIL(base_op, type, name, "hint out of range");
+                SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE);
                 return 0;
             }
             cache1->dk_version_or_hint = (uint32_t)hint;
@@ -601,14 +588,14 @@ specialize_dict_access(
     /* No attribute in instance dictionary */
     switch(kind) {
         case NON_OVERRIDING:
-            SPECIALIZATION_FAIL(base_op, type, name, "non-overriding descriptor");
+            SPECIALIZATION_FAIL(base_op, SPEC_FAIL_NON_OVERRIDING_DESCRIPTOR);
             return 0;
         case NON_DESCRIPTOR:
             /* To do -- Optimize this case */
-            SPECIALIZATION_FAIL(base_op, type, name, "non descriptor");
+            SPECIALIZATION_FAIL(base_op, SPEC_FAIL_NOT_DESCRIPTOR);
             return 0;
         case ABSENT:
-            SPECIALIZATION_FAIL(base_op, type, name, "no attribute");
+            SPECIALIZATION_FAIL(base_op, SPEC_FAIL_EXPECTED_ERROR);
             return 0;
         default:
             Py_UNREACHABLE();
@@ -637,13 +624,13 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp
     DesciptorClassification kind = analyze_descriptor(type, name, &descr, 0);
     switch(kind) {
         case OVERRIDING:
-            SPECIALIZATION_FAIL(LOAD_ATTR, type, name, "overriding descriptor");
+            SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OVERRIDING_DESCRIPTOR);
             goto fail;
         case METHOD:
-            SPECIALIZATION_FAIL(LOAD_ATTR, type, name, "method");
+            SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_METHOD);
             goto fail;
         case PROPERTY:
-            SPECIALIZATION_FAIL(LOAD_ATTR, type, name, "property");
+            SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_PROPERTY);
             goto fail;
         case OBJECT_SLOT:
         {
@@ -651,11 +638,11 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp
             struct PyMemberDef *dmem = member->d_member;
             Py_ssize_t offset = dmem->offset;
             if (dmem->flags & PY_AUDIT_READ) {
-                SPECIALIZATION_FAIL(LOAD_ATTR, type, name, "audit read");
+                SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_AUDITED_SLOT);
                 goto fail;
             }
             if (offset != (uint16_t)offset) {
-                SPECIALIZATION_FAIL(LOAD_ATTR, type, name, "offset out of range");
+                SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_RANGE);
                 goto fail;
             }
             assert(dmem->type == T_OBJECT_EX);
@@ -675,13 +662,13 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp
             goto success;
         }
         case OTHER_SLOT:
-            SPECIALIZATION_FAIL(LOAD_ATTR, type, name, "non-object slot");
+            SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_NON_OBJECT_SLOT);
             goto fail;
         case MUTABLE:
-            SPECIALIZATION_FAIL(LOAD_ATTR, type, name, "mutable class attribute");
+            SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_MUTABLE_CLASS);
             goto fail;
         case GETSET_OVERRIDDEN:
-            SPECIALIZATION_FAIL(LOAD_ATTR, type, name, "__getattribute__ overridden");
+            SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OVERRIDDEN);
             goto fail;
         case NON_OVERRIDING:
         case NON_DESCRIPTOR:
@@ -717,20 +704,20 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S
     _PyAttrCache *cache1 = &cache[-1].attr;
     PyTypeObject *type = Py_TYPE(owner);
     if (PyModule_CheckExact(owner)) {
-        SPECIALIZATION_FAIL(STORE_ATTR, type, name, "module attribute");
+        SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_OVERRIDDEN);
         goto fail;
     }
     PyObject *descr;
     DesciptorClassification kind = analyze_descriptor(type, name, &descr, 1);
     switch(kind) {
         case OVERRIDING:
-            SPECIALIZATION_FAIL(STORE_ATTR, type, name, "overriding descriptor");
+            SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_OVERRIDING_DESCRIPTOR);
             goto fail;
         case METHOD:
-            SPECIALIZATION_FAIL(STORE_ATTR, type, name, "method");
+            SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_METHOD);
             goto fail;
         case PROPERTY:
-            SPECIALIZATION_FAIL(STORE_ATTR, type, name, "property");
+            SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_PROPERTY);
             goto fail;
         case OBJECT_SLOT:
         {
@@ -738,11 +725,11 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S
             struct PyMemberDef *dmem = member->d_member;
             Py_ssize_t offset = dmem->offset;
             if (dmem->flags & READONLY) {
-                SPECIALIZATION_FAIL(STORE_ATTR, type, name, "read only");
+                SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_READ_ONLY);
                 goto fail;
             }
             if (offset != (uint16_t)offset) {
-                SPECIALIZATION_FAIL(STORE_ATTR, type, name, "offset out of range");
+                SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_OUT_OF_RANGE);
                 goto fail;
             }
             assert(dmem->type == T_OBJECT_EX);
@@ -754,13 +741,13 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S
         }
         case DUNDER_CLASS:
         case OTHER_SLOT:
-            SPECIALIZATION_FAIL(STORE_ATTR, type, name, "other slot");
+            SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_NON_OBJECT_SLOT);
             goto fail;
         case MUTABLE:
-            SPECIALIZATION_FAIL(STORE_ATTR, type, name, "mutable class attribute");
+            SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_MUTABLE_CLASS);
             goto fail;
         case GETSET_OVERRIDDEN:
-            SPECIALIZATION_FAIL(STORE_ATTR, type, name, "__setattr__ overridden");
+            SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_OVERRIDDEN);
             goto fail;
         case NON_OVERRIDING:
         case NON_DESCRIPTOR:
@@ -869,7 +856,7 @@ _Py_Specialize_BinarySubscr(
             *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, saturating_start());
             goto success;
         } else {
-            SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), Py_TYPE(sub), "list; non-integer subscr");
+            SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_LIST_NON_INT_SUBSCRIPT);
             goto fail;
         }
     }
@@ -878,7 +865,7 @@ _Py_Specialize_BinarySubscr(
             *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, saturating_start());
             goto success;
         } else {
-            SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), Py_TYPE(sub), "tuple; non-integer subscr");
+            SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_TUPLE_NON_INT_SUBSCRIPT);
             goto fail;
         }
     }
@@ -886,7 +873,7 @@ _Py_Specialize_BinarySubscr(
         *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, saturating_start());
         goto success;
     }
-    SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), Py_TYPE(sub), "not list|tuple|dict");
+    SPECIALIZATION_FAIL(BINARY_SUBSCR,SPEC_FAIL_NOT_TUPLE_LIST_OR_DICT);
     goto fail;
 fail:
     STAT_INC(BINARY_SUBSCR, specialization_failure);
diff --git a/Tools/scripts/summarize_specialization_stats.py b/Tools/scripts/summarize_specialization_stats.py
index 1a13ae8cb6ce5a..cc3ef85933c026 100644
--- a/Tools/scripts/summarize_specialization_stats.py
+++ b/Tools/scripts/summarize_specialization_stats.py
@@ -23,6 +23,18 @@ def print_stats(name, family_stats):
             print(f"{key:>12}:{family_stats[key]:>12} {100*family_stats[key]/total:0.1f}%")
     for key in ("specialization_success",  "specialization_failure"):
         print(f"  {key}:{family_stats[key]:>12}")
+    total_failures = family_stats["specialization_failure"]
+    failure_kinds = [ 0 ] * 20
+    for key in family_stats:
+        if not key.startswith("specialization_failure_kind"):
+            continue
+        _, index = key[:-1].split("[")
+        index =  int(index)
+        failure_kinds[index] = family_stats[key]
+    for index, value in enumerate(failure_kinds):
+        if not value:
+            continue
+        print(f"    kind {index:>2}: {value:>8} {100*value/total_failures:0.1f}%")
 
 def main():
     stats = collections.defaultdict(collections.Counter)



More information about the Python-checkins mailing list