[Python-checkins] gh-89546: Clean up PyType_FromMetaclass (GH-93686)

miss-islington webhook-mailer at python.org
Tue Jun 14 05:13:43 EDT 2022


https://github.com/python/cpython/commit/3597c129413a86f805beca78b7c72a20b5bf801c
commit: 3597c129413a86f805beca78b7c72a20b5bf801c
branch: main
author: Petr Viktorin <encukou at gmail.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2022-06-14T02:13:29-07:00
summary:

gh-89546: Clean up PyType_FromMetaclass (GH-93686)



When changing PyType_FromMetaclass recently (GH-93012, GH-93466, GH-28748)
I found a bunch of opportunities to improve the code. Here they are.

Fixes: #89546

Automerge-Triggered-By: GH:encukou

files:
A Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst
M Modules/_testcapimodule.c
M Objects/typeobject.c

diff --git a/Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst b/Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst
new file mode 100644
index 0000000000000..8e6b6d95c7b4a
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst	
@@ -0,0 +1,4 @@
+:c:func:`PyType_FromMetaclass` (and other ``PyType_From*`` functions) now
+check that offsets and the base class's
+:c:member:`~PyTypeObject.tp_basicsize` fit in the new class's
+``tp_basicsize``.
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 33dc3dbe493c5..54957db89663e 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1221,7 +1221,7 @@ static PyType_Spec MinimalMetaclass_spec = {
 
 static PyType_Spec MinimalType_spec = {
     .name = "_testcapi.MinimalSpecType",
-    .basicsize = sizeof(PyObject),
+    .basicsize = 0,  // Updated later
     .flags = Py_TPFLAGS_DEFAULT,
     .slots = empty_type_slots,
 };
@@ -1245,6 +1245,7 @@ test_from_spec_metatype_inheritance(PyObject *self, PyObject *Py_UNUSED(ignored)
         goto finally;
     }
 
+    MinimalType_spec.basicsize = (int)(((PyTypeObject*)class)->tp_basicsize);
     new = PyType_FromSpecWithBases(&MinimalType_spec, class);
     if (new == NULL) {
         goto finally;
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index c6df50da9736b..db4682c69ed0e 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -3398,30 +3398,87 @@ get_bases_tuple(PyObject *bases_in, PyType_Spec *spec)
     return PyTuple_Pack(1, bases_in);
 }
 
+static inline int
+check_basicsize_includes_size_and_offsets(PyTypeObject* type)
+{
+    if (type->tp_alloc != PyType_GenericAlloc) {
+        // Custom allocators can ignore tp_basicsize
+        return 1;
+    }
+    Py_ssize_t max = (Py_ssize_t)type->tp_basicsize;
+
+    if (type->tp_base && type->tp_base->tp_basicsize > type->tp_basicsize) {
+        PyErr_Format(PyExc_TypeError,
+                     "tp_basicsize for type '%s' (%d) is too small for base '%s' (%d)",
+                     type->tp_name, type->tp_basicsize,
+                     type->tp_base->tp_name, type->tp_base->tp_basicsize);
+        return 0;
+    }
+    if (type->tp_weaklistoffset + (Py_ssize_t)sizeof(PyObject*) > max) {
+        PyErr_Format(PyExc_TypeError,
+                     "weaklist offset %d is out of bounds for type '%s' (tp_basicsize = %d)",
+                     type->tp_weaklistoffset,
+                     type->tp_name, type->tp_basicsize);
+        return 0;
+    }
+    if (type->tp_dictoffset + (Py_ssize_t)sizeof(PyObject*) > max) {
+        PyErr_Format(PyExc_TypeError,
+                     "dict offset %d is out of bounds for type '%s' (tp_basicsize = %d)",
+                     type->tp_dictoffset,
+                     type->tp_name, type->tp_basicsize);
+        return 0;
+    }
+    if (type->tp_vectorcall_offset + (Py_ssize_t)sizeof(vectorcallfunc*) > max) {
+        PyErr_Format(PyExc_TypeError,
+                     "vectorcall offset %d is out of bounds for type '%s' (tp_basicsize = %d)",
+                     type->tp_vectorcall_offset,
+                     type->tp_name, type->tp_basicsize);
+        return 0;
+    }
+    return 1;
+}
+
 PyObject *
 PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
                      PyType_Spec *spec, PyObject *bases_in)
 {
+    /* Invariant: A non-NULL value in one of these means this function holds
+     * a strong reference or owns allocated memory.
+     * These get decrefed/freed/returned at the end, on both success and error.
+     */
     PyHeapTypeObject *res = NULL;
-    PyObject *modname = NULL;
     PyTypeObject *type;
     PyObject *bases = NULL;
+    char *tp_doc = NULL;
+    PyObject *ht_name = NULL;
+    char *_ht_tpname = NULL;
+
     int r;
 
+    /* Prepare slots that need special handling.
+     * Keep in mind that a slot can be given multiple times:
+     * if that would cause trouble (leaks, UB, ...), raise an exception.
+     */
+
     const PyType_Slot *slot;
     Py_ssize_t nmembers = 0;
     Py_ssize_t weaklistoffset, dictoffset, vectorcalloffset;
     char *res_start;
-    short slot_offset, subslot_offset;
 
     nmembers = weaklistoffset = dictoffset = vectorcalloffset = 0;
     for (slot = spec->slots; slot->slot; slot++) {
-        if (slot->slot == Py_tp_members) {
+        if (slot->slot < 0
+            || (size_t)slot->slot >= Py_ARRAY_LENGTH(pyslot_offsets)) {
+            PyErr_SetString(PyExc_RuntimeError, "invalid slot offset");
+            goto finally;
+        }
+        switch (slot->slot) {
+        case Py_tp_members:
             if (nmembers != 0) {
                 PyErr_SetString(
                     PyExc_SystemError,
                     "Multiple Py_tp_members slots are not supported.");
-                return NULL;
+                goto finally;
             }
             for (const PyMemberDef *memb = slot->pfunc; memb->name != NULL; memb++) {
                 nmembers++;
@@ -3444,15 +3501,70 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
                     vectorcalloffset = memb->offset;
                 }
             }
+            break;
+        case Py_tp_doc:
+            /* For the docstring slot, which usually points to a static string
+               literal, we need to make a copy */
+            if (tp_doc != NULL) {
+                PyErr_SetString(
+                    PyExc_SystemError,
+                    "Multiple Py_tp_doc slots are not supported.");
+                goto finally;
+            }
+            if (slot->pfunc == NULL) {
+                PyObject_Free(tp_doc);
+                tp_doc = NULL;
+            }
+            else {
+                size_t len = strlen(slot->pfunc)+1;
+                tp_doc = PyObject_Malloc(len);
+                if (tp_doc == NULL) {
+                    PyErr_NoMemory();
+                    goto finally;
+                }
+                memcpy(tp_doc, slot->pfunc, len);
+            }
+            break;
         }
     }
 
+    /* Prepare the type name and qualname */
+
     if (spec->name == NULL) {
         PyErr_SetString(PyExc_SystemError,
                         "Type spec does not define the name field.");
         goto finally;
     }
 
+    const char *s = strrchr(spec->name, '.');
+    if (s == NULL) {
+        s = spec->name;
+    }
+    else {
+        s++;
+    }
+
+    ht_name = PyUnicode_FromString(s);
+    if (!ht_name) {
+        goto finally;
+    }
+
+    /* Copy spec->name to a buffer we own.
+    *
+    * Unfortunately, we can't use tp_name directly (with some
+    * flag saying that it should be deallocated with the type),
+    * because tp_name is public API and may be set independently
+    * of any such flag.
+    * So, we use a separate buffer, _ht_tpname, that's always
+    * deallocated with the type (if it's non-NULL).
+    */
+    Py_ssize_t name_buf_len = strlen(spec->name) + 1;
+    _ht_tpname = PyMem_Malloc(name_buf_len);
+    if (_ht_tpname == NULL) {
+        goto finally;
+    }
+    memcpy(_ht_tpname, spec->name, name_buf_len);
+
     /* Get a tuple of bases.
      * bases is a strong reference (unlike bases_in).
      */
@@ -3491,7 +3603,13 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
     // here we just check its work
     assert(_PyType_HasFeature(base, Py_TPFLAGS_BASETYPE));
 
-    /* Allocate the new type */
+    /* Allocate the new type
+     *
+     * Between here and PyType_Ready, we should limit:
+     * - calls to Python code
+     * - raising exceptions
+     * - memory allocations
+     */
 
     res = (PyHeapTypeObject*)metaclass->tp_alloc(metaclass, nmembers);
     if (res == NULL) {
@@ -3503,105 +3621,70 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
     /* The flags must be initialized early, before the GC traverses us */
     type->tp_flags = spec->flags | Py_TPFLAGS_HEAPTYPE;
 
-    /* Set the type name and qualname */
-    const char *s = strrchr(spec->name, '.');
-    if (s == NULL) {
-        s = spec->name;
-    }
-    else {
-        s++;
-    }
-
-    res->ht_name = PyUnicode_FromString(s);
-    if (!res->ht_name) {
-        goto finally;
-    }
-    res->ht_qualname = Py_NewRef(res->ht_name);
-
-    /* Copy spec->name to a buffer we own.
-    *
-    * Unfortunately, we can't use tp_name directly (with some
-    * flag saying that it should be deallocated with the type),
-    * because tp_name is public API and may be set independently
-    * of any such flag.
-    * So, we use a separate buffer, _ht_tpname, that's always
-    * deallocated with the type (if it's non-NULL).
-    */
-    Py_ssize_t name_buf_len = strlen(spec->name) + 1;
-    res->_ht_tpname = PyMem_Malloc(name_buf_len);
-    if (res->_ht_tpname == NULL) {
-        goto finally;
-    }
-    type->tp_name = memcpy(res->_ht_tpname, spec->name, name_buf_len);
-
     res->ht_module = Py_XNewRef(module);
 
     /* Initialize essential fields */
+
     type->tp_as_async = &res->as_async;
     type->tp_as_number = &res->as_number;
     type->tp_as_sequence = &res->as_sequence;
     type->tp_as_mapping = &res->as_mapping;
     type->tp_as_buffer = &res->as_buffer;
 
-    /* Set tp_base and tp_bases */
+    /* Set slots we have prepared */
+
     type->tp_base = (PyTypeObject *)Py_NewRef(base);
     type->tp_bases = bases;
     bases = NULL;  // We give our reference to bases to the type
 
+    type->tp_doc = tp_doc;
+    tp_doc = NULL;  // Give ownership of the allocated memory to the type
+
+    res->ht_qualname = Py_NewRef(ht_name);
+    res->ht_name = ht_name;
+    ht_name = NULL;  // Give our reference to to the type
+
+    type->tp_name = _ht_tpname;
+    res->_ht_tpname = _ht_tpname;
+    _ht_tpname = NULL;  // Give ownership to to the type
+
     /* Copy the sizes */
+
     type->tp_basicsize = spec->basicsize;
     type->tp_itemsize = spec->itemsize;
 
+    /* Copy all the ordinary slots */
+
     for (slot = spec->slots; slot->slot; slot++) {
-        if (slot->slot < 0
-            || (size_t)slot->slot >= Py_ARRAY_LENGTH(pyslot_offsets)) {
-            PyErr_SetString(PyExc_RuntimeError, "invalid slot offset");
-            goto finally;
-        }
-        else if (slot->slot == Py_tp_base || slot->slot == Py_tp_bases) {
+        switch (slot->slot) {
+        case Py_tp_base:
+        case Py_tp_bases:
+        case Py_tp_doc:
             /* Processed above */
-            continue;
-        }
-        else if (slot->slot == Py_tp_doc) {
-            /* For the docstring slot, which usually points to a static string
-               literal, we need to make a copy */
-            if (type->tp_doc != NULL) {
-                PyErr_SetString(
-                    PyExc_SystemError,
-                    "Multiple Py_tp_doc slots are not supported.");
-                goto finally;
-            }
-            if (slot->pfunc == NULL) {
-                type->tp_doc = NULL;
-                continue;
-            }
-            size_t len = strlen(slot->pfunc)+1;
-            char *tp_doc = PyObject_Malloc(len);
-            if (tp_doc == NULL) {
-                type->tp_doc = NULL;
-                PyErr_NoMemory();
-                goto finally;
+            break;
+        case Py_tp_members:
+            {
+                /* Move the slots to the heap type itself */
+                size_t len = Py_TYPE(type)->tp_itemsize * nmembers;
+                memcpy(_PyHeapType_GET_MEMBERS(res), slot->pfunc, len);
+                type->tp_members = _PyHeapType_GET_MEMBERS(res);
             }
-            memcpy(tp_doc, slot->pfunc, len);
-            type->tp_doc = tp_doc;
-        }
-        else if (slot->slot == Py_tp_members) {
-            /* Move the slots to the heap type itself */
-            size_t len = Py_TYPE(type)->tp_itemsize * nmembers;
-            memcpy(_PyHeapType_GET_MEMBERS(res), slot->pfunc, len);
-            type->tp_members = _PyHeapType_GET_MEMBERS(res);
-        }
-        else {
-            /* Copy other slots directly */
-            PySlot_Offset slotoffsets = pyslot_offsets[slot->slot];
-            slot_offset = slotoffsets.slot_offset;
-            if (slotoffsets.subslot_offset == -1) {
-                *(void**)((char*)res_start + slot_offset) = slot->pfunc;
-            } else {
-                void *parent_slot = *(void**)((char*)res_start + slot_offset);
-                subslot_offset = slotoffsets.subslot_offset;
-                *(void**)((char*)parent_slot + subslot_offset) = slot->pfunc;
+            break;
+        default:
+            {
+                /* Copy other slots directly */
+                PySlot_Offset slotoffsets = pyslot_offsets[slot->slot];
+                short slot_offset = slotoffsets.slot_offset;
+                if (slotoffsets.subslot_offset == -1) {
+                    *(void**)((char*)res_start + slot_offset) = slot->pfunc;
+                }
+                else {
+                    void *procs = *(void**)((char*)res_start + slot_offset);
+                    short subslot_offset = slotoffsets.subslot_offset;
+                    *(void**)((char*)procs + subslot_offset) = slot->pfunc;
+                }
             }
+            break;
         }
     }
     if (type->tp_dealloc == NULL) {
@@ -3611,14 +3694,26 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
         type->tp_dealloc = subtype_dealloc;
     }
 
-    if (vectorcalloffset) {
-        type->tp_vectorcall_offset = vectorcalloffset;
-    }
+    /* Set up offsets */
+
+    type->tp_vectorcall_offset = vectorcalloffset;
+    type->tp_weaklistoffset = weaklistoffset;
+    type->tp_dictoffset = dictoffset;
+
+    /* Ready the type (which includes inheritance).
+     *
+     * After this call we should generally only touch up what's
+     * accessible to Python code, like __dict__.
+     */
 
     if (PyType_Ready(type) < 0) {
         goto finally;
     }
 
+    if (!check_basicsize_includes_size_and_offsets(type)) {
+        goto finally;
+    }
+
     if (type->tp_flags & Py_TPFLAGS_MANAGED_DICT) {
         res->ht_cached_keys = _PyDict_NewKeysForClass();
     }
@@ -3636,13 +3731,11 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
     }
 
     if (weaklistoffset) {
-        type->tp_weaklistoffset = weaklistoffset;
         if (PyDict_DelItem((PyObject *)type->tp_dict, &_Py_ID(__weaklistoffset__)) < 0) {
             goto finally;
         }
     }
     if (dictoffset) {
-        type->tp_dictoffset = dictoffset;
         if (PyDict_DelItem((PyObject *)type->tp_dict, &_Py_ID(__dictoffset__)) < 0) {
             goto finally;
         }
@@ -3656,12 +3749,13 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
     if (r == 0) {
         s = strrchr(spec->name, '.');
         if (s != NULL) {
-            modname = PyUnicode_FromStringAndSize(
+            PyObject *modname = PyUnicode_FromStringAndSize(
                     spec->name, (Py_ssize_t)(s - spec->name));
             if (modname == NULL) {
                 goto finally;
             }
             r = PyDict_SetItem(type->tp_dict, &_Py_ID(__module__), modname);
+            Py_DECREF(modname);
             if (r != 0) {
                 goto finally;
             }
@@ -3681,7 +3775,9 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
         Py_CLEAR(res);
     }
     Py_XDECREF(bases);
-    Py_XDECREF(modname);
+    PyObject_Free(tp_doc);
+    Py_XDECREF(ht_name);
+    PyMem_Free(_ht_tpname);
     return (PyObject*)res;
 }
 



More information about the Python-checkins mailing list