[Python-checkins] gh-89546: Clean up PyType_FromMetaclass (GH-93686)
miss-islington
webhook-mailer at python.org
Tue Jun 14 05:13:43 EDT 2022
https://github.com/python/cpython/commit/3597c129413a86f805beca78b7c72a20b5bf801c
commit: 3597c129413a86f805beca78b7c72a20b5bf801c
branch: main
author: Petr Viktorin <encukou at gmail.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2022-06-14T02:13:29-07:00
summary:
gh-89546: Clean up PyType_FromMetaclass (GH-93686)
When changing PyType_FromMetaclass recently (GH-93012, GH-93466, GH-28748)
I found a bunch of opportunities to improve the code. Here they are.
Fixes: #89546
Automerge-Triggered-By: GH:encukou
files:
A Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst
M Modules/_testcapimodule.c
M Objects/typeobject.c
diff --git a/Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst b/Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst
new file mode 100644
index 0000000000000..8e6b6d95c7b4a
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst
@@ -0,0 +1,4 @@
+:c:func:`PyType_FromMetaclass` (and other ``PyType_From*`` functions) now
+check that offsets and the base class's
+:c:member:`~PyTypeObject.tp_basicsize` fit in the new class's
+``tp_basicsize``.
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 33dc3dbe493c5..54957db89663e 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1221,7 +1221,7 @@ static PyType_Spec MinimalMetaclass_spec = {
static PyType_Spec MinimalType_spec = {
.name = "_testcapi.MinimalSpecType",
- .basicsize = sizeof(PyObject),
+ .basicsize = 0, // Updated later
.flags = Py_TPFLAGS_DEFAULT,
.slots = empty_type_slots,
};
@@ -1245,6 +1245,7 @@ test_from_spec_metatype_inheritance(PyObject *self, PyObject *Py_UNUSED(ignored)
goto finally;
}
+ MinimalType_spec.basicsize = (int)(((PyTypeObject*)class)->tp_basicsize);
new = PyType_FromSpecWithBases(&MinimalType_spec, class);
if (new == NULL) {
goto finally;
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index c6df50da9736b..db4682c69ed0e 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -3398,30 +3398,87 @@ get_bases_tuple(PyObject *bases_in, PyType_Spec *spec)
return PyTuple_Pack(1, bases_in);
}
+static inline int
+check_basicsize_includes_size_and_offsets(PyTypeObject* type)
+{
+ if (type->tp_alloc != PyType_GenericAlloc) {
+ // Custom allocators can ignore tp_basicsize
+ return 1;
+ }
+ Py_ssize_t max = (Py_ssize_t)type->tp_basicsize;
+
+ if (type->tp_base && type->tp_base->tp_basicsize > type->tp_basicsize) {
+ PyErr_Format(PyExc_TypeError,
+ "tp_basicsize for type '%s' (%d) is too small for base '%s' (%d)",
+ type->tp_name, type->tp_basicsize,
+ type->tp_base->tp_name, type->tp_base->tp_basicsize);
+ return 0;
+ }
+ if (type->tp_weaklistoffset + (Py_ssize_t)sizeof(PyObject*) > max) {
+ PyErr_Format(PyExc_TypeError,
+ "weaklist offset %d is out of bounds for type '%s' (tp_basicsize = %d)",
+ type->tp_weaklistoffset,
+ type->tp_name, type->tp_basicsize);
+ return 0;
+ }
+ if (type->tp_dictoffset + (Py_ssize_t)sizeof(PyObject*) > max) {
+ PyErr_Format(PyExc_TypeError,
+ "dict offset %d is out of bounds for type '%s' (tp_basicsize = %d)",
+ type->tp_dictoffset,
+ type->tp_name, type->tp_basicsize);
+ return 0;
+ }
+ if (type->tp_vectorcall_offset + (Py_ssize_t)sizeof(vectorcallfunc*) > max) {
+ PyErr_Format(PyExc_TypeError,
+ "vectorcall offset %d is out of bounds for type '%s' (tp_basicsize = %d)",
+ type->tp_vectorcall_offset,
+ type->tp_name, type->tp_basicsize);
+ return 0;
+ }
+ return 1;
+}
+
PyObject *
PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
PyType_Spec *spec, PyObject *bases_in)
{
+ /* Invariant: A non-NULL value in one of these means this function holds
+ * a strong reference or owns allocated memory.
+ * These get decrefed/freed/returned at the end, on both success and error.
+ */
PyHeapTypeObject *res = NULL;
- PyObject *modname = NULL;
PyTypeObject *type;
PyObject *bases = NULL;
+ char *tp_doc = NULL;
+ PyObject *ht_name = NULL;
+ char *_ht_tpname = NULL;
+
int r;
+ /* Prepare slots that need special handling.
+ * Keep in mind that a slot can be given multiple times:
+ * if that would cause trouble (leaks, UB, ...), raise an exception.
+ */
+
const PyType_Slot *slot;
Py_ssize_t nmembers = 0;
Py_ssize_t weaklistoffset, dictoffset, vectorcalloffset;
char *res_start;
- short slot_offset, subslot_offset;
nmembers = weaklistoffset = dictoffset = vectorcalloffset = 0;
for (slot = spec->slots; slot->slot; slot++) {
- if (slot->slot == Py_tp_members) {
+ if (slot->slot < 0
+ || (size_t)slot->slot >= Py_ARRAY_LENGTH(pyslot_offsets)) {
+ PyErr_SetString(PyExc_RuntimeError, "invalid slot offset");
+ goto finally;
+ }
+ switch (slot->slot) {
+ case Py_tp_members:
if (nmembers != 0) {
PyErr_SetString(
PyExc_SystemError,
"Multiple Py_tp_members slots are not supported.");
- return NULL;
+ goto finally;
}
for (const PyMemberDef *memb = slot->pfunc; memb->name != NULL; memb++) {
nmembers++;
@@ -3444,15 +3501,70 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
vectorcalloffset = memb->offset;
}
}
+ break;
+ case Py_tp_doc:
+ /* For the docstring slot, which usually points to a static string
+ literal, we need to make a copy */
+ if (tp_doc != NULL) {
+ PyErr_SetString(
+ PyExc_SystemError,
+ "Multiple Py_tp_doc slots are not supported.");
+ goto finally;
+ }
+ if (slot->pfunc == NULL) {
+ PyObject_Free(tp_doc);
+ tp_doc = NULL;
+ }
+ else {
+ size_t len = strlen(slot->pfunc)+1;
+ tp_doc = PyObject_Malloc(len);
+ if (tp_doc == NULL) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+ memcpy(tp_doc, slot->pfunc, len);
+ }
+ break;
}
}
+ /* Prepare the type name and qualname */
+
if (spec->name == NULL) {
PyErr_SetString(PyExc_SystemError,
"Type spec does not define the name field.");
goto finally;
}
+ const char *s = strrchr(spec->name, '.');
+ if (s == NULL) {
+ s = spec->name;
+ }
+ else {
+ s++;
+ }
+
+ ht_name = PyUnicode_FromString(s);
+ if (!ht_name) {
+ goto finally;
+ }
+
+ /* Copy spec->name to a buffer we own.
+ *
+ * Unfortunately, we can't use tp_name directly (with some
+ * flag saying that it should be deallocated with the type),
+ * because tp_name is public API and may be set independently
+ * of any such flag.
+ * So, we use a separate buffer, _ht_tpname, that's always
+ * deallocated with the type (if it's non-NULL).
+ */
+ Py_ssize_t name_buf_len = strlen(spec->name) + 1;
+ _ht_tpname = PyMem_Malloc(name_buf_len);
+ if (_ht_tpname == NULL) {
+ goto finally;
+ }
+ memcpy(_ht_tpname, spec->name, name_buf_len);
+
/* Get a tuple of bases.
* bases is a strong reference (unlike bases_in).
*/
@@ -3491,7 +3603,13 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
// here we just check its work
assert(_PyType_HasFeature(base, Py_TPFLAGS_BASETYPE));
- /* Allocate the new type */
+ /* Allocate the new type
+ *
+ * Between here and PyType_Ready, we should limit:
+ * - calls to Python code
+ * - raising exceptions
+ * - memory allocations
+ */
res = (PyHeapTypeObject*)metaclass->tp_alloc(metaclass, nmembers);
if (res == NULL) {
@@ -3503,105 +3621,70 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
/* The flags must be initialized early, before the GC traverses us */
type->tp_flags = spec->flags | Py_TPFLAGS_HEAPTYPE;
- /* Set the type name and qualname */
- const char *s = strrchr(spec->name, '.');
- if (s == NULL) {
- s = spec->name;
- }
- else {
- s++;
- }
-
- res->ht_name = PyUnicode_FromString(s);
- if (!res->ht_name) {
- goto finally;
- }
- res->ht_qualname = Py_NewRef(res->ht_name);
-
- /* Copy spec->name to a buffer we own.
- *
- * Unfortunately, we can't use tp_name directly (with some
- * flag saying that it should be deallocated with the type),
- * because tp_name is public API and may be set independently
- * of any such flag.
- * So, we use a separate buffer, _ht_tpname, that's always
- * deallocated with the type (if it's non-NULL).
- */
- Py_ssize_t name_buf_len = strlen(spec->name) + 1;
- res->_ht_tpname = PyMem_Malloc(name_buf_len);
- if (res->_ht_tpname == NULL) {
- goto finally;
- }
- type->tp_name = memcpy(res->_ht_tpname, spec->name, name_buf_len);
-
res->ht_module = Py_XNewRef(module);
/* Initialize essential fields */
+
type->tp_as_async = &res->as_async;
type->tp_as_number = &res->as_number;
type->tp_as_sequence = &res->as_sequence;
type->tp_as_mapping = &res->as_mapping;
type->tp_as_buffer = &res->as_buffer;
- /* Set tp_base and tp_bases */
+ /* Set slots we have prepared */
+
type->tp_base = (PyTypeObject *)Py_NewRef(base);
type->tp_bases = bases;
bases = NULL; // We give our reference to bases to the type
+ type->tp_doc = tp_doc;
+ tp_doc = NULL; // Give ownership of the allocated memory to the type
+
+ res->ht_qualname = Py_NewRef(ht_name);
+ res->ht_name = ht_name;
+ ht_name = NULL; // Give our reference to to the type
+
+ type->tp_name = _ht_tpname;
+ res->_ht_tpname = _ht_tpname;
+ _ht_tpname = NULL; // Give ownership to to the type
+
/* Copy the sizes */
+
type->tp_basicsize = spec->basicsize;
type->tp_itemsize = spec->itemsize;
+ /* Copy all the ordinary slots */
+
for (slot = spec->slots; slot->slot; slot++) {
- if (slot->slot < 0
- || (size_t)slot->slot >= Py_ARRAY_LENGTH(pyslot_offsets)) {
- PyErr_SetString(PyExc_RuntimeError, "invalid slot offset");
- goto finally;
- }
- else if (slot->slot == Py_tp_base || slot->slot == Py_tp_bases) {
+ switch (slot->slot) {
+ case Py_tp_base:
+ case Py_tp_bases:
+ case Py_tp_doc:
/* Processed above */
- continue;
- }
- else if (slot->slot == Py_tp_doc) {
- /* For the docstring slot, which usually points to a static string
- literal, we need to make a copy */
- if (type->tp_doc != NULL) {
- PyErr_SetString(
- PyExc_SystemError,
- "Multiple Py_tp_doc slots are not supported.");
- goto finally;
- }
- if (slot->pfunc == NULL) {
- type->tp_doc = NULL;
- continue;
- }
- size_t len = strlen(slot->pfunc)+1;
- char *tp_doc = PyObject_Malloc(len);
- if (tp_doc == NULL) {
- type->tp_doc = NULL;
- PyErr_NoMemory();
- goto finally;
+ break;
+ case Py_tp_members:
+ {
+ /* Move the slots to the heap type itself */
+ size_t len = Py_TYPE(type)->tp_itemsize * nmembers;
+ memcpy(_PyHeapType_GET_MEMBERS(res), slot->pfunc, len);
+ type->tp_members = _PyHeapType_GET_MEMBERS(res);
}
- memcpy(tp_doc, slot->pfunc, len);
- type->tp_doc = tp_doc;
- }
- else if (slot->slot == Py_tp_members) {
- /* Move the slots to the heap type itself */
- size_t len = Py_TYPE(type)->tp_itemsize * nmembers;
- memcpy(_PyHeapType_GET_MEMBERS(res), slot->pfunc, len);
- type->tp_members = _PyHeapType_GET_MEMBERS(res);
- }
- else {
- /* Copy other slots directly */
- PySlot_Offset slotoffsets = pyslot_offsets[slot->slot];
- slot_offset = slotoffsets.slot_offset;
- if (slotoffsets.subslot_offset == -1) {
- *(void**)((char*)res_start + slot_offset) = slot->pfunc;
- } else {
- void *parent_slot = *(void**)((char*)res_start + slot_offset);
- subslot_offset = slotoffsets.subslot_offset;
- *(void**)((char*)parent_slot + subslot_offset) = slot->pfunc;
+ break;
+ default:
+ {
+ /* Copy other slots directly */
+ PySlot_Offset slotoffsets = pyslot_offsets[slot->slot];
+ short slot_offset = slotoffsets.slot_offset;
+ if (slotoffsets.subslot_offset == -1) {
+ *(void**)((char*)res_start + slot_offset) = slot->pfunc;
+ }
+ else {
+ void *procs = *(void**)((char*)res_start + slot_offset);
+ short subslot_offset = slotoffsets.subslot_offset;
+ *(void**)((char*)procs + subslot_offset) = slot->pfunc;
+ }
}
+ break;
}
}
if (type->tp_dealloc == NULL) {
@@ -3611,14 +3694,26 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
type->tp_dealloc = subtype_dealloc;
}
- if (vectorcalloffset) {
- type->tp_vectorcall_offset = vectorcalloffset;
- }
+ /* Set up offsets */
+
+ type->tp_vectorcall_offset = vectorcalloffset;
+ type->tp_weaklistoffset = weaklistoffset;
+ type->tp_dictoffset = dictoffset;
+
+ /* Ready the type (which includes inheritance).
+ *
+ * After this call we should generally only touch up what's
+ * accessible to Python code, like __dict__.
+ */
if (PyType_Ready(type) < 0) {
goto finally;
}
+ if (!check_basicsize_includes_size_and_offsets(type)) {
+ goto finally;
+ }
+
if (type->tp_flags & Py_TPFLAGS_MANAGED_DICT) {
res->ht_cached_keys = _PyDict_NewKeysForClass();
}
@@ -3636,13 +3731,11 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
}
if (weaklistoffset) {
- type->tp_weaklistoffset = weaklistoffset;
if (PyDict_DelItem((PyObject *)type->tp_dict, &_Py_ID(__weaklistoffset__)) < 0) {
goto finally;
}
}
if (dictoffset) {
- type->tp_dictoffset = dictoffset;
if (PyDict_DelItem((PyObject *)type->tp_dict, &_Py_ID(__dictoffset__)) < 0) {
goto finally;
}
@@ -3656,12 +3749,13 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
if (r == 0) {
s = strrchr(spec->name, '.');
if (s != NULL) {
- modname = PyUnicode_FromStringAndSize(
+ PyObject *modname = PyUnicode_FromStringAndSize(
spec->name, (Py_ssize_t)(s - spec->name));
if (modname == NULL) {
goto finally;
}
r = PyDict_SetItem(type->tp_dict, &_Py_ID(__module__), modname);
+ Py_DECREF(modname);
if (r != 0) {
goto finally;
}
@@ -3681,7 +3775,9 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module,
Py_CLEAR(res);
}
Py_XDECREF(bases);
- Py_XDECREF(modname);
+ PyObject_Free(tp_doc);
+ Py_XDECREF(ht_name);
+ PyMem_Free(_ht_tpname);
return (PyObject*)res;
}
More information about the Python-checkins
mailing list