[Python-checkins] bpo-35053: Enhance tracemalloc to trace free lists (GH-10063)

Victor Stinner webhook-mailer at python.org
Thu Oct 25 07:31:19 EDT 2018


https://github.com/python/cpython/commit/9e00e80e213ebc37eff89ce72102c1f928ebc133
commit: 9e00e80e213ebc37eff89ce72102c1f928ebc133
branch: master
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2018-10-25T13:31:16+02:00
summary:

bpo-35053: Enhance tracemalloc to trace free lists (GH-10063)

tracemalloc now tries to update the traceback when an object is
reused from a "free list" (optimization for faster object creation,
used by the builtin list type for example).

Changes:

* Add _PyTraceMalloc_NewReference() function which tries to update
  the Python traceback of a Python object.
* _Py_NewReference() now calls _PyTraceMalloc_NewReference().
* Add an unit test.

files:
A Misc/NEWS.d/next/Library/2018-10-23-18-58-12.bpo-35053.G82qwh.rst
M Include/object.h
M Include/pymem.h
M Lib/test/test_tracemalloc.py
M Modules/_tracemalloc.c
M Objects/object.c
M Objects/obmalloc.c

diff --git a/Include/object.h b/Include/object.h
index bcf78afe6bbb..8cd57d21a40f 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -776,6 +776,9 @@ PyAPI_FUNC(void) _Py_AddToAllObjects(PyObject *, int force);
  * inline.
  */
 #define _Py_NewReference(op) (                          \
+    (_Py_tracemalloc_config.tracing        \
+        ? _PyTraceMalloc_NewReference(op)               \
+        : 0),                                           \
     _Py_INC_TPALLOCS(op) _Py_COUNT_ALLOCS_COMMA         \
     _Py_INC_REFTOTAL  _Py_REF_DEBUG_COMMA               \
     Py_REFCNT(op) = 1)
diff --git a/Include/pymem.h b/Include/pymem.h
index ef6e0bb5e25f..6299ab405a05 100644
--- a/Include/pymem.h
+++ b/Include/pymem.h
@@ -36,6 +36,10 @@ PyAPI_FUNC(int) PyTraceMalloc_Track(
     uintptr_t ptr,
     size_t size);
 
+/* Update the Python traceback of an object.
+   This function can be used when a memory block is reused from a free list. */
+PyAPI_FUNC(int) _PyTraceMalloc_NewReference(PyObject *op);
+
 /* Untrack an allocated memory block in the tracemalloc module.
    Do nothing if the block was not tracked.
 
@@ -239,6 +243,40 @@ PyAPI_FUNC(int) _PyMem_SetDefaultAllocator(
     PyMemAllocatorEx *old_alloc);
 #endif
 
+
+/* bpo-35053: expose _Py_tracemalloc_config for performance:
+   _Py_NewReference() needs an efficient check to test if tracemalloc is
+   tracing. */
+struct _PyTraceMalloc_Config {
+    /* Module initialized?
+       Variable protected by the GIL */
+    enum {
+        TRACEMALLOC_NOT_INITIALIZED,
+        TRACEMALLOC_INITIALIZED,
+        TRACEMALLOC_FINALIZED
+    } initialized;
+
+    /* Is tracemalloc tracing memory allocations?
+       Variable protected by the GIL */
+    int tracing;
+
+    /* limit of the number of frames in a traceback, 1 by default.
+       Variable protected by the GIL. */
+    int max_nframe;
+
+    /* use domain in trace key?
+       Variable protected by the GIL. */
+    int use_domain;
+};
+
+PyAPI_DATA(struct _PyTraceMalloc_Config) _Py_tracemalloc_config;
+
+#define _PyTraceMalloc_Config_INIT \
+    {.initialized = TRACEMALLOC_NOT_INITIALIZED, \
+     .tracing = 0, \
+     .max_nframe = 1, \
+     .use_domain = 0}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Lib/test/test_tracemalloc.py b/Lib/test/test_tracemalloc.py
index 76a6159c7aa4..c3866483b8aa 100644
--- a/Lib/test/test_tracemalloc.py
+++ b/Lib/test/test_tracemalloc.py
@@ -111,6 +111,26 @@ def test_get_object_traceback(self):
         traceback = tracemalloc.get_object_traceback(obj)
         self.assertEqual(traceback, obj_traceback)
 
+    def test_new_reference(self):
+        tracemalloc.clear_traces()
+        # gc.collect() indirectly calls PyList_ClearFreeList()
+        support.gc_collect()
+
+        # Create a list and "destroy it": put it in the PyListObject free list
+        obj = []
+        obj = None
+
+        # Create a list which should reuse the previously created empty list
+        obj = []
+
+        nframe = tracemalloc.get_traceback_limit()
+        frames = get_frames(nframe, -3)
+        obj_traceback = tracemalloc.Traceback(frames)
+
+        traceback = tracemalloc.get_object_traceback(obj)
+        self.assertIsNotNone(traceback)
+        self.assertEqual(traceback, obj_traceback)
+
     def test_set_traceback_limit(self):
         obj_size = 10
 
diff --git a/Misc/NEWS.d/next/Library/2018-10-23-18-58-12.bpo-35053.G82qwh.rst b/Misc/NEWS.d/next/Library/2018-10-23-18-58-12.bpo-35053.G82qwh.rst
new file mode 100644
index 000000000000..d96ac119aa82
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-10-23-18-58-12.bpo-35053.G82qwh.rst
@@ -0,0 +1,3 @@
+tracemalloc now tries to update the traceback when an object is reused from a
+"free list" (optimization for faster object creation, used by the builtin list
+type for example).
diff --git a/Modules/_tracemalloc.c b/Modules/_tracemalloc.c
index e07022cce2bc..d736b240fc2d 100644
--- a/Modules/_tracemalloc.c
+++ b/Modules/_tracemalloc.c
@@ -29,27 +29,6 @@ static struct {
     PyMemAllocatorEx obj;
 } allocators;
 
-static struct {
-    /* Module initialized?
-       Variable protected by the GIL */
-    enum {
-        TRACEMALLOC_NOT_INITIALIZED,
-        TRACEMALLOC_INITIALIZED,
-        TRACEMALLOC_FINALIZED
-    } initialized;
-
-    /* Is tracemalloc tracing memory allocations?
-       Variable protected by the GIL */
-    int tracing;
-
-    /* limit of the number of frames in a traceback, 1 by default.
-       Variable protected by the GIL. */
-    int max_nframe;
-
-    /* use domain in trace key?
-       Variable protected by the GIL. */
-    int use_domain;
-} tracemalloc_config = {TRACEMALLOC_NOT_INITIALIZED, 0, 1, 0};
 
 #if defined(TRACE_RAW_MALLOC)
 /* This lock is needed because tracemalloc_free() is called without
@@ -459,7 +438,7 @@ traceback_get_frames(traceback_t *traceback)
         tracemalloc_get_frame(pyframe, &traceback->frames[traceback->nframe]);
         assert(traceback->frames[traceback->nframe].filename != NULL);
         traceback->nframe++;
-        if (traceback->nframe == tracemalloc_config.max_nframe)
+        if (traceback->nframe == _Py_tracemalloc_config.max_nframe)
             break;
     }
 }
@@ -540,7 +519,7 @@ tracemalloc_use_domain(void)
 {
     _Py_hashtable_t *new_traces = NULL;
 
-    assert(!tracemalloc_config.use_domain);
+    assert(!_Py_tracemalloc_config.use_domain);
 
     new_traces = hashtable_new(sizeof(pointer_t),
                                sizeof(trace_t),
@@ -560,7 +539,7 @@ tracemalloc_use_domain(void)
     _Py_hashtable_destroy(tracemalloc_traces);
     tracemalloc_traces = new_traces;
 
-    tracemalloc_config.use_domain = 1;
+    _Py_tracemalloc_config.use_domain = 1;
 
     return 0;
 }
@@ -572,9 +551,9 @@ tracemalloc_remove_trace(unsigned int domain, uintptr_t ptr)
     trace_t trace;
     int removed;
 
-    assert(tracemalloc_config.tracing);
+    assert(_Py_tracemalloc_config.tracing);
 
-    if (tracemalloc_config.use_domain) {
+    if (_Py_tracemalloc_config.use_domain) {
         pointer_t key = {ptr, domain};
         removed = _Py_HASHTABLE_POP(tracemalloc_traces, key, trace);
     }
@@ -603,14 +582,14 @@ tracemalloc_add_trace(unsigned int domain, uintptr_t ptr,
     _Py_hashtable_entry_t* entry;
     int res;
 
-    assert(tracemalloc_config.tracing);
+    assert(_Py_tracemalloc_config.tracing);
 
     traceback = traceback_new();
     if (traceback == NULL) {
         return -1;
     }
 
-    if (!tracemalloc_config.use_domain && domain != DEFAULT_DOMAIN) {
+    if (!_Py_tracemalloc_config.use_domain && domain != DEFAULT_DOMAIN) {
         /* first trace using a non-zero domain whereas traces use compact
            (uintptr_t) keys: switch to pointer_t keys. */
         if (tracemalloc_use_domain() < 0) {
@@ -618,7 +597,7 @@ tracemalloc_add_trace(unsigned int domain, uintptr_t ptr,
         }
     }
 
-    if (tracemalloc_config.use_domain) {
+    if (_Py_tracemalloc_config.use_domain) {
         entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_traces, key);
     }
     else {
@@ -639,7 +618,7 @@ tracemalloc_add_trace(unsigned int domain, uintptr_t ptr,
         trace.size = size;
         trace.traceback = traceback;
 
-        if (tracemalloc_config.use_domain) {
+        if (_Py_tracemalloc_config.use_domain) {
             res = _Py_HASHTABLE_SET(tracemalloc_traces, key, trace);
         }
         else {
@@ -956,13 +935,13 @@ tracemalloc_clear_traces(void)
 static int
 tracemalloc_init(void)
 {
-    if (tracemalloc_config.initialized == TRACEMALLOC_FINALIZED) {
+    if (_Py_tracemalloc_config.initialized == TRACEMALLOC_FINALIZED) {
         PyErr_SetString(PyExc_RuntimeError,
                         "the tracemalloc module has been unloaded");
         return -1;
     }
 
-    if (tracemalloc_config.initialized == TRACEMALLOC_INITIALIZED)
+    if (_Py_tracemalloc_config.initialized == TRACEMALLOC_INITIALIZED)
         return 0;
 
     PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw);
@@ -996,7 +975,7 @@ tracemalloc_init(void)
                                            hashtable_hash_traceback,
                                            hashtable_compare_traceback);
 
-    if (tracemalloc_config.use_domain) {
+    if (_Py_tracemalloc_config.use_domain) {
         tracemalloc_traces = hashtable_new(sizeof(pointer_t),
                                            sizeof(trace_t),
                                            hashtable_hash_pointer_t,
@@ -1026,7 +1005,7 @@ tracemalloc_init(void)
     tracemalloc_empty_traceback.frames[0].lineno = 0;
     tracemalloc_empty_traceback.hash = traceback_hash(&tracemalloc_empty_traceback);
 
-    tracemalloc_config.initialized = TRACEMALLOC_INITIALIZED;
+    _Py_tracemalloc_config.initialized = TRACEMALLOC_INITIALIZED;
     return 0;
 }
 
@@ -1034,9 +1013,9 @@ tracemalloc_init(void)
 static void
 tracemalloc_deinit(void)
 {
-    if (tracemalloc_config.initialized != TRACEMALLOC_INITIALIZED)
+    if (_Py_tracemalloc_config.initialized != TRACEMALLOC_INITIALIZED)
         return;
-    tracemalloc_config.initialized = TRACEMALLOC_FINALIZED;
+    _Py_tracemalloc_config.initialized = TRACEMALLOC_FINALIZED;
 
     tracemalloc_stop();
 
@@ -1077,13 +1056,13 @@ tracemalloc_start(int max_nframe)
         return -1;
     }
 
-    if (tracemalloc_config.tracing) {
+    if (_Py_tracemalloc_config.tracing) {
         /* hook already installed: do nothing */
         return 0;
     }
 
     assert(1 <= max_nframe && max_nframe <= MAX_NFRAME);
-    tracemalloc_config.max_nframe = max_nframe;
+    _Py_tracemalloc_config.max_nframe = max_nframe;
 
     /* allocate a buffer to store a new traceback */
     size = TRACEBACK_SIZE(max_nframe);
@@ -1119,7 +1098,7 @@ tracemalloc_start(int max_nframe)
     PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
 
     /* everything is ready: start tracing Python memory allocations */
-    tracemalloc_config.tracing = 1;
+    _Py_tracemalloc_config.tracing = 1;
 
     return 0;
 }
@@ -1128,11 +1107,11 @@ tracemalloc_start(int max_nframe)
 static void
 tracemalloc_stop(void)
 {
-    if (!tracemalloc_config.tracing)
+    if (!_Py_tracemalloc_config.tracing)
         return;
 
     /* stop tracing Python memory allocations */
-    tracemalloc_config.tracing = 0;
+    _Py_tracemalloc_config.tracing = 0;
 
     /* unregister the hook on memory allocators */
 #ifdef TRACE_RAW_MALLOC
@@ -1160,7 +1139,7 @@ static PyObject *
 _tracemalloc_is_tracing_impl(PyObject *module)
 /*[clinic end generated code: output=2d763b42601cd3ef input=af104b0a00192f63]*/
 {
-    return PyBool_FromLong(tracemalloc_config.tracing);
+    return PyBool_FromLong(_Py_tracemalloc_config.tracing);
 }
 
 
@@ -1174,7 +1153,7 @@ static PyObject *
 _tracemalloc_clear_traces_impl(PyObject *module)
 /*[clinic end generated code: output=a86080ee41b84197 input=0dab5b6c785183a5]*/
 {
-    if (!tracemalloc_config.tracing)
+    if (!_Py_tracemalloc_config.tracing)
         Py_RETURN_NONE;
 
     set_reentrant(1);
@@ -1299,7 +1278,7 @@ tracemalloc_get_traces_fill(_Py_hashtable_t *traces, _Py_hashtable_entry_t *entr
     PyObject *tracemalloc_obj;
     int res;
 
-    if (tracemalloc_config.use_domain) {
+    if (_Py_tracemalloc_config.use_domain) {
         pointer_t key;
         _Py_HASHTABLE_ENTRY_READ_KEY(traces, entry, key);
         domain = key.domain;
@@ -1359,7 +1338,7 @@ _tracemalloc__get_traces_impl(PyObject *module)
     if (get_traces.list == NULL)
         goto error;
 
-    if (!tracemalloc_config.tracing)
+    if (!_Py_tracemalloc_config.tracing)
         return get_traces.list;
 
     /* the traceback hash table is used temporarily to intern traceback tuple
@@ -1414,11 +1393,11 @@ tracemalloc_get_traceback(unsigned int domain, uintptr_t ptr)
     trace_t trace;
     int found;
 
-    if (!tracemalloc_config.tracing)
+    if (!_Py_tracemalloc_config.tracing)
         return NULL;
 
     TABLES_LOCK();
-    if (tracemalloc_config.use_domain) {
+    if (_Py_tracemalloc_config.use_domain) {
         pointer_t key = {ptr, domain};
         found = _Py_HASHTABLE_GET(tracemalloc_traces, key, trace);
     }
@@ -1558,7 +1537,7 @@ static PyObject *
 _tracemalloc_get_traceback_limit_impl(PyObject *module)
 /*[clinic end generated code: output=d556d9306ba95567 input=da3cd977fc68ae3b]*/
 {
-    return PyLong_FromLong(tracemalloc_config.max_nframe);
+    return PyLong_FromLong(_Py_tracemalloc_config.max_nframe);
 }
 
 
@@ -1603,7 +1582,7 @@ _tracemalloc_get_traced_memory_impl(PyObject *module)
 {
     Py_ssize_t size, peak_size;
 
-    if (!tracemalloc_config.tracing)
+    if (!_Py_tracemalloc_config.tracing)
         return Py_BuildValue("ii", 0, 0);
 
     TABLES_LOCK();
@@ -1681,7 +1660,7 @@ PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,
     int res;
     PyGILState_STATE gil_state;
 
-    if (!tracemalloc_config.tracing) {
+    if (!_Py_tracemalloc_config.tracing) {
         /* tracemalloc is not tracing: do nothing */
         return -2;
     }
@@ -1700,7 +1679,7 @@ PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,
 int
 PyTraceMalloc_Untrack(unsigned int domain, uintptr_t ptr)
 {
-    if (!tracemalloc_config.tracing) {
+    if (!_Py_tracemalloc_config.tracing) {
         /* tracemalloc is not tracing: do nothing */
         return -2;
     }
@@ -1713,6 +1692,60 @@ PyTraceMalloc_Untrack(unsigned int domain, uintptr_t ptr)
 }
 
 
+/* If the object memory block is already traced, update its trace
+   with the current Python traceback.
+
+   Do nothing if tracemalloc is not tracing memory allocations
+   or if the object memory block is not already traced. */
+int
+_PyTraceMalloc_NewReference(PyObject *op)
+{
+    assert(PyGILState_Check());
+
+    if (!_Py_tracemalloc_config.tracing) {
+        /* tracemalloc is not tracing: do nothing */
+        return -1;
+    }
+
+    uintptr_t ptr;
+    PyTypeObject *type = Py_TYPE(op);
+    if (PyType_IS_GC(type)) {
+        ptr = (uintptr_t)((char *)op - sizeof(PyGC_Head));
+    }
+    else {
+        ptr = (uintptr_t)op;
+    }
+
+    _Py_hashtable_entry_t* entry;
+    int res = -1;
+
+    TABLES_LOCK();
+    if (_Py_tracemalloc_config.use_domain) {
+        pointer_t key = {ptr, DEFAULT_DOMAIN};
+        entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_traces, key);
+    }
+    else {
+        entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_traces, ptr);
+    }
+
+    if (entry != NULL) {
+        /* update the traceback of the memory block */
+        traceback_t *traceback = traceback_new();
+        if (traceback != NULL) {
+            trace_t trace;
+            _Py_HASHTABLE_ENTRY_READ_DATA(tracemalloc_traces, entry, trace);
+            trace.traceback = traceback;
+            _Py_HASHTABLE_ENTRY_WRITE_DATA(tracemalloc_traces, entry, trace);
+            res = 0;
+        }
+    }
+    /* else: cannot track the object, its memory block size is unknown */
+    TABLES_UNLOCK();
+
+    return res;
+}
+
+
 PyObject*
 _PyTraceMalloc_GetTraceback(unsigned int domain, uintptr_t ptr)
 {
diff --git a/Objects/object.c b/Objects/object.c
index 00c0bad86152..4597b1266ae8 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -1919,6 +1919,9 @@ _Py_ReadyTypes(void)
 void
 _Py_NewReference(PyObject *op)
 {
+    if (_Py_tracemalloc_config.tracing) {
+        _PyTraceMalloc_NewReference(op);
+    }
     _Py_INC_REFTOTAL;
     op->ob_refcnt = 1;
     _Py_AddToAllObjects(op, 1);
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index d58da35952de..fbc947806908 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -63,6 +63,12 @@ static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
 #endif
 
 
+/* bpo-35053: Declare tracemalloc configuration here rather than
+   Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
+   library, whereas _Py_NewReference() requires it. */
+struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
+
+
 static void *
 _PyMem_RawMalloc(void *ctx, size_t size)
 {



More information about the Python-checkins mailing list