[Python-checkins] bpo-43760: Speed up check for tracing in interpreter dispatch (#25276)

markshannon webhook-mailer at python.org
Tue Apr 13 06:08:32 EDT 2021


https://github.com/python/cpython/commit/9e7b2076fb4380987ad0262c4c0ca900b06475ad
commit: 9e7b2076fb4380987ad0262c4c0ca900b06475ad
branch: master
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2021-04-13T11:08:14+01:00
summary:

bpo-43760: Speed up check for tracing in interpreter dispatch (#25276)

* Remove redundant tracing_possible field from interpreter state.

* Move 'use_tracing' from tstate onto C stack, for fastest possible checking in dispatch logic.

* Add comments stressing the importance stack discipline when dealing with CFrames.

* Add NEWS

files:
A Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst
M Include/cpython/pystate.h
M Include/internal/pycore_interp.h
M Python/ceval.c
M Python/pystate.c
M Python/sysmodule.c

diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index cfaee890f9715..e3ccc54356084 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -29,6 +29,21 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *);
 #define PyTrace_OPCODE 7
 
 
+typedef struct _cframe {
+    /* This struct will be threaded through the C stack
+     * allowing fast access to per-thread state that needs
+     * to be accessed quickly by the interpreter, but can
+     * be modified outside of the interpreter.
+     *
+     * WARNING: This makes data on the C stack accessible from
+     * heap objects. Care must be taken to maintain stack
+     * discipline and make sure that instances of this struct cannot
+     * accessed outside of their lifetime.
+     */
+    int use_tracing;
+    struct _cframe *previous;
+} CFrame;
+
 typedef struct _err_stackitem {
     /* This struct represents an entry on the exception stack, which is a
      * per-coroutine state. (Coroutine in the computer science sense,
@@ -61,7 +76,10 @@ struct _ts {
        This is to prevent the actual trace/profile code from being recorded in
        the trace/profile. */
     int tracing;
-    int use_tracing;
+
+    /* Pointer to current CFrame in the C stack frame of the currently,
+     * or most recently, executing _PyEval_EvalFrameDefault. */
+    CFrame *cframe;
 
     Py_tracefunc c_profilefunc;
     Py_tracefunc c_tracefunc;
@@ -129,6 +147,8 @@ struct _ts {
     /* Unique thread state id. */
     uint64_t id;
 
+    CFrame root_cframe;
+
     /* XXX signal handlers should also be here */
 
 };
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index fa0e26feb0f84..dbdd109bd7883 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -33,12 +33,6 @@ struct _pending_calls {
 
 struct _ceval_state {
     int recursion_limit;
-    /* Records whether tracing is on for any thread.  Counts the number
-       of threads for which tstate->c_tracefunc is non-NULL, so if the
-       value is 0, we know we don't have to check this thread's
-       c_tracefunc.  This speeds up the if statement in
-       _PyEval_EvalFrameDefault() after fast_next_opcode. */
-    int tracing_possible;
     /* This single variable consolidates all requests to break out of
        the fast path in the eval loop. */
     _Py_atomic_int eval_breaker;
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst b/Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst
new file mode 100644
index 0000000000000..e6612103d807e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst	
@@ -0,0 +1,2 @@
+Move the flag for checking whether tracing is enabled to the C stack, from the heap.
+Should speed up dispatch in the interpreter.
diff --git a/Python/ceval.c b/Python/ceval.c
index c12116044b54e..53b596b304c21 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -37,6 +37,7 @@ typedef struct {
     PyCodeObject *code; // The code object for the bounds. May be NULL.
     int instr_prev;  // Only valid if code != NULL.
     PyCodeAddressRange bounds; // Only valid if code != NULL.
+    CFrame cframe;
 } PyTraceInfo;
 
 
@@ -1109,8 +1110,6 @@ match_class(PyThreadState *tstate, PyObject *subject, PyObject *type,
 static int do_raise(PyThreadState *tstate, PyObject *exc, PyObject *cause);
 static int unpack_iterable(PyThreadState *, PyObject *, int, int, PyObject **);
 
-#define _Py_TracingPossible(ceval) ((ceval)->tracing_possible)
-
 
 PyObject *
 PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
@@ -1307,7 +1306,7 @@ eval_frame_handle_pending(PyThreadState *tstate)
 
 #define DISPATCH() \
     { \
-        if (_Py_TracingPossible(ceval2) OR_DTRACE_LINE OR_LLTRACE) { \
+        if (trace_info.cframe.use_tracing OR_DTRACE_LINE OR_LLTRACE) { \
             goto tracing_dispatch; \
         } \
         f->f_lasti = INSTR_OFFSET(); \
@@ -1595,8 +1594,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
     int oparg;         /* Current opcode argument, if any */
     PyObject **fastlocals, **freevars;
     PyObject *retval = NULL;            /* Return value */
-    struct _ceval_state * const ceval2 = &tstate->interp->ceval;
-    _Py_atomic_int * const eval_breaker = &ceval2->eval_breaker;
+    _Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker;
     PyCodeObject *co;
 
     const _Py_CODEUNIT *first_instr;
@@ -1616,11 +1614,20 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
     /* Mark trace_info as uninitialized */
     trace_info.code = NULL;
 
+    /* WARNING: Because the CFrame lives on the C stack,
+     * but can be accessed from a heap allocated object (tstate)
+     * strict stack discipline must be maintained.
+     */
+    CFrame *prev_cframe = tstate->cframe;
+    trace_info.cframe.use_tracing = prev_cframe->use_tracing;
+    trace_info.cframe.previous = prev_cframe;
+    tstate->cframe = &trace_info.cframe;
+
     /* push frame */
     tstate->frame = f;
     co = f->f_code;
 
-    if (tstate->use_tracing) {
+    if (trace_info.cframe.use_tracing) {
         if (tstate->c_tracefunc != NULL) {
             /* tstate->c_tracefunc, if defined, is a
                function that will be called on *every* entry
@@ -1782,7 +1789,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
 
         /* line-by-line tracing support */
 
-        if (_Py_TracingPossible(ceval2) &&
+        if (trace_info.cframe.use_tracing &&
             tstate->c_tracefunc != NULL && !tstate->tracing) {
             int err;
             /* see maybe_call_line_trace()
@@ -4543,7 +4550,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
                 PUSH(val);
                 PUSH(exc);
                 JUMPTO(handler);
-                if (_Py_TracingPossible(ceval2)) {
+                if (trace_info.cframe.use_tracing) {
                     trace_info.instr_prev = INT_MAX;
                 }
                 /* Resume normal execution */
@@ -4567,7 +4574,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
     f->f_stackdepth = 0;
     f->f_state = FRAME_RAISED;
 exiting:
-    if (tstate->use_tracing) {
+    if (trace_info.cframe.use_tracing) {
         if (tstate->c_tracefunc) {
             if (call_trace_protected(tstate->c_tracefunc, tstate->c_traceobj,
                                      tstate, f, &trace_info, PyTrace_RETURN, retval)) {
@@ -4584,6 +4591,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
 
     /* pop frame */
 exit_eval_frame:
+    /* Restore previous cframe */
+    tstate->cframe = trace_info.cframe.previous;
+    tstate->cframe->use_tracing = trace_info.cframe.use_tracing;
+
     if (PyDTrace_FUNCTION_RETURN_ENABLED())
         dtrace_function_return(f);
     _Py_LeaveRecursiveCall(tstate);
@@ -5507,7 +5518,7 @@ call_trace(Py_tracefunc func, PyObject *obj,
     if (tstate->tracing)
         return 0;
     tstate->tracing++;
-    tstate->use_tracing = 0;
+    tstate->cframe->use_tracing = 0;
     if (frame->f_lasti < 0) {
         frame->f_lineno = frame->f_code->co_firstlineno;
     }
@@ -5517,7 +5528,7 @@ call_trace(Py_tracefunc func, PyObject *obj,
     }
     result = func(obj, frame, what, arg);
     frame->f_lineno = 0;
-    tstate->use_tracing = ((tstate->c_tracefunc != NULL)
+    tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL)
                            || (tstate->c_profilefunc != NULL));
     tstate->tracing--;
     return result;
@@ -5528,15 +5539,15 @@ _PyEval_CallTracing(PyObject *func, PyObject *args)
 {
     PyThreadState *tstate = _PyThreadState_GET();
     int save_tracing = tstate->tracing;
-    int save_use_tracing = tstate->use_tracing;
+    int save_use_tracing = tstate->cframe->use_tracing;
     PyObject *result;
 
     tstate->tracing = 0;
-    tstate->use_tracing = ((tstate->c_tracefunc != NULL)
+    tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL)
                            || (tstate->c_profilefunc != NULL));
     result = PyObject_Call(func, args, NULL);
     tstate->tracing = save_tracing;
-    tstate->use_tracing = save_use_tracing;
+    tstate->cframe->use_tracing = save_use_tracing;
     return result;
 }
 
@@ -5590,7 +5601,7 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
     tstate->c_profilefunc = NULL;
     tstate->c_profileobj = NULL;
     /* Must make sure that tracing is not ignored if 'profileobj' is freed */
-    tstate->use_tracing = tstate->c_tracefunc != NULL;
+    tstate->cframe->use_tracing = tstate->c_tracefunc != NULL;
     Py_XDECREF(profileobj);
 
     Py_XINCREF(arg);
@@ -5598,7 +5609,7 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
     tstate->c_profilefunc = func;
 
     /* Flag that tracing or profiling is turned on */
-    tstate->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL);
+    tstate->cframe->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL);
     return 0;
 }
 
@@ -5626,14 +5637,12 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
         return -1;
     }
 
-    struct _ceval_state *ceval2 = &tstate->interp->ceval;
     PyObject *traceobj = tstate->c_traceobj;
-    ceval2->tracing_possible += (func != NULL) - (tstate->c_tracefunc != NULL);
 
     tstate->c_tracefunc = NULL;
     tstate->c_traceobj = NULL;
     /* Must make sure that profiling is not ignored if 'traceobj' is freed */
-    tstate->use_tracing = (tstate->c_profilefunc != NULL);
+    tstate->cframe->use_tracing = (tstate->c_profilefunc != NULL);
     Py_XDECREF(traceobj);
 
     Py_XINCREF(arg);
@@ -5641,7 +5650,7 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
     tstate->c_tracefunc = func;
 
     /* Flag that tracing or profiling is turned on */
-    tstate->use_tracing = ((func != NULL)
+    tstate->cframe->use_tracing = ((func != NULL)
                            || (tstate->c_profilefunc != NULL));
 
     return 0;
@@ -5836,7 +5845,7 @@ PyEval_GetFuncDesc(PyObject *func)
 }
 
 #define C_TRACE(x, call) \
-if (tstate->use_tracing && tstate->c_profilefunc) { \
+if (trace_info->cframe.use_tracing && tstate->c_profilefunc) { \
     if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
         tstate, tstate->frame, trace_info, \
         PyTrace_C_CALL, func)) { \
@@ -5917,7 +5926,7 @@ call_function(PyThreadState *tstate,
     Py_ssize_t nargs = oparg - nkwargs;
     PyObject **stack = (*pp_stack) - nargs - nkwargs;
 
-    if (tstate->use_tracing) {
+    if (trace_info->cframe.use_tracing) {
         x = trace_call_function(tstate, trace_info, func, stack, nargs, kwnames);
     }
     else {
@@ -5950,7 +5959,7 @@ do_call_core(PyThreadState *tstate,
     }
     else if (Py_IS_TYPE(func, &PyMethodDescr_Type)) {
         Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
-        if (nargs > 0 && tstate->use_tracing) {
+        if (nargs > 0 && trace_info->cframe.use_tracing) {
             /* We need to create a temporary bound method as argument
                for profiling.
 
diff --git a/Python/pystate.c b/Python/pystate.c
index c8b2530f68dfe..436f874842cc8 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -624,7 +624,8 @@ new_threadstate(PyInterpreterState *interp, int init)
     tstate->recursion_headroom = 0;
     tstate->stackcheck_counter = 0;
     tstate->tracing = 0;
-    tstate->use_tracing = 0;
+    tstate->root_cframe.use_tracing = 0;
+    tstate->cframe = &tstate->root_cframe;
     tstate->gilstate_counter = 0;
     tstate->async_exc = NULL;
     tstate->thread_id = PyThread_get_thread_ident();
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 54d70ef056975..a36d90f9de168 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -252,7 +252,7 @@ sys_audit_tstate(PyThreadState *ts, const char *event,
 
         /* Disallow tracing in hooks unless explicitly enabled */
         ts->tracing++;
-        ts->use_tracing = 0;
+        ts->cframe->use_tracing = 0;
         while ((hook = PyIter_Next(hooks)) != NULL) {
             _Py_IDENTIFIER(__cantrace__);
             PyObject *o;
@@ -265,14 +265,14 @@ sys_audit_tstate(PyThreadState *ts, const char *event,
                 break;
             }
             if (canTrace) {
-                ts->use_tracing = (ts->c_tracefunc || ts->c_profilefunc);
+                ts->cframe->use_tracing = (ts->c_tracefunc || ts->c_profilefunc);
                 ts->tracing--;
             }
             PyObject* args[2] = {eventName, eventArgs};
             o = _PyObject_FastCallTstate(ts, hook, args, 2);
             if (canTrace) {
                 ts->tracing++;
-                ts->use_tracing = 0;
+                ts->cframe->use_tracing = 0;
             }
             if (!o) {
                 break;
@@ -280,7 +280,7 @@ sys_audit_tstate(PyThreadState *ts, const char *event,
             Py_DECREF(o);
             Py_CLEAR(hook);
         }
-        ts->use_tracing = (ts->c_tracefunc || ts->c_profilefunc);
+        ts->cframe->use_tracing = (ts->c_tracefunc || ts->c_profilefunc);
         ts->tracing--;
         if (_PyErr_Occurred(ts)) {
             goto exit;



More information about the Python-checkins mailing list