[Python-checkins] bpo-45256: Avoid C calls for most Python to Python calls. (GH-28937)

markshannon webhook-mailer at python.org
Mon Oct 18 04:57:29 EDT 2021


https://github.com/python/cpython/commit/70945d57e775b335eb58b734d82e68484063e835
commit: 70945d57e775b335eb58b734d82e68484063e835
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2021-10-18T09:57:24+01:00
summary:

bpo-45256: Avoid C calls for most Python to Python calls. (GH-28937)

* Avoid making C calls for most calls to Python functions.

* Change initialize_locals(steal=true) and _PyTuple_FromArraySteal to consume the argument references regardless of whether they succeed or fail.

files:
M Lib/test/test_gdb.py
M Objects/tupleobject.c
M Python/ceval.c

diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py
index fb0f1295574b9..2805eaf9f9567 100644
--- a/Lib/test/test_gdb.py
+++ b/Lib/test/test_gdb.py
@@ -724,24 +724,36 @@ def test_two_abs_args(self):
                            '   3    def foo(a, b, c):\n',
                            bt)
 
+SAMPLE_WITH_C_CALL = """
+
+from _testcapi import pyobject_fastcall
+
+def foo(a, b, c):
+    bar(a, b, c)
+
+def bar(a, b, c):
+    pyobject_fastcall(baz, (a, b, c))
+
+def baz(*args):
+    id(42)
+
+foo(1, 2, 3)
+
+"""
+
+
 class StackNavigationTests(DebuggerTests):
     @unittest.skipUnless(HAS_PYUP_PYDOWN, "test requires py-up/py-down commands")
     @unittest.skipIf(python_is_optimized(),
                      "Python was compiled with optimizations")
     def test_pyup_command(self):
         'Verify that the "py-up" command works'
-        bt = self.get_stack_trace(script=self.get_sample_script(),
+        bt = self.get_stack_trace(source=SAMPLE_WITH_C_CALL,
                                   cmds_after_breakpoint=['py-up', 'py-up'])
         self.assertMultilineMatches(bt,
                                     r'''^.*
-#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\)
-    id\(42\)
-#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
-    baz\(a, b, c\)
-#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 4, in foo \(a=1, b=2, c=3\)
-    bar\(a=a, b=b, c=c\)
-#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 12, in <module> \(\)
-    foo\(1, 2, 3\)
+#[0-9]+ Frame 0x-?[0-9a-f]+, for file <string>, line 12, in baz \(args=\(1, 2, 3\)\)
+#[0-9]+ <built-in method pyobject_fastcall of module object at remote 0x[0-9a-f]+>
 $''')
 
     @unittest.skipUnless(HAS_PYUP_PYDOWN, "test requires py-up/py-down commands")
@@ -765,22 +777,13 @@ def test_up_at_top(self):
                      "Python was compiled with optimizations")
     def test_up_then_down(self):
         'Verify "py-up" followed by "py-down"'
-        bt = self.get_stack_trace(script=self.get_sample_script(),
+        bt = self.get_stack_trace(source=SAMPLE_WITH_C_CALL,
                                   cmds_after_breakpoint=['py-up', 'py-up', 'py-down'])
         self.assertMultilineMatches(bt,
                                     r'''^.*
-#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\)
-    id\(42\)
-#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
-    baz\(a, b, c\)
-#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 4, in foo \(a=1, b=2, c=3\)
-    bar\(a=a, b=b, c=c\)
-#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 12, in <module> \(\)
-    foo\(1, 2, 3\)
-#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\)
-    id\(42\)
-#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
-    baz\(a, b, c\)
+#[0-9]+ Frame 0x-?[0-9a-f]+, for file <string>, line 12, in baz \(args=\(1, 2, 3\)\)
+#[0-9]+ <built-in method pyobject_fastcall of module object at remote 0x[0-9a-f]+>
+#[0-9]+ Frame 0x-?[0-9a-f]+, for file <string>, line 12, in baz \(args=\(1, 2, 3\)\)
 $''')
 
 class PyBtTests(DebuggerTests):
@@ -970,13 +973,12 @@ def __init__(self):
         self.assertRegex(gdb_output,
                          r"<method-wrapper u?'__init__' of MyList object at ")
 
-
 class PyPrintTests(DebuggerTests):
     @unittest.skipIf(python_is_optimized(),
                      "Python was compiled with optimizations")
     def test_basic_command(self):
         'Verify that the "py-print" command works'
-        bt = self.get_stack_trace(script=self.get_sample_script(),
+        bt = self.get_stack_trace(source=SAMPLE_WITH_C_CALL,
                                   cmds_after_breakpoint=['py-up', 'py-print args'])
         self.assertMultilineMatches(bt,
                                     r".*\nlocal 'args' = \(1, 2, 3\)\n.*")
@@ -985,7 +987,7 @@ def test_basic_command(self):
                      "Python was compiled with optimizations")
     @unittest.skipUnless(HAS_PYUP_PYDOWN, "test requires py-up/py-down commands")
     def test_print_after_up(self):
-        bt = self.get_stack_trace(script=self.get_sample_script(),
+        bt = self.get_stack_trace(source=SAMPLE_WITH_C_CALL,
                                   cmds_after_breakpoint=['py-up', 'py-up', 'py-print c', 'py-print b', 'py-print a'])
         self.assertMultilineMatches(bt,
                                     r".*\nlocal 'c' = 3\nlocal 'b' = 2\nlocal 'a' = 1\n.*")
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index 018e738af06e3..051683086ea2c 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -490,9 +490,11 @@ _PyTuple_FromArraySteal(PyObject *const *src, Py_ssize_t n)
     if (n == 0) {
         return tuple_get_empty();
     }
-
     PyTupleObject *tuple = tuple_alloc(n);
     if (tuple == NULL) {
+        for (Py_ssize_t i = 0; i < n; i++) {
+            Py_DECREF(src[i]);
+        }
         return NULL;
     }
     PyObject **dst = tuple->ob_item;
diff --git a/Python/ceval.c b/Python/ceval.c
index 898687224e696..60fdf01627eba 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -50,9 +50,9 @@
 _Py_IDENTIFIER(__name__);
 
 /* Forward declarations */
-Py_LOCAL_INLINE(PyObject *) call_function(
-    PyThreadState *tstate, PyObject ***pp_stack,
-    Py_ssize_t oparg, PyObject *kwnames, int use_tracing);
+static PyObject *trace_call_function(
+    PyThreadState *tstate, PyObject *callable, PyObject **stack,
+    Py_ssize_t oparg, PyObject *kwnames);
 static PyObject * do_call_core(
     PyThreadState *tstate, PyObject *func,
     PyObject *callargs, PyObject *kwdict, int use_tracing);
@@ -1702,6 +1702,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
         switch (opcode) {
 #endif
 
+        /* Variables used for making calls */
+        PyObject *kwnames;
+        int nargs;
+        int postcall_shrink;
+
         /* BEWARE!
            It is essential that any operation that fails must goto error
            and that all operation that succeed call DISPATCH() ! */
@@ -4599,10 +4604,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 
         TARGET(CALL_METHOD) {
             /* Designed to work in tamdem with LOAD_METHOD. */
-            PyObject **sp, *res;
-            int meth_found;
-
-            sp = stack_pointer;
             /* `meth` is NULL when LOAD_METHOD thinks that it's not
                 a method call.
 
@@ -4628,114 +4629,84 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
                We'll be passing `oparg + 1` to call_function, to
                make it accept the `self` as a first argument.
             */
-            meth_found = (PEEK(oparg + 2) != NULL);
-            res = call_function(tstate, &sp, oparg + meth_found, NULL, cframe.use_tracing);
-            stack_pointer = sp;
-
-            STACK_SHRINK(1 - meth_found);
-            PUSH(res);
-            if (res == NULL) {
-                goto error;
-            }
-            CHECK_EVAL_BREAKER();
-            DISPATCH();
+            int is_method = (PEEK(oparg + 2) != NULL);
+            oparg += is_method;
+            nargs = oparg;
+            kwnames = NULL;
+            postcall_shrink = 2-is_method;
+            goto call_function;
         }
 
         TARGET(CALL_METHOD_KW) {
             /* Designed to work in tandem with LOAD_METHOD. Same as CALL_METHOD
             but pops TOS to get a tuple of keyword names. */
-            PyObject **sp, *res;
-            PyObject *names = NULL;
-            int meth_found;
-
-            names = POP();
-
-            sp = stack_pointer;
-            meth_found = (PEEK(oparg + 2) != NULL);
-            res = call_function(tstate, &sp, oparg + meth_found, names, cframe.use_tracing);
-            stack_pointer = sp;
+            kwnames = POP();
+            int is_method = (PEEK(oparg + 2) != NULL);
+            oparg += is_method;
+            nargs = oparg - (int)PyTuple_GET_SIZE(kwnames);
+            postcall_shrink = 2-is_method;
+            goto call_function;
+        }
 
-            STACK_SHRINK(1 - meth_found);
-            PUSH(res);
-            Py_DECREF(names);
-            if (res == NULL) {
-                goto error;
-            }
-            CHECK_EVAL_BREAKER();
-            DISPATCH();
+        TARGET(CALL_FUNCTION_KW) {
+            kwnames = POP();
+            nargs = oparg - (int)PyTuple_GET_SIZE(kwnames);
+            postcall_shrink = 1;
+            goto call_function;
         }
 
         TARGET(CALL_FUNCTION) {
             PREDICTED(CALL_FUNCTION);
-            PyObject *res;
-
+            PyObject *function;
+            nargs = oparg;
+            kwnames = NULL;
+            postcall_shrink = 1;
+        call_function:
             // Check if the call can be inlined or not
-            PyObject *function = PEEK(oparg + 1);
+            function = PEEK(oparg + 1);
             if (Py_TYPE(function) == &PyFunction_Type && tstate->interp->eval_frame == NULL) {
                 int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(function))->co_flags;
-                PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : PyFunction_GET_GLOBALS(function);
                 int is_generator = code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR);
                 if (!is_generator) {
+                    PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : PyFunction_GET_GLOBALS(function);
+                    STACK_SHRINK(oparg);
                     InterpreterFrame *new_frame = _PyEvalFramePushAndInit(
                         tstate, PyFunction_AS_FRAME_CONSTRUCTOR(function), locals,
-                                                                stack_pointer-oparg,
-                                                                oparg, NULL, 1);
-                    if (new_frame == NULL) {
-                        // When we exit here, we own all variables in the stack
-                        // (the frame creation has not stolen any variable) so
-                        // we need to clean the whole stack (done in the
-                        // "error" label).
-                        goto error;
-                    }
-
-                    STACK_SHRINK(oparg + 1);
+                                                                stack_pointer,
+                                                                nargs, kwnames, 1);
+                    STACK_SHRINK(postcall_shrink);
                     // The frame has stolen all the arguments from the stack,
                     // so there is no need to clean them up.
+                    Py_XDECREF(kwnames);
                     Py_DECREF(function);
+                    if (new_frame == NULL) {
+                        goto error;
+                    }
                     _PyFrame_SetStackPointer(frame, stack_pointer);
                     new_frame->depth = frame->depth + 1;
                     tstate->frame = frame = new_frame;
                     goto start_frame;
                 }
-                else {
-                    /* Callable is a generator or coroutine function: create
-                     * coroutine or generator. */
-                    res = make_coro(tstate, PyFunction_AS_FRAME_CONSTRUCTOR(function),
-                                    locals, stack_pointer-oparg, oparg, NULL);
-                    STACK_SHRINK(oparg + 1);
-                    for (int i = 0; i < oparg + 1; i++) {
-                        Py_DECREF(stack_pointer[i]);
-                    }
-                }
+            }
+            /* Callable is not a normal Python function */
+            PyObject *res;
+            if (cframe.use_tracing) {
+                res = trace_call_function(tstate, function, stack_pointer-oparg, nargs, kwnames);
             }
             else {
-                /* Callable is not a Python function */
-                PyObject **sp = stack_pointer;
-                res = call_function(tstate, &sp, oparg, NULL, cframe.use_tracing);
-                stack_pointer = sp;
+                res = PyObject_Vectorcall(function, stack_pointer-oparg,
+                                          nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames);
             }
-
-            PUSH(res);
-            if (res == NULL) {
-                goto error;
+            assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
+            Py_DECREF(function);
+            Py_XDECREF(kwnames);
+            /* Clear the stack */
+            STACK_SHRINK(oparg);
+            for (int i = 0; i < oparg; i++) {
+                Py_DECREF(stack_pointer[i]);
             }
-            CHECK_EVAL_BREAKER();
-            DISPATCH();
-        }
-
-        TARGET(CALL_FUNCTION_KW) {
-            PyObject **sp, *res, *names;
-
-            names = POP();
-            assert(PyTuple_Check(names));
-            assert(PyTuple_GET_SIZE(names) <= oparg);
-            /* We assume without checking that names contains only strings */
-            sp = stack_pointer;
-            res = call_function(tstate, &sp, oparg, names, cframe.use_tracing);
-            stack_pointer = sp;
+            STACK_SHRINK(postcall_shrink);
             PUSH(res);
-            Py_DECREF(names);
-
             if (res == NULL) {
                 goto error;
             }
@@ -5482,7 +5453,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
     if (co->co_flags & CO_VARKEYWORDS) {
         kwdict = PyDict_New();
         if (kwdict == NULL) {
-            goto fail;
+            goto fail_pre_positional;
         }
         i = total_args;
         if (co->co_flags & CO_VARARGS) {
@@ -5521,11 +5492,19 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
             u = _PyTuple_FromArray(args + n, argcount - n);
         }
         if (u == NULL) {
-            goto fail;
+            goto fail_post_positional;
         }
         assert(localsplus[total_args] == NULL);
         localsplus[total_args] = u;
     }
+    else if (argcount > n) {
+        /* Too many postional args. Error is reported later */
+        if (steal_args) {
+            for (j = n; j < argcount; j++) {
+                Py_DECREF(args[j]);
+            }
+        }
+    }
 
     /* Handle keyword arguments */
     if (kwnames != NULL) {
@@ -5540,7 +5519,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
                 _PyErr_Format(tstate, PyExc_TypeError,
                             "%U() keywords must be strings",
                           con->fc_qualname);
-                goto fail;
+                goto kw_fail;
             }
 
             /* Speed hack: do raw pointer compares. As names are
@@ -5561,7 +5540,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
                     goto kw_found;
                 }
                 else if (cmp < 0) {
-                    goto fail;
+                    goto kw_fail;
                 }
             }
 
@@ -5573,29 +5552,38 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
                                                         kwcount, kwnames,
                                                      con->fc_qualname))
                 {
-                    goto fail;
+                    goto kw_fail;
                 }
 
                 _PyErr_Format(tstate, PyExc_TypeError,
                             "%U() got an unexpected keyword argument '%S'",
                           con->fc_qualname, keyword);
-                goto fail;
+                goto kw_fail;
             }
 
             if (PyDict_SetItem(kwdict, keyword, value) == -1) {
-                goto fail;
+                goto kw_fail;
             }
             if (steal_args) {
                 Py_DECREF(value);
             }
             continue;
 
+        kw_fail:
+            if (steal_args) {
+                for (;i < kwcount; i++) {
+                    PyObject *value = args[i+argcount];
+                    Py_DECREF(value);
+                }
+            }
+            goto fail_noclean;
+
         kw_found:
             if (localsplus[j] != NULL) {
                 _PyErr_Format(tstate, PyExc_TypeError,
                             "%U() got multiple values for argument '%S'",
                           con->fc_qualname, keyword);
-                goto fail;
+                goto kw_fail;
             }
             if (!steal_args) {
                 Py_INCREF(value);
@@ -5608,7 +5596,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
     if ((argcount > co->co_argcount) && !(co->co_flags & CO_VARARGS)) {
         too_many_positional(tstate, co, argcount, con->fc_defaults, localsplus,
                             con->fc_qualname);
-        goto fail;
+        goto fail_noclean;
     }
 
     /* Add missing positional arguments (copy default values from defs) */
@@ -5624,7 +5612,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
         if (missing) {
             missing_arguments(tstate, co, missing, defcount, localsplus,
                               con->fc_qualname);
-            goto fail;
+            goto fail_noclean;
         }
         if (n > m)
             i = n - m;
@@ -5657,7 +5645,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
                     continue;
                 }
                 else if (_PyErr_Occurred(tstate)) {
-                    goto fail;
+                    goto fail_noclean;
                 }
             }
             missing++;
@@ -5665,7 +5653,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
         if (missing) {
             missing_arguments(tstate, co, missing, -1, localsplus,
                               con->fc_qualname);
-            goto fail;
+            goto fail_noclean;
         }
     }
 
@@ -5678,33 +5666,23 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
 
     return 0;
 
-fail: /* Jump here from prelude on failure */
+fail_pre_positional:
     if (steal_args) {
-        // If we failed to initialize locals, make sure the caller still own all the
-        // arguments that were on the stack. We need to increment the reference count
-        // of everything we copied (everything in localsplus) that came from the stack
-        // (everything that is present in the "args" array).
-        Py_ssize_t kwcount = kwnames != NULL ? PyTuple_GET_SIZE(kwnames) : 0;
-        for (Py_ssize_t k=0; k < total_args; k++) {
-            PyObject* arg = localsplus[k];
-            for (Py_ssize_t j=0; j < argcount + kwcount; j++) {
-                if (args[j] == arg) {
-                    Py_XINCREF(arg);
-                    break;
-                }
-            }
+        for (j = 0; j < argcount; j++) {
+            Py_DECREF(args[j]);
         }
-        // Restore all the **kwargs we placed into the kwargs dictionary
-        if (kwdict) {
-            PyObject *key, *value;
-            Py_ssize_t pos = 0;
-            while (PyDict_Next(kwdict, &pos, &key, &value)) {
-                Py_INCREF(value);
-            }
+    }
+    /* fall through */
+fail_post_positional:
+    if (steal_args) {
+        Py_ssize_t kwcount = kwnames != NULL ? PyTuple_GET_SIZE(kwnames) : 0;
+        for (j = argcount; j < argcount+kwcount; j++) {
+            Py_DECREF(args[j]);
         }
     }
+    /* fall through */
+fail_noclean:
     return -1;
-
 }
 
 static InterpreterFrame *
@@ -6593,40 +6571,6 @@ trace_call_function(PyThreadState *tstate,
     return PyObject_Vectorcall(func, args, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames);
 }
 
-/* Issue #29227: Inline call_function() into _PyEval_EvalFrameDefault()
-   to reduce the stack consumption. */
-Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION
-call_function(PyThreadState *tstate,
-              PyObject ***pp_stack,
-              Py_ssize_t oparg,
-              PyObject *kwnames,
-              int use_tracing)
-{
-    PyObject **pfunc = (*pp_stack) - oparg - 1;
-    PyObject *func = *pfunc;
-    PyObject *x, *w;
-    Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames);
-    Py_ssize_t nargs = oparg - nkwargs;
-    PyObject **stack = (*pp_stack) - nargs - nkwargs;
-
-    if (use_tracing) {
-        x = trace_call_function(tstate, func, stack, nargs, kwnames);
-    }
-    else {
-        x = PyObject_Vectorcall(func, stack, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames);
-    }
-
-    assert((x != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-
-    /* Clear the stack of the function object. */
-    while ((*pp_stack) > pfunc) {
-        w = EXT_POP(*pp_stack);
-        Py_DECREF(w);
-    }
-
-    return x;
-}
-
 static PyObject *
 do_call_core(PyThreadState *tstate,
              PyObject *func,



More information about the Python-checkins mailing list