[Python-checkins] bpo-36904: Optimize _PyStack_UnpackDict (GH-14517)

Inada Naoki webhook-mailer at python.org
Tue Jul 2 05:49:57 EDT 2019


https://github.com/python/cpython/commit/d4efd917ac24940063a1ce80073fe3570c5f07f8
commit: d4efd917ac24940063a1ce80073fe3570c5f07f8
branch: master
author: Jeroen Demeyer <J.Demeyer at UGent.be>
committer: Inada Naoki <songofacandy at gmail.com>
date: 2019-07-02T18:49:40+09:00
summary:

bpo-36904: Optimize _PyStack_UnpackDict (GH-14517)

files:
M Include/cpython/abstract.h
M Objects/call.c

diff --git a/Include/cpython/abstract.h b/Include/cpython/abstract.h
index b4288e714c70..69ed9435c654 100644
--- a/Include/cpython/abstract.h
+++ b/Include/cpython/abstract.h
@@ -26,24 +26,6 @@ PyAPI_FUNC(PyObject *) _PyStack_AsDict(
     PyObject *const *values,
     PyObject *kwnames);
 
-/* Convert (args, nargs, kwargs: dict) into a (stack, nargs, kwnames: tuple).
-
-   Return 0 on success, raise an exception and return -1 on error.
-
-   Write the new stack into *p_stack. If *p_stack is differen than args, it
-   must be released by PyMem_Free().
-
-   The stack uses borrowed references.
-
-   The type of keyword keys is not checked, these checks should be done
-   later (ex: _PyArg_ParseStackAndKeywords). */
-PyAPI_FUNC(int) _PyStack_UnpackDict(
-    PyObject *const *args,
-    Py_ssize_t nargs,
-    PyObject *kwargs,
-    PyObject *const **p_stack,
-    PyObject **p_kwnames);
-
 /* Suggested size (number of positional arguments) for arrays of PyObject*
    allocated on a C stack to avoid allocating memory on the heap memory. Such
    array is used to pass positional arguments to call functions of the
diff --git a/Objects/call.c b/Objects/call.c
index 2b52bdf6abd4..8e0d271ab78a 100644
--- a/Objects/call.c
+++ b/Objects/call.c
@@ -8,6 +8,14 @@
 static PyObject *
 cfunction_call_varargs(PyObject *func, PyObject *args, PyObject *kwargs);
 
+static PyObject *const *
+_PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
+                    PyObject **p_kwnames);
+
+static void
+_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
+                         PyObject *kwnames);
+
 
 static PyObject *
 null_error(void)
@@ -100,24 +108,19 @@ _PyObject_FastCallDict(PyObject *callable, PyObject *const *args,
     }
 
     PyObject *res;
-    if (kwargs == NULL) {
+    if (kwargs == NULL || PyDict_GET_SIZE(kwargs) == 0) {
         res = func(callable, args, nargsf, NULL);
     }
     else {
         PyObject *kwnames;
         PyObject *const *newargs;
-        if (_PyStack_UnpackDict(args, nargs, kwargs, &newargs, &kwnames) < 0) {
+        newargs = _PyStack_UnpackDict(args, nargs, kwargs, &kwnames);
+        if (newargs == NULL) {
             return NULL;
         }
-        res = func(callable, newargs, nargs, kwnames);
-        if (kwnames != NULL) {
-            Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames) + nargs;
-            for (i = 0; i < n; i++) {
-                Py_DECREF(newargs[i]);
-            }
-            PyMem_Free((PyObject **)newargs);
-            Py_DECREF(kwnames);
-        }
+        res = func(callable, newargs,
+                   nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames);
+        _PyStack_UnpackDict_Free(newargs, nargs, kwnames);
     }
     return _Py_CheckFunctionResult(callable, res, NULL);
 }
@@ -196,24 +199,23 @@ PyVectorcall_Call(PyObject *callable, PyObject *tuple, PyObject *kwargs)
         return NULL;
     }
 
+    Py_ssize_t nargs = PyTuple_GET_SIZE(tuple);
+
+    /* Fast path for no keywords */
+    if (kwargs == NULL || PyDict_GET_SIZE(kwargs) == 0) {
+        return func(callable, _PyTuple_ITEMS(tuple), nargs, NULL);
+    }
+
     /* Convert arguments & call */
     PyObject *const *args;
-    Py_ssize_t nargs = PyTuple_GET_SIZE(tuple);
     PyObject *kwnames;
-    if (_PyStack_UnpackDict(_PyTuple_ITEMS(tuple), nargs,
-        kwargs, &args, &kwnames) < 0) {
+    args = _PyStack_UnpackDict(_PyTuple_ITEMS(tuple), nargs, kwargs, &kwnames);
+    if (args == NULL) {
         return NULL;
     }
-    PyObject *result = func(callable, args, nargs, kwnames);
-    if (kwnames != NULL) {
-        Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames) + nargs;
-        for (i = 0; i < n; i++) {
-            Py_DECREF(args[i]);
-        }
-        PyMem_Free((PyObject **)args);
-        Py_DECREF(kwnames);
-    }
-
+    PyObject *result = func(callable, args,
+                            nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames);
+    _PyStack_UnpackDict_Free(args, nargs, kwnames);
     return result;
 }
 
@@ -455,23 +457,22 @@ _PyMethodDef_RawFastCallDict(PyMethodDef *method, PyObject *self,
 
     case METH_FASTCALL | METH_KEYWORDS:
     {
-        PyObject *const *stack;
-        PyObject *kwnames;
         _PyCFunctionFastWithKeywords fastmeth = (_PyCFunctionFastWithKeywords)(void(*)(void))meth;
 
-        if (_PyStack_UnpackDict(args, nargs, kwargs, &stack, &kwnames) < 0) {
-            goto exit;
+        /* Fast path for no keywords */
+        if (kwargs == NULL || PyDict_GET_SIZE(kwargs) == 0) {
+            result = (*fastmeth) (self, args, nargs, NULL);
+            break;
         }
 
-        result = (*fastmeth) (self, stack, nargs, kwnames);
-        if (kwnames != NULL) {
-            Py_ssize_t i, n = nargs + PyTuple_GET_SIZE(kwnames);
-            for (i = 0; i < n; i++) {
-                Py_DECREF(stack[i]);
-            }
-            PyMem_Free((PyObject **)stack);
-            Py_DECREF(kwnames);
+        PyObject *const *stack;
+        PyObject *kwnames;
+        stack = _PyStack_UnpackDict(args, nargs, kwargs, &kwnames);
+        if (stack == NULL) {
+            goto exit;
         }
+        result = (*fastmeth) (self, stack, nargs, kwnames);
+        _PyStack_UnpackDict_Free(stack, nargs, kwnames);
         break;
     }
 
@@ -1206,53 +1207,63 @@ _PyStack_AsDict(PyObject *const *values, PyObject *kwnames)
 }
 
 
-int
-_PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
-                    PyObject *const **p_stack, PyObject **p_kwnames)
-{
-    PyObject **stack, **kwstack;
-    Py_ssize_t nkwargs;
-    Py_ssize_t pos, i;
-    PyObject *key, *value;
-    PyObject *kwnames;
+/* Convert (args, nargs, kwargs: dict) into a (stack, nargs, kwnames: tuple).
 
-    assert(nargs >= 0);
-    assert(kwargs == NULL || PyDict_CheckExact(kwargs));
+   Allocate a new argument vector and keyword names tuple. Return the argument
+   vector; return NULL with exception set on error. Return the keyword names
+   tuple in *p_kwnames.
 
-    if (kwargs == NULL || (nkwargs = PyDict_GET_SIZE(kwargs)) == 0) {
-        *p_stack = args;
-        *p_kwnames = NULL;
-        return 0;
-    }
+   The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET.
+
+   When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames)
 
-    if ((size_t)nargs > PY_SSIZE_T_MAX / sizeof(stack[0]) - (size_t)nkwargs) {
+   The type of keyword keys is not checked, these checks should be done
+   later (ex: _PyArg_ParseStackAndKeywords). */
+static PyObject *const *
+_PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
+                    PyObject **p_kwnames)
+{
+    assert(nargs >= 0);
+    assert(kwargs != NULL);
+    assert(PyDict_Check(kwargs));
+
+    Py_ssize_t nkwargs = PyDict_GET_SIZE(kwargs);
+    /* Check for overflow in the PyMem_Malloc() call below. The subtraction
+     * in this check cannot overflow: both maxnargs and nkwargs are
+     * non-negative signed integers, so their difference fits in the type. */
+    Py_ssize_t maxnargs = PY_SSIZE_T_MAX / sizeof(args[0]) - 1;
+    if (nargs > maxnargs - nkwargs) {
         PyErr_NoMemory();
-        return -1;
+        return NULL;
     }
 
-    stack = PyMem_Malloc((nargs + nkwargs) * sizeof(stack[0]));
+    /* Add 1 to support PY_VECTORCALL_ARGUMENTS_OFFSET */
+    PyObject **stack = PyMem_Malloc((1 + nargs + nkwargs) * sizeof(args[0]));
     if (stack == NULL) {
         PyErr_NoMemory();
-        return -1;
+        return NULL;
     }
 
-    kwnames = PyTuple_New(nkwargs);
+    PyObject *kwnames = PyTuple_New(nkwargs);
     if (kwnames == NULL) {
         PyMem_Free(stack);
-        return -1;
+        return NULL;
     }
 
+    stack++;  /* For PY_VECTORCALL_ARGUMENTS_OFFSET */
+
     /* Copy positional arguments */
-    for (i = 0; i < nargs; i++) {
+    for (Py_ssize_t i = 0; i < nargs; i++) {
         Py_INCREF(args[i]);
         stack[i] = args[i];
     }
 
-    kwstack = stack + nargs;
-    pos = i = 0;
+    PyObject **kwstack = stack + nargs;
     /* This loop doesn't support lookup function mutating the dictionary
        to change its size. It's a deliberate choice for speed, this function is
        called in the performance critical hot code. */
+    Py_ssize_t pos = 0, i = 0;
+    PyObject *key, *value;
     while (PyDict_Next(kwargs, &pos, &key, &value)) {
         Py_INCREF(key);
         Py_INCREF(value);
@@ -1261,7 +1272,18 @@ _PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
         i++;
     }
 
-    *p_stack = stack;
     *p_kwnames = kwnames;
-    return 0;
+    return stack;
+}
+
+static void
+_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
+                         PyObject *kwnames)
+{
+    Py_ssize_t n = PyTuple_GET_SIZE(kwnames) + nargs;
+    for (Py_ssize_t i = 0; i < n; i++) {
+        Py_DECREF(stack[i]);
+    }
+    PyMem_Free((PyObject **)stack - 1);
+    Py_DECREF(kwnames);
 }



More information about the Python-checkins mailing list