[Python-checkins] cpython: Issue #20416: marshal.dumps() with protocols 3 and 4 is now 40-50% faster on

serhiy.storchaka python-checkins at python.org
Wed Feb 11 14:56:46 CET 2015


https://hg.python.org/cpython/rev/012364df2712
changeset:   94586:012364df2712
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Wed Feb 11 15:53:31 2015 +0200
summary:
  Issue #20416: marshal.dumps() with protocols 3 and 4 is now 40-50% faster on
average.

files:
  Doc/whatsnew/3.5.rst |   3 +
  Misc/NEWS            |   3 +
  Python/marshal.c     |  95 ++++++++++++++++++-------------
  3 files changed, 61 insertions(+), 40 deletions(-)


diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -381,6 +381,9 @@
 * Many operations on :class:`io.BytesIO` are now 50% to 100% faster.
   (Contributed by Serhiy Storchaka in :issue:`15381`.)
 
+* :func:`marshal.dumps` with versions 3 and 4 is now 40-50% faster on average.
+  (Contributed by Serhiy Storchaka in :issue:`20416`.)
+
 
 Build and C API Changes
 =======================
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,9 @@
 Library
 -------
 
+- Issue #20416: marshal.dumps() with protocols 3 and 4 is now 40-50% faster on
+  average.
+
 - Issue #23421: Fixed compression in tarfile CLI.  Patch by wdv4758h.
 
 - Issue #23361: Fix possible overflow in Windows subprocess creation code.
diff --git a/Python/marshal.c b/Python/marshal.c
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -12,6 +12,7 @@
 #include "longintrepr.h"
 #include "code.h"
 #include "marshal.h"
+#include "../Modules/hashtable.h"
 
 /* High water mark to determine when the marshalled object is dangerously deep
  * and risks coring the interpreter.  When the object stack gets this deep,
@@ -73,6 +74,7 @@
     char *buf;
     Py_ssize_t buf_size;
     PyObject *refs; /* dict on marshal, list on unmarshal */
+    _Py_hashtable_t *hashtable;
     int version;
 } WFILE;
 
@@ -223,46 +225,38 @@
 static int
 w_ref(PyObject *v, char *flag, WFILE *p)
 {
-    PyObject *id;
-    PyObject *idx;
+    _Py_hashtable_entry_t *entry;
+    int w;
 
-    if (p->version < 3 || p->refs == NULL)
+    if (p->version < 3 || p->hashtable == NULL)
         return 0; /* not writing object references */
 
     /* if it has only one reference, it definitely isn't shared */
     if (Py_REFCNT(v) == 1)
         return 0;
 
-    id = PyLong_FromVoidPtr((void*)v);
-    if (id == NULL)
-        goto err;
-    idx = PyDict_GetItem(p->refs, id);
-    if (idx != NULL) {
+    entry = _Py_hashtable_get_entry(p->hashtable, v);
+    if (entry != NULL) {
         /* write the reference index to the stream */
-        long w = PyLong_AsLong(idx);
-        Py_DECREF(id);
-        if (w == -1 && PyErr_Occurred()) {
-            goto err;
-        }
+        _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, &w, sizeof(w), entry);
         /* we don't store "long" indices in the dict */
         assert(0 <= w && w <= 0x7fffffff);
         w_byte(TYPE_REF, p);
         w_long(w, p);
         return 1;
     } else {
-        int ok;
-        Py_ssize_t s = PyDict_Size(p->refs);
+        size_t s = p->hashtable->entries;
         /* we don't support long indices */
         if (s >= 0x7fffffff) {
             PyErr_SetString(PyExc_ValueError, "too many objects");
             goto err;
         }
-        idx = PyLong_FromSsize_t(s);
-        ok = idx && PyDict_SetItem(p->refs, id, idx) == 0;
-        Py_DECREF(id);
-        Py_XDECREF(idx);
-        if (!ok)
+        w = s;
+        Py_INCREF(v);
+        if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
+            Py_DECREF(v);
             goto err;
+        }
         *flag |= FLAG_REF;
         return 0;
     }
@@ -545,15 +539,44 @@
     }
 }
 
+static int
+w_init_refs(WFILE *wf, int version)
+{
+    if (version >= 3) {
+        wf->hashtable = _Py_hashtable_new(sizeof(int), _Py_hashtable_hash_ptr,
+                                          _Py_hashtable_compare_direct);
+        if (wf->hashtable == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int
+w_decref_entry(_Py_hashtable_entry_t *entry, void *Py_UNUSED(data))
+{
+    Py_XDECREF(entry->key);
+    return 0;
+}
+
+static void
+w_clear_refs(WFILE *wf)
+{
+    if (wf->hashtable != NULL) {
+        _Py_hashtable_foreach(wf->hashtable, w_decref_entry, NULL);
+        _Py_hashtable_destroy(wf->hashtable);
+    }
+}
+
 /* version currently has no effect for writing ints. */
 void
 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
 {
     WFILE wf;
+    memset(&wf, 0, sizeof(wf));
     wf.fp = fp;
     wf.error = WFERR_OK;
-    wf.depth = 0;
-    wf.refs = NULL;
     wf.version = version;
     w_long(x, &wf);
 }
@@ -562,17 +585,14 @@
 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
 {
     WFILE wf;
+    memset(&wf, 0, sizeof(wf));
     wf.fp = fp;
     wf.error = WFERR_OK;
-    wf.depth = 0;
-    if (version >= 3) {
-        if ((wf.refs = PyDict_New()) == NULL)
-            return; /* caller mush check PyErr_Occurred() */
-    } else
-        wf.refs = NULL;
     wf.version = version;
+    if (w_init_refs(&wf, version))
+        return; /* caller mush check PyErr_Occurred() */
     w_object(x, &wf);
-    Py_XDECREF(wf.refs);
+    w_clear_refs(&wf);
 }
 
 typedef WFILE RFILE; /* Same struct with different invariants */
@@ -1509,25 +1529,20 @@
 {
     WFILE wf;
 
-    wf.fp = NULL;
-    wf.readable = NULL;
+    memset(&wf, 0, sizeof(wf));
     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
     if (wf.str == NULL)
         return NULL;
     wf.ptr = PyBytes_AS_STRING((PyBytesObject *)wf.str);
     wf.end = wf.ptr + PyBytes_Size(wf.str);
     wf.error = WFERR_OK;
-    wf.depth = 0;
     wf.version = version;
-    if (version >= 3) {
-        if ((wf.refs = PyDict_New()) == NULL) {
-            Py_DECREF(wf.str);
-            return NULL;
-        }
-    } else
-        wf.refs = NULL;
+    if (w_init_refs(&wf, version)) {
+        Py_DECREF(wf.str);
+        return NULL;
+    }
     w_object(x, &wf);
-    Py_XDECREF(wf.refs);
+    w_clear_refs(&wf);
     if (wf.str != NULL) {
         char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
         if (wf.ptr - base > PY_SSIZE_T_MAX) {

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list