[Python-checkins] r72910 - in python/branches/py3k: Modules/_pickle.c

collin.winter python-checkins at python.org
Mon May 25 07:43:30 CEST 2009


Author: collin.winter
Date: Mon May 25 07:43:30 2009
New Revision: 72910

Log:
Merged revisions 72909 via svnmerge from 
http://svn.python.org/projects/python/trunk

Note that the performance improvement for the py3k branch is not as high as for trunk.

........
  r72909 | collin.winter | 2009-05-24 21:34:39 -0700 (Sun, 24 May 2009) | 2 lines
  
  Issue 5670: special-case pickling of dicts. This nearly doubles the performance of dict pickling in cPickle.
........


Modified:
   python/branches/py3k/   (props changed)
   python/branches/py3k/Modules/_pickle.c

Modified: python/branches/py3k/Modules/_pickle.c
==============================================================================
--- python/branches/py3k/Modules/_pickle.c	(original)
+++ python/branches/py3k/Modules/_pickle.c	Mon May 25 07:43:30 2009
@@ -1699,6 +1699,69 @@
     return -1;
 }
 
+/* This is a variant of batch_dict() above that specializes for dicts, with no
+ * support for dict subclasses. Like batch_dict(), we batch up chunks of
+ *     MARK key value ... key value SETITEMS
+ * opcode sequences.  Calling code should have arranged to first create an
+ * empty dict, or dict-like object, for the SETITEMS to operate on.
+ * Returns 0 on success, -1 on error.
+ *
+ * Note that this currently doesn't work for protocol 0.
+ */
+static int
+batch_dict_exact(PicklerObject *self, PyObject *obj)
+{
+    PyObject *key = NULL, *value = NULL;
+    int i;
+    Py_ssize_t dict_size, ppos = 0;
+
+    static const char mark_op = MARK;
+    static const char setitem = SETITEM;
+    static const char setitems = SETITEMS;
+
+    assert(obj != NULL);
+    assert(self->proto > 0);
+
+    dict_size = PyDict_Size(obj);
+
+    /* Special-case len(d) == 1 to save space. */
+    if (dict_size == 1) {
+        PyDict_Next(obj, &ppos, &key, &value);
+        if (save(self, key, 0) < 0)
+            return -1;
+        if (save(self, value, 0) < 0)
+            return -1;
+        if (pickler_write(self, &setitem, 1) < 0)
+            return -1;
+        return 0;
+    }
+
+    /* Write in batches of BATCHSIZE. */
+    do {
+        i = 0;
+        if (pickler_write(self, &mark_op, 1) < 0)
+            return -1;
+        while (PyDict_Next(obj, &ppos, &key, &value)) {
+            if (save(self, key, 0) < 0)
+                return -1;
+            if (save(self, value, 0) < 0)
+                return -1;
+            if (++i == BATCHSIZE)
+                break;
+        }
+        if (pickler_write(self, &setitems, 1) < 0)
+            return -1;
+        if (PyDict_Size(obj) != dict_size) {
+            PyErr_Format(
+                PyExc_RuntimeError,
+                "dictionary changed size during iteration");
+            return -1;
+        }
+
+    } while (i == BATCHSIZE);
+    return 0;
+}
+
 static int
 save_dict(PicklerObject *self, PyObject *obj)
 {
@@ -1733,15 +1796,24 @@
 
     if (len != 0) {
         /* Save the dict items. */
-        items = PyObject_CallMethod(obj, "items", "()");
-        if (items == NULL)
-            goto error;
-        iter = PyObject_GetIter(items);
-        Py_DECREF(items);
-        if (iter == NULL)
-            goto error;
-        status = batch_dict(self, iter);
-        Py_DECREF(iter);
+        if (PyDict_CheckExact(obj) && self->proto > 0) {
+            /* We can take certain shortcuts if we know this is a dict and
+               not a dict subclass. */
+            if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
+                status = batch_dict_exact(self, obj);
+                Py_LeaveRecursiveCall();
+            }
+        } else {
+            items = PyObject_CallMethod(obj, "items", "()");
+            if (items == NULL)
+                goto error;
+            iter = PyObject_GetIter(items);
+            Py_DECREF(items);
+            if (iter == NULL)
+                goto error;
+            status = batch_dict(self, iter);
+            Py_DECREF(iter);
+        }
     }
 
     if (0) {


More information about the Python-checkins mailing list