[Python-checkins] r66390 - in python/trunk: Misc/NEWS Misc/find_recursionlimit.py Modules/cPickle.c

amaury.forgeotdarc python-checkins at python.org
Thu Sep 11 22:56:13 CEST 2008


Author: amaury.forgeotdarc
Date: Thu Sep 11 22:56:13 2008
New Revision: 66390

Log:
#3640: Correct a crash in cPickle on 64bit platforms, in the case of deeply nested lists or dicts.

Reviewed by Martin von Loewis.


Modified:
   python/trunk/Misc/NEWS
   python/trunk/Misc/find_recursionlimit.py
   python/trunk/Modules/cPickle.c

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Thu Sep 11 22:56:13 2008
@@ -75,6 +75,9 @@
 Library
 -------
 
+- Issue #3640: Pickling a list or a dict uses less local variables, to reduce
+  stack usage in the case of deeply nested objects.
+
 - Issue #3629: Fix sre "bytecode" validator for an end case.
 
 - Issue #3811: The Unicode database was updated to 5.1.

Modified: python/trunk/Misc/find_recursionlimit.py
==============================================================================
--- python/trunk/Misc/find_recursionlimit.py	(original)
+++ python/trunk/Misc/find_recursionlimit.py	Thu Sep 11 22:56:13 2008
@@ -22,6 +22,7 @@
 """
 
 import sys
+import itertools
 
 class RecursiveBlowup1:
     def __init__(self):
@@ -61,6 +62,23 @@
 def test_recurse():
     return test_recurse()
 
+def test_cpickle(_cache={}):
+    try:
+        import cPickle
+    except ImportError:
+        print "cannot import cPickle, skipped!"
+        return
+    l = None
+    for n in itertools.count():
+        try:
+            l = _cache[n]
+            continue  # Already tried and it works, let's save some time
+        except KeyError:
+            for i in range(100):
+                l = [l]
+        cPickle.dumps(l, protocol=-1)
+        _cache[n] = l
+
 def check_limit(n, test_func_name):
     sys.setrecursionlimit(n)
     if test_func_name.startswith("test_"):
@@ -83,5 +101,6 @@
     check_limit(limit, "test_init")
     check_limit(limit, "test_getattr")
     check_limit(limit, "test_getitem")
+    check_limit(limit, "test_cpickle")
     print "Limit of %d is fine" % limit
     limit = limit + 100

Modified: python/trunk/Modules/cPickle.c
==============================================================================
--- python/trunk/Modules/cPickle.c	(original)
+++ python/trunk/Modules/cPickle.c	Thu Sep 11 22:56:13 2008
@@ -1515,8 +1515,8 @@
 static int
 batch_list(Picklerobject *self, PyObject *iter)
 {
-	PyObject *obj;
-	PyObject *slice[BATCHSIZE];
+	PyObject *obj = NULL;
+	PyObject *firstitem = NULL;
 	int i, n;
 
 	static char append = APPEND;
@@ -1545,45 +1545,69 @@
 
 	/* proto > 0:  write in batches of BATCHSIZE. */
 	do {
-		/* Get next group of (no more than) BATCHSIZE elements. */
-		for (n = 0; n < BATCHSIZE; ++n) {
-			obj = PyIter_Next(iter);
-			if (obj == NULL) {
-				if (PyErr_Occurred())
-					goto BatchFailed;
-				break;
-			}
-			slice[n] = obj;
+		/* Get first item */
+		firstitem = PyIter_Next(iter);
+		if (firstitem == NULL) {
+			if (PyErr_Occurred())
+				goto BatchFailed;
+
+			/* nothing more to add */
+			break;
 		}
 
-		if (n > 1) {
-			/* Pump out MARK, slice[0:n], APPENDS. */
-			if (self->write_func(self, &MARKv, 1) < 0)
+		/* Try to get a second item */
+		obj = PyIter_Next(iter);
+		if (obj == NULL) {
+			if (PyErr_Occurred())
 				goto BatchFailed;
-			for (i = 0; i < n; ++i) {
-				if (save(self, slice[i], 0) < 0)
-					goto BatchFailed;
-			}
-			if (self->write_func(self, &appends, 1) < 0)
-				goto BatchFailed;
-		}
-		else if (n == 1) {
-			if (save(self, slice[0], 0) < 0)
+
+			/* Only one item to write */
+			if (save(self, firstitem, 0) < 0)
 				goto BatchFailed;
 			if (self->write_func(self, &append, 1) < 0)
 				goto BatchFailed;
+			Py_CLEAR(firstitem);
+			break;
 		}
 
-		for (i = 0; i < n; ++i) {
-			Py_DECREF(slice[i]);
+		/* More than one item to write */
+
+		/* Pump out MARK, items, APPENDS. */
+		if (self->write_func(self, &MARKv, 1) < 0)
+			goto BatchFailed;
+		
+		if (save(self, firstitem, 0) < 0)
+			goto BatchFailed;
+		Py_CLEAR(firstitem);
+		n = 1;
+		
+		/* Fetch and save up to BATCHSIZE items */
+		while (obj) {
+			if (save(self, obj, 0) < 0)
+				goto BatchFailed;
+			Py_CLEAR(obj);
+			n += 1;
+			
+			if (n == BATCHSIZE)
+				break;
+
+			obj = PyIter_Next(iter);
+			if (obj == NULL) {
+				if (PyErr_Occurred())
+					goto BatchFailed;
+				break;
+			}
 		}
+
+		if (self->write_func(self, &appends, 1) < 0)
+			goto BatchFailed;
+
 	} while (n == BATCHSIZE);
 	return 0;
 
 BatchFailed:
-	while (--n >= 0) {
-		Py_DECREF(slice[n]);
-	}
+	Py_XDECREF(firstitem);
+	Py_XDECREF(obj);
 	return -1;
 }
 
@@ -1659,8 +1683,8 @@
 static int
 batch_dict(Picklerobject *self, PyObject *iter)
 {
-	PyObject *p;
-	PyObject *slice[BATCHSIZE];
+	PyObject *p = NULL;
+	PyObject *firstitem = NULL;
 	int i, n;
 
 	static char setitem = SETITEM;
@@ -1696,56 +1720,85 @@
 
 	/* proto > 0:  write in batches of BATCHSIZE. */
 	do {
-		/* Get next group of (no more than) BATCHSIZE elements. */
-		for (n = 0; n < BATCHSIZE; ++n) {
-			p = PyIter_Next(iter);
-			if (p == NULL) {
-				if (PyErr_Occurred())
-					goto BatchFailed;
-				break;
-			}
-			if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) {
-				PyErr_SetString(PyExc_TypeError, "dict items "
-					"iterator must return 2-tuples");
+		/* Get first item */
+		firstitem = PyIter_Next(iter);
+		if (firstitem == NULL) {
+			if (PyErr_Occurred())
 				goto BatchFailed;
-			}
-			slice[n] = p;
+
+			/* nothing more to add */
+			break;
+		}
+		if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
+			PyErr_SetString(PyExc_TypeError, "dict items "
+					"iterator must return 2-tuples");
+			goto BatchFailed;
 		}
 
-		if (n > 1) {
-			/* Pump out MARK, slice[0:n], SETITEMS. */
-			if (self->write_func(self, &MARKv, 1) < 0)
+		/* Try to get a second item */
+		p = PyIter_Next(iter);
+		if (p == NULL) {
+			if (PyErr_Occurred())
 				goto BatchFailed;
-			for (i = 0; i < n; ++i) {
-				p = slice[i];
-				if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0)
-					goto BatchFailed;
-				if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0)
-					goto BatchFailed;
-			}
-			if (self->write_func(self, &setitems, 1) < 0)
+
+			/* Only one item to write */
+			if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
+				goto BatchFailed;
+			if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
 				goto BatchFailed;
+			if (self->write_func(self, &setitem, 1) < 0)
+				goto BatchFailed;
+			Py_CLEAR(firstitem);
+			break;
 		}
-		else if (n == 1) {
-			p = slice[0];
+
+		/* More than one item to write */
+
+		/* Pump out MARK, items, SETITEMS. */
+		if (self->write_func(self, &MARKv, 1) < 0)
+			goto BatchFailed;
+
+		if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
+			goto BatchFailed;
+		if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
+			goto BatchFailed;
+		Py_CLEAR(firstitem);
+		n = 1;
+
+		/* Fetch and save up to BATCHSIZE items */
+		while (p) {
+			if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) {
+				PyErr_SetString(PyExc_TypeError, "dict items "
+					"iterator must return 2-tuples");
+				goto BatchFailed;
+			}
 			if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0)
 				goto BatchFailed;
 			if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0)
 				goto BatchFailed;
-			if (self->write_func(self, &setitem, 1) < 0)
-				goto BatchFailed;
-		}
+			Py_CLEAR(p);
+			n += 1;
+
+			if (n == BATCHSIZE)
+				break;
 
-		for (i = 0; i < n; ++i) {
-			Py_DECREF(slice[i]);
+			p = PyIter_Next(iter);
+			if (p == NULL) {
+				if (PyErr_Occurred())
+					goto BatchFailed;
+				break;
+			}
 		}
+
+		if (self->write_func(self, &setitems, 1) < 0)
+			goto BatchFailed;
+
 	} while (n == BATCHSIZE);
 	return 0;
 
 BatchFailed:
-	while (--n >= 0) {
-		Py_DECREF(slice[n]);
-	}
+	Py_XDECREF(firstitem);
+	Py_XDECREF(p);
 	return -1;
 }
 


More information about the Python-checkins mailing list