[Python-checkins] cpython (3.2): Issue #16350: Fix zlib decompressor handling of unused_data with multiple calls

nadeem.vawda python-checkins at python.org
Mon Nov 5 00:44:28 CET 2012


http://hg.python.org/cpython/rev/be882735e0b6
changeset:   80255:be882735e0b6
branch:      3.2
parent:      80249:56bc323288d1
user:        Nadeem Vawda <nadeem.vawda at gmail.com>
date:        Mon Nov 05 00:37:42 2012 +0100
summary:
  Issue #16350: Fix zlib decompressor handling of unused_data with multiple calls to decompress() after EOF.

Patch by Serhiy Storchaka.

files:
  Lib/test/test_zlib.py |  13 +++++++++++++
  Misc/NEWS             |   4 ++++
  Modules/zlibmodule.c  |  29 +++++++++++++++++++++++------
  3 files changed, 40 insertions(+), 6 deletions(-)


diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -434,6 +434,19 @@
         y += dco.flush()
         self.assertEqual(y, b'foo')
 
+    def test_decompress_unused_data(self):
+        # Repeated calls to decompress() after EOF should accumulate data in
+        # dco.unused_data, instead of just storing the arg to the last call.
+        x = zlib.compress(HAMLET_SCENE) + HAMLET_SCENE
+        for step in 1, 2, 100:
+            dco = zlib.decompressobj()
+            data = b''.join(dco.decompress(x[i : i + step])
+                            for i in range(0, len(x), step))
+            data += dco.flush()
+
+            self.assertEqual(data, HAMLET_SCENE)
+            self.assertEqual(dco.unused_data, HAMLET_SCENE)
+
     if hasattr(zlib.compressobj(), "copy"):
         def test_compresscopy(self):
             # Test copying a compression object
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -156,6 +156,10 @@
 Library
 -------
 
+- Issue #16350: zlib.Decompress.decompress() now accumulates data from
+  successive calls after EOF in unused_data, instead of only saving the argument
+  to the last call. Patch by Serhiy Storchaka.
+
 - Issue #12759: sre_parse now raises a proper error when the name of the group
   is missing.  Initial patch by Serhiy Storchaka.
 
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -610,12 +610,29 @@
        preserved.
     */
     if (err == Z_STREAM_END) {
-        Py_XDECREF(self->unused_data);  /* Free original empty string */
-        self->unused_data = PyBytes_FromStringAndSize(
-            (char *)self->zst.next_in, self->zst.avail_in);
-        if (self->unused_data == NULL) {
-            Py_DECREF(RetVal);
-            goto error;
+        if (self->zst.avail_in > 0) {
+            /* Append the leftover data to the existing value of unused_data. */
+            Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data);
+            Py_ssize_t new_size = old_size + self->zst.avail_in;
+            PyObject *new_data;
+            if (new_size <= old_size) {  /* Check for overflow. */
+                PyErr_NoMemory();
+                Py_DECREF(RetVal);
+                RetVal = NULL;
+                goto error;
+            }
+            new_data = PyBytes_FromStringAndSize(NULL, new_size);
+            if (new_data == NULL) {
+                Py_DECREF(RetVal);
+                RetVal = NULL;
+                goto error;
+            }
+            Py_MEMCPY(PyBytes_AS_STRING(new_data),
+                      PyBytes_AS_STRING(self->unused_data), old_size);
+            Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size,
+                      self->zst.next_in, self->zst.avail_in);
+            Py_DECREF(self->unused_data);
+            self->unused_data = new_data;
         }
         /* We will only get Z_BUF_ERROR if the output buffer was full
            but there wasn't more output when we tried again, so it is

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list