[Python-checkins] cpython (2.7): Issue #27130: Fix handling of buffers exceeding (U)INT_MAX in “zlib” module

martin.panter python-checkins at python.org
Sat Jul 23 00:30:09 EDT 2016


https://hg.python.org/cpython/rev/2192edcfea02
changeset:   102425:2192edcfea02
branch:      2.7
parent:      102409:434fc614c506
user:        Martin Panter <vadmium+py at gmail.com>
date:        Sat Jul 23 04:22:09 2016 +0000
summary:
  Issue #27130: Fix handling of buffers exceeding (U)INT_MAX in “zlib” module

Ported from patches by Xiang Zhang, Nadeem Vawda, and myself.

files:
  Lib/test/test_zlib.py |  120 +++++
  Misc/NEWS             |    5 +
  Modules/zlibmodule.c  |  642 ++++++++++++++++-------------
  3 files changed, 484 insertions(+), 283 deletions(-)


diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -1,4 +1,5 @@
 import unittest
+from test import test_support as support
 from test.test_support import TESTFN, run_unittest, import_module, unlink, requires
 import binascii
 import pickle
@@ -80,6 +81,16 @@
                          zlib.crc32('spam',  (2**31)))
 
 
+# Issue #10276 - check that inputs >=4GB are handled correctly.
+class ChecksumBigBufferTestCase(unittest.TestCase):
+
+    @precisionbigmemtest(size=_4G + 4, memuse=1, dry_run=False)
+    def test_big_buffer(self, size):
+        data = b"nyan" * (_1G + 1)
+        self.assertEqual(zlib.crc32(data) & 0xFFFFFFFF, 1044521549)
+        self.assertEqual(zlib.adler32(data) & 0xFFFFFFFF, 2256789997)
+
+
 class ExceptionTestCase(unittest.TestCase):
     # make sure we generate some expected errors
     def test_badlevel(self):
@@ -104,6 +115,15 @@
         self.assertRaises(ValueError, zlib.decompressobj().flush, 0)
         self.assertRaises(ValueError, zlib.decompressobj().flush, -1)
 
+    @support.cpython_only
+    def test_overflow(self):
+        with self.assertRaisesRegexp(OverflowError, 'int too large'):
+            zlib.decompress(b'', 15, sys.maxsize + 1)
+        with self.assertRaisesRegexp(OverflowError, 'int too large'):
+            zlib.decompressobj().decompress(b'', sys.maxsize + 1)
+        with self.assertRaisesRegexp(OverflowError, 'int too large'):
+            zlib.decompressobj().flush(sys.maxsize + 1)
+
 
 class BaseCompressTestCase(object):
     def check_big_compress_buffer(self, size, compress_func):
@@ -167,6 +187,28 @@
     def test_big_decompress_buffer(self, size):
         self.check_big_decompress_buffer(size, zlib.decompress)
 
+    @precisionbigmemtest(size=_4G, memuse=1)
+    def test_large_bufsize(self, size):
+        # Test decompress(bufsize) parameter greater than the internal limit
+        data = HAMLET_SCENE * 10
+        compressed = zlib.compress(data, 1)
+        self.assertEqual(zlib.decompress(compressed, 15, size), data)
+
+    def test_custom_bufsize(self):
+        data = HAMLET_SCENE * 10
+        compressed = zlib.compress(data, 1)
+        self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data)
+
+    @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+    @precisionbigmemtest(size=_4G + 100, memuse=4)
+    def test_64bit_compress(self, size):
+        data = b'x' * size
+        try:
+            comp = zlib.compress(data, 0)
+            self.assertEqual(zlib.decompress(comp), data)
+        finally:
+            comp = data = None
+
 
 class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
     # Test compression object
@@ -318,6 +360,22 @@
         self.assertRaises(ValueError, dco.decompress, "", -1)
         self.assertEqual('', dco.unconsumed_tail)
 
+    def test_maxlen_large(self):
+        # Sizes up to sys.maxsize should be accepted, although zlib is
+        # internally limited to expressing sizes with unsigned int
+        data = HAMLET_SCENE * 10
+        DEFAULTALLOC = 16 * 1024
+        self.assertGreater(len(data), DEFAULTALLOC)
+        compressed = zlib.compress(data, 1)
+        dco = zlib.decompressobj()
+        self.assertEqual(dco.decompress(compressed, sys.maxsize), data)
+
+    def test_maxlen_custom(self):
+        data = HAMLET_SCENE * 10
+        compressed = zlib.compress(data, 1)
+        dco = zlib.decompressobj()
+        self.assertEqual(dco.decompress(compressed, CustomInt()), data[:100])
+
     def test_clear_unconsumed_tail(self):
         # Issue #12050: calling decompress() without providing max_length
         # should clear the unconsumed_tail attribute.
@@ -416,6 +474,22 @@
         data = zlib.compress(input2)
         self.assertEqual(dco.flush(), input1[1:])
 
+    @precisionbigmemtest(size=_4G, memuse=1)
+    def test_flush_large_length(self, size):
+        # Test flush(length) parameter greater than internal limit UINT_MAX
+        input = HAMLET_SCENE * 10
+        data = zlib.compress(input, 1)
+        dco = zlib.decompressobj()
+        dco.decompress(data, 1)
+        self.assertEqual(dco.flush(size), input[1:])
+
+    def test_flush_custom_length(self):
+        input = HAMLET_SCENE * 10
+        data = zlib.compress(input, 1)
+        dco = zlib.decompressobj()
+        dco.decompress(data, 1)
+        self.assertEqual(dco.flush(CustomInt()), input[1:])
+
     @requires_Compress_copy
     def test_compresscopy(self):
         # Test copying a compression object
@@ -527,6 +601,46 @@
         decompress = lambda s: d.decompress(s) + d.flush()
         self.check_big_decompress_buffer(size, decompress)
 
+    @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+    @precisionbigmemtest(size=_4G + 100, memuse=4)
+    def test_64bit_compress(self, size):
+        data = b'x' * size
+        co = zlib.compressobj(0)
+        do = zlib.decompressobj()
+        try:
+            comp = co.compress(data) + co.flush()
+            uncomp = do.decompress(comp) + do.flush()
+            self.assertEqual(uncomp, data)
+        finally:
+            comp = uncomp = data = None
+
+    @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+    @precisionbigmemtest(size=_4G + 100, memuse=3)
+    def test_large_unused_data(self, size):
+        data = b'abcdefghijklmnop'
+        unused = b'x' * size
+        comp = zlib.compress(data) + unused
+        do = zlib.decompressobj()
+        try:
+            uncomp = do.decompress(comp) + do.flush()
+            self.assertEqual(unused, do.unused_data)
+            self.assertEqual(uncomp, data)
+        finally:
+            unused = comp = do = None
+
+    @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+    @precisionbigmemtest(size=_4G + 100, memuse=5)
+    def test_large_unconsumed_tail(self, size):
+        data = b'x' * size
+        do = zlib.decompressobj()
+        try:
+            comp = zlib.compress(data, 0)
+            uncomp = do.decompress(comp, 1) + do.flush()
+            self.assertEqual(uncomp, data)
+            self.assertEqual(do.unconsumed_tail, b'')
+        finally:
+            comp = uncomp = data = None
+
     def test_wbits(self):
         co = zlib.compressobj(1, zlib.DEFLATED, 15)
         zlib15 = co.compress(HAMLET_SCENE) + co.flush()
@@ -658,9 +772,15 @@
 """
 
 
+class CustomInt:
+    def __int__(self):
+        return 100
+
+
 def test_main():
     run_unittest(
         ChecksumTestCase,
+        ChecksumBigBufferTestCase,
         ExceptionTestCase,
         CompressTestCase,
         CompressObjectTestCase
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -26,6 +26,11 @@
 Library
 -------
 
+- Issue #27130: In the "zlib" module, fix handling of large buffers
+  (typically 2 or 4 GiB).  Previously, inputs were limited to 2 GiB, and
+  compression and decompression operations did not properly handle results of
+  2 or 4 GiB.
+
 - Issue #23804: Fix SSL zero-length recv() calls to not block and not raise
   an error about unclean EOF.
 
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -1,8 +1,9 @@
 /* zlibmodule.c -- gzip-compatible data compression */
-/* See http://www.gzip.org/zlib/ */
+/* See http://zlib.net/ */
 
 /* Windows users:  read Python's PCbuild\readme.txt */
 
+#define PY_SSIZE_T_CLEAN
 
 #include "Python.h"
 #include "zlib.h"
@@ -129,6 +130,71 @@
     return self;
 }
 
+static void
+arrange_input_buffer(z_stream *zst, Py_ssize_t *remains)
+{
+    if ((size_t)*remains > UINT_MAX) {
+        zst->avail_in = UINT_MAX;
+    } else {
+        zst->avail_in = *remains;
+    }
+    *remains -= zst->avail_in;
+}
+
+static Py_ssize_t
+arrange_output_buffer_with_maximum(z_stream *zst, PyObject **buffer,
+                                   Py_ssize_t length,
+                                   Py_ssize_t max_length)
+{
+    Py_ssize_t occupied;
+
+    if (*buffer == NULL) {
+        if (!(*buffer = PyBytes_FromStringAndSize(NULL, length)))
+            return -1;
+        occupied = 0;
+    }
+    else {
+        occupied = zst->next_out - (Byte *)PyBytes_AS_STRING(*buffer);
+
+        if (length == occupied) {
+            Py_ssize_t new_length;
+            assert(length <= max_length);
+            /* can not scale the buffer over max_length */
+            if (length == max_length)
+                return -2;
+            if (length <= (max_length >> 1))
+                new_length = length << 1;
+            else
+                new_length = max_length;
+            if (_PyBytes_Resize(buffer, new_length) < 0)
+                return -1;
+            length = new_length;
+        }
+    }
+
+    if ((size_t)(length - occupied) > UINT_MAX) {
+        zst->avail_out = UINT_MAX;
+    } else {
+        zst->avail_out = length - occupied;
+    }
+    zst->next_out = (Byte *)PyBytes_AS_STRING(*buffer) + occupied;
+
+    return length;
+}
+
+static Py_ssize_t
+arrange_output_buffer(z_stream *zst, PyObject **buffer, Py_ssize_t length)
+{
+    Py_ssize_t ret;
+
+    ret = arrange_output_buffer_with_maximum(zst, buffer, length,
+                                             PY_SSIZE_T_MAX);
+    if (ret == -2)
+        PyErr_NoMemory();
+
+    return ret;
+}
+
 PyDoc_STRVAR(compress__doc__,
 "compress(string[, level]) -- Returned compressed string.\n"
 "\n"
@@ -137,44 +203,31 @@
 static PyObject *
 PyZlib_compress(PyObject *self, PyObject *args)
 {
-    PyObject *ReturnVal = NULL;
-    Byte *input, *output;
-    int length, level=Z_DEFAULT_COMPRESSION, err;
+    PyObject *RetVal = NULL;
+    Byte *ibuf;
+    Py_ssize_t ibuflen, obuflen = DEFAULTALLOC;
+    int level=Z_DEFAULT_COMPRESSION;
+    int err, flush;
     z_stream zst;
 
     /* require Python string object, optional 'level' arg */
-    if (!PyArg_ParseTuple(args, "s#|i:compress", &input, &length, &level))
+    if (!PyArg_ParseTuple(args, "s#|i:compress", &ibuf, &ibuflen, &level))
         return NULL;
 
-    zst.avail_out = length + length/1000 + 12 + 1;
-
-    output = (Byte*)malloc(zst.avail_out);
-    if (output == NULL) {
-        PyErr_SetString(PyExc_MemoryError,
-                        "Can't allocate memory to compress data");
-        return NULL;
-    }
-
-    /* Past the point of no return.  From here on out, we need to make sure
-       we clean up mallocs & INCREFs. */
-
     zst.zalloc = (alloc_func)NULL;
     zst.zfree = (free_func)Z_NULL;
-    zst.next_out = (Byte *)output;
-    zst.next_in = (Byte *)input;
-    zst.avail_in = length;
+    zst.next_in = ibuf;
     err = deflateInit(&zst, level);
 
-    switch(err) {
-    case(Z_OK):
+    switch (err) {
+    case Z_OK:
         break;
-    case(Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         PyErr_SetString(PyExc_MemoryError,
                         "Out of memory while compressing data");
         goto error;
-    case(Z_STREAM_ERROR):
-        PyErr_SetString(ZlibError,
-                        "Bad compression level");
+    case Z_STREAM_ERROR:
+        PyErr_SetString(ZlibError, "Bad compression level");
         goto error;
     default:
         deflateEnd(&zst);
@@ -182,27 +235,45 @@
         goto error;
     }
 
-    Py_BEGIN_ALLOW_THREADS;
-    err = deflate(&zst, Z_FINISH);
-    Py_END_ALLOW_THREADS;
+    do {
+        arrange_input_buffer(&zst, &ibuflen);
+        flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
 
-    if (err != Z_STREAM_END) {
-        zlib_error(zst, err, "while compressing data");
-        deflateEnd(&zst);
-        goto error;
+        do {
+            obuflen = arrange_output_buffer(&zst, &RetVal, obuflen);
+            if (obuflen < 0) {
+                deflateEnd(&zst);
+                goto error;
+            }
+
+            Py_BEGIN_ALLOW_THREADS
+            err = deflate(&zst, flush);
+            Py_END_ALLOW_THREADS
+
+            if (err == Z_STREAM_ERROR) {
+                deflateEnd(&zst);
+                zlib_error(zst, err, "while compressing data");
+                goto error;
+            }
+
+        } while (zst.avail_out == 0);
+        assert(zst.avail_in == 0);
+
+    } while (flush != Z_FINISH);
+    assert(err == Z_STREAM_END);
+
+    err = deflateEnd(&zst);
+    if (err == Z_OK) {
+        if (_PyBytes_Resize(&RetVal, zst.next_out -
+                            (Byte *)PyBytes_AS_STRING(RetVal)) < 0)
+            goto error;
+        return RetVal;
     }
-
-    err=deflateEnd(&zst);
-    if (err == Z_OK)
-        ReturnVal = PyString_FromStringAndSize((char *)output,
-                                               zst.total_out);
     else
         zlib_error(zst, err, "while finishing compression");
-
  error:
-    free(output);
-
-    return ReturnVal;
+    Py_XDECREF(RetVal);
+    return NULL;
 }
 
 PyDoc_STRVAR(decompress__doc__,
@@ -214,36 +285,32 @@
 static PyObject *
 PyZlib_decompress(PyObject *self, PyObject *args)
 {
-    PyObject *result_str;
-    Byte *input;
-    int length, err;
+    PyObject *RetVal = NULL;
+    Byte *ibuf;
+    Py_ssize_t ibuflen;
+    int err, flush;
     int wsize=DEF_WBITS;
     Py_ssize_t r_strlen=DEFAULTALLOC;
     z_stream zst;
 
     if (!PyArg_ParseTuple(args, "s#|in:decompress",
-                          &input, &length, &wsize, &r_strlen))
+                          &ibuf, &ibuflen, &wsize, &r_strlen))
         return NULL;
 
-    if (r_strlen <= 0)
+    if (r_strlen <= 0) {
         r_strlen = 1;
-
-    zst.avail_in = length;
-    zst.avail_out = r_strlen;
-
-    if (!(result_str = PyString_FromStringAndSize(NULL, r_strlen)))
-        return NULL;
+    }
 
     zst.zalloc = (alloc_func)NULL;
     zst.zfree = (free_func)Z_NULL;
-    zst.next_out = (Byte *)PyString_AS_STRING(result_str);
-    zst.next_in = (Byte *)input;
+    zst.avail_in = 0;
+    zst.next_in = ibuf;
     err = inflateInit2(&zst, wsize);
 
-    switch(err) {
-    case(Z_OK):
+    switch (err) {
+    case Z_OK:
         break;
-    case(Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         PyErr_SetString(PyExc_MemoryError,
                         "Out of memory while decompressing data");
         goto error;
@@ -254,42 +321,46 @@
     }
 
     do {
-        Py_BEGIN_ALLOW_THREADS
-        err=inflate(&zst, Z_FINISH);
-        Py_END_ALLOW_THREADS
+        arrange_input_buffer(&zst, &ibuflen);
+        flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
 
-        switch(err) {
-        case(Z_STREAM_END):
-            break;
-        case(Z_BUF_ERROR):
-            /*
-             * If there is at least 1 byte of room according to zst.avail_out
-             * and we get this error, assume that it means zlib cannot
-             * process the inflate call() due to an error in the data.
-             */
-            if (zst.avail_out > 0) {
-                zlib_error(zst, err, "while decompressing data");
+        do {
+            r_strlen = arrange_output_buffer(&zst, &RetVal, r_strlen);
+            if (r_strlen < 0) {
                 inflateEnd(&zst);
                 goto error;
             }
-            /* fall through */
-        case(Z_OK):
-            /* need more memory */
-            if (_PyString_Resize(&result_str, r_strlen << 1) < 0) {
+
+            Py_BEGIN_ALLOW_THREADS
+            err = inflate(&zst, flush);
+            Py_END_ALLOW_THREADS
+
+            switch (err) {
+            case Z_OK:            /* fall through */
+            case Z_BUF_ERROR:     /* fall through */
+            case Z_STREAM_END:
+                break;
+            case Z_MEM_ERROR:
                 inflateEnd(&zst);
+                PyErr_SetString(PyExc_MemoryError,
+                                "Out of memory while decompressing data");
+                goto error;
+            default:
+                inflateEnd(&zst);
+                zlib_error(zst, err, "while decompressing data");
                 goto error;
             }
-            zst.next_out = (unsigned char *)PyString_AS_STRING(result_str) \
-                + r_strlen;
-            zst.avail_out = r_strlen;
-            r_strlen = r_strlen << 1;
-            break;
-        default:
-            inflateEnd(&zst);
-            zlib_error(zst, err, "while decompressing data");
-            goto error;
-        }
-    } while (err != Z_STREAM_END);
+
+        } while (zst.avail_out == 0);
+
+    } while (err != Z_STREAM_END && ibuflen != 0);
+
+
+    if (err != Z_STREAM_END) {
+        inflateEnd(&zst);
+        zlib_error(zst, err, "while decompressing data");
+        goto error;
+    }
 
     err = inflateEnd(&zst);
     if (err != Z_OK) {
@@ -297,11 +368,12 @@
         goto error;
     }
 
-    _PyString_Resize(&result_str, zst.total_out);
-    return result_str;
+    _PyString_Resize(&RetVal, zst.next_out -
+                        (Byte *)PyBytes_AS_STRING(RetVal));
+    return RetVal;
 
  error:
-    Py_XDECREF(result_str);
+    Py_XDECREF(RetVal);
     return NULL;
 }
 
@@ -317,23 +389,23 @@
         return NULL;
 
     self = newcompobject(&Comptype);
-    if (self==NULL)
+    if (self == NULL)
         return(NULL);
     self->zst.zalloc = (alloc_func)NULL;
     self->zst.zfree = (free_func)Z_NULL;
     self->zst.next_in = NULL;
     self->zst.avail_in = 0;
     err = deflateInit2(&self->zst, level, method, wbits, memLevel, strategy);
-    switch(err) {
-    case (Z_OK):
+    switch (err) {
+    case Z_OK:
         self->is_initialised = 1;
-        return (PyObject*)self;
-    case (Z_MEM_ERROR):
+        return (PyObject *)self;
+    case Z_MEM_ERROR:
         Py_DECREF(self);
         PyErr_SetString(PyExc_MemoryError,
                         "Can't allocate memory for compression object");
         return NULL;
-    case(Z_STREAM_ERROR):
+    case Z_STREAM_ERROR:
         Py_DECREF(self);
         PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
         return NULL;
@@ -354,21 +426,21 @@
 
     self = newcompobject(&Decomptype);
     if (self == NULL)
-        return(NULL);
+        return NULL;
     self->zst.zalloc = (alloc_func)NULL;
     self->zst.zfree = (free_func)Z_NULL;
     self->zst.next_in = NULL;
     self->zst.avail_in = 0;
     err = inflateInit2(&self->zst, wbits);
-    switch(err) {
-    case (Z_OK):
+    switch (err) {
+    case Z_OK:
         self->is_initialised = 1;
-        return (PyObject*)self;
-    case(Z_STREAM_ERROR):
+        return (PyObject *)self;
+    case Z_STREAM_ERROR:
         Py_DECREF(self);
         PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
         return NULL;
-    case (Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         Py_DECREF(self);
         PyErr_SetString(PyExc_MemoryError,
                         "Can't allocate memory for decompression object");
@@ -411,58 +483,44 @@
 static PyObject *
 PyZlib_objcompress(compobject *self, PyObject *args)
 {
-    int err, inplen;
-    Py_ssize_t length = DEFAULTALLOC;
-    PyObject *RetVal;
-    Byte *input;
-    unsigned long start_total_out;
+    PyObject *RetVal = NULL;
+    Py_ssize_t ibuflen, obuflen = DEFAULTALLOC;
+    int err;
 
-    if (!PyArg_ParseTuple(args, "s#:compress", &input, &inplen))
-        return NULL;
-
-    if (!(RetVal = PyString_FromStringAndSize(NULL, length)))
+    if (!PyArg_ParseTuple(args, "s#:compress", &self->zst.next_in, &ibuflen))
         return NULL;
 
     ENTER_ZLIB
 
-    start_total_out = self->zst.total_out;
-    self->zst.avail_in = inplen;
-    self->zst.next_in = input;
-    self->zst.avail_out = length;
-    self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal);
+    do {
+        arrange_input_buffer(&self->zst, &ibuflen);
 
-    Py_BEGIN_ALLOW_THREADS
-    err = deflate(&(self->zst), Z_NO_FLUSH);
-    Py_END_ALLOW_THREADS
+        do {
+            obuflen = arrange_output_buffer(&self->zst, &RetVal, obuflen);
+            if (obuflen < 0)
+                goto error;
 
-    /* while Z_OK and the output buffer is full, there might be more output,
-       so extend the output buffer and try again */
-    while (err == Z_OK && self->zst.avail_out == 0) {
-        if (_PyString_Resize(&RetVal, length << 1) < 0)
-            goto error;
-        self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \
-            + length;
-        self->zst.avail_out = length;
-        length = length << 1;
+            Py_BEGIN_ALLOW_THREADS
+            err = deflate(&self->zst, Z_NO_FLUSH);
+            Py_END_ALLOW_THREADS
 
-        Py_BEGIN_ALLOW_THREADS
-        err = deflate(&(self->zst), Z_NO_FLUSH);
-        Py_END_ALLOW_THREADS
-    }
-    /* We will only get Z_BUF_ERROR if the output buffer was full but
-       there wasn't more output when we tried again, so it is not an error
-       condition.
-    */
+            if (err == Z_STREAM_ERROR) {
+                zlib_error(self->zst, err, "while compressing data");
+                goto error;
+            }
 
-    if (err != Z_OK && err != Z_BUF_ERROR) {
-        zlib_error(self->zst, err, "while compressing");
-        Py_DECREF(RetVal);
-        RetVal = NULL;
-        goto error;
-    }
-    _PyString_Resize(&RetVal, self->zst.total_out - start_total_out);
+        } while (self->zst.avail_out == 0);
+        assert(self->zst.avail_in == 0);
+
+    } while (ibuflen != 0);
+
+    _PyString_Resize(&RetVal, self->zst.next_out -
+                        (Byte *)PyBytes_AS_STRING(RetVal));
+    goto success;
 
  error:
+    Py_CLEAR(RetVal);
+ success:
     LEAVE_ZLIB
     return RetVal;
 }
@@ -470,41 +528,46 @@
 /* Helper for objdecompress() and unflush(). Saves any unconsumed input data in
    self->unused_data or self->unconsumed_tail, as appropriate. */
 static int
-save_unconsumed_input(compobject *self, int err)
+save_unconsumed_input(compobject *self, Byte *input, Py_ssize_t inplen,
+    int err)
 {
     if (err == Z_STREAM_END) {
         /* The end of the compressed data has been reached. Store the leftover
            input data in self->unused_data. */
         if (self->zst.avail_in > 0) {
             Py_ssize_t old_size = PyString_GET_SIZE(self->unused_data);
-            Py_ssize_t new_size;
+            Py_ssize_t new_size, left_size;
             PyObject *new_data;
-            if (self->zst.avail_in > PY_SSIZE_T_MAX - old_size) {
+            left_size = input + inplen - self->zst.next_in;
+            if (left_size > (PY_SSIZE_T_MAX - old_size)) {
                 PyErr_NoMemory();
                 return -1;
             }
-            new_size = old_size + self->zst.avail_in;
+            new_size = old_size + left_size;
             new_data = PyString_FromStringAndSize(NULL, new_size);
             if (new_data == NULL)
                 return -1;
             Py_MEMCPY(PyString_AS_STRING(new_data),
                       PyString_AS_STRING(self->unused_data), old_size);
             Py_MEMCPY(PyString_AS_STRING(new_data) + old_size,
-                      self->zst.next_in, self->zst.avail_in);
+                      self->zst.next_in, left_size);
             Py_SETREF(self->unused_data, new_data);
             self->zst.avail_in = 0;
         }
     }
+
     if (self->zst.avail_in > 0 || PyString_GET_SIZE(self->unconsumed_tail)) {
         /* This code handles two distinct cases:
            1. Output limit was reached. Save leftover input in unconsumed_tail.
            2. All input data was consumed. Clear unconsumed_tail. */
+        Py_ssize_t left_size = input + inplen - self->zst.next_in;
         PyObject *new_data = PyString_FromStringAndSize(
-                (char *)self->zst.next_in, self->zst.avail_in);
+                (char *)self->zst.next_in, left_size);
         if (new_data == NULL)
             return -1;
         Py_SETREF(self->unconsumed_tail, new_data);
     }
+
     return 0;
 }
 
@@ -522,71 +585,69 @@
 static PyObject *
 PyZlib_objdecompress(compobject *self, PyObject *args)
 {
-    int err, inplen, max_length = 0;
-    Py_ssize_t old_length, length = DEFAULTALLOC;
-    PyObject *RetVal;
+    int err = Z_OK;
+    Py_ssize_t inplen, max_length = 0;
+    Py_ssize_t ibuflen, obuflen = DEFAULTALLOC, hard_limit;
+    PyObject *RetVal = NULL;
     Byte *input;
-    unsigned long start_total_out;
 
-    if (!PyArg_ParseTuple(args, "s#|i:decompress", &input,
+    if (!PyArg_ParseTuple(args, "s#|n:decompress", &input,
                           &inplen, &max_length))
         return NULL;
     if (max_length < 0) {
         PyErr_SetString(PyExc_ValueError,
                         "max_length must be greater than zero");
         return NULL;
-    }
+    } else if (max_length == 0)
+        hard_limit = PY_SSIZE_T_MAX;
+    else
+        hard_limit = max_length;
+
+    self->zst.next_in = input;
+    ibuflen = inplen;
 
     /* limit amount of data allocated to max_length */
-    if (max_length && length > max_length)
-        length = max_length;
-    if (!(RetVal = PyString_FromStringAndSize(NULL, length)))
-        return NULL;
+    if (max_length && obuflen > max_length)
+        obuflen = max_length;
 
     ENTER_ZLIB
 
-    start_total_out = self->zst.total_out;
-    self->zst.avail_in = inplen;
-    self->zst.next_in = input;
-    self->zst.avail_out = length;
-    self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal);
+    do {
+        arrange_input_buffer(&self->zst, &ibuflen);
 
-    Py_BEGIN_ALLOW_THREADS
-    err = inflate(&(self->zst), Z_SYNC_FLUSH);
-    Py_END_ALLOW_THREADS
+        do {
+            obuflen = arrange_output_buffer_with_maximum(&self->zst, &RetVal,
+                                                         obuflen, hard_limit);
+            if (obuflen == -2) {
+                if (max_length > 0) {
+                    goto save;
+                }
+                PyErr_NoMemory();
+            }
+            if (obuflen < 0) {
+                goto abort;
+            }
 
-    /* While Z_OK and the output buffer is full, there might be more output.
-       So extend the output buffer and try again.
-    */
-    while (err == Z_OK && self->zst.avail_out == 0) {
-        /* If max_length set, don't continue decompressing if we've already
-           reached the limit.
-        */
-        if (max_length && length >= max_length)
-            break;
+            Py_BEGIN_ALLOW_THREADS
+            err = inflate(&self->zst, Z_SYNC_FLUSH);
+            Py_END_ALLOW_THREADS
 
-        /* otherwise, ... */
-        old_length = length;
-        length = length << 1;
-        if (max_length && length > max_length)
-            length = max_length;
+            switch (err) {
+            case Z_OK:            /* fall through */
+            case Z_BUF_ERROR:     /* fall through */
+            case Z_STREAM_END:
+                break;
+            default:
+                goto save;
+            }
 
-        if (_PyString_Resize(&RetVal, length) < 0)
-            goto error;
-        self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \
-            + old_length;
-        self->zst.avail_out = length - old_length;
+        } while (self->zst.avail_out == 0);
 
-        Py_BEGIN_ALLOW_THREADS
-        err = inflate(&(self->zst), Z_SYNC_FLUSH);
-        Py_END_ALLOW_THREADS
-    }
+    } while (err != Z_STREAM_END && ibuflen != 0);
 
-    if (save_unconsumed_input(self, err) < 0) {
-        Py_DECREF(RetVal);
-        RetVal = NULL;
-        goto error;
-    }
+ save:
+    if (save_unconsumed_input(self, input, inplen, err) < 0)
+        goto abort;
 
     /* This is the logical place to call inflateEnd, but the old behaviour of
        only calling it on flush() is preserved. */
@@ -597,14 +658,16 @@
            not an error condition.
         */
         zlib_error(self->zst, err, "while decompressing");
-        Py_DECREF(RetVal);
-        RetVal = NULL;
-        goto error;
+        goto abort;
     }
 
-    _PyString_Resize(&RetVal, self->zst.total_out - start_total_out);
+    _PyString_Resize(&RetVal, self->zst.next_out -
+                        (Byte *)PyBytes_AS_STRING(RetVal));
+    goto success;
 
- error:
+ abort:
+    Py_CLEAR(RetVal);
+ success:
     LEAVE_ZLIB
 
     return RetVal;
@@ -621,10 +684,10 @@
 static PyObject *
 PyZlib_flush(compobject *self, PyObject *args)
 {
-    int err, length = DEFAULTALLOC;
-    PyObject *RetVal;
+    int err;
+    Py_ssize_t length = DEFAULTALLOC;
+    PyObject *RetVal = NULL;
     int flushmode = Z_FINISH;
-    unsigned long start_total_out;
 
     if (!PyArg_ParseTuple(args, "|i:flush", &flushmode))
         return NULL;
@@ -635,44 +698,37 @@
         return PyString_FromStringAndSize(NULL, 0);
     }
 
-    if (!(RetVal = PyString_FromStringAndSize(NULL, length)))
-        return NULL;
-
     ENTER_ZLIB
 
-    start_total_out = self->zst.total_out;
     self->zst.avail_in = 0;
-    self->zst.avail_out = length;
-    self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal);
 
-    Py_BEGIN_ALLOW_THREADS
-    err = deflate(&(self->zst), flushmode);
-    Py_END_ALLOW_THREADS
-
-    /* while Z_OK and the output buffer is full, there might be more output,
-       so extend the output buffer and try again */
-    while (err == Z_OK && self->zst.avail_out == 0) {
-        if (_PyString_Resize(&RetVal, length << 1) < 0)
+    do {
+        length = arrange_output_buffer(&self->zst, &RetVal, length);
+        if (length < 0) {
+            Py_CLEAR(RetVal);
             goto error;
-        self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \
-            + length;
-        self->zst.avail_out = length;
-        length = length << 1;
+        }
 
         Py_BEGIN_ALLOW_THREADS
-        err = deflate(&(self->zst), flushmode);
+        err = deflate(&self->zst, flushmode);
         Py_END_ALLOW_THREADS
-    }
+
+        if (err == Z_STREAM_ERROR) {
+            zlib_error(self->zst, err, "while flushing");
+            Py_CLEAR(RetVal);
+            goto error;
+        }
+    } while (self->zst.avail_out == 0);
+    assert(self->zst.avail_in == 0);
 
     /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
        various data structures. Note we should only get Z_STREAM_END when
        flushmode is Z_FINISH, but checking both for safety*/
     if (err == Z_STREAM_END && flushmode == Z_FINISH) {
-        err = deflateEnd(&(self->zst));
+        err = deflateEnd(&self->zst);
         if (err != Z_OK) {
             zlib_error(self->zst, err, "from deflateEnd()");
-            Py_DECREF(RetVal);
-            RetVal = NULL;
+            Py_CLEAR(RetVal);
             goto error;
         }
         else
@@ -682,18 +738,17 @@
            but there wasn't more output when we tried again, so it is
            not an error condition.
         */
-    } else if (err!=Z_OK && err!=Z_BUF_ERROR) {
+    } else if (err != Z_OK && err != Z_BUF_ERROR) {
         zlib_error(self->zst, err, "while flushing");
-        Py_DECREF(RetVal);
-        RetVal = NULL;
+        Py_CLEAR(RetVal);
         goto error;
     }
 
-    _PyString_Resize(&RetVal, self->zst.total_out - start_total_out);
+    _PyString_Resize(&RetVal, self->zst.next_out -
+                        (Byte *)PyBytes_AS_STRING(RetVal));
 
  error:
     LEAVE_ZLIB
-
     return RetVal;
 }
 
@@ -715,13 +770,13 @@
      */
     ENTER_ZLIB
     err = deflateCopy(&retval->zst, &self->zst);
-    switch(err) {
-    case(Z_OK):
+    switch (err) {
+    case Z_OK:
         break;
-    case(Z_STREAM_ERROR):
+    case Z_STREAM_ERROR:
         PyErr_SetString(PyExc_ValueError, "Inconsistent stream state");
         goto error;
-    case(Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         PyErr_SetString(PyExc_MemoryError,
                         "Can't allocate memory for compression object");
         goto error;
@@ -764,13 +819,13 @@
      */
     ENTER_ZLIB
     err = inflateCopy(&retval->zst, &self->zst);
-    switch(err) {
-    case(Z_OK):
+    switch (err) {
+    case Z_OK:
         break;
-    case(Z_STREAM_ERROR):
+    case Z_STREAM_ERROR:
         PyErr_SetString(PyExc_ValueError, "Inconsistent stream state");
         goto error;
-    case(Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         PyErr_SetString(PyExc_MemoryError,
                         "Can't allocate memory for decompression object");
         goto error;
@@ -807,73 +862,76 @@
 static PyObject *
 PyZlib_unflush(compobject *self, PyObject *args)
 {
-    int err, length = DEFAULTALLOC;
-    PyObject * retval = NULL;
-    unsigned long start_total_out;
+    Py_ssize_t length = DEFAULTALLOC;
+    int err, flush;
+    PyObject *RetVal = NULL;
+    Py_ssize_t ibuflen;
 
-    if (!PyArg_ParseTuple(args, "|i:flush", &length))
+    if (!PyArg_ParseTuple(args, "|n:flush", &length))
         return NULL;
     if (length <= 0) {
         PyErr_SetString(PyExc_ValueError, "length must be greater than zero");
         return NULL;
     }
-    if (!(retval = PyString_FromStringAndSize(NULL, length)))
-        return NULL;
-
 
     ENTER_ZLIB
 
-    start_total_out = self->zst.total_out;
-    self->zst.avail_in = PyString_GET_SIZE(self->unconsumed_tail);
     self->zst.next_in = (Byte *)PyString_AS_STRING(self->unconsumed_tail);
-    self->zst.avail_out = length;
-    self->zst.next_out = (Byte *)PyString_AS_STRING(retval);
+    ibuflen = PyString_GET_SIZE(self->unconsumed_tail);
 
-    Py_BEGIN_ALLOW_THREADS
-    err = inflate(&(self->zst), Z_FINISH);
-    Py_END_ALLOW_THREADS
+    do {
+        arrange_input_buffer(&self->zst, &ibuflen);
+        flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
 
-    /* while Z_OK and the output buffer is full, there might be more output,
-       so extend the output buffer and try again */
-    while ((err == Z_OK || err == Z_BUF_ERROR) && self->zst.avail_out == 0) {
-        if (_PyString_Resize(&retval, length << 1) < 0)
-            goto error;
-        self->zst.next_out = (Byte *)PyString_AS_STRING(retval) + length;
-        self->zst.avail_out = length;
-        length = length << 1;
+        do {
+            length = arrange_output_buffer(&self->zst, &RetVal, length);
+            if (length < 0)
+                goto abort;
 
-        Py_BEGIN_ALLOW_THREADS
-        err = inflate(&(self->zst), Z_FINISH);
-        Py_END_ALLOW_THREADS
-    }
+            Py_BEGIN_ALLOW_THREADS
+            err = inflate(&self->zst, flush);
+            Py_END_ALLOW_THREADS
 
-    if (save_unconsumed_input(self, err) < 0) {
-        Py_DECREF(retval);
-        retval = NULL;
-        goto error;
-    }
+            switch (err) {
+            case Z_OK:            /* fall through */
+            case Z_BUF_ERROR:     /* fall through */
+            case Z_STREAM_END:
+                break;
+            default:
+                goto save;
+            }
+
+        } while (self->zst.avail_out == 0);
+
+    } while (err != Z_STREAM_END && ibuflen != 0);
+
+ save:
+    if (save_unconsumed_input(self,
+            (Byte *)PyString_AS_STRING(self->unconsumed_tail),
+            PyString_GET_SIZE(self->unconsumed_tail), err) < 0)
+        goto abort;
 
     /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
        various data structures. Note we should only get Z_STREAM_END when
        flushmode is Z_FINISH */
     if (err == Z_STREAM_END) {
-        err = inflateEnd(&(self->zst));
+        err = inflateEnd(&self->zst);
         self->is_initialised = 0;
         if (err != Z_OK) {
             zlib_error(self->zst, err, "from inflateEnd()");
-            Py_DECREF(retval);
-            retval = NULL;
-            goto error;
+            goto abort;
         }
     }
 
-    _PyString_Resize(&retval, self->zst.total_out - start_total_out);
+    _PyString_Resize(&RetVal, self->zst.next_out -
+                        (Byte *)PyBytes_AS_STRING(RetVal));
+    goto success;
 
-error:
-
+ abort:
+    Py_CLEAR(RetVal);
+ success:
     LEAVE_ZLIB
-
-    return retval;
+    return RetVal;
 }
 
 static PyMethodDef comp_methods[] =
@@ -943,15 +1001,24 @@
 {
     unsigned int adler32val = 1;  /* adler32(0L, Z_NULL, 0) */
     Byte *buf;
-    int len, signed_val;
+    Py_ssize_t len;
+    int signed_val;
 
     if (!PyArg_ParseTuple(args, "s#|I:adler32", &buf, &len, &adler32val))
         return NULL;
+
+    /* Avoid truncation of length for very large buffers. adler32() takes
+       length as an unsigned int, which may be narrower than Py_ssize_t. */
+    while ((size_t)len > UINT_MAX) {
+        adler32val = adler32(adler32val, buf, UINT_MAX);
+        buf += (size_t) UINT_MAX;
+        len -= (size_t) UINT_MAX;
+    }
     /* In Python 2.x we return a signed integer regardless of native platform
      * long size (the 32bit unsigned long is treated as 32-bit signed and sign
      * extended into a 64-bit long inside the integer object).  3.0 does the
      * right thing and returns unsigned. http://bugs.python.org/issue1202 */
-    signed_val = adler32(adler32val, buf, len);
+    signed_val = adler32(adler32val, buf, (unsigned int)len);
     return PyInt_FromLong(signed_val);
 }
 
@@ -966,15 +1033,24 @@
 {
     unsigned int crc32val = 0;  /* crc32(0L, Z_NULL, 0) */
     Byte *buf;
-    int len, signed_val;
+    Py_ssize_t len;
+    int signed_val;
 
     if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val))
         return NULL;
+
+    /* Avoid truncation of length for very large buffers. crc32() takes
+       length as an unsigned int, which may be narrower than Py_ssize_t. */
+    while ((size_t)len > UINT_MAX) {
+        crc32val = crc32(crc32val, buf, UINT_MAX);
+        buf += (size_t) UINT_MAX;
+        len -= (size_t) UINT_MAX;
+    }
     /* In Python 2.x we return a signed integer regardless of native platform
      * long size (the 32bit unsigned long is treated as 32-bit signed and sign
      * extended into a 64-bit long inside the integer object).  3.0 does the
      * right thing and returns unsigned. http://bugs.python.org/issue1202 */
-    signed_val = crc32(crc32val, buf, len);
+    signed_val = crc32(crc32val, buf, (unsigned int)len);
     return PyInt_FromLong(signed_val);
 }
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list