[Python-checkins] cpython (2.7): Issue #27130: Fix handling of buffers exceeding (U)INT_MAX in “zlib” module
martin.panter
python-checkins at python.org
Sat Jul 23 00:30:09 EDT 2016
https://hg.python.org/cpython/rev/2192edcfea02
changeset: 102425:2192edcfea02
branch: 2.7
parent: 102409:434fc614c506
user: Martin Panter <vadmium+py at gmail.com>
date: Sat Jul 23 04:22:09 2016 +0000
summary:
Issue #27130: Fix handling of buffers exceeding (U)INT_MAX in “zlib” module
Ported from patches by Xiang Zhang, Nadeem Vawda, and myself.
files:
Lib/test/test_zlib.py | 120 +++++
Misc/NEWS | 5 +
Modules/zlibmodule.c | 642 ++++++++++++++++-------------
3 files changed, 484 insertions(+), 283 deletions(-)
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -1,4 +1,5 @@
import unittest
+from test import test_support as support
from test.test_support import TESTFN, run_unittest, import_module, unlink, requires
import binascii
import pickle
@@ -80,6 +81,16 @@
zlib.crc32('spam', (2**31)))
+# Issue #10276 - check that inputs >=4GB are handled correctly.
+class ChecksumBigBufferTestCase(unittest.TestCase):
+
+ @precisionbigmemtest(size=_4G + 4, memuse=1, dry_run=False)
+ def test_big_buffer(self, size):
+ data = b"nyan" * (_1G + 1)
+ self.assertEqual(zlib.crc32(data) & 0xFFFFFFFF, 1044521549)
+ self.assertEqual(zlib.adler32(data) & 0xFFFFFFFF, 2256789997)
+
+
class ExceptionTestCase(unittest.TestCase):
# make sure we generate some expected errors
def test_badlevel(self):
@@ -104,6 +115,15 @@
self.assertRaises(ValueError, zlib.decompressobj().flush, 0)
self.assertRaises(ValueError, zlib.decompressobj().flush, -1)
+ @support.cpython_only
+ def test_overflow(self):
+ with self.assertRaisesRegexp(OverflowError, 'int too large'):
+ zlib.decompress(b'', 15, sys.maxsize + 1)
+ with self.assertRaisesRegexp(OverflowError, 'int too large'):
+ zlib.decompressobj().decompress(b'', sys.maxsize + 1)
+ with self.assertRaisesRegexp(OverflowError, 'int too large'):
+ zlib.decompressobj().flush(sys.maxsize + 1)
+
class BaseCompressTestCase(object):
def check_big_compress_buffer(self, size, compress_func):
@@ -167,6 +187,28 @@
def test_big_decompress_buffer(self, size):
self.check_big_decompress_buffer(size, zlib.decompress)
+ @precisionbigmemtest(size=_4G, memuse=1)
+ def test_large_bufsize(self, size):
+ # Test decompress(bufsize) parameter greater than the internal limit
+ data = HAMLET_SCENE * 10
+ compressed = zlib.compress(data, 1)
+ self.assertEqual(zlib.decompress(compressed, 15, size), data)
+
+ def test_custom_bufsize(self):
+ data = HAMLET_SCENE * 10
+ compressed = zlib.compress(data, 1)
+ self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data)
+
+ @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+ @precisionbigmemtest(size=_4G + 100, memuse=4)
+ def test_64bit_compress(self, size):
+ data = b'x' * size
+ try:
+ comp = zlib.compress(data, 0)
+ self.assertEqual(zlib.decompress(comp), data)
+ finally:
+ comp = data = None
+
class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
# Test compression object
@@ -318,6 +360,22 @@
self.assertRaises(ValueError, dco.decompress, "", -1)
self.assertEqual('', dco.unconsumed_tail)
+ def test_maxlen_large(self):
+ # Sizes up to sys.maxsize should be accepted, although zlib is
+ # internally limited to expressing sizes with unsigned int
+ data = HAMLET_SCENE * 10
+ DEFAULTALLOC = 16 * 1024
+ self.assertGreater(len(data), DEFAULTALLOC)
+ compressed = zlib.compress(data, 1)
+ dco = zlib.decompressobj()
+ self.assertEqual(dco.decompress(compressed, sys.maxsize), data)
+
+ def test_maxlen_custom(self):
+ data = HAMLET_SCENE * 10
+ compressed = zlib.compress(data, 1)
+ dco = zlib.decompressobj()
+ self.assertEqual(dco.decompress(compressed, CustomInt()), data[:100])
+
def test_clear_unconsumed_tail(self):
# Issue #12050: calling decompress() without providing max_length
# should clear the unconsumed_tail attribute.
@@ -416,6 +474,22 @@
data = zlib.compress(input2)
self.assertEqual(dco.flush(), input1[1:])
+ @precisionbigmemtest(size=_4G, memuse=1)
+ def test_flush_large_length(self, size):
+ # Test flush(length) parameter greater than internal limit UINT_MAX
+ input = HAMLET_SCENE * 10
+ data = zlib.compress(input, 1)
+ dco = zlib.decompressobj()
+ dco.decompress(data, 1)
+ self.assertEqual(dco.flush(size), input[1:])
+
+ def test_flush_custom_length(self):
+ input = HAMLET_SCENE * 10
+ data = zlib.compress(input, 1)
+ dco = zlib.decompressobj()
+ dco.decompress(data, 1)
+ self.assertEqual(dco.flush(CustomInt()), input[1:])
+
@requires_Compress_copy
def test_compresscopy(self):
# Test copying a compression object
@@ -527,6 +601,46 @@
decompress = lambda s: d.decompress(s) + d.flush()
self.check_big_decompress_buffer(size, decompress)
+ @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+ @precisionbigmemtest(size=_4G + 100, memuse=4)
+ def test_64bit_compress(self, size):
+ data = b'x' * size
+ co = zlib.compressobj(0)
+ do = zlib.decompressobj()
+ try:
+ comp = co.compress(data) + co.flush()
+ uncomp = do.decompress(comp) + do.flush()
+ self.assertEqual(uncomp, data)
+ finally:
+ comp = uncomp = data = None
+
+ @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+ @precisionbigmemtest(size=_4G + 100, memuse=3)
+ def test_large_unused_data(self, size):
+ data = b'abcdefghijklmnop'
+ unused = b'x' * size
+ comp = zlib.compress(data) + unused
+ do = zlib.decompressobj()
+ try:
+ uncomp = do.decompress(comp) + do.flush()
+ self.assertEqual(unused, do.unused_data)
+ self.assertEqual(uncomp, data)
+ finally:
+ unused = comp = do = None
+
+ @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+ @precisionbigmemtest(size=_4G + 100, memuse=5)
+ def test_large_unconsumed_tail(self, size):
+ data = b'x' * size
+ do = zlib.decompressobj()
+ try:
+ comp = zlib.compress(data, 0)
+ uncomp = do.decompress(comp, 1) + do.flush()
+ self.assertEqual(uncomp, data)
+ self.assertEqual(do.unconsumed_tail, b'')
+ finally:
+ comp = uncomp = data = None
+
def test_wbits(self):
co = zlib.compressobj(1, zlib.DEFLATED, 15)
zlib15 = co.compress(HAMLET_SCENE) + co.flush()
@@ -658,9 +772,15 @@
"""
+class CustomInt:
+ def __int__(self):
+ return 100
+
+
def test_main():
run_unittest(
ChecksumTestCase,
+ ChecksumBigBufferTestCase,
ExceptionTestCase,
CompressTestCase,
CompressObjectTestCase
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -26,6 +26,11 @@
Library
-------
+- Issue #27130: In the "zlib" module, fix handling of large buffers
+ (typically 2 or 4 GiB). Previously, inputs were limited to 2 GiB, and
+ compression and decompression operations did not properly handle results of
+ 2 or 4 GiB.
+
- Issue #23804: Fix SSL zero-length recv() calls to not block and not raise
an error about unclean EOF.
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -1,8 +1,9 @@
/* zlibmodule.c -- gzip-compatible data compression */
-/* See http://www.gzip.org/zlib/ */
+/* See http://zlib.net/ */
/* Windows users: read Python's PCbuild\readme.txt */
+#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "zlib.h"
@@ -129,6 +130,71 @@
return self;
}
+static void
+arrange_input_buffer(z_stream *zst, Py_ssize_t *remains)
+{
+ if ((size_t)*remains > UINT_MAX) {
+ zst->avail_in = UINT_MAX;
+ } else {
+ zst->avail_in = *remains;
+ }
+ *remains -= zst->avail_in;
+}
+
+static Py_ssize_t
+arrange_output_buffer_with_maximum(z_stream *zst, PyObject **buffer,
+ Py_ssize_t length,
+ Py_ssize_t max_length)
+{
+ Py_ssize_t occupied;
+
+ if (*buffer == NULL) {
+ if (!(*buffer = PyBytes_FromStringAndSize(NULL, length)))
+ return -1;
+ occupied = 0;
+ }
+ else {
+ occupied = zst->next_out - (Byte *)PyBytes_AS_STRING(*buffer);
+
+ if (length == occupied) {
+ Py_ssize_t new_length;
+ assert(length <= max_length);
+ /* can not scale the buffer over max_length */
+ if (length == max_length)
+ return -2;
+ if (length <= (max_length >> 1))
+ new_length = length << 1;
+ else
+ new_length = max_length;
+ if (_PyBytes_Resize(buffer, new_length) < 0)
+ return -1;
+ length = new_length;
+ }
+ }
+
+ if ((size_t)(length - occupied) > UINT_MAX) {
+ zst->avail_out = UINT_MAX;
+ } else {
+ zst->avail_out = length - occupied;
+ }
+ zst->next_out = (Byte *)PyBytes_AS_STRING(*buffer) + occupied;
+
+ return length;
+}
+
+static Py_ssize_t
+arrange_output_buffer(z_stream *zst, PyObject **buffer, Py_ssize_t length)
+{
+ Py_ssize_t ret;
+
+ ret = arrange_output_buffer_with_maximum(zst, buffer, length,
+ PY_SSIZE_T_MAX);
+ if (ret == -2)
+ PyErr_NoMemory();
+
+ return ret;
+}
+
PyDoc_STRVAR(compress__doc__,
"compress(string[, level]) -- Returned compressed string.\n"
"\n"
@@ -137,44 +203,31 @@
static PyObject *
PyZlib_compress(PyObject *self, PyObject *args)
{
- PyObject *ReturnVal = NULL;
- Byte *input, *output;
- int length, level=Z_DEFAULT_COMPRESSION, err;
+ PyObject *RetVal = NULL;
+ Byte *ibuf;
+ Py_ssize_t ibuflen, obuflen = DEFAULTALLOC;
+ int level=Z_DEFAULT_COMPRESSION;
+ int err, flush;
z_stream zst;
/* require Python string object, optional 'level' arg */
- if (!PyArg_ParseTuple(args, "s#|i:compress", &input, &length, &level))
+ if (!PyArg_ParseTuple(args, "s#|i:compress", &ibuf, &ibuflen, &level))
return NULL;
- zst.avail_out = length + length/1000 + 12 + 1;
-
- output = (Byte*)malloc(zst.avail_out);
- if (output == NULL) {
- PyErr_SetString(PyExc_MemoryError,
- "Can't allocate memory to compress data");
- return NULL;
- }
-
- /* Past the point of no return. From here on out, we need to make sure
- we clean up mallocs & INCREFs. */
-
zst.zalloc = (alloc_func)NULL;
zst.zfree = (free_func)Z_NULL;
- zst.next_out = (Byte *)output;
- zst.next_in = (Byte *)input;
- zst.avail_in = length;
+ zst.next_in = ibuf;
err = deflateInit(&zst, level);
- switch(err) {
- case(Z_OK):
+ switch (err) {
+ case Z_OK:
break;
- case(Z_MEM_ERROR):
+ case Z_MEM_ERROR:
PyErr_SetString(PyExc_MemoryError,
"Out of memory while compressing data");
goto error;
- case(Z_STREAM_ERROR):
- PyErr_SetString(ZlibError,
- "Bad compression level");
+ case Z_STREAM_ERROR:
+ PyErr_SetString(ZlibError, "Bad compression level");
goto error;
default:
deflateEnd(&zst);
@@ -182,27 +235,45 @@
goto error;
}
- Py_BEGIN_ALLOW_THREADS;
- err = deflate(&zst, Z_FINISH);
- Py_END_ALLOW_THREADS;
+ do {
+ arrange_input_buffer(&zst, &ibuflen);
+ flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
- if (err != Z_STREAM_END) {
- zlib_error(zst, err, "while compressing data");
- deflateEnd(&zst);
- goto error;
+ do {
+ obuflen = arrange_output_buffer(&zst, &RetVal, obuflen);
+ if (obuflen < 0) {
+ deflateEnd(&zst);
+ goto error;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ err = deflate(&zst, flush);
+ Py_END_ALLOW_THREADS
+
+ if (err == Z_STREAM_ERROR) {
+ deflateEnd(&zst);
+ zlib_error(zst, err, "while compressing data");
+ goto error;
+ }
+
+ } while (zst.avail_out == 0);
+ assert(zst.avail_in == 0);
+
+ } while (flush != Z_FINISH);
+ assert(err == Z_STREAM_END);
+
+ err = deflateEnd(&zst);
+ if (err == Z_OK) {
+ if (_PyBytes_Resize(&RetVal, zst.next_out -
+ (Byte *)PyBytes_AS_STRING(RetVal)) < 0)
+ goto error;
+ return RetVal;
}
-
- err=deflateEnd(&zst);
- if (err == Z_OK)
- ReturnVal = PyString_FromStringAndSize((char *)output,
- zst.total_out);
else
zlib_error(zst, err, "while finishing compression");
-
error:
- free(output);
-
- return ReturnVal;
+ Py_XDECREF(RetVal);
+ return NULL;
}
PyDoc_STRVAR(decompress__doc__,
@@ -214,36 +285,32 @@
static PyObject *
PyZlib_decompress(PyObject *self, PyObject *args)
{
- PyObject *result_str;
- Byte *input;
- int length, err;
+ PyObject *RetVal = NULL;
+ Byte *ibuf;
+ Py_ssize_t ibuflen;
+ int err, flush;
int wsize=DEF_WBITS;
Py_ssize_t r_strlen=DEFAULTALLOC;
z_stream zst;
if (!PyArg_ParseTuple(args, "s#|in:decompress",
- &input, &length, &wsize, &r_strlen))
+ &ibuf, &ibuflen, &wsize, &r_strlen))
return NULL;
- if (r_strlen <= 0)
+ if (r_strlen <= 0) {
r_strlen = 1;
-
- zst.avail_in = length;
- zst.avail_out = r_strlen;
-
- if (!(result_str = PyString_FromStringAndSize(NULL, r_strlen)))
- return NULL;
+ }
zst.zalloc = (alloc_func)NULL;
zst.zfree = (free_func)Z_NULL;
- zst.next_out = (Byte *)PyString_AS_STRING(result_str);
- zst.next_in = (Byte *)input;
+ zst.avail_in = 0;
+ zst.next_in = ibuf;
err = inflateInit2(&zst, wsize);
- switch(err) {
- case(Z_OK):
+ switch (err) {
+ case Z_OK:
break;
- case(Z_MEM_ERROR):
+ case Z_MEM_ERROR:
PyErr_SetString(PyExc_MemoryError,
"Out of memory while decompressing data");
goto error;
@@ -254,42 +321,46 @@
}
do {
- Py_BEGIN_ALLOW_THREADS
- err=inflate(&zst, Z_FINISH);
- Py_END_ALLOW_THREADS
+ arrange_input_buffer(&zst, &ibuflen);
+ flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
- switch(err) {
- case(Z_STREAM_END):
- break;
- case(Z_BUF_ERROR):
- /*
- * If there is at least 1 byte of room according to zst.avail_out
- * and we get this error, assume that it means zlib cannot
- * process the inflate call() due to an error in the data.
- */
- if (zst.avail_out > 0) {
- zlib_error(zst, err, "while decompressing data");
+ do {
+ r_strlen = arrange_output_buffer(&zst, &RetVal, r_strlen);
+ if (r_strlen < 0) {
inflateEnd(&zst);
goto error;
}
- /* fall through */
- case(Z_OK):
- /* need more memory */
- if (_PyString_Resize(&result_str, r_strlen << 1) < 0) {
+
+ Py_BEGIN_ALLOW_THREADS
+ err = inflate(&zst, flush);
+ Py_END_ALLOW_THREADS
+
+ switch (err) {
+ case Z_OK: /* fall through */
+ case Z_BUF_ERROR: /* fall through */
+ case Z_STREAM_END:
+ break;
+ case Z_MEM_ERROR:
inflateEnd(&zst);
+ PyErr_SetString(PyExc_MemoryError,
+ "Out of memory while decompressing data");
+ goto error;
+ default:
+ inflateEnd(&zst);
+ zlib_error(zst, err, "while decompressing data");
goto error;
}
- zst.next_out = (unsigned char *)PyString_AS_STRING(result_str) \
- + r_strlen;
- zst.avail_out = r_strlen;
- r_strlen = r_strlen << 1;
- break;
- default:
- inflateEnd(&zst);
- zlib_error(zst, err, "while decompressing data");
- goto error;
- }
- } while (err != Z_STREAM_END);
+
+ } while (zst.avail_out == 0);
+
+ } while (err != Z_STREAM_END && ibuflen != 0);
+
+
+ if (err != Z_STREAM_END) {
+ inflateEnd(&zst);
+ zlib_error(zst, err, "while decompressing data");
+ goto error;
+ }
err = inflateEnd(&zst);
if (err != Z_OK) {
@@ -297,11 +368,12 @@
goto error;
}
- _PyString_Resize(&result_str, zst.total_out);
- return result_str;
+ _PyString_Resize(&RetVal, zst.next_out -
+ (Byte *)PyBytes_AS_STRING(RetVal));
+ return RetVal;
error:
- Py_XDECREF(result_str);
+ Py_XDECREF(RetVal);
return NULL;
}
@@ -317,23 +389,23 @@
return NULL;
self = newcompobject(&Comptype);
- if (self==NULL)
+ if (self == NULL)
return(NULL);
self->zst.zalloc = (alloc_func)NULL;
self->zst.zfree = (free_func)Z_NULL;
self->zst.next_in = NULL;
self->zst.avail_in = 0;
err = deflateInit2(&self->zst, level, method, wbits, memLevel, strategy);
- switch(err) {
- case (Z_OK):
+ switch (err) {
+ case Z_OK:
self->is_initialised = 1;
- return (PyObject*)self;
- case (Z_MEM_ERROR):
+ return (PyObject *)self;
+ case Z_MEM_ERROR:
Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory for compression object");
return NULL;
- case(Z_STREAM_ERROR):
+ case Z_STREAM_ERROR:
Py_DECREF(self);
PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
return NULL;
@@ -354,21 +426,21 @@
self = newcompobject(&Decomptype);
if (self == NULL)
- return(NULL);
+ return NULL;
self->zst.zalloc = (alloc_func)NULL;
self->zst.zfree = (free_func)Z_NULL;
self->zst.next_in = NULL;
self->zst.avail_in = 0;
err = inflateInit2(&self->zst, wbits);
- switch(err) {
- case (Z_OK):
+ switch (err) {
+ case Z_OK:
self->is_initialised = 1;
- return (PyObject*)self;
- case(Z_STREAM_ERROR):
+ return (PyObject *)self;
+ case Z_STREAM_ERROR:
Py_DECREF(self);
PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
return NULL;
- case (Z_MEM_ERROR):
+ case Z_MEM_ERROR:
Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory for decompression object");
@@ -411,58 +483,44 @@
static PyObject *
PyZlib_objcompress(compobject *self, PyObject *args)
{
- int err, inplen;
- Py_ssize_t length = DEFAULTALLOC;
- PyObject *RetVal;
- Byte *input;
- unsigned long start_total_out;
+ PyObject *RetVal = NULL;
+ Py_ssize_t ibuflen, obuflen = DEFAULTALLOC;
+ int err;
- if (!PyArg_ParseTuple(args, "s#:compress", &input, &inplen))
- return NULL;
-
- if (!(RetVal = PyString_FromStringAndSize(NULL, length)))
+ if (!PyArg_ParseTuple(args, "s#:compress", &self->zst.next_in, &ibuflen))
return NULL;
ENTER_ZLIB
- start_total_out = self->zst.total_out;
- self->zst.avail_in = inplen;
- self->zst.next_in = input;
- self->zst.avail_out = length;
- self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal);
+ do {
+ arrange_input_buffer(&self->zst, &ibuflen);
- Py_BEGIN_ALLOW_THREADS
- err = deflate(&(self->zst), Z_NO_FLUSH);
- Py_END_ALLOW_THREADS
+ do {
+ obuflen = arrange_output_buffer(&self->zst, &RetVal, obuflen);
+ if (obuflen < 0)
+ goto error;
- /* while Z_OK and the output buffer is full, there might be more output,
- so extend the output buffer and try again */
- while (err == Z_OK && self->zst.avail_out == 0) {
- if (_PyString_Resize(&RetVal, length << 1) < 0)
- goto error;
- self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \
- + length;
- self->zst.avail_out = length;
- length = length << 1;
+ Py_BEGIN_ALLOW_THREADS
+ err = deflate(&self->zst, Z_NO_FLUSH);
+ Py_END_ALLOW_THREADS
- Py_BEGIN_ALLOW_THREADS
- err = deflate(&(self->zst), Z_NO_FLUSH);
- Py_END_ALLOW_THREADS
- }
- /* We will only get Z_BUF_ERROR if the output buffer was full but
- there wasn't more output when we tried again, so it is not an error
- condition.
- */
+ if (err == Z_STREAM_ERROR) {
+ zlib_error(self->zst, err, "while compressing data");
+ goto error;
+ }
- if (err != Z_OK && err != Z_BUF_ERROR) {
- zlib_error(self->zst, err, "while compressing");
- Py_DECREF(RetVal);
- RetVal = NULL;
- goto error;
- }
- _PyString_Resize(&RetVal, self->zst.total_out - start_total_out);
+ } while (self->zst.avail_out == 0);
+ assert(self->zst.avail_in == 0);
+
+ } while (ibuflen != 0);
+
+ _PyString_Resize(&RetVal, self->zst.next_out -
+ (Byte *)PyBytes_AS_STRING(RetVal));
+ goto success;
error:
+ Py_CLEAR(RetVal);
+ success:
LEAVE_ZLIB
return RetVal;
}
@@ -470,41 +528,46 @@
/* Helper for objdecompress() and unflush(). Saves any unconsumed input data in
self->unused_data or self->unconsumed_tail, as appropriate. */
static int
-save_unconsumed_input(compobject *self, int err)
+save_unconsumed_input(compobject *self, Byte *input, Py_ssize_t inplen,
+ int err)
{
if (err == Z_STREAM_END) {
/* The end of the compressed data has been reached. Store the leftover
input data in self->unused_data. */
if (self->zst.avail_in > 0) {
Py_ssize_t old_size = PyString_GET_SIZE(self->unused_data);
- Py_ssize_t new_size;
+ Py_ssize_t new_size, left_size;
PyObject *new_data;
- if (self->zst.avail_in > PY_SSIZE_T_MAX - old_size) {
+ left_size = input + inplen - self->zst.next_in;
+ if (left_size > (PY_SSIZE_T_MAX - old_size)) {
PyErr_NoMemory();
return -1;
}
- new_size = old_size + self->zst.avail_in;
+ new_size = old_size + left_size;
new_data = PyString_FromStringAndSize(NULL, new_size);
if (new_data == NULL)
return -1;
Py_MEMCPY(PyString_AS_STRING(new_data),
PyString_AS_STRING(self->unused_data), old_size);
Py_MEMCPY(PyString_AS_STRING(new_data) + old_size,
- self->zst.next_in, self->zst.avail_in);
+ self->zst.next_in, left_size);
Py_SETREF(self->unused_data, new_data);
self->zst.avail_in = 0;
}
}
+
if (self->zst.avail_in > 0 || PyString_GET_SIZE(self->unconsumed_tail)) {
/* This code handles two distinct cases:
1. Output limit was reached. Save leftover input in unconsumed_tail.
2. All input data was consumed. Clear unconsumed_tail. */
+ Py_ssize_t left_size = input + inplen - self->zst.next_in;
PyObject *new_data = PyString_FromStringAndSize(
- (char *)self->zst.next_in, self->zst.avail_in);
+ (char *)self->zst.next_in, left_size);
if (new_data == NULL)
return -1;
Py_SETREF(self->unconsumed_tail, new_data);
}
+
return 0;
}
@@ -522,71 +585,69 @@
static PyObject *
PyZlib_objdecompress(compobject *self, PyObject *args)
{
- int err, inplen, max_length = 0;
- Py_ssize_t old_length, length = DEFAULTALLOC;
- PyObject *RetVal;
+ int err = Z_OK;
+ Py_ssize_t inplen, max_length = 0;
+ Py_ssize_t ibuflen, obuflen = DEFAULTALLOC, hard_limit;
+ PyObject *RetVal = NULL;
Byte *input;
- unsigned long start_total_out;
- if (!PyArg_ParseTuple(args, "s#|i:decompress", &input,
+ if (!PyArg_ParseTuple(args, "s#|n:decompress", &input,
&inplen, &max_length))
return NULL;
if (max_length < 0) {
PyErr_SetString(PyExc_ValueError,
"max_length must be greater than zero");
return NULL;
- }
+ } else if (max_length == 0)
+ hard_limit = PY_SSIZE_T_MAX;
+ else
+ hard_limit = max_length;
+
+ self->zst.next_in = input;
+ ibuflen = inplen;
/* limit amount of data allocated to max_length */
- if (max_length && length > max_length)
- length = max_length;
- if (!(RetVal = PyString_FromStringAndSize(NULL, length)))
- return NULL;
+ if (max_length && obuflen > max_length)
+ obuflen = max_length;
ENTER_ZLIB
- start_total_out = self->zst.total_out;
- self->zst.avail_in = inplen;
- self->zst.next_in = input;
- self->zst.avail_out = length;
- self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal);
+ do {
+ arrange_input_buffer(&self->zst, &ibuflen);
- Py_BEGIN_ALLOW_THREADS
- err = inflate(&(self->zst), Z_SYNC_FLUSH);
- Py_END_ALLOW_THREADS
+ do {
+ obuflen = arrange_output_buffer_with_maximum(&self->zst, &RetVal,
+ obuflen, hard_limit);
+ if (obuflen == -2) {
+ if (max_length > 0) {
+ goto save;
+ }
+ PyErr_NoMemory();
+ }
+ if (obuflen < 0) {
+ goto abort;
+ }
- /* While Z_OK and the output buffer is full, there might be more output.
- So extend the output buffer and try again.
- */
- while (err == Z_OK && self->zst.avail_out == 0) {
- /* If max_length set, don't continue decompressing if we've already
- reached the limit.
- */
- if (max_length && length >= max_length)
- break;
+ Py_BEGIN_ALLOW_THREADS
+ err = inflate(&self->zst, Z_SYNC_FLUSH);
+ Py_END_ALLOW_THREADS
- /* otherwise, ... */
- old_length = length;
- length = length << 1;
- if (max_length && length > max_length)
- length = max_length;
+ switch (err) {
+ case Z_OK: /* fall through */
+ case Z_BUF_ERROR: /* fall through */
+ case Z_STREAM_END:
+ break;
+ default:
+ goto save;
+ }
- if (_PyString_Resize(&RetVal, length) < 0)
- goto error;
- self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \
- + old_length;
- self->zst.avail_out = length - old_length;
+ } while (self->zst.avail_out == 0);
- Py_BEGIN_ALLOW_THREADS
- err = inflate(&(self->zst), Z_SYNC_FLUSH);
- Py_END_ALLOW_THREADS
- }
+ } while (err != Z_STREAM_END && ibuflen != 0);
- if (save_unconsumed_input(self, err) < 0) {
- Py_DECREF(RetVal);
- RetVal = NULL;
- goto error;
- }
+ save:
+ if (save_unconsumed_input(self, input, inplen, err) < 0)
+ goto abort;
/* This is the logical place to call inflateEnd, but the old behaviour of
only calling it on flush() is preserved. */
@@ -597,14 +658,16 @@
not an error condition.
*/
zlib_error(self->zst, err, "while decompressing");
- Py_DECREF(RetVal);
- RetVal = NULL;
- goto error;
+ goto abort;
}
- _PyString_Resize(&RetVal, self->zst.total_out - start_total_out);
+ _PyString_Resize(&RetVal, self->zst.next_out -
+ (Byte *)PyBytes_AS_STRING(RetVal));
+ goto success;
- error:
+ abort:
+ Py_CLEAR(RetVal);
+ success:
LEAVE_ZLIB
return RetVal;
@@ -621,10 +684,10 @@
static PyObject *
PyZlib_flush(compobject *self, PyObject *args)
{
- int err, length = DEFAULTALLOC;
- PyObject *RetVal;
+ int err;
+ Py_ssize_t length = DEFAULTALLOC;
+ PyObject *RetVal = NULL;
int flushmode = Z_FINISH;
- unsigned long start_total_out;
if (!PyArg_ParseTuple(args, "|i:flush", &flushmode))
return NULL;
@@ -635,44 +698,37 @@
return PyString_FromStringAndSize(NULL, 0);
}
- if (!(RetVal = PyString_FromStringAndSize(NULL, length)))
- return NULL;
-
ENTER_ZLIB
- start_total_out = self->zst.total_out;
self->zst.avail_in = 0;
- self->zst.avail_out = length;
- self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal);
- Py_BEGIN_ALLOW_THREADS
- err = deflate(&(self->zst), flushmode);
- Py_END_ALLOW_THREADS
-
- /* while Z_OK and the output buffer is full, there might be more output,
- so extend the output buffer and try again */
- while (err == Z_OK && self->zst.avail_out == 0) {
- if (_PyString_Resize(&RetVal, length << 1) < 0)
+ do {
+ length = arrange_output_buffer(&self->zst, &RetVal, length);
+ if (length < 0) {
+ Py_CLEAR(RetVal);
goto error;
- self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \
- + length;
- self->zst.avail_out = length;
- length = length << 1;
+ }
Py_BEGIN_ALLOW_THREADS
- err = deflate(&(self->zst), flushmode);
+ err = deflate(&self->zst, flushmode);
Py_END_ALLOW_THREADS
- }
+
+ if (err == Z_STREAM_ERROR) {
+ zlib_error(self->zst, err, "while flushing");
+ Py_CLEAR(RetVal);
+ goto error;
+ }
+ } while (self->zst.avail_out == 0);
+ assert(self->zst.avail_in == 0);
/* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
various data structures. Note we should only get Z_STREAM_END when
flushmode is Z_FINISH, but checking both for safety*/
if (err == Z_STREAM_END && flushmode == Z_FINISH) {
- err = deflateEnd(&(self->zst));
+ err = deflateEnd(&self->zst);
if (err != Z_OK) {
zlib_error(self->zst, err, "from deflateEnd()");
- Py_DECREF(RetVal);
- RetVal = NULL;
+ Py_CLEAR(RetVal);
goto error;
}
else
@@ -682,18 +738,17 @@
but there wasn't more output when we tried again, so it is
not an error condition.
*/
- } else if (err!=Z_OK && err!=Z_BUF_ERROR) {
+ } else if (err != Z_OK && err != Z_BUF_ERROR) {
zlib_error(self->zst, err, "while flushing");
- Py_DECREF(RetVal);
- RetVal = NULL;
+ Py_CLEAR(RetVal);
goto error;
}
- _PyString_Resize(&RetVal, self->zst.total_out - start_total_out);
+ _PyString_Resize(&RetVal, self->zst.next_out -
+ (Byte *)PyBytes_AS_STRING(RetVal));
error:
LEAVE_ZLIB
-
return RetVal;
}
@@ -715,13 +770,13 @@
*/
ENTER_ZLIB
err = deflateCopy(&retval->zst, &self->zst);
- switch(err) {
- case(Z_OK):
+ switch (err) {
+ case Z_OK:
break;
- case(Z_STREAM_ERROR):
+ case Z_STREAM_ERROR:
PyErr_SetString(PyExc_ValueError, "Inconsistent stream state");
goto error;
- case(Z_MEM_ERROR):
+ case Z_MEM_ERROR:
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory for compression object");
goto error;
@@ -764,13 +819,13 @@
*/
ENTER_ZLIB
err = inflateCopy(&retval->zst, &self->zst);
- switch(err) {
- case(Z_OK):
+ switch (err) {
+ case Z_OK:
break;
- case(Z_STREAM_ERROR):
+ case Z_STREAM_ERROR:
PyErr_SetString(PyExc_ValueError, "Inconsistent stream state");
goto error;
- case(Z_MEM_ERROR):
+ case Z_MEM_ERROR:
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory for decompression object");
goto error;
@@ -807,73 +862,76 @@
static PyObject *
PyZlib_unflush(compobject *self, PyObject *args)
{
- int err, length = DEFAULTALLOC;
- PyObject * retval = NULL;
- unsigned long start_total_out;
+ Py_ssize_t length = DEFAULTALLOC;
+ int err, flush;
+ PyObject *RetVal = NULL;
+ Py_ssize_t ibuflen;
- if (!PyArg_ParseTuple(args, "|i:flush", &length))
+ if (!PyArg_ParseTuple(args, "|n:flush", &length))
return NULL;
if (length <= 0) {
PyErr_SetString(PyExc_ValueError, "length must be greater than zero");
return NULL;
}
- if (!(retval = PyString_FromStringAndSize(NULL, length)))
- return NULL;
-
ENTER_ZLIB
- start_total_out = self->zst.total_out;
- self->zst.avail_in = PyString_GET_SIZE(self->unconsumed_tail);
self->zst.next_in = (Byte *)PyString_AS_STRING(self->unconsumed_tail);
- self->zst.avail_out = length;
- self->zst.next_out = (Byte *)PyString_AS_STRING(retval);
+ ibuflen = PyString_GET_SIZE(self->unconsumed_tail);
- Py_BEGIN_ALLOW_THREADS
- err = inflate(&(self->zst), Z_FINISH);
- Py_END_ALLOW_THREADS
+ do {
+ arrange_input_buffer(&self->zst, &ibuflen);
+ flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
- /* while Z_OK and the output buffer is full, there might be more output,
- so extend the output buffer and try again */
- while ((err == Z_OK || err == Z_BUF_ERROR) && self->zst.avail_out == 0) {
- if (_PyString_Resize(&retval, length << 1) < 0)
- goto error;
- self->zst.next_out = (Byte *)PyString_AS_STRING(retval) + length;
- self->zst.avail_out = length;
- length = length << 1;
+ do {
+ length = arrange_output_buffer(&self->zst, &RetVal, length);
+ if (length < 0)
+ goto abort;
- Py_BEGIN_ALLOW_THREADS
- err = inflate(&(self->zst), Z_FINISH);
- Py_END_ALLOW_THREADS
- }
+ Py_BEGIN_ALLOW_THREADS
+ err = inflate(&self->zst, flush);
+ Py_END_ALLOW_THREADS
- if (save_unconsumed_input(self, err) < 0) {
- Py_DECREF(retval);
- retval = NULL;
- goto error;
- }
+ switch (err) {
+ case Z_OK: /* fall through */
+ case Z_BUF_ERROR: /* fall through */
+ case Z_STREAM_END:
+ break;
+ default:
+ goto save;
+ }
+
+ } while (self->zst.avail_out == 0);
+
+ } while (err != Z_STREAM_END && ibuflen != 0);
+
+ save:
+ if (save_unconsumed_input(self,
+ (Byte *)PyString_AS_STRING(self->unconsumed_tail),
+ PyString_GET_SIZE(self->unconsumed_tail), err) < 0)
+ goto abort;
/* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
various data structures. Note we should only get Z_STREAM_END when
flushmode is Z_FINISH */
if (err == Z_STREAM_END) {
- err = inflateEnd(&(self->zst));
+ err = inflateEnd(&self->zst);
self->is_initialised = 0;
if (err != Z_OK) {
zlib_error(self->zst, err, "from inflateEnd()");
- Py_DECREF(retval);
- retval = NULL;
- goto error;
+ goto abort;
}
}
- _PyString_Resize(&retval, self->zst.total_out - start_total_out);
+ _PyString_Resize(&RetVal, self->zst.next_out -
+ (Byte *)PyBytes_AS_STRING(RetVal));
+ goto success;
-error:
-
+ abort:
+ Py_CLEAR(RetVal);
+ success:
LEAVE_ZLIB
-
- return retval;
+ return RetVal;
}
static PyMethodDef comp_methods[] =
@@ -943,15 +1001,24 @@
{
unsigned int adler32val = 1; /* adler32(0L, Z_NULL, 0) */
Byte *buf;
- int len, signed_val;
+ Py_ssize_t len;
+ int signed_val;
if (!PyArg_ParseTuple(args, "s#|I:adler32", &buf, &len, &adler32val))
return NULL;
+
+ /* Avoid truncation of length for very large buffers. adler32() takes
+ length as an unsigned int, which may be narrower than Py_ssize_t. */
+ while ((size_t)len > UINT_MAX) {
+ adler32val = adler32(adler32val, buf, UINT_MAX);
+ buf += (size_t) UINT_MAX;
+ len -= (size_t) UINT_MAX;
+ }
/* In Python 2.x we return a signed integer regardless of native platform
* long size (the 32bit unsigned long is treated as 32-bit signed and sign
* extended into a 64-bit long inside the integer object). 3.0 does the
* right thing and returns unsigned. http://bugs.python.org/issue1202 */
- signed_val = adler32(adler32val, buf, len);
+ signed_val = adler32(adler32val, buf, (unsigned int)len);
return PyInt_FromLong(signed_val);
}
@@ -966,15 +1033,24 @@
{
unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
Byte *buf;
- int len, signed_val;
+ Py_ssize_t len;
+ int signed_val;
if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val))
return NULL;
+
+ /* Avoid truncation of length for very large buffers. crc32() takes
+ length as an unsigned int, which may be narrower than Py_ssize_t. */
+ while ((size_t)len > UINT_MAX) {
+ crc32val = crc32(crc32val, buf, UINT_MAX);
+ buf += (size_t) UINT_MAX;
+ len -= (size_t) UINT_MAX;
+ }
/* In Python 2.x we return a signed integer regardless of native platform
* long size (the 32bit unsigned long is treated as 32-bit signed and sign
* extended into a 64-bit long inside the integer object). 3.0 does the
* right thing and returns unsigned. http://bugs.python.org/issue1202 */
- signed_val = crc32(crc32val, buf, len);
+ signed_val = crc32(crc32val, buf, (unsigned int)len);
return PyInt_FromLong(signed_val);
}
--
Repository URL: https://hg.python.org/cpython
More information about the Python-checkins
mailing list