[Python-checkins] CVS: python/dist/src/Modules zlibmodule.c,2.43,2.44

Jeremy Hylton jhylton@users.sourceforge.net
Tue, 16 Oct 2001 13:39:51 -0700


Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv21328/Modules

Modified Files:
	zlibmodule.c 
Log Message:
[ #403753 ] zlib decompress; uncontrollable memory usage

Mostly by Toby Dickenson and Titus Brown.

Add an optional argument to a decompression object's decompress()
method.  The argument specifies the maximum length of the return
value.  If the uncompressed data exceeds this length, the excess data
is stored as the unconsumed_tail attribute.  (Not to be confused with
unused_data, which is a separate issue.)

Difference from SF patch: Default value for unconsumed_tail is ""
rather than None.  It's simpler if the attribute is always a string.



Index: zlibmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/zlibmodule.c,v
retrieving revision 2.43
retrieving revision 2.44
diff -C2 -d -r2.43 -r2.44
*** zlibmodule.c	2001/10/09 10:54:31	2.43
--- zlibmodule.c	2001/10/16 20:39:49	2.44
***************
*** 79,82 ****
--- 79,83 ----
    z_stream zst;
    PyObject *unused_data;
+   PyObject *unconsumed_tail;
    int is_initialised;
  } compobject;
***************
*** 101,104 ****
--- 102,114 ----
  	self->is_initialised = 0;
  	self->unused_data = PyString_FromString("");
+ 	if (self->unused_data == NULL) {
+ 	    Py_DECREF(self);
+ 	    return NULL;
+ 	}
+ 	self->unconsumed_tail = PyString_FromString("");
+ 	if (self->unconsumed_tail == NULL) {
+ 	    Py_DECREF(self);
+ 	    return NULL;
+ 	}
          return self;
  }
***************
*** 486,489 ****
--- 496,500 ----
        deflateEnd(&self->zst);
      Py_XDECREF(self->unused_data);
+     Py_XDECREF(self->unconsumed_tail);
      PyObject_Del(self);
  
***************
*** 499,502 ****
--- 510,514 ----
        inflateEnd(&self->zst);
      Py_XDECREF(self->unused_data);
+     Py_XDECREF(self->unconsumed_tail);
      PyObject_Del(self);
  
***************
*** 596,603 ****
  
  static char decomp_decompress__doc__[] =
! "decompress(data) -- Return a string containing the decompressed version of the data.\n\n"
  "After calling this function, some of the input data may still\n"
  "be stored in internal buffers for later processing.\n"
! "Call the flush() method to clear these buffers."
  ;
  
--- 608,619 ----
  
  static char decomp_decompress__doc__[] =
! "decompress(data, max_length) -- Return a string containing\n"
! "the decompressed version of the data.\n\n"
  "After calling this function, some of the input data may still\n"
  "be stored in internal buffers for later processing.\n"
! "Call the flush() method to clear these buffers.\n"
! "If the max_length parameter is specified then the return value will be\n"
! "no longer than max_length.  Unconsumed input data will be stored in\n"
! "the unconsumed_tail attribute."
  ;
  
***************
*** 605,609 ****
  PyZlib_objdecompress(compobject *self, PyObject *args)
  {
!   int err, inplen, length = DEFAULTALLOC;
    PyObject *RetVal;
    Byte *input;
--- 621,626 ----
  PyZlib_objdecompress(compobject *self, PyObject *args)
  {
!   int err, inplen, old_length, length = DEFAULTALLOC;
!   int max_length = 0;
    PyObject *RetVal;
    Byte *input;
***************
*** 612,620 ****
    PyObject * inputString;
  
!   if (!PyArg_ParseTuple(args, "S:decompress", &inputString))
      return NULL;
    if (PyString_AsStringAndSize(inputString, (char**)&input, &inplen) == -1)
      return NULL;
  
    if (!(RetVal = PyString_FromStringAndSize(NULL, length))) {
      PyErr_SetString(PyExc_MemoryError,
--- 629,646 ----
    PyObject * inputString;
  
!   if (!PyArg_ParseTuple(args, "S|i:decompress", &inputString, &max_length))
!     return NULL;
!   if (max_length < 0) {
!     PyErr_SetString(PyExc_ValueError,
! 		    "max_length must be greater than zero");
      return NULL;
+   }
+ 
    if (PyString_AsStringAndSize(inputString, (char**)&input, &inplen) == -1)
      return NULL;
  
+   /* limit amount of data allocated to max_length */
+   if (max_length && length > max_length) 
+     length = max_length;
    if (!(RetVal = PyString_FromStringAndSize(NULL, length))) {
      PyErr_SetString(PyExc_MemoryError,
***************
*** 638,645 ****
    Py_END_ALLOW_THREADS
  
!   /* while Z_OK and the output buffer is full, there might be more output,
!     so extend the output buffer and try again */
    while (err == Z_OK && self->zst.avail_out == 0) { 
!     if (_PyString_Resize(&RetVal, length << 1) == -1) {
        PyErr_SetString(PyExc_MemoryError,
                        "Can't allocate memory to compress data");
--- 664,684 ----
    Py_END_ALLOW_THREADS
  
!   /* While Z_OK and the output buffer is full, there might be more output.
!      So extend the output buffer and try again.
!   */
    while (err == Z_OK && self->zst.avail_out == 0) { 
!     /* If max_length set, don't continue decompressing if we've already
!         reached the limit.
!     */
!     if (max_length && length >= max_length)
!       break;
! 
!     /* otherwise, ... */
!     old_length = length;
!     length = length << 1;
!     if (max_length && length > max_length) 
!       length = max_length;
! 
!     if (_PyString_Resize(&RetVal, length) == -1) {
        PyErr_SetString(PyExc_MemoryError,
                        "Can't allocate memory to compress data");
***************
*** 647,653 ****
        break;
      }
!     self->zst.next_out = (unsigned char *)PyString_AsString(RetVal) + length;
!     self->zst.avail_out = length;
!     length = length << 1;
      Py_BEGIN_ALLOW_THREADS
      err = inflate(&(self->zst), Z_SYNC_FLUSH);
--- 686,692 ----
        break;
      }
!     self->zst.next_out = (unsigned char *)PyString_AsString(RetVal)+old_length;
!     self->zst.avail_out = length - old_length;
! 
      Py_BEGIN_ALLOW_THREADS
      err = inflate(&(self->zst), Z_SYNC_FLUSH);
***************
*** 655,658 ****
--- 694,707 ----
    }
  
+   /* Not all of the compressed data could be accomodated in the output buffer
+     of specified size. Return the unconsumed tail in an attribute.*/
+   if(max_length) {
+     Py_DECREF(self->unconsumed_tail);
+     self->unconsumed_tail = PyString_FromStringAndSize(self->zst.next_in, 
+ 						       self->zst.avail_in);
+     if(!self->unconsumed_tail)
+       return_error = 1;
+   }
+ 
    /* The end of the compressed data has been reached, so set the unused_data 
      attribute to a string containing the remainder of the data in the string. 
***************
*** 885,888 ****
--- 934,942 ----
  	    Py_INCREF(self->unused_data);
              retval = self->unused_data;
+ 	  }
+ 	else if (strcmp(name, "unconsumed_tail") == 0) 
+ 	  {  
+ 	    Py_INCREF(self->unconsumed_tail);
+ 	    retval = self->unconsumed_tail;
  	  }
  	else