[pypy-svn] r47084 - in pypy/dist/pypy: module/zlib module/zlib/test rlib rlib/test

arigo at codespeak.net arigo at codespeak.net
Tue Oct 2 10:31:00 CEST 2007


Author: arigo
Date: Tue Oct  2 10:30:59 2007
New Revision: 47084

Modified:
   pypy/dist/pypy/module/zlib/interp_zlib.py
   pypy/dist/pypy/module/zlib/test/test_zlib.py
   pypy/dist/pypy/rlib/rzlib.py
   pypy/dist/pypy/rlib/test/test_rzlib.py
Log:
This mostly finishes the zlib module.


Modified: pypy/dist/pypy/module/zlib/interp_zlib.py
==============================================================================
--- pypy/dist/pypy/module/zlib/interp_zlib.py	(original)
+++ pypy/dist/pypy/module/zlib/interp_zlib.py	Tue Oct  2 10:30:59 2007
@@ -1,6 +1,7 @@
+import sys
 from pypy.interpreter.gateway import ObjSpace, W_Root, interp2app
 from pypy.interpreter.baseobjspace import Wrappable
-from pypy.interpreter.typedef import TypeDef
+from pypy.interpreter.typedef import TypeDef, interp_attrproperty
 from pypy.interpreter.error import OperationError
 from pypy.rlib.rarithmetic import intmask
 
@@ -61,8 +62,7 @@
         try:
             stream = rzlib.deflateInit(level)
         except ValueError:
-            raise OperationError(space.w_ValueError,
-                                 space.wrap("Invalid initialization option"))
+            raise zlib_error(space, "Bad compression level")
         try:
             result = rzlib.compress(stream, string, rzlib.Z_FINISH)
         finally:
@@ -78,16 +78,15 @@
     decompress(string[, wbits[, bufsize]]) -- Return decompressed string.
 
     Optional arg wbits is the window buffer size.  Optional arg bufsize is
-    the initial output buffer size.
+    only for compatibility with CPython and is ignored.
     """
     try:
         try:
             stream = rzlib.inflateInit(wbits)
         except ValueError:
-            raise OperationError(space.w_ValueError,
-                                 space.wrap("Invalid initialization option"))
+            raise zlib_error(space, "Bad window buffer size")
         try:
-            result = rzlib.decompress(stream, string, rzlib.Z_FINISH)
+            result, _, _ = rzlib.decompress(stream, string, rzlib.Z_FINISH)
         finally:
             rzlib.inflateEnd(stream)
     except rzlib.RZlibError, e:
@@ -114,6 +113,7 @@
         except ValueError:
             raise OperationError(space.w_ValueError,
                                  space.wrap("Invalid initialization option"))
+        self.lock = space.allocate_lock()
 
     def __del__(self):
         """Automatically free the resources used by the stream."""
@@ -132,7 +132,12 @@
         Call the flush() method to clear these buffers.
         """
         try:
-            result = rzlib.compress(self.stream, data)
+            lock = self.lock
+            lock.acquire(True)
+            try:
+                result = rzlib.compress(self.stream, data)
+            finally:
+                lock.release()
         except rzlib.RZlibError, e:
             raise zlib_error(self.space, e.msg)
         return self.space.wrap(result)
@@ -152,7 +157,12 @@
         compressed.
         """
         try:
-            result = rzlib.compress(self.stream, '', mode)
+            lock = self.lock
+            lock.acquire(True)
+            try:
+                result = rzlib.compress(self.stream, '', mode)
+            finally:
+                lock.release()
         except rzlib.RZlibError, e:
             raise zlib_error(self.space, e.msg)
         return self.space.wrap(result)
@@ -198,6 +208,8 @@
         inflateInit2.
         """
         self.space = space
+        self.unused_data = ''
+        self.unconsumed_tail = ''
         try:
             self.stream = rzlib.inflateInit(wbits)
         except rzlib.RZlibError, e:
@@ -205,6 +217,7 @@
         except ValueError:
             raise OperationError(space.w_ValueError,
                                  space.wrap("Invalid initialization option"))
+        self.lock = space.allocate_lock()
 
     def __del__(self):
         """Automatically free the resources used by the stream."""
@@ -222,14 +235,33 @@
         no longer than max_length.  Unconsumed input data will be stored in the
         unconsumed_tail attribute.
         """
-        if max_length != 0:      # XXX
-            raise OperationError(self.space.w_NotImplementedError,
-                                 self.space.wrap("max_length != 0"))
+        if max_length == 0:
+            max_length = sys.maxint
+        elif max_length < 0:
+            raise OperationError(self.space.w_ValueError,
+                                 self.space.wrap("max_length must be "
+                                                 "greater than zero"))
         try:
-            result = rzlib.decompress(self.stream, data)
+            lock = self.lock
+            lock.acquire(True)
+            try:
+                result = rzlib.decompress(self.stream, data,
+                                          max_length = max_length)
+            finally:
+                lock.release()
         except rzlib.RZlibError, e:
             raise zlib_error(self.space, e.msg)
-        return self.space.wrap(result)
+
+        string, finished, unused_len = result
+        unused_start = len(data) - unused_len
+        assert unused_start >= 0
+        tail = data[unused_start:]
+        if finished:
+            self.unconsumed_tail = ''
+            self.unused_data = tail
+        else:
+            self.unconsumed_tail = tail
+        return self.space.wrap(string)
     decompress.unwrap_spec = ['self', str, int]
 
 
@@ -264,6 +296,8 @@
     __new__ = interp2app(Decompress___new__),
     decompress = interp2app(Decompress.decompress),
     flush = interp2app(Decompress.flush),
+    unused_data = interp_attrproperty('unused_data', Decompress),
+    unconsumed_tail = interp_attrproperty('unconsumed_tail', Decompress),
     __doc__ = """decompressobj([wbits]) -- Return a decompressor object.
 
 Optional arg wbits is the window buffer size.

Modified: pypy/dist/pypy/module/zlib/test/test_zlib.py
==============================================================================
--- pypy/dist/pypy/module/zlib/test/test_zlib.py	(original)
+++ pypy/dist/pypy/module/zlib/test/test_zlib.py	Tue Oct  2 10:30:59 2007
@@ -133,5 +133,45 @@
 
 
     def test_decompress_invalid_input(self):
+        """
+        Try to feed garbage to zlib.decompress().
+        """
         raises(self.zlib.error, self.zlib.decompress, self.compressed[:-2])
         raises(self.zlib.error, self.zlib.decompress, 'foobar')
+
+
+    def test_unused_data(self):
+        """
+        Try to feed too much data to zlib.decompress().
+        It should show up in the unused_data attribute.
+        """
+        d = self.zlib.decompressobj()
+        s = d.decompress(self.compressed + 'extrastuff')
+        assert s == self.expanded
+        assert d.unused_data == 'extrastuff'
+        # try again with several decompression steps
+        d = self.zlib.decompressobj()
+        s1 = d.decompress(self.compressed[:10])
+        assert d.unused_data == ''
+        s2 = d.decompress(self.compressed[10:-3])
+        assert d.unused_data == ''
+        s3 = d.decompress(self.compressed[-3:] + 'spam' * 100)
+        assert d.unused_data == 'spam' * 100
+        assert s1 + s2 + s3 == self.expanded
+        s4 = d.decompress('egg' * 50)
+        assert d.unused_data == 'egg' * 50
+        assert s4 == ''
+
+
+    def test_max_length(self):
+        """
+        Test the max_length argument of the decompress() method
+        and the corresponding unconsumed_tail attribute.
+        """
+        d = self.zlib.decompressobj()
+        data = self.compressed
+        for i in range(0, 100, 10):
+            s1 = d.decompress(data, 10)
+            assert s1 == self.expanded[i:i+10]
+            data = d.unconsumed_tail
+        assert not data

Modified: pypy/dist/pypy/rlib/rzlib.py
==============================================================================
--- pypy/dist/pypy/rlib/rzlib.py	(original)
+++ pypy/dist/pypy/rlib/rzlib.py	Tue Oct  2 10:30:59 2007
@@ -1,3 +1,4 @@
+import sys
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rpython.tool import rffi_platform
 
@@ -263,15 +264,24 @@
     """
     # Warning, reentrant calls to the zlib with a given stream can cause it
     # to crash.  The caller of pypy.rlib.rzlib should use locks if needed.
-    return _operate(stream, data, flush, False, _deflate, "while compressing")
-
-
-def decompress(stream, data, flush=Z_SYNC_FLUSH):
-    """
-    Feed more data into an inflate stream.  Returns a string containing
-    (a part of) the decompressed data.  If flush != Z_NO_FLUSH, this also
-    flushes the output data; see zlib.h or the documentation of the
-    zlib module for the possible values of 'flush'.
+    data, _, avail_in = _operate(stream, data, flush, sys.maxint, _deflate,
+                                 "while compressing")
+    assert not avail_in, "not all input consumed by deflate"
+    return data
+
+
+def decompress(stream, data, flush=Z_SYNC_FLUSH, max_length=sys.maxint):
+    """
+    Feed more data into an inflate stream.  Returns a tuple (string,
+    finished, unused_data_length).  The string contains (a part of) the
+    decompressed data.  If flush != Z_NO_FLUSH, this also flushes the
+    output data; see zlib.h or the documentation of the zlib module for
+    the possible values of 'flush'.
+
+    The 'string' is never longer than 'max_length'.  The
+    'unused_data_length' is the number of unprocessed input characters,
+    either because they are after the end of the compressed stream or
+    because processing it would cause the 'max_length' to be exceeded.
     """
     # Warning, reentrant calls to the zlib with a given stream can cause it
     # to crash.  The caller of pypy.rlib.rzlib should use locks if needed.
@@ -284,11 +294,18 @@
         should_finish = True
     else:
         should_finish = False
-    return _operate(stream, data, flush, should_finish, _inflate,
-                    "while decompressing")
+    result = _operate(stream, data, flush, max_length, _inflate,
+                      "while decompressing")
+    if should_finish:
+        # detect incomplete input in the Z_FINISHED case
+        finished = result[1]
+        if not finished:
+            raise RZlibError("the input compressed stream of data is "
+                             "incomplete")
+    return result
 
 
-def _operate(stream, data, flush, should_finish, cfunc, while_doing):
+def _operate(stream, data, flush, max_length, cfunc, while_doing):
     """Common code for compress() and decompress().
     """
     # Prepare the input buffer for the stream
@@ -312,12 +329,19 @@
 
             while True:
                 stream.c_next_out = rffi.cast(Bytefp, outbuf)
-                rffi.setintfield(stream, 'c_avail_out', OUTPUT_BUFFER_SIZE)
+                bufsize = OUTPUT_BUFFER_SIZE
+                if max_length < bufsize:
+                    if max_length <= 0:
+                        err = Z_OK
+                        break
+                    bufsize = max_length
+                max_length -= bufsize
+                rffi.setintfield(stream, 'c_avail_out', bufsize)
                 err = cfunc(stream, flush)
                 if err == Z_OK or err == Z_STREAM_END:
                     # accumulate data into 'result'
                     avail_out = rffi.cast(lltype.Signed, stream.c_avail_out)
-                    for i in xrange(OUTPUT_BUFFER_SIZE - avail_out):
+                    for i in xrange(bufsize - avail_out):
                         result.append(outbuf[i])
                     # if the output buffer is full, there might be more data
                     # so we need to try again.  Otherwise, we're done.
@@ -335,7 +359,7 @@
                     # the output buffer was full but there wasn't more
                     # output when we tried again, so it is not an error
                     # condition.
-                    if avail_out == OUTPUT_BUFFER_SIZE:
+                    if avail_out == bufsize:
                         break
 
                 # fallback case: report this error
@@ -347,10 +371,8 @@
         lltype.free(inbuf, flavor='raw')
 
     # When decompressing, if the compressed stream of data was truncated,
-    # then the zlib simply returns Z_OK and waits for more.  Let's detect
-    # this situation and complain.
-    if should_finish and err != Z_STREAM_END:
-        raise RZlibError("the input compressed stream of data is not complete")
-
-    assert not stream.c_avail_in, "not all input consumed by deflate/inflate"
-    return ''.join(result)
+    # then the zlib simply returns Z_OK and waits for more.  If it is
+    # complete it returns Z_STREAM_END.
+    return (''.join(result),
+            err == Z_STREAM_END,
+            rffi.cast(lltype.Signed, stream.c_avail_in))

Modified: pypy/dist/pypy/rlib/test/test_rzlib.py
==============================================================================
--- pypy/dist/pypy/rlib/test/test_rzlib.py	(original)
+++ pypy/dist/pypy/rlib/test/test_rzlib.py	Tue Oct  2 10:30:59 2007
@@ -121,10 +121,14 @@
     should allow us to decompress bytes.
     """
     stream = rzlib.inflateInit()
-    bytes = rzlib.decompress(stream, compressed)
-    bytes += rzlib.decompress(stream, "", rzlib.Z_FINISH)
+    bytes1, finished1, unused1 = rzlib.decompress(stream, compressed)
+    bytes2, finished2, unused2 = rzlib.decompress(stream, "", rzlib.Z_FINISH)
     rzlib.inflateEnd(stream)
-    assert bytes == expanded
+    assert bytes1 + bytes2 == expanded
+    assert finished1 is True
+    assert finished2 is True
+    assert unused1 == 0
+    assert unused2 == 0
 
 
 def test_decompression_lots_of_data():
@@ -135,9 +139,12 @@
     compressed = zlib.compress(expanded)
     print len(compressed), '=>', len(expanded)
     stream = rzlib.inflateInit()
-    bytes = rzlib.decompress(stream, compressed, rzlib.Z_FINISH)
+    bytes, finished, unused = rzlib.decompress(stream, compressed,
+                                               rzlib.Z_FINISH)
     rzlib.inflateEnd(stream)
     assert bytes == expanded
+    assert finished is True
+    assert unused == 0
 
 
 def test_decompression_truncated_input():
@@ -149,11 +156,75 @@
     compressed = zlib.compress(expanded)
     print len(compressed), '=>', len(expanded)
     stream = rzlib.inflateInit()
-    data = rzlib.decompress(stream, compressed[:1000])
+    data, finished1, unused1 = rzlib.decompress(stream, compressed[:1000])
     assert expanded.startswith(data)
-    data += rzlib.decompress(stream, compressed[1000:2000])
+    assert finished1 is False
+    assert unused1 == 0
+    data2, finished2, unused2 = rzlib.decompress(stream, compressed[1000:2000])
+    data += data2
+    assert finished2 is False
+    assert unused2 == 0
     assert expanded.startswith(data)
     py.test.raises(rzlib.RZlibError,
                    rzlib.decompress, stream, compressed[2000:-500],
                    rzlib.Z_FINISH)
     rzlib.inflateEnd(stream)
+
+
+def test_decompression_too_much_input():
+    """
+    Check the case where we feed extra data to decompress().
+    """
+    stream = rzlib.inflateInit()
+    data1, finished1, unused1 = rzlib.decompress(stream, compressed[:-5])
+    assert finished1 is False
+    assert unused1 == 0
+    data2, finished2, unused2 = rzlib.decompress(stream,
+                                                 compressed[-5:] + 'garbage')
+    assert finished2 is True
+    assert unused2 == len('garbage')
+    assert data1 + data2 == expanded
+    data3, finished3, unused3 = rzlib.decompress(stream, 'more_garbage')
+    assert finished3 is True
+    assert unused3 == len('more_garbage')
+    assert data3 == ''
+
+
+def test_decompress_max_length():
+    """
+    Test the max_length argument of decompress().
+    """
+    stream = rzlib.inflateInit()
+    data1, finished1, unused1 = rzlib.decompress(stream, compressed,
+                                                 max_length = 17)
+    assert data1 == expanded[:17]
+    assert finished1 is False
+    assert unused1 > 0
+    data2, finished2, unused2 = rzlib.decompress(stream, compressed[-unused1:])
+    assert data2 == expanded[17:]
+    assert finished2 is True
+    assert unused2 == 0
+
+
+def test_cornercases():
+    """
+    Test degenerate arguments.
+    """
+    stream = rzlib.deflateInit()
+    bytes = rzlib.compress(stream, "")
+    bytes += rzlib.compress(stream, "")
+    bytes += rzlib.compress(stream, "", rzlib.Z_FINISH)
+    assert zlib.decompress(bytes) == ""
+
+    stream = rzlib.inflateInit()
+    data, finished, unused = rzlib.decompress(stream, "")
+    assert data == ""
+    assert finished is False
+    assert unused == 0
+    buf = compressed
+    for i in range(10):
+        data, finished, unused = rzlib.decompress(stream, buf, max_length=0)
+        assert data == ""
+        assert finished is False
+        assert unused > 0
+        buf = buf[-unused:]



More information about the Pypy-commit mailing list