[pypy-svn] r31096 - in pypy/dist/pypy/module/bz2: . test

rhymes at codespeak.net rhymes at codespeak.net
Mon Aug 7 11:51:22 CEST 2006


Author: rhymes
Date: Mon Aug  7 11:51:16 2006
New Revision: 31096

Modified:
   pypy/dist/pypy/module/bz2/interp_bz2.py
   pypy/dist/pypy/module/bz2/test/test_bz2.py
Log:
fix compression in BZ2Compressor to work with *huge* data too

Modified: pypy/dist/pypy/module/bz2/interp_bz2.py
==============================================================================
--- pypy/dist/pypy/module/bz2/interp_bz2.py	(original)
+++ pypy/dist/pypy/module/bz2/interp_bz2.py	Mon Aug  7 11:51:16 2006
@@ -814,13 +814,29 @@
         self.bzs.next_out = out_buf
         self.bzs.avail_out = out_bufsize
         
+        temp = []
         while True:
             bzerror = libbz2.BZ2_bzCompress(byref(self.bzs), BZ_RUN)
-            if bzerror != BZ_OK:
+            if bzerror != BZ_RUN_OK:
                 _catch_bz2_error(self.space, bzerror)
 
             if self.bzs.avail_in == 0:
                 break
+            elif self.bzs.avail_out == 0:
+                total_out = _bzs_total_out(self.bzs)
+                data = "".join([out_buf[i] for i in range(total_out)])
+                temp.append(data)
+                
+                out_bufsize = _new_buffer_size(out_bufsize)
+                out_buf = create_string_buffer(out_bufsize)
+                self.bzs.next_out = out_buf
+                self.bzs.avail_out = out_bufsize
+
+        if temp:
+            total_out = _bzs_total_out(self.bzs)
+            data = "".join([out_buf[i] for i in range(total_out - len(temp[0]))])
+            temp.append(data)
+            return self.space.wrap("".join(temp))
 
         total_out = _bzs_total_out(self.bzs)
         res = "".join([out_buf[i] for i in range(total_out)])
@@ -835,17 +851,37 @@
         
         out_bufsize = SMALLCHUNK
         out_buf = create_string_buffer(out_bufsize)
-
+    
         self.bzs.next_out = out_buf
         self.bzs.avail_out = out_bufsize
         
+        total_out = _bzs_total_out(self.bzs)
+        
+        temp = []
         while True:
             bzerror = libbz2.BZ2_bzCompress(byref(self.bzs), BZ_FINISH)
             if bzerror == BZ_STREAM_END:
                 break
             elif bzerror != BZ_FINISH_OK:
                 _catch_bz2_error(self.space, bzerror)
-
+                
+            if self.bzs.avail_out == 0:
+                data = "".join([out_buf[i] for i in range(_bzs_total_out(self.bzs))])
+                temp.append(data)
+                
+                out_bufsize = _new_buffer_size(out_bufsize)
+                out_buf = create_string_buffer(out_bufsize)
+                self.bzs.next_out = out_buf
+                self.bzs.avail_out = out_bufsize
+        
+        if temp:
+            return self.space.wrap("".join(temp))
+            
+        if self.bzs.avail_out:
+            size = _bzs_total_out(self.bzs) - total_out
+            res = "".join([out_buf[i] for i in range(size)])
+            return self.space.wrap(res)
+    
         total_out = _bzs_total_out(self.bzs)
         res = "".join([out_buf[i] for i in range(total_out)])
         return self.space.wrap(res)

Modified: pypy/dist/pypy/module/bz2/test/test_bz2.py
==============================================================================
--- pypy/dist/pypy/module/bz2/test/test_bz2.py	(original)
+++ pypy/dist/pypy/module/bz2/test/test_bz2.py	Mon Aug  7 11:51:16 2006
@@ -605,6 +605,29 @@
         data = "%s%s" % (data, bz2c.flush())
         assert decompress(data) == TEXT
         
+    def test_compress_huge_data(self):
+        def decompress(data):
+            import popen2
+            import bz2
+            pop = popen2.Popen3("bunzip2", capturestderr=1)
+            pop.tochild.write(data)
+            pop.tochild.close()
+            res = pop.fromchild.read()
+            pop.fromchild.close()
+            if pop.wait() != 0:
+                res = bz2.decompress(data)
+            return res
+
+        from bz2 import BZ2Compressor            
+        TEXT = 'root:x:0:0:root:/root:/bin/bash\nbin:x:1:1:bin:/bin:\ndaemon:x:2:2:daemon:/sbin:\nadm:x:3:4:adm:/var/adm:\nlp:x:4:7:lp:/var/spool/lpd:\nsync:x:5:0:sync:/sbin:/bin/sync\nshutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\nhalt:x:7:0:halt:/sbin:/sbin/halt\nmail:x:8:12:mail:/var/spool/mail:\nnews:x:9:13:news:/var/spool/news:\nuucp:x:10:14:uucp:/var/spool/uucp:\noperator:x:11:0:operator:/root:\ngames:x:12:100:games:/usr/games:\ngopher:x:13:30:gopher:/usr/lib/gopher-data:\nftp:x:14:50:FTP User:/var/ftp:/bin/bash\nnobody:x:65534:65534:Nobody:/home:\npostfix:x:100:101:postfix:/var/spool/postfix:\nniemeyer:x:500:500::/home/niemeyer:/bin/bash\npostgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\nmysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\nwww:x:103:104::/var/www:/bin/false\n'
+        
+        HUGE_DATA = TEXT * 10000
+        bz2c = BZ2Compressor()
+        raises(TypeError, bz2c.compress)
+        data = bz2c.compress(HUGE_DATA)
+        data = "%s%s" % (data, bz2c.flush())
+        assert decompress(data) == HUGE_DATA
+        
 # has_cmdline_bunzip2 = sys.platform not in ("win32", "os2emx", "riscos")
 # 
 # if has_cmdline_bunzip2:



More information about the Pypy-commit mailing list