[pypy-svn] r31096 - in pypy/dist/pypy/module/bz2: . test
rhymes at codespeak.net
rhymes at codespeak.net
Mon Aug 7 11:51:22 CEST 2006
Author: rhymes
Date: Mon Aug 7 11:51:16 2006
New Revision: 31096
Modified:
pypy/dist/pypy/module/bz2/interp_bz2.py
pypy/dist/pypy/module/bz2/test/test_bz2.py
Log:
fix compression in BZ2Compressor to work with *huge* data too
Modified: pypy/dist/pypy/module/bz2/interp_bz2.py
==============================================================================
--- pypy/dist/pypy/module/bz2/interp_bz2.py (original)
+++ pypy/dist/pypy/module/bz2/interp_bz2.py Mon Aug 7 11:51:16 2006
@@ -814,13 +814,29 @@
self.bzs.next_out = out_buf
self.bzs.avail_out = out_bufsize
+ temp = []
while True:
bzerror = libbz2.BZ2_bzCompress(byref(self.bzs), BZ_RUN)
- if bzerror != BZ_OK:
+ if bzerror != BZ_RUN_OK:
_catch_bz2_error(self.space, bzerror)
if self.bzs.avail_in == 0:
break
+ elif self.bzs.avail_out == 0:
+ total_out = _bzs_total_out(self.bzs)
+ data = "".join([out_buf[i] for i in range(total_out)])
+ temp.append(data)
+
+ out_bufsize = _new_buffer_size(out_bufsize)
+ out_buf = create_string_buffer(out_bufsize)
+ self.bzs.next_out = out_buf
+ self.bzs.avail_out = out_bufsize
+
+ if temp:
+ total_out = _bzs_total_out(self.bzs)
+ data = "".join([out_buf[i] for i in range(total_out - len(temp[0]))])
+ temp.append(data)
+ return self.space.wrap("".join(temp))
total_out = _bzs_total_out(self.bzs)
res = "".join([out_buf[i] for i in range(total_out)])
@@ -835,17 +851,37 @@
out_bufsize = SMALLCHUNK
out_buf = create_string_buffer(out_bufsize)
-
+
self.bzs.next_out = out_buf
self.bzs.avail_out = out_bufsize
+ total_out = _bzs_total_out(self.bzs)
+
+ temp = []
while True:
bzerror = libbz2.BZ2_bzCompress(byref(self.bzs), BZ_FINISH)
if bzerror == BZ_STREAM_END:
break
elif bzerror != BZ_FINISH_OK:
_catch_bz2_error(self.space, bzerror)
-
+
+ if self.bzs.avail_out == 0:
+ data = "".join([out_buf[i] for i in range(_bzs_total_out(self.bzs))])
+ temp.append(data)
+
+ out_bufsize = _new_buffer_size(out_bufsize)
+ out_buf = create_string_buffer(out_bufsize)
+ self.bzs.next_out = out_buf
+ self.bzs.avail_out = out_bufsize
+
+ if temp:
+ return self.space.wrap("".join(temp))
+
+ if self.bzs.avail_out:
+ size = _bzs_total_out(self.bzs) - total_out
+ res = "".join([out_buf[i] for i in range(size)])
+ return self.space.wrap(res)
+
total_out = _bzs_total_out(self.bzs)
res = "".join([out_buf[i] for i in range(total_out)])
return self.space.wrap(res)
Modified: pypy/dist/pypy/module/bz2/test/test_bz2.py
==============================================================================
--- pypy/dist/pypy/module/bz2/test/test_bz2.py (original)
+++ pypy/dist/pypy/module/bz2/test/test_bz2.py Mon Aug 7 11:51:16 2006
@@ -605,6 +605,29 @@
data = "%s%s" % (data, bz2c.flush())
assert decompress(data) == TEXT
+ def test_compress_huge_data(self):
+ def decompress(data):
+ import popen2
+ import bz2
+ pop = popen2.Popen3("bunzip2", capturestderr=1)
+ pop.tochild.write(data)
+ pop.tochild.close()
+ res = pop.fromchild.read()
+ pop.fromchild.close()
+ if pop.wait() != 0:
+ res = bz2.decompress(data)
+ return res
+
+ from bz2 import BZ2Compressor
+ TEXT = 'root:x:0:0:root:/root:/bin/bash\nbin:x:1:1:bin:/bin:\ndaemon:x:2:2:daemon:/sbin:\nadm:x:3:4:adm:/var/adm:\nlp:x:4:7:lp:/var/spool/lpd:\nsync:x:5:0:sync:/sbin:/bin/sync\nshutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\nhalt:x:7:0:halt:/sbin:/sbin/halt\nmail:x:8:12:mail:/var/spool/mail:\nnews:x:9:13:news:/var/spool/news:\nuucp:x:10:14:uucp:/var/spool/uucp:\noperator:x:11:0:operator:/root:\ngames:x:12:100:games:/usr/games:\ngopher:x:13:30:gopher:/usr/lib/gopher-data:\nftp:x:14:50:FTP User:/var/ftp:/bin/bash\nnobody:x:65534:65534:Nobody:/home:\npostfix:x:100:101:postfix:/var/spool/postfix:\nniemeyer:x:500:500::/home/niemeyer:/bin/bash\npostgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\nmysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\nwww:x:103:104::/var/www:/bin/false\n'
+
+ HUGE_DATA = TEXT * 10000
+ bz2c = BZ2Compressor()
+ raises(TypeError, bz2c.compress)
+ data = bz2c.compress(HUGE_DATA)
+ data = "%s%s" % (data, bz2c.flush())
+ assert decompress(data) == HUGE_DATA
+
# has_cmdline_bunzip2 = sys.platform not in ("win32", "os2emx", "riscos")
#
# if has_cmdline_bunzip2:
More information about the Pypy-commit
mailing list