[pypy-svn] r31022 - in pypy/dist/pypy/module/bz2: . test
rhymes at codespeak.net
rhymes at codespeak.net
Sat Aug 5 17:59:32 CEST 2006
Author: rhymes
Date: Sat Aug 5 17:59:27 2006
New Revision: 31022
Modified:
pypy/dist/pypy/module/bz2/interp_bz2.py
pypy/dist/pypy/module/bz2/test/test_bz2.py
Log:
readline() works
Modified: pypy/dist/pypy/module/bz2/interp_bz2.py
==============================================================================
--- pypy/dist/pypy/module/bz2/interp_bz2.py (original)
+++ pypy/dist/pypy/module/bz2/interp_bz2.py Sat Aug 5 17:59:27 2006
@@ -1,457 +1,570 @@
-from pypy.rpython.rctypes.tool import ctypes_platform
-from pypy.rpython.rctypes.tool.libc import libc
-import pypy.rpython.rctypes.implementation # this defines rctypes magic
-from pypy.rpython.rctypes.aerrno import geterrno
-from pypy.interpreter.error import OperationError
-from pypy.interpreter.baseobjspace import W_Root, ObjSpace, Wrappable
-from pypy.interpreter.typedef import TypeDef
-from pypy.interpreter.gateway import interp2app
-from ctypes import *
-import ctypes.util
-
-from bzlib import bz_stream, BZFILE, FILE
-from fileobject import PyFileObject
-
-libbz2 = cdll.LoadLibrary(ctypes.util.find_library("bz2"))
-
-c_void = None
-
-class CConfig:
- _header_ = """
- #include <stdio.h>
- #include <sys/types.h>
- #include <bzlib.h>
- """
- off_t = ctypes_platform.SimpleType("off_t", c_longlong)
- size_t = ctypes_platform.SimpleType("size_t", c_ulong)
- BUFSIZ = ctypes_platform.ConstantInteger("BUFSIZ")
- SEEK_SET = ctypes_platform.ConstantInteger("SEEK_SET")
-
-constants = {}
-constant_names = ['BZ_RUN', 'BZ_FLUSH', 'BZ_FINISH', 'BZ_OK',
- 'BZ_RUN_OK', 'BZ_FLUSH_OK', 'BZ_FINISH_OK', 'BZ_STREAM_END',
- 'BZ_SEQUENCE_ERROR', 'BZ_PARAM_ERROR', 'BZ_MEM_ERROR', 'BZ_DATA_ERROR',
- 'BZ_DATA_ERROR_MAGIC', 'BZ_IO_ERROR', 'BZ_UNEXPECTED_EOF',
- 'BZ_OUTBUFF_FULL', 'BZ_CONFIG_ERROR']
-for name in constant_names:
- setattr(CConfig, name, ctypes_platform.DefinedConstantInteger(name))
-
-class cConfig:
- pass
-cConfig.__dict__.update(ctypes_platform.configure(CConfig))
-
-for name in constant_names:
- value = getattr(cConfig, name)
- if value is not None:
- constants[name] = value
-locals().update(constants)
-
-off_t = cConfig.off_t
-BUFSIZ = cConfig.BUFSIZ
-SEEK_SET = cConfig.SEEK_SET
-BZ_OK = cConfig.BZ_OK
-BZ_STREAM_END = cConfig.BZ_STREAM_END
-BZ_CONFIG_ERROR = cConfig.BZ_CONFIG_ERROR
-BZ_PARAM_ERROR = cConfig.BZ_PARAM_ERROR
-BZ_DATA_ERROR = cConfig.BZ_DATA_ERROR
-BZ_DATA_ERROR_MAGIC = cConfig.BZ_DATA_ERROR_MAGIC
-BZ_IO_ERROR = cConfig.BZ_IO_ERROR
-BZ_MEM_ERROR = cConfig.BZ_MEM_ERROR
-BZ_UNEXPECTED_EOF = cConfig.BZ_UNEXPECTED_EOF
-BZ_SEQUENCE_ERROR = cConfig.BZ_SEQUENCE_ERROR
-
-# modes
-MODE_CLOSED = 0
-MODE_READ = 1
-MODE_READ_EOF = 2
-MODE_WRITE = 3
-
-# bits in f_newlinetypes
-NEWLINE_UNKNOWN = 0 # No newline seen, yet
-NEWLINE_CR = 1 # \r newline seen
-NEWLINE_LF = 2 # \n newline seen
-NEWLINE_CRLF = 4 # \r\n newline seen
-
-if BUFSIZ < 8192:
- SMALLCHUNK = 8192
-else:
- SMALLCHUNK = BUFSIZ
-
-
-pythonapi.PyFile_FromString.argtypes = [c_char_p, c_char_p]
-pythonapi.PyFile_FromString.restype = POINTER(PyFileObject)
-pythonapi.PyFile_SetBufSize.argtypes = [POINTER(PyFileObject), c_int]
-pythonapi.PyFile_SetBufSize.restype = c_void
-pythonapi.PyFile_AsFile.argtypes = [POINTER(PyFileObject)]
-pythonapi.PyFile_AsFile.restype = POINTER(FILE)
-pythonapi.PyMem_Free.argtypes = [c_char_p]
-pythonapi.PyMem_Free.restype = c_void
-
-libbz2.BZ2_bzReadOpen.argtypes = [POINTER(c_int), POINTER(FILE), c_int,
- c_int, c_void_p, c_int]
-libbz2.BZ2_bzReadOpen.restype = POINTER(BZFILE)
-libbz2.BZ2_bzWriteOpen.argtypes = [POINTER(c_int), POINTER(FILE), c_int,
- c_int, c_int]
-libbz2.BZ2_bzWriteOpen.restype = POINTER(BZFILE)
-libbz2.BZ2_bzReadClose.argtypes = [POINTER(c_int), POINTER(BZFILE)]
-libbz2.BZ2_bzReadClose.restype = c_void
-libbz2.BZ2_bzWriteClose.argtypes = [POINTER(c_int), POINTER(BZFILE),
- c_int, POINTER(c_uint), POINTER(c_uint)]
-libbz2.BZ2_bzWriteClose.restype = c_void
-libbz2.BZ2_bzRead.argtypes = [POINTER(c_int), POINTER(BZFILE), c_char_p, c_int]
-libbz2.BZ2_bzRead.restype = c_int
-
-libc.strerror.restype = c_char_p
-libc.strerror.argtypes = [c_int]
-libc.fclose.argtypes = [POINTER(FILE)]
-libc.fclose.restype = c_int
-libc.fseek.argtypes = [POINTER(FILE), c_int, c_int]
-libc.fseek.restype = c_int
-
-def _get_error_msg():
- errno = geterrno()
- return libc.strerror(errno)
-
-def _catch_bz2_error(space, bzerror):
- if BZ_CONFIG_ERROR and bzerror == BZ_CONFIG_ERROR:
- raise OperationError(space.w_SystemError,
- space.wrap("the bz2 library was not compiled correctly"))
- if bzerror == BZ_PARAM_ERROR:
- raise OperationError(space.w_SystemError,
- space.wrap("the bz2 library has received wrong parameters"))
- elif bzerror == BZ_MEM_ERROR:
- raise OperationError(space.w_MemoryError, space.wrap(""))
- elif bzerror in (BZ_DATA_ERROR, BZ_DATA_ERROR_MAGIC):
- raise OperationError(space.w_IOError, space.wrap("invalid data stream"))
- elif bzerror == BZ_IO_ERROR:
- raise OperationError(space.w_IOError, space.wrap("unknown IO error"))
- elif bzerror == BZ_UNEXPECTED_EOF:
- raise OperationError(space.w_EOFError,
- space.wrap(
- "compressed file ended before the logical end-of-stream was detected"))
- elif bzerror == BZ_SEQUENCE_ERROR:
- raise OperationError(space.w_RuntimeError,
- space.wrap("wrong sequence of bz2 library commands used"))
-
-def _drop_readahead(obj):
- if obj.f_buf:
- pythonapi.PyMem_Free(obj.f_buf)
- obj.f_buf = c_char_p()
-
-def _univ_newline_read(bzerror, stream, buf, n, obj):
- dst = buf
-
- if not obj.f_univ_newline:
- return libbz2.BZ2_bzRead(byref(bzerror), stream, buf, n)
-
- newlinetypes = obj.f_newlinetypes
- skipnextlf = obj.f_skipnextlf
-
- while n:
- src = dst
-
- nread = libbz2.BZ2_bzRead(byref(bzerror), stream, buf, n)
- n -= nread # assuming 1 byte out for each in; will adjust
- shortread = n != 0 # True iff EOF or error
-
- # needed to operate with "pointers"
- src_lst = list(src.value)
- src_pos = 0
- dst_lst = list(dst.value)
- dst_pos = 0
- while nread:
- nread -= 1
-
- c = src_lst[src_pos]
- src_pos += 1
-
- if c == '\r':
- # save as LF and set flag to skip next LF.
- dst_lst[dst_pos] = '\n'
- dst_pos += 1
- skipnextlf = True
- elif skipnextlf and c == '\n':
- # skip LF, and remember we saw CR LF.
- skipnextlf = False
- newlinetypes |= NEWLINE_CRLF
- n += 1
- else:
- # normal char to be stored in buffer. Also
- # update the newlinetypes flag if either this
- # is an LF or the previous char was a CR.
- if c == '\n':
- newlinetypes |= NEWLINE_LF
- elif skipnextlf:
- newlinetypes |= NEWLINE_CR
-
- dst_lst[dst_pos] = c
- dst_pos += 1
-
- skipnextlf = False
-
- if shortread:
- # if this is EOF, update type flags.
- if skipnextlf and (bzerror == BZ_STREAM_END):
- newlinetypes |= NEWLINE_CR
- break
-
- obj.f_newlinetypes = newlinetypes
- obj.f_skipnextlf = skipnextlf
-
- buf = c_char_p("".join(dst_lst))
-
- return dst_pos
-
-
-class _BZ2File(Wrappable):
- def __init__(self, space, filename, mode='r', buffering=-1, compresslevel=9):
- self.space = space
-
- self.f_buf = c_char_p() # allocated readahead buffer
- self.f_bufend = c_char_p() # points after last occupied position
- self.f_bufptr = c_char_p() # current buffer position
-
- self.f_softspace = 0 # flag used by print command
-
- self.f_univ_newline = False # handle any newline convention
- self.f_newlinetypes = 0 # types of newlines seen
- self.f_skipnextlf = 0 # skip next \n
-
- self.mode = 0
- self.pos = 0
- self.size = 0
-
- self._init_bz2file(filename, mode, buffering, compresslevel)
-
- def _init_bz2file(self, filename, mode_, buffering, compresslevel):
- self.size = -1
-
- name = filename
- mode_char = ""
- mode_list = mode_
-
- if compresslevel < 1 or compresslevel > 9:
- raise OperationError(self.space.w_ValueError,
- self.space.wrap("compresslevel must be between 1 and 9"))
-
- for mode in mode_list:
- error = False
-
- if mode in ['r', 'w']:
- if mode_char:
- error = True
- mode_char = mode
- elif mode == 'b':
- pass
- elif mode == 'U':
- self.f_univ_newline = True
- else:
- error = True
-
- if error:
- raise OperationError(self.space.w_ValueError,
- self.space.wrap("invalid mode char %s" % mode))
-
- if mode_char == 0:
- mode_char = 'r'
- mode = ('wb', 'rb')[mode_char == 'r']
-
- # open the file and set the buffer
- f = pythonapi.PyFile_FromString(name, mode)
- if not f:
- raise OperationError(self.space.w_IOError,
- self.space.wrap("cannot open file %s" % name))
- pythonapi.PyFile_SetBufSize(f, buffering)
-
- # store the FILE object
- self._file = pythonapi.PyFile_AsFile(f)
-
- bzerror = c_int()
- if mode_char == 'r':
- self.fp = libbz2.BZ2_bzReadOpen(byref(bzerror), self._file,
- 0, 0, None, 0)
- else:
- self.fp = libbz2.BZ2_bzWriteOpen(byref(bzerror), self._file,
- compresslevel, 0, 0)
-
- if bzerror != BZ_OK:
- _catch_bz2_error(self.space, bzerror)
-
- self.mode = (MODE_WRITE, MODE_READ)[mode_char == 'r']
-
- def __del__(self):
- bzerror = c_int()
-
- if self.mode in (MODE_READ, MODE_READ_EOF):
- libbz2.BZ2_bzReadClose(byref(bzerror), self.fp)
- elif self.mode == MODE_WRITE:
- libbz2.BZ2_bzWriteClose(byref(bzerror), self.fp, 0, None, None)
-
- _drop_readahead(self)
-
- def _check_if_close(self):
- if self.mode == MODE_CLOSED:
- raise OperationError(self.space.w_ValueError,
- self.space.wrap("I/O operation on closed file"))
-
- def close(self):
- """close() -> None or (perhaps) an integer
-
- Close the file. Sets data attribute .closed to true. A closed file
- cannot be used for further I/O operations."""
-
- # this feature is not supported due to fclose():
- # close() may be called more than once without error.
-
- bzerror = c_int(BZ_OK)
-
- if self.mode in (MODE_READ, MODE_READ_EOF):
- libbz2.BZ2_bzReadClose(byref(bzerror), self.fp)
- elif self.mode == MODE_WRITE:
- libbz2.BZ2_bzWriteClose(byref(bzerror), self.fp, 0, None, None)
-
- self.mode = MODE_CLOSED
-
- # close the underline file
- ret = libc.fclose(self._file)
- if ret != 0:
- raise OperationError(self.space.w_IOError,
- self.space.wrap(_get_error_msg()))
-
- if bzerror != BZ_OK:
- return _catch_bz2_error(self.space, bzerror)
-
- return ret
- close.unwrap_spec = ['self']
-
- def tell(self):
- """tell() -> int
-
- Return the current file position, an integer (may be a long integer)."""
-
- self._check_if_close()
-
- return self.space.wrap(self.pos)
- tell.unwrap_spec = ['self']
-
- def seek(self, offset, whence=0):
- """"seek(offset [, whence]) -> None
-
- Move to new file position. Argument offset is a byte count. Optional
- argument whence defaults to 0 (offset from start of file, offset
- should be >= 0); other values are 1 (move relative to current position,
- positive or negative), and 2 (move relative to end of file, usually
- negative, although many platforms allow seeking beyond the end of a file).
-
- Note that seeking of bz2 files is emulated, and depending on the parameters
- the operation may be extremely slow."""
-
- _drop_readahead(self)
- self._check_if_close()
-
- buf = c_char_p()
- bufsize = SMALLCHUNK
- bytesread = 0
- bzerror = c_int()
-
- if self.mode not in (MODE_READ, MODE_READ_EOF):
- raise OperationError(self.space.w_IOError,
- self.space.wrap("seek works only while reading"))
-
- if whence == 2:
- if self.size == -1:
- while True:
- chunksize = _univ_newline_read(bzerror, self.fp, buf,
- bufsize, self)
- self.pos += chunksize
- bytesread += chunksize
-
- if bzerror == BZ_STREAM_END:
- break
- elif bzerror != BZ_OK:
- _catch_bz2_error(bzerror)
-
- self.mode = MODE_READ_EOF
- self.size = self.pos
- bytesread = 0
- offset += self.size
- elif whence == 1:
- offset += self.pos
-
- # Before getting here, offset must be the absolute position the file
- # pointer should be set to.
- if offset >= self.pos:
- # we can move forward
- offset -= self.pos
- else:
- # we cannot move back, so rewind the stream
- libbz2.BZ2_bzReadClose(byref(bzerror), self.fp)
- if bzerror != BZ_OK:
- _catch_bz2_error(bzerror)
-
- ret = libc.fseek(self._file, 0, SEEK_SET)
- if ret != 0:
- raise OperationError(self.space.w_IOError,
- self.space.wrap(_get_error_msg()))
-
- self.pos = 0
- self.fp = libbz2.BZ2_bzReadOpen(byref(bzerror), self._file,
- 0, 0, None, 0)
- if bzerror != BZ_OK:
- _catch_bz2_error(bzerror)
-
- self.mode = MODE_READ
-
- if offset <= 0 or self.mode == MODE_READ_EOF:
- return
-
- # Before getting here, offset must be set to the number of bytes
- # to walk forward.
- while True:
- if (offset - bytesread) > bufsize:
- readsize = bufsize
- else:
- # offset might be wider that readsize, but the result
- # of the subtraction is bound by buffersize (see the
- # condition above). bufsize is 8192.
- readsize = offset - bytesread
-
- chunksize = _univ_newline_read(bzerror, self.fp, buf, readsize, self)
- self.pos += chunksize
- bytesread += chunksize
-
- if bzerror == BZ_STREAM_END:
- self.size = self.pos
- self.mode = MODE_READ_EOF
- elif bzerror != BZ_OK:
- _catch_bz2_error(bzerror)
-
- if bytesread == offset:
- break
- seek.unwrap_spec = ['self', int, int]
-
-
-_BZ2File.typedef = TypeDef("_BZ2File",
- close = interp2app(_BZ2File.close, unwrap_spec=_BZ2File.close.unwrap_spec),
- tell = interp2app(_BZ2File.tell, unwrap_spec=_BZ2File.tell.unwrap_spec),
- seek = interp2app(_BZ2File.seek, unwrap_spec=_BZ2File.seek.unwrap_spec),
-)
-
-def BZ2File(space, filename, mode='r', buffering=-1, compresslevel=9):
- """BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object
-
- Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or
- writing. When opened for writing, the file will be created if it doesn't
- exist, and truncated otherwise. If the buffering argument is given, 0 means
- unbuffered, and larger numbers specify the buffer size. If compresslevel
- is given, must be a number between 1 and 9.
-
- Add a 'U' to mode to open the file for input with universal newline
- support. Any line ending in the input file will be seen as a '\\n' in
- Python. Also, a file so opened gains the attribute 'newlines'; the value
- for this attribute is one of None (no newline read yet), '\\r', '\\n',
- '\\r\\n' or a tuple containing all the newline types seen. Universal
- newlines are available only when reading."""
- return _BZ2File(space, filename, mode, buffering, compresslevel)
-BZ2File.unwrap_spec = [ObjSpace, str, str, int, int]
-
+from pypy.rpython.rctypes.tool import ctypes_platform
+from pypy.rpython.rctypes.tool.libc import libc
+import pypy.rpython.rctypes.implementation # this defines rctypes magic
+from pypy.rpython.rctypes.aerrno import geterrno
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.baseobjspace import W_Root, ObjSpace, Wrappable
+from pypy.interpreter.typedef import TypeDef
+from pypy.interpreter.gateway import interp2app
+from ctypes import *
+import ctypes.util
+import sys
+
+from bzlib import bz_stream, BZFILE, FILE
+from fileobject import PyFileObject
+
+libbz2 = cdll.LoadLibrary(ctypes.util.find_library("bz2"))
+
+c_void = None
+
+class CConfig:
+ _header_ = """
+ #include <stdio.h>
+ #include <sys/types.h>
+ #include <bzlib.h>
+ """
+ off_t = ctypes_platform.SimpleType("off_t", c_longlong)
+ size_t = ctypes_platform.SimpleType("size_t", c_ulong)
+ BUFSIZ = ctypes_platform.ConstantInteger("BUFSIZ")
+ SEEK_SET = ctypes_platform.ConstantInteger("SEEK_SET")
+
+constants = {}
+constant_names = ['BZ_RUN', 'BZ_FLUSH', 'BZ_FINISH', 'BZ_OK',
+ 'BZ_RUN_OK', 'BZ_FLUSH_OK', 'BZ_FINISH_OK', 'BZ_STREAM_END',
+ 'BZ_SEQUENCE_ERROR', 'BZ_PARAM_ERROR', 'BZ_MEM_ERROR', 'BZ_DATA_ERROR',
+ 'BZ_DATA_ERROR_MAGIC', 'BZ_IO_ERROR', 'BZ_UNEXPECTED_EOF',
+ 'BZ_OUTBUFF_FULL', 'BZ_CONFIG_ERROR']
+for name in constant_names:
+ setattr(CConfig, name, ctypes_platform.DefinedConstantInteger(name))
+
+class cConfig:
+ pass
+cConfig.__dict__.update(ctypes_platform.configure(CConfig))
+
+for name in constant_names:
+ value = getattr(cConfig, name)
+ if value is not None:
+ constants[name] = value
+locals().update(constants)
+
+off_t = cConfig.off_t
+BUFSIZ = cConfig.BUFSIZ
+SEEK_SET = cConfig.SEEK_SET
+BZ_OK = cConfig.BZ_OK
+BZ_STREAM_END = cConfig.BZ_STREAM_END
+BZ_CONFIG_ERROR = cConfig.BZ_CONFIG_ERROR
+BZ_PARAM_ERROR = cConfig.BZ_PARAM_ERROR
+BZ_DATA_ERROR = cConfig.BZ_DATA_ERROR
+BZ_DATA_ERROR_MAGIC = cConfig.BZ_DATA_ERROR_MAGIC
+BZ_IO_ERROR = cConfig.BZ_IO_ERROR
+BZ_MEM_ERROR = cConfig.BZ_MEM_ERROR
+BZ_UNEXPECTED_EOF = cConfig.BZ_UNEXPECTED_EOF
+BZ_SEQUENCE_ERROR = cConfig.BZ_SEQUENCE_ERROR
+
+# modes
+MODE_CLOSED = 0
+MODE_READ = 1
+MODE_READ_EOF = 2
+MODE_WRITE = 3
+
+# bits in f_newlinetypes
+NEWLINE_UNKNOWN = 0 # No newline seen, yet
+NEWLINE_CR = 1 # \r newline seen
+NEWLINE_LF = 2 # \n newline seen
+NEWLINE_CRLF = 4 # \r\n newline seen
+
+if BUFSIZ < 8192:
+ SMALLCHUNK = 8192
+else:
+ SMALLCHUNK = BUFSIZ
+
+MAXINT = sys.maxint
+
+pythonapi.PyFile_FromString.argtypes = [c_char_p, c_char_p]
+pythonapi.PyFile_FromString.restype = POINTER(PyFileObject)
+pythonapi.PyFile_SetBufSize.argtypes = [POINTER(PyFileObject), c_int]
+pythonapi.PyFile_SetBufSize.restype = c_void
+pythonapi.PyFile_AsFile.argtypes = [POINTER(PyFileObject)]
+pythonapi.PyFile_AsFile.restype = POINTER(FILE)
+pythonapi.PyMem_Free.argtypes = [c_char_p]
+pythonapi.PyMem_Free.restype = c_void
+
+libbz2.BZ2_bzReadOpen.argtypes = [POINTER(c_int), POINTER(FILE), c_int,
+ c_int, c_void_p, c_int]
+libbz2.BZ2_bzReadOpen.restype = POINTER(BZFILE)
+libbz2.BZ2_bzWriteOpen.argtypes = [POINTER(c_int), POINTER(FILE), c_int,
+ c_int, c_int]
+libbz2.BZ2_bzWriteOpen.restype = POINTER(BZFILE)
+libbz2.BZ2_bzReadClose.argtypes = [POINTER(c_int), POINTER(BZFILE)]
+libbz2.BZ2_bzReadClose.restype = c_void
+libbz2.BZ2_bzWriteClose.argtypes = [POINTER(c_int), POINTER(BZFILE),
+ c_int, POINTER(c_uint), POINTER(c_uint)]
+libbz2.BZ2_bzWriteClose.restype = c_void
+libbz2.BZ2_bzRead.argtypes = [POINTER(c_int), POINTER(BZFILE), c_char_p, c_int]
+libbz2.BZ2_bzRead.restype = c_int
+
+libc.strerror.restype = c_char_p
+libc.strerror.argtypes = [c_int]
+libc.fclose.argtypes = [POINTER(FILE)]
+libc.fclose.restype = c_int
+libc.fseek.argtypes = [POINTER(FILE), c_int, c_int]
+libc.fseek.restype = c_int
+
+def _get_error_msg():
+ errno = geterrno()
+ return libc.strerror(errno)
+
+def _catch_bz2_error(space, bzerror):
+ if BZ_CONFIG_ERROR and bzerror == BZ_CONFIG_ERROR:
+ raise OperationError(space.w_SystemError,
+ space.wrap("the bz2 library was not compiled correctly"))
+ if bzerror == BZ_PARAM_ERROR:
+ raise OperationError(space.w_SystemError,
+ space.wrap("the bz2 library has received wrong parameters"))
+ elif bzerror == BZ_MEM_ERROR:
+ raise OperationError(space.w_MemoryError, space.wrap(""))
+ elif bzerror in (BZ_DATA_ERROR, BZ_DATA_ERROR_MAGIC):
+ raise OperationError(space.w_IOError, space.wrap("invalid data stream"))
+ elif bzerror == BZ_IO_ERROR:
+ raise OperationError(space.w_IOError, space.wrap("unknown IO error"))
+ elif bzerror == BZ_UNEXPECTED_EOF:
+ raise OperationError(space.w_EOFError,
+ space.wrap(
+ "compressed file ended before the logical end-of-stream was detected"))
+ elif bzerror == BZ_SEQUENCE_ERROR:
+ raise OperationError(space.w_RuntimeError,
+ space.wrap("wrong sequence of bz2 library commands used"))
+
+def _drop_readahead(obj):
+ if obj.f_buf:
+ pythonapi.PyMem_Free(obj.f_buf)
+ obj.f_buf = c_char_p()
+
+def _univ_newline_read(bzerror, stream, buf, n, obj):
+ dst = buf
+
+ if not obj.f_univ_newline:
+ return libbz2.BZ2_bzRead(byref(bzerror), stream, buf, n)
+
+ newlinetypes = obj.f_newlinetypes
+ skipnextlf = obj.f_skipnextlf
+
+ while n:
+ src = dst
+
+ nread = libbz2.BZ2_bzRead(byref(bzerror), stream, buf, n)
+ n -= nread # assuming 1 byte out for each in; will adjust
+ shortread = n != 0 # True iff EOF or error
+
+ # needed to operate with "pointers"
+ src_lst = list(src.value)
+ src_pos = 0
+ dst_lst = list(dst.value)
+ dst_pos = 0
+ while nread:
+ nread -= 1
+
+ c = src_lst[src_pos]
+ src_pos += 1
+
+ if c == '\r':
+ # save as LF and set flag to skip next LF.
+ dst_lst[dst_pos] = '\n'
+ dst_pos += 1
+ skipnextlf = True
+ elif skipnextlf and c == '\n':
+ # skip LF, and remember we saw CR LF.
+ skipnextlf = False
+ newlinetypes |= NEWLINE_CRLF
+ n += 1
+ else:
+ # normal char to be stored in buffer. Also
+ # update the newlinetypes flag if either this
+ # is an LF or the previous char was a CR.
+ if c == '\n':
+ newlinetypes |= NEWLINE_LF
+ elif skipnextlf:
+ newlinetypes |= NEWLINE_CR
+
+ dst_lst[dst_pos] = c
+ dst_pos += 1
+
+ skipnextlf = False
+
+ if shortread:
+ # if this is EOF, update type flags.
+ if skipnextlf and (bzerror == BZ_STREAM_END):
+ newlinetypes |= NEWLINE_CR
+ break
+
+ obj.f_newlinetypes = newlinetypes
+ obj.f_skipnextlf = skipnextlf
+
+ buf = c_char_p("".join(dst_lst))
+
+ return dst_pos
+
+def _getline(space, obj, size):
+ used_v_size = 0 # no. used slots in buffer
+ increment = 0 # amount to increment the buffer
+ bzerror = c_int()
+
+ newlinetypes = obj.f_newlinetypes
+ skipnextlf = obj.f_skipnextlf
+ univ_newline = obj.f_univ_newline
+
+ total_v_size = (100, size)[size > 0] # total no. of slots in buffer
+ buf_lst = []
+ buf_pos = 0
+
+ end_pos = buf_pos + total_v_size
+
+ ch = c_char()
+ while True:
+ if univ_newline:
+ while True:
+ libbz2.BZ2_bzRead(byref(bzerror), obj.fp, byref(ch), 1)
+ obj.pos += 1
+ if bzerror != BZ_OK or buf_pos == end_pos:
+ break
+
+ if skipnextlf:
+ skipnextlf = False
+ if ch.value == '\n':
+ # Seeing a \n here with
+ # skipnextlf true means we saw a \r before.
+ newlinetypes |= NEWLINE_CRLF
+ libbz2.BZ2_bzRead(byref(bzerror), obj.fp, byref(ch), 1)
+ if bzerror != BZ_OK: break
+ else:
+ newlinetypes |= NEWLINE_CR
+
+ if ch.value == '\r':
+ skipnextlf = True
+ ch.value = '\n'
+ elif ch.value == '\n':
+ newlinetypes |= NEWLINE_LF
+ buf_lst.append(ch.value)
+ buf_pos += 1
+
+ if ch.value == '\n': break
+ if bzerror == BZ_STREAM_END and skipnextlf:
+ newlinetypes |= NEWLINE_CR
+ else: # if not universal newlines use the normal loop
+ while True:
+ libbz2.BZ2_bzRead(byref(bzerror), obj.fp, byref(ch), 1)
+ obj.pos += 1
+ buf_lst.append(ch.value)
+ buf_pos += 1
+
+ if not (bzerror == BZ_OK and ch.value != '\n' and buf_pos != end_pos):
+ break
+
+ obj.f_newlinetypes = newlinetypes
+ obj.f_skipnextlf = skipnextlf
+
+ if bzerror.value == BZ_STREAM_END:
+ obj.size = obj.pos
+ obj.mode = MODE_READ_EOF
+ break
+ elif bzerror.value != BZ_OK:
+ _catch_bz2_error(space, bzerror)
+
+ if ch.value == '\n': break
+ # must be because buf_pos == end_pos
+ if size > 0:
+ break
+
+ used_v_size = total_v_size
+ increment = total_v_size >> 2 # mild exponential growth
+ total_v_size += increment
+
+ if total_v_size > MAXINT:
+ raise OperationError(space.w_OverflowError,
+ space.wrap("line is longer than a Python string can hold"))
+
+ buf_pos += used_v_size
+ end_pos += total_v_size
+
+ used_v_size = buf_pos
+ if used_v_size != total_v_size:
+ return "".join(buf_lst[:used_v_size])
+ return "".join(buf_lst)
+
+class _BZ2File(Wrappable):
+ def __init__(self, space, filename, mode='r', buffering=-1, compresslevel=9):
+ self.space = space
+
+ self.f_buf = c_char_p() # allocated readahead buffer
+ self.f_bufend = c_char_p() # points after last occupied position
+ self.f_bufptr = c_char_p() # current buffer position
+
+ self.f_softspace = 0 # flag used by print command
+
+ self.f_univ_newline = False # handle any newline convention
+ self.f_newlinetypes = 0 # types of newlines seen
+ self.f_skipnextlf = 0 # skip next \n
+
+ self.mode = 0
+ self.pos = 0
+ self.size = 0
+
+ self._init_bz2file(filename, mode, buffering, compresslevel)
+
+ def _init_bz2file(self, filename, mode_, buffering, compresslevel):
+ self.size = -1
+
+ name = filename
+ mode_char = ""
+ mode_list = mode_
+
+ if compresslevel < 1 or compresslevel > 9:
+ raise OperationError(self.space.w_ValueError,
+ self.space.wrap("compresslevel must be between 1 and 9"))
+
+ for mode in mode_list:
+ error = False
+
+ if mode in ['r', 'w']:
+ if mode_char:
+ error = True
+ mode_char = mode
+ elif mode == 'b':
+ pass
+ elif mode == 'U':
+ self.f_univ_newline = True
+ else:
+ error = True
+
+ if error:
+ raise OperationError(self.space.w_ValueError,
+ self.space.wrap("invalid mode char %s" % mode))
+
+ if mode_char == 0:
+ mode_char = 'r'
+ mode = ('wb', 'rb')[mode_char == 'r']
+
+ # open the file and set the buffer
+ f = pythonapi.PyFile_FromString(name, mode)
+ if not f:
+ raise OperationError(self.space.w_IOError,
+ self.space.wrap("cannot open file %s" % name))
+ pythonapi.PyFile_SetBufSize(f, buffering)
+
+ # store the FILE object
+ self._file = pythonapi.PyFile_AsFile(f)
+
+ bzerror = c_int()
+ if mode_char == 'r':
+ self.fp = libbz2.BZ2_bzReadOpen(byref(bzerror), self._file,
+ 0, 0, None, 0)
+ else:
+ self.fp = libbz2.BZ2_bzWriteOpen(byref(bzerror), self._file,
+ compresslevel, 0, 0)
+
+ if bzerror != BZ_OK:
+ _catch_bz2_error(self.space, bzerror)
+
+ self.mode = (MODE_WRITE, MODE_READ)[mode_char == 'r']
+
+ def __del__(self):
+ bzerror = c_int()
+
+ if self.mode in (MODE_READ, MODE_READ_EOF):
+ libbz2.BZ2_bzReadClose(byref(bzerror), self.fp)
+ elif self.mode == MODE_WRITE:
+ libbz2.BZ2_bzWriteClose(byref(bzerror), self.fp, 0, None, None)
+
+ _drop_readahead(self)
+
+ def _check_if_close(self):
+ if self.mode == MODE_CLOSED:
+ raise OperationError(self.space.w_ValueError,
+ self.space.wrap("I/O operation on closed file"))
+
+ def close(self):
+ """close() -> None or (perhaps) an integer
+
+ Close the file. Sets data attribute .closed to true. A closed file
+ cannot be used for further I/O operations."""
+
+ # this feature is not supported due to fclose():
+ # close() may be called more than once without error.
+
+ bzerror = c_int(BZ_OK)
+
+ if self.mode in (MODE_READ, MODE_READ_EOF):
+ libbz2.BZ2_bzReadClose(byref(bzerror), self.fp)
+ elif self.mode == MODE_WRITE:
+ libbz2.BZ2_bzWriteClose(byref(bzerror), self.fp, 0, None, None)
+
+ self.mode = MODE_CLOSED
+
+ # close the underline file
+ ret = libc.fclose(self._file)
+ if ret != 0:
+ raise OperationError(self.space.w_IOError,
+ self.space.wrap(_get_error_msg()))
+
+ if bzerror != BZ_OK:
+ return _catch_bz2_error(self.space, bzerror)
+
+ return ret
+ close.unwrap_spec = ['self']
+
+ def tell(self):
+ """tell() -> int
+
+ Return the current file position, an integer (may be a long integer)."""
+
+ self._check_if_close()
+
+ return self.space.wrap(self.pos)
+ tell.unwrap_spec = ['self']
+
+ def seek(self, offset, whence=0):
+ """"seek(offset [, whence]) -> None
+
+ Move to new file position. Argument offset is a byte count. Optional
+ argument whence defaults to 0 (offset from start of file, offset
+ should be >= 0); other values are 1 (move relative to current position,
+ positive or negative), and 2 (move relative to end of file, usually
+ negative, although many platforms allow seeking beyond the end of a file).
+
+ Note that seeking of bz2 files is emulated, and depending on the parameters
+ the operation may be extremely slow."""
+
+ _drop_readahead(self)
+ self._check_if_close()
+
+ buf = c_char_p()
+ bufsize = SMALLCHUNK
+ bytesread = 0
+ bzerror = c_int()
+
+ if self.mode not in (MODE_READ, MODE_READ_EOF):
+ raise OperationError(self.space.w_IOError,
+ self.space.wrap("seek works only while reading"))
+
+ if whence == 2:
+ if self.size == -1:
+ while True:
+ chunksize = _univ_newline_read(bzerror, self.fp, buf,
+ bufsize, self)
+ self.pos += chunksize
+ bytesread += chunksize
+
+ if bzerror == BZ_STREAM_END:
+ break
+ elif bzerror != BZ_OK:
+ _catch_bz2_error(self.space, bzerror)
+
+ self.mode = MODE_READ_EOF
+ self.size = self.pos
+ bytesread = 0
+ offset += self.size
+ elif whence == 1:
+ offset += self.pos
+
+ # Before getting here, offset must be the absolute position the file
+ # pointer should be set to.
+ if offset >= self.pos:
+ # we can move forward
+ offset -= self.pos
+ else:
+ # we cannot move back, so rewind the stream
+ libbz2.BZ2_bzReadClose(byref(bzerror), self.fp)
+ if bzerror != BZ_OK:
+ _catch_bz2_error(self.space, bzerror)
+
+ ret = libc.fseek(self._file, 0, SEEK_SET)
+ if ret != 0:
+ raise OperationError(self.space.w_IOError,
+ self.space.wrap(_get_error_msg()))
+
+ self.pos = 0
+ self.fp = libbz2.BZ2_bzReadOpen(byref(bzerror), self._file,
+ 0, 0, None, 0)
+ if bzerror != BZ_OK:
+ _catch_bz2_error(self.space, bzerror)
+
+ self.mode = MODE_READ
+
+ if offset <= 0 or self.mode == MODE_READ_EOF:
+ return
+
+ # Before getting here, offset must be set to the number of bytes
+ # to walk forward.
+ while True:
+ if (offset - bytesread) > bufsize:
+ readsize = bufsize
+ else:
+ # offset might be wider that readsize, but the result
+ # of the subtraction is bound by buffersize (see the
+ # condition above). bufsize is 8192.
+ readsize = offset - bytesread
+
+ chunksize = _univ_newline_read(bzerror, self.fp, buf, readsize, self)
+ self.pos += chunksize
+ bytesread += chunksize
+
+ if bzerror == BZ_STREAM_END:
+ self.size = self.pos
+ self.mode = MODE_READ_EOF
+ elif bzerror != BZ_OK:
+ _catch_bz2_error(self.space, bzerror)
+
+ if bytesread == offset:
+ break
+ seek.unwrap_spec = ['self', int, int]
+
+ def readline(self, size=-1):
+ """readline([size]) -> string
+
+ Return the next line from the file, as a string, retaining newline.
+ A non-negative size argument will limit the maximum number of bytes to
+ return (an incomplete line may be returned then). Return an empty
+ string at EOF."""
+
+ self._check_if_close()
+
+ if self.mode == MODE_READ_EOF:
+ return self.space.wrap("")
+ elif not self.mode == MODE_READ:
+ raise OperationError(self.space.w_IOError,
+ self.space.wrap("file is not ready for reading"))
+
+ if size == 0:
+ return self.space.wrap("")
+ else:
+ size = (size, 0)[size < 0]
+ return self.space.wrap(_getline(self.space, self, size))
+ readline.unwrap_spec = ['self', int]
+
+
+_BZ2File.typedef = TypeDef("_BZ2File",
+ close = interp2app(_BZ2File.close, unwrap_spec=_BZ2File.close.unwrap_spec),
+ tell = interp2app(_BZ2File.tell, unwrap_spec=_BZ2File.tell.unwrap_spec),
+ seek = interp2app(_BZ2File.seek, unwrap_spec=_BZ2File.seek.unwrap_spec),
+ readline = interp2app(_BZ2File.readline,
+ unwrap_spec=_BZ2File.readline.unwrap_spec)
+)
+
+def BZ2File(space, filename, mode='r', buffering=-1, compresslevel=9):
+ """BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object
+
+ Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or
+ writing. When opened for writing, the file will be created if it doesn't
+ exist, and truncated otherwise. If the buffering argument is given, 0 means
+ unbuffered, and larger numbers specify the buffer size. If compresslevel
+ is given, must be a number between 1 and 9.
+
+ Add a 'U' to mode to open the file for input with universal newline
+ support. Any line ending in the input file will be seen as a '\\n' in
+ Python. Also, a file so opened gains the attribute 'newlines'; the value
+ for this attribute is one of None (no newline read yet), '\\r', '\\n',
+ '\\r\\n' or a tuple containing all the newline types seen. Universal
+ newlines are available only when reading."""
+ return _BZ2File(space, filename, mode, buffering, compresslevel)
+BZ2File.unwrap_spec = [ObjSpace, str, str, int, int]
+
Modified: pypy/dist/pypy/module/bz2/test/test_bz2.py
==============================================================================
--- pypy/dist/pypy/module/bz2/test/test_bz2.py (original)
+++ pypy/dist/pypy/module/bz2/test/test_bz2.py Sat Aug 5 17:59:27 2006
@@ -9,7 +9,7 @@
if os.path.exists("foo"):
os.unlink("foo")
-class AppTestBz2:
+class AppTestBz2File:
def setup_class(cls):
space = gettestobjspace(usemodules=('bz2',))
cls.space = space
@@ -78,7 +78,31 @@
# bz2f.seek(0)
# bz2f.seek(-1, 2)
# assert bz2f.tell() == 5
+
+ def test_readline(self):
+ def create_temp_file(crlf=False):
+ DATA = 'BZh91AY&SY.\xc8N\x18\x00\x01>_\x80\x00\x10@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe00\x01\x99\xaa\x00\xc0\x03F\x86\x8c#&\x83F\x9a\x03\x06\xa6\xd0\xa6\x93M\x0fQ\xa7\xa8\x06\x804hh\x12$\x11\xa4i4\xf14S\xd2<Q\xb5\x0fH\xd3\xd4\xdd\xd5\x87\xbb\xf8\x94\r\x8f\xafI\x12\xe1\xc9\xf8/E\x00pu\x89\x12]\xc9\xbbDL\nQ\x0e\t1\x12\xdf\xa0\xc0\x97\xac2O9\x89\x13\x94\x0e\x1c7\x0ed\x95I\x0c\xaaJ\xa4\x18L\x10\x05#\x9c\xaf\xba\xbc/\x97\x8a#C\xc8\xe1\x8cW\xf9\xe2\xd0\xd6M\xa7\x8bXa<e\x84t\xcbL\xb3\xa7\xd9\xcd\xd1\xcb\x84.\xaf\xb3\xab\xab\xad`n}\xa0lh\tE,\x8eZ\x15\x17VH>\x88\xe5\xcd9gd6\x0b\n\xe9\x9b\xd5\x8a\x99\xf7\x08.K\x8ev\xfb\xf7xw\xbb\xdf\xa1\x92\xf1\xdd|/";\xa2\xba\x9f\xd5\xb1#A\xb6\xf6\xb3o\xc9\xc5y\\\xebO\xe7\x85\x9a\xbc\xb6f8\x952\xd5\xd7"%\x89>V,\xf7\xa6z\xe2\x9f\xa3\xdf\x11\x11"\xd6E)I\xa9\x13^\xca\xf3r\xd0\x03U\x922\xf26\xec\xb6\xed\x8b\xc3U\x13\x9d\xc5\x170\xa4\xfa^\x92\xacDF\x8a\x97\xd6\x19\xfe\xdd\xb8\xbd\x1a\x9a\x19\xa3\x80ankR\x8b\xe5\xd83]\xa9\xc6\x08\x82f\xf6\xb9"6l$\xb8j@\xc0\x8a\xb0l1..\xbak\x83ls\x15\xbc\xf4\xc1\x13\xbe\xf8E\xb8\x9d\r\xa8\x9dk\x84\xd3n\xfa\xacQ\x07\xb1%y\xaav\xb4\x08\xe0z\x1b\x16\xf5\x04\xe9\xcc\xb9\x08z\x1en7.G\xfc]\xc9\x14\xe1B@\xbb!8`'
+ DATA_CRLF = 'BZh91AY&SY\xaez\xbbN\x00\x01H\xdf\x80\x00\x12@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe0@\x01\xbc\xc6`\x86*\x8d=M\xa9\x9a\x86\xd0L@\x0fI\xa6!\xa1\x13\xc8\x88jdi\x8d@\x03@\x1a\x1a\x0c\x0c\x83 \x00\xc4h2\x19\x01\x82D\x84e\t\xe8\x99\x89\x19\x1ah\x00\r\x1a\x11\xaf\x9b\x0fG\xf5(\x1b\x1f?\t\x12\xcf\xb5\xfc\x95E\x00ps\x89\x12^\xa4\xdd\xa2&\x05(\x87\x04\x98\x89u\xe40%\xb6\x19\'\x8c\xc4\x89\xca\x07\x0e\x1b!\x91UIFU%C\x994!DI\xd2\xfa\xf0\xf1N8W\xde\x13A\xf5\x9cr%?\x9f3;I45A\xd1\x8bT\xb1<l\xba\xcb_\xc00xY\x17r\x17\x88\x08\x08@\xa0\ry@\x10\x04$)`\xf2\xce\x89z\xb0s\xec\x9b.iW\x9d\x81\xb5-+t\x9f\x1a\'\x97dB\xf5x\xb5\xbe.[.\xd7\x0e\x81\xe7\x08\x1cN`\x88\x10\xca\x87\xc3!"\x80\x92R\xa1/\xd1\xc0\xe6mf\xac\xbd\x99\xcca\xb3\x8780>\xa4\xc7\x8d\x1a\\"\xad\xa1\xabyBg\x15\xb9l\x88\x88\x91k"\x94\xa4\xd4\x89\xae*\xa6\x0b\x10\x0c\xd6\xd4m\xe86\xec\xb5j\x8a\x86j\';\xca.\x01I\xf2\xaaJ\xe8\x88\x8cU+t3\xfb\x0c\n\xa33\x13r2\r\x16\xe0\xb3(\xbf\x1d\x83r\xe7M\xf0D\x1365\xd8\x88\xd3\xa4\x92\xcb2\x06\x04\\\xc1\xb0\xea//\xbek&\xd8\xe6+t\xe5\xa1\x13\xada\x16\xder5"w]\xa2i\xb7[\x97R \xe2IT\xcd;Z\x04dk4\xad\x8a\t\xd3\x81z\x10\xf1:^`\xab\x1f\xc5\xdc\x91N\x14$+\x9e\xae\xd3\x80'
+
+ f = open("foo", "wb")
+
+ data = (DATA, DATA_CRLF)[crlf]
+ f.write(data)
+ f.close()
+ from bz2 import BZ2File
+ from StringIO import StringIO
+ create_temp_file()
+
+ TEXT = 'root:x:0:0:root:/root:/bin/bash\nbin:x:1:1:bin:/bin:\ndaemon:x:2:2:daemon:/sbin:\nadm:x:3:4:adm:/var/adm:\nlp:x:4:7:lp:/var/spool/lpd:\nsync:x:5:0:sync:/sbin:/bin/sync\nshutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\nhalt:x:7:0:halt:/sbin:/sbin/halt\nmail:x:8:12:mail:/var/spool/mail:\nnews:x:9:13:news:/var/spool/news:\nuucp:x:10:14:uucp:/var/spool/uucp:\noperator:x:11:0:operator:/root:\ngames:x:12:100:games:/usr/games:\ngopher:x:13:30:gopher:/usr/lib/gopher-data:\nftp:x:14:50:FTP User:/var/ftp:/bin/bash\nnobody:x:65534:65534:Nobody:/home:\npostfix:x:100:101:postfix:/var/spool/postfix:\nniemeyer:x:500:500::/home/niemeyer:/bin/bash\npostgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\nmysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\nwww:x:103:104::/var/www:/bin/false\n'
+
+ bz2f = BZ2File("foo")
+ raises(TypeError, bz2f.readline, None)
+ sio = StringIO(TEXT)
+ for line in sio.readlines():
+ line_read = bz2f.readline()
+ assert line_read == line
+ bz2f.close()
# #!/usr/bin/python
# from test import test_support
@@ -164,17 +188,7 @@
# bz2f = BZ2File(self.filename)
# self.assertEqual(bz2f.read(100), self.TEXT[:100])
# bz2f.close()
-#
-# def testReadLine(self):
-# # "Test BZ2File.readline()"
-# self.createTempFile()
-# bz2f = BZ2File(self.filename)
-# self.assertRaises(TypeError, bz2f.readline, None)
-# sio = StringIO(self.TEXT)
-# for line in sio.readlines():
-# self.assertEqual(bz2f.readline(), line)
-# bz2f.close()
-#
+
# def testReadLines(self):
# # "Test BZ2File.readlines()"
# self.createTempFile()
More information about the Pypy-commit
mailing list