[pypy-svn] r49538 - pypy/branch/pypy-interp-file/module/bz2

Fri Dec 7 21:16:59 CET 2007

Author: arigo
Date: Fri Dec  7 21:16:58 2007
New Revision: 49538

Removed:
   pypy/branch/pypy-interp-file/module/bz2/app_bz2.py
Modified:
   pypy/branch/pypy-interp-file/module/bz2/__init__.py
   pypy/branch/pypy-interp-file/module/bz2/interp_bz2.py
Log:
Trying to fix the bz2 module:

* port BZ2File to interp-level.  This is not as nice as it could be.

* fix various bugs in ReadBZ2Filter that for some reason were not
  apparent before (probably differences in what stream methods get
  called in answer to what file-level operation).


Modified: pypy/branch/pypy-interp-file/module/bz2/__init__.py
==============================================================================

--- pypy/branch/pypy-interp-file/module/bz2/__init__.py	(original)
+++ pypy/branch/pypy-interp-file/module/bz2/__init__.py	Fri Dec  7 21:16:58 2007
@@ -2,15 +2,18 @@
 from pypy.interpreter.mixedmodule import MixedModule
 
 class Module(MixedModule):
+    """The python bz2 module provides a comprehensive interface for
+the bz2 compression library. It implements a complete file
+interface, one shot (de)compression functions, and types for
+sequential (de)compression."""
+
     interpleveldefs = {
         'BZ2Compressor': 'interp_bz2.W_BZ2Compressor',
         'BZ2Decompressor': 'interp_bz2.W_BZ2Decompressor',
         'compress': 'interp_bz2.compress',
         'decompress': 'interp_bz2.decompress',
-        '_open_file_as_stream': 'interp_bz2.open_file_as_stream'
+        'BZ2File': 'interp_bz2.W_BZ2File',
     }
 
     appleveldefs = {
-        '__doc__': 'app_bz2.__doc__',
-        'BZ2File': 'app_bz2.BZ2File',
     }

Modified: pypy/branch/pypy-interp-file/module/bz2/interp_bz2.py
==============================================================================
--- pypy/branch/pypy-interp-file/module/bz2/interp_bz2.py	(original)
+++ pypy/branch/pypy-interp-file/module/bz2/interp_bz2.py	Fri Dec  7 21:16:58 2007
@@ -5,7 +5,7 @@
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from pypy.interpreter.typedef import interp_attrproperty
-from pypy.interpreter.gateway import ObjSpace, W_Root, NoneNotWrapped, interp2app
+from pypy.interpreter.gateway import ObjSpace, W_Root, NoneNotWrapped, interp2app, Arguments
 from pypy.rlib.streamio import Stream
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 import sys
@@ -161,12 +161,83 @@
             return current_size + BIGCHUNK
     return current_size + SMALLCHUNK
 
-def open_file_as_stream(space, path, mode="r", buffering=-1, compresslevel=9):
+# ____________________________________________________________
+#
+# Make the BZ2File type by internally inheriting from W_File.
+# XXX this depends on internal details of W_File to work properly.
+
+from pypy.module._file.interp_file import W_File
+
+class W_BZ2File(W_File):
+
+    def direct___init__(self, name, mode='r', buffering=0, compresslevel=9):
+        self.direct_close()
+        # the stream should always be opened in binary mode
+        if "b" not in mode:
+            mode = mode + "b"
+        self.check_mode_ok(mode)
+        stream = open_bz2file_as_stream(self.space, name, mode,
+                                        buffering, compresslevel)
+        fd = stream.try_to_find_file_descriptor()
+        self.fdopenstream(stream, fd, mode, name)
+
+    _exposed_method_names = []
+    W_File._decl.im_func(locals(), "__init__", ['self', str, str, int, int],
+          """Opens a BZ2-compressed file.""")
+
+    def bz2file__repr__(self):
+        if self.stream is None:
+            head = "closed"
+        else:
+            head = "open"
+        info = "%s bz2.BZ2File '%s', mode '%s'" % (head, self.name, self.mode)
+        return self.getrepr(self.space, info)
+    bz2file__repr__.unwrap_spec = ['self']
+
+def descr_bz2file__new__(space, w_subtype, args):
+    bz2file = space.allocate_instance(W_BZ2File, w_subtype)
+    W_BZ2File.__init__(bz2file, space)
+    return space.wrap(bz2file)
+descr_bz2file__new__.unwrap_spec = [ObjSpace, W_Root, Arguments]
+
+same_attributes_as_in_file = list(W_File._exposed_method_names)
+same_attributes_as_in_file.remove('__init__')
+same_attributes_as_in_file.extend([
+    'name', 'mode', 'encoding', 'closed', 'newlines', 'softspace',
+    '__weakref__'])
+
+extra_attrs = dict([(name, interp2app(getattr(W_BZ2File, 'file_' + name)))
+                    for name in W_BZ2File._exposed_method_names])
+extra_attrs.update(dict([(name, W_File.typedef.rawdict[name])
+                         for name in same_attributes_as_in_file]))
+
+W_BZ2File.typedef = TypeDef(
+    "BZ2File",
+    __doc__ = """\
+BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object
+
+Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or
+writing. When opened for writing, the file will be created if it doesn't
+exist, and truncated otherwise. If the buffering argument is given, 0 means
+unbuffered, and larger numbers specify the buffer size. If compresslevel
+is given, must be a number between 1 and 9.
+
+Add a 'U' to mode to open the file for input with universal newline
+support. Any line ending in the input file will be seen as a '\\n' in
+Python. Also, a file so opened gains the attribute 'newlines'; the value
+for this attribute is one of None (no newline read yet), '\\r', '\\n',
+'\\r\\n' or a tuple containing all the newline types seen. Universal
+newlines are available only when reading.""",
+    __new__  = interp2app(descr_bz2file__new__),
+    __repr__ = interp2app(W_BZ2File.bz2file__repr__),
+    **extra_attrs)
+
+# ____________________________________________________________
+
+def open_bz2file_as_stream(space, path, mode="r", buffering=-1,
+                           compresslevel=9):
     from pypy.rlib.streamio import decode_mode, open_path_helper
     from pypy.rlib.streamio import construct_stream_tower
-    from pypy.module._file.interp_file import wrap_oserror_as_ioerror, W_Stream
-    from pypy.module._file.interp_file import is_mode_ok
-    is_mode_ok(space, mode)
     os_flags, universal, reading, writing, basemode = decode_mode(mode)
     if reading and writing:
         raise OperationError(space.w_ValueError,
@@ -174,10 +245,7 @@
     if basemode == "a":
         raise OperationError(space.w_ValueError,
                              space.wrap("cannot append to bz2 file"))
-    try:
-        stream = open_path_helper(path, os_flags, False)
-    except OSError, exc:
-        raise wrap_oserror_as_ioerror(space, exc)
+    stream = open_path_helper(path, os_flags, False)
     if reading:
         bz2stream = ReadBZ2Filter(space, stream, compresslevel)
     else:
@@ -185,8 +253,7 @@
         bz2stream = WriteBZ2Filter(space, stream, compresslevel)
     stream = construct_stream_tower(bz2stream, buffering, universal, reading,
                                     writing)
-    return space.wrap(W_Stream(space, stream))
-open_file_as_stream.unwrap_spec = [ObjSpace, str, str, int, int]
+    return stream
 
 
 class ReadBZ2Filter(Stream):
@@ -229,12 +296,18 @@
                 if not length:
                     break
         else:
-            raise NotImplementedError
+            # first measure the length by reading everything left
+            while len(self.read(65536)) > 0:
+                pass
+            pos = self.readlength + offset
+            self.seek(pos, 0)
 
     def readall(self):
         w_result = self.decompressor.decompress(self.stream.readall())
         result = self.space.str_w(w_result)
         self.readlength += len(result)
+        result = self.buffer + result
+        self.buffer = ''
         return result
 
     def read(self, n):
@@ -244,8 +317,12 @@
         while not self.buffer:
             if self.finished:
                 return ""
+            moredata = self.stream.read(n)
+            if not moredata:
+                self.finished = True
+                return ""
             try:
-                w_read = self.decompressor.decompress(self.stream.read(n))
+                w_read = self.decompressor.decompress(moredata)
             except OperationError, e:
                 if e.match(self.space, self.space.w_EOFError):
                     self.finished = True
@@ -507,7 +584,9 @@
         after the end of stream is found, EOFError will be raised. If any data
         was found after the end of stream, it'll be ignored and saved in
         unused_data attribute."""
-        
+
+        if data == '':
+            return self.space.wrap('')
         if not self.running:
             raise OperationError(self.space.w_EOFError,
                 self.space.wrap("end of stream was already found"))