[pypy-svn] r49286 - in pypy/dist/pypy/module: _file marshal marshal/test

arigo at codespeak.net arigo at codespeak.net
Sun Dec 2 16:53:06 CET 2007


Author: arigo
Date: Sun Dec  2 16:53:05 2007
New Revision: 49286

Modified:
   pypy/dist/pypy/module/_file/interp_file.py
   pypy/dist/pypy/module/marshal/interp_marshal.py
   pypy/dist/pypy/module/marshal/test/test_marshal.py
Log:
A performance hack for interp-level code that manipulates app-level file
objects.  It gives direct access to the underlying stream, shortcutting
calls that need to go through app-level (which performs various checks
and locking).  For marshal, for example, a single lock/unlock pair around
the whole operation is enough.


Modified: pypy/dist/pypy/module/_file/interp_file.py
==============================================================================
--- pypy/dist/pypy/module/_file/interp_file.py	(original)
+++ pypy/dist/pypy/module/_file/interp_file.py	Sun Dec  2 16:53:05 2007
@@ -87,6 +87,36 @@
         assert self.slockowner is None
         return False
 
+    def do_read(self, n):
+        """
+        An interface for direct interp-level usage of W_Stream,
+        e.g. from interp_marshal.py.
+        NOTE: this assumes that the stream lock is already acquired.
+        Like os.read(), this can return less than n bytes.
+        """
+        try:
+            return self.stream.read(n)
+        except streamio.StreamError, e:
+            raise OperationError(space.w_ValueError,
+                                 space.wrap(e.message))
+        except OSError, e:
+            raise wrap_oserror_as_ioerror(space, e)
+
+    def do_write(self, data):
+        """
+        An interface for direct interp-level usage of W_Stream,
+        e.g. from interp_marshal.py.
+        NOTE: this assumes that the stream lock is already acquired.
+        """
+        try:
+            self.stream.write(data)
+        except streamio.StreamError, e:
+            raise OperationError(space.w_ValueError,
+                                 space.wrap(e.message))
+        except OSError, e:
+            raise wrap_oserror_as_ioerror(space, e)
+
+
 for name, argtypes in streamio.STREAM_METHODS.iteritems():
     numargs = len(argtypes)
     args = ", ".join(["v%s" % i for i in range(numargs)])
@@ -136,3 +166,20 @@
             space, streamio.fdopen_as_stream(fd, mode, buffering)))
 fdopen_as_stream.unwrap_spec = [ObjSpace, int, str, int]
 
+
+def file2stream(space, w_f):
+    """A hack for direct interp-level access to W_Stream objects,
+    for better performance e.g. when marshalling directly from/to a
+    real file object.  This peels off the app-level layers of the file class
+    defined in app_file.py.  It complains if the file is already closed.
+    """
+    w_stream = space.findattr(w_f, space.wrap('stream'))
+    if w_stream is None:
+        return None
+    w_stream = space.interpclass_w(w_stream)
+    if not isinstance(w_stream, W_Stream):
+        return None
+    if space.is_true(space.getattr(w_f, space.wrap('_closed'))):
+        raise OperationError(space.w_ValueError,
+                             space.wrap('I/O operation on closed file'))
+    return w_stream

Modified: pypy/dist/pypy/module/marshal/interp_marshal.py
==============================================================================
--- pypy/dist/pypy/module/marshal/interp_marshal.py	(original)
+++ pypy/dist/pypy/module/marshal/interp_marshal.py	Sun Dec  2 16:53:05 2007
@@ -1,6 +1,7 @@
 from pypy.interpreter.baseobjspace import ObjSpace
 from pypy.interpreter.error import OperationError
 from pypy.rlib.rarithmetic import intmask
+from pypy.module._file.interp_file import file2stream
 import sys
 
 # Py_MARSHAL_VERSION = 2
@@ -14,12 +15,19 @@
 
 def dump(space, w_data, w_f, w_version=Py_MARSHAL_VERSION):
     """Write the 'data' object into the open file 'f'."""
-    writer = FileWriter(space, w_f)
-    # note: bound methods are currently not supported,
-    # so we have to pass the instance in, instead.
-    ##m = Marshaller(space, writer.write, space.int_w(w_version))
-    m = Marshaller(space, writer, space.int_w(w_version))
-    m.put_w_obj(w_data)
+    w_stream = file2stream(space, w_f)
+    if w_stream is not None:
+        writer = StreamWriter(space, w_stream)
+    else:
+        writer = FileWriter(space, w_f)
+    try:
+        # note: bound methods are currently not supported,
+        # so we have to pass the instance in, instead.
+        ##m = Marshaller(space, writer.write, space.int_w(w_version))
+        m = Marshaller(space, writer, space.int_w(w_version))
+        m.put_w_obj(w_data)
+    finally:
+        writer.finished()
 
 def dumps(space, w_data, w_version=Py_MARSHAL_VERSION):
     """Return the string that would have been written to a file
@@ -30,9 +38,17 @@
 
 def load(space, w_f):
     """Read one value from the file 'f' and return it."""
-    reader = FileReader(space, w_f)
-    u = Unmarshaller(space, reader)
-    return u.get_w_obj(False)
+    # special case real files for performance
+    w_stream = file2stream(space, w_f)
+    if w_stream is not None:
+        reader = StreamReader(space, w_stream)
+    else:
+        reader = FileReader(space, w_f)
+    try:
+        u = Unmarshaller(space, reader)
+        return u.get_w_obj(False)
+    finally:
+        reader.finished()
 
 def loads(space, w_str):
     """Convert a string back to a value.  Extra characters in the string are
@@ -41,9 +57,22 @@
     return u.get_w_obj(False)
 
 
-class FileWriter(object):
-    def __init__(self, space, w_f):
+class AbstractReaderWriter(object):
+    def __init__(self, space):
         self.space = space
+
+    def raise_eof(self):
+        space = self.space
+        raise OperationError(space.w_EOFError, space.wrap(
+            'EOF read where object expected'))
+
+    def finished(self):
+        pass
+
+
+class FileWriter(AbstractReaderWriter):
+    def __init__(self, space, w_f):
+        AbstractReaderWriter.__init__(self, space)
         try:
             self.func = space.getattr(w_f, space.wrap('write'))
             # XXX how to check if it is callable?
@@ -53,19 +82,14 @@
             raise OperationError(space.w_TypeError, space.wrap(
             'marshal.dump() 2nd arg must be file-like object'))
 
-    def raise_eof(self):
-        space = self.space
-        raise OperationError(space.w_EOFError, space.wrap(
-            'EOF read where object expected'))
-
     def write(self, data):
         space = self.space
         space.call_function(self.func, space.wrap(data))
 
 
-class FileReader(object):
+class FileReader(AbstractReaderWriter):
     def __init__(self, space, w_f):
-        self.space = space
+        AbstractReaderWriter.__init__(self, space)
         try:
             self.func = space.getattr(w_f, space.wrap('read'))
             # XXX how to check if it is callable?
@@ -83,10 +107,29 @@
             self.raise_eof()
         return ret
 
-    def raise_eof(self):
-        space = self.space
-        raise OperationError(space.w_EOFError, space.wrap(
-            'EOF read where object expected'))
+
+class StreamReaderWriter(AbstractReaderWriter):
+    def __init__(self, space, w_stream):
+        AbstractReaderWriter.__init__(self, space)
+        self.w_stream = w_stream
+        w_stream.descr_lock()
+
+    def finished(self):
+        self.w_stream.descr_unlock()
+
+class StreamWriter(StreamReaderWriter):
+    def write(self, data):
+        self.w_stream.do_write(data)
+
+class StreamReader(StreamReaderWriter):
+    def read(self, n):
+        result = data = self.w_stream.do_read(n)
+        while len(result) < n:
+            if len(data) == 0:
+                self.raise_eof()
+            data = self.w_stream.do_read(n)
+            result += data
+        return result
 
 
 MAX_MARSHAL_DEPTH = 5000

Modified: pypy/dist/pypy/module/marshal/test/test_marshal.py
==============================================================================
--- pypy/dist/pypy/module/marshal/test/test_marshal.py	(original)
+++ pypy/dist/pypy/module/marshal/test/test_marshal.py	Sun Dec  2 16:53:05 2007
@@ -1,5 +1,11 @@
+from pypy.tool.udir import udir
+
 class AppTestMarshal:
 
+    def setup_class(cls):
+        tmpfile = udir.join('AppTestMarshal.tmp')
+        cls.w_tmpfile = cls.space.wrap(str(tmpfile))
+
     def test_None(self):
         import sys
         hello = "he"
@@ -589,6 +595,26 @@
         x = marshal.load(f)
         assert x == case and type(x) is type(case)
 
+    def test_stream_reader_writer(self):
+        # for performance, we have a special case when reading/writing real
+        # file objects
+        import marshal
+        obj1 = [4, ("hello", 7.5)]
+        obj2 = "foobar"
+        f = open(self.tmpfile, 'wb')
+        marshal.dump(obj1, f)
+        marshal.dump(obj2, f)
+        f.write('END')
+        f.close()
+        f = open(self.tmpfile, 'rb')
+        obj1b = marshal.load(f)
+        obj2b = marshal.load(f)
+        tail = f.read()
+        f.close()
+        assert obj1b == obj1
+        assert obj2b == obj2
+        assert tail == 'END'
+
 
 class AppTestMultiDict(object):
     def setup_class(cls):
@@ -602,3 +628,4 @@
     def setup_class(cls):
         from pypy.conftest import gettestobjspace
         cls.space = gettestobjspace(**{"objspace.std.withrope": True})
+        AppTestMarshal.setup_class.im_func(cls)



More information about the Pypy-commit mailing list