[Python-3000-checkins] r54190 - in python/branches/p3yk/Lib: io.py test/test_io.py

guido.van.rossum python-3000-checkins at python.org
Wed Mar 7 02:00:18 CET 2007


Author: guido.van.rossum
Date: Wed Mar  7 02:00:12 2007
New Revision: 54190

Modified:
   python/branches/p3yk/Lib/io.py
   python/branches/p3yk/Lib/test/test_io.py
Log:
New version from Mike Verdone (sat in my inbox since 2/27).

I cleaned up whitespace but otherwise didn't change it.

This will need work to reflect the tentative decision to drop nonblocking I/O
support from the buffering layers.


Modified: python/branches/p3yk/Lib/io.py
==============================================================================
--- python/branches/p3yk/Lib/io.py	(original)
+++ python/branches/p3yk/Lib/io.py	Wed Mar  7 02:00:12 2007
@@ -10,12 +10,21 @@
               "Mike Verdone <mike.verdone at gmail.com>")
 
 __all__ = ["open", "RawIOBase", "FileIO", "SocketIO", "BytesIO",
-           "BufferedReader", "BufferedWriter", "BufferedRWPair", "EOF"]
+           "BufferedReader", "BufferedWriter", "BufferedRWPair",
+           "BufferedRandom", "EOF"]
 
 import os
 
 DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
-EOF = b""
+DEFAULT_MAX_BUFFER_SIZE = 16 * 1024 # bytes
+EOF = b''
+
+
+class BlockingIO(IOError):
+    def __init__(self, errno, strerror, characters_written):
+        IOError.__init__(self, errno, strerror)
+        self.characters_written = characters_written
+
 
 def open(filename, mode="r", buffering=None, *, encoding=None):
     """Replacement for the built-in open function.
@@ -117,6 +126,11 @@
     # XXX Add individual method docstrings
 
     def read(self, n):
+        """Read and return up to n bytes.
+
+        Returns an empty bytes array on EOF, or None if the object is
+        set not to block and has no data to read.
+        """
         b = bytes(n.__index__())
         self.readinto(b)
         return b
@@ -125,6 +139,10 @@
         raise IOError(".readinto() not supported")
 
     def write(self, b):
+        """Write the given buffer to the IO stream.
+
+        Returns the number of bytes written.
+        """
         raise IOError(".write() not supported")
 
     def seek(self, pos, whence=0):
@@ -324,111 +342,210 @@
         return True
 
 
+class BufferedIOBase(RawIOBase):
+
+    """Base class for buffered IO objects."""
+
+    def flush(self):
+        """Flush the buffer to the underlying raw IO object."""
+        raise IOError(".flush() unsupported")
+
+
 class BufferedReader(BufferedIOBase):
 
-    """Buffered reader.
+    """Buffer for a readable sequential RawIO object.
 
-    Buffer for a readable sequential RawIO object. Does not allow
-    random access (seek, tell).
+    Does not allow random access (seek, tell).
     """
 
     def __init__(self, raw):
-        """
-        Create a new buffered reader using the given readable raw IO object.
+        """Create a new buffered reader using the given readable raw IO object.
         """
         assert raw.readable()
         self.raw = raw
-        self._read_buf = b''
+        self._read_buf = b""
         if hasattr(raw, 'fileno'):
             self.fileno = raw.fileno
 
     def read(self, n=None):
+        """Read n bytes.
+
+        Returns exactly n bytes of data unless the underlying raw IO
+        stream reaches EOF of if the call would block in non-blocking
+        mode. If n is None, read until EOF or until read() would
+        block.
         """
-        Read n bytes. Returns exactly n bytes of data unless the underlying
-        raw IO stream reaches EOF of if the call would block in non-blocking
-        mode. If n is None, read until EOF or until read() would block.
-        """
+        assert n is None or n > 0
         nodata_val = EOF
         while (len(self._read_buf) < n) if (n is not None) else True:
             current = self.raw.read(n)
             if current in (EOF, None):
                 nodata_val = current
                 break
-            self._read_buf += current # XXX using += is bad
-        read = self._read_buf[:n]
-        if (not self._read_buf):
-            return nodata_val
-        self._read_buf = self._read_buf[n if n else 0:]
-        return read
-
-    def write(self, b):
-        raise IOError(".write() unsupported")
+            self._read_buf += current
+        if self._read_buf:
+            if n is None:
+                n = len(self._read_buf)
+            out = self._read_buf[:n]
+            self._read_buf = self._read_buf[n:]
+        else:
+            out = nodata_val
+        return out
 
     def readable(self):
         return True
 
+    def fileno(self):
+        return self.raw.fileno()
+
     def flush(self):
         # Flush is a no-op
         pass
 
+    def close(self):
+        self.raw.close()
 
-class BufferedWriter(BufferedIOBase):
-
-    """Buffered writer.
 
-    XXX More docs.
-    """
+class BufferedWriter(BufferedIOBase):
 
-    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
-        assert raw.writeable()
+    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
+                 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
+        assert raw.writable()
         self.raw = raw
         self.buffer_size = buffer_size
-        self._write_buf_stack = []
-        self._write_buf_size = 0
-        if hasattr(raw, 'fileno'):
-            self.fileno = raw.fileno
-
-    def read(self, n=None):
-        raise IOError(".read() not supported")
+        self.max_buffer_size = max_buffer_size
+        self._write_buf = b''
 
     def write(self, b):
+        # XXX we can implement some more tricks to try and avoid partial writes
         assert issubclass(type(b), bytes)
-        self._write_buf_stack.append(b)
-        self._write_buf_size += len(b)
-        if (self._write_buf_size > self.buffer_size):
-            self.flush()
+        if len(self._write_buf) > self.buffer_size:
+            # We're full, so let's pre-flush the buffer
+            try:
+                self.flush()
+            except BlockingIO as e:
+                # We can't accept anything else.
+                raise BlockingIO(e.errno, e.strerror, 0)
+        self._write_buf += b
+        if (len(self._write_buf) > self.buffer_size):
+            try:
+                self.flush()
+            except BlockingIO as e:
+                if (len(self._write_buf) > self.max_buffer_size):
+                    # We've hit max_buffer_size. We have to accept a partial
+                    # write and cut back our buffer.
+                    overage = len(self._write_buf) - self.max_buffer_size
+                    self._write_buf = self._write_buf[:self.max_buffer_size]
+                    raise BlockingIO(e.errno, e.strerror, overage)
 
-    def writeable(self):
+    def writable(self):
         return True
 
     def flush(self):
-        buf = b''.join(self._write_buf_stack)
-        while len(buf):
-            buf = buf[self.raw.write(buf):]
-        self._write_buf_stack = []
-        self._write_buf_size = 0
+        try:
+            while len(self._write_buf):
+                self._write_buf = self._write_buf[
+                                    self.raw.write(self._write_buf):]
+        except BlockingIO as e:
+            self._write_buf[e.characters_written:]
+            raise
+
+    def fileno(self):
+        return self.raw.fileno()
+
+    def close(self):
+        self.raw.close()
 
-    # XXX support flushing buffer on close, del
+    def __del__(self):
+        # XXX flush buffers before dying. Is there a nicer way to do this?
+        if self._write_buf:
+            self.flush()
 
 
 class BufferedRWPair(BufferedReader, BufferedWriter):
 
-    """Buffered Read/Write Pair.
+    """A buffered reader and writer object together.
 
     A buffered reader object and buffered writer object put together to
     form a sequential IO object that can read and write.
     """
 
-    def __init__(self, bufferedReader, bufferedWriter):
-        assert bufferedReader.readable()
-        assert bufferedWriter.writeable()
-        self.bufferedReader = bufferedReader
-        self.bufferedWriter = bufferedWriter
-        self.read = bufferedReader.read
-        self.write = bufferedWriter.write
-        self.flush = bufferedWriter.flush
-        self.readable = bufferedReader.readable
-        self.writeable = bufferedWriter.writeable
+    def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE,
+                 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
+        assert reader.readable()
+        assert writer.writable()
+        BufferedReader.__init__(self, reader)
+        BufferedWriter.__init__(self, writer, buffer_size, max_buffer_size)
+        self.reader = reader
+        self.writer = writer
+
+    def read(self, n=None):
+        return self.reader.read(n)
+
+    def write(self, b):
+        return self.writer.write(b)
+
+    def readable(self):
+        return self.reader.readable()
+
+    def writable(self):
+        return self.writer.writable()
+
+    def flush(self):
+        return self.writer.flush()
 
     def seekable(self):
         return False
+
+    def fileno(self):
+        # XXX whose fileno do we return? Reader's? Writer's? Unsupported?
+        raise IOError(".fileno() unsupported")
+
+    def close(self):
+        self.reader.close()
+        self.writer.close()
+
+
+class BufferedRandom(BufferedReader, BufferedWriter):
+
+    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
+                 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
+        assert raw.seekable()
+        BufferedReader.__init__(self, raw)
+        BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
+
+    def seekable(self):
+        return self.raw.seekable()
+
+    def readable(self):
+        return self.raw.readable()
+
+    def writable(self):
+        return self.raw.writable()
+
+    def seek(self, pos, whence=0):
+        self.flush()
+        self._read_buf = b""
+        self.raw.seek(pos, whence)
+        # XXX I suppose we could implement some magic here to move through the
+        # existing read buffer in the case of seek(<some small +ve number>, 1)
+
+    def tell(self):
+        if (self._write_buf):
+            return self.raw.tell() + len(self._write_buf)
+        else:
+            return self.raw.tell() - len(self._read_buf)
+
+    def read(self, n=None):
+        self.flush()
+        return BufferedReader.read(self, n)
+
+    def write(self, b):
+        self._read_buf = b""
+        return BufferedWriter.write(self, b)
+
+    def flush(self):
+        BufferedWriter.flush(self)
+
+    def close(self):
+        self.raw.close()

Modified: python/branches/p3yk/Lib/test/test_io.py
==============================================================================
--- python/branches/p3yk/Lib/test/test_io.py	(original)
+++ python/branches/p3yk/Lib/test/test_io.py	Wed Mar  7 02:00:12 2007
@@ -5,10 +5,10 @@
 
 import io
 
-
-class MockReadIO(io.RawIOBase):
-    def __init__(self, readStack):
+class MockIO(io.RawIOBase):
+    def __init__(self, readStack=()):
         self._readStack = list(readStack)
+        self._writeStack = []
 
     def read(self, n=None):
         try:
@@ -16,27 +16,41 @@
         except:
             return io.EOF
 
+    def write(self, b):
+        self._writeStack.append(b)
+        return len(b)
+
+    def writable(self):
+        return True
+
     def fileno(self):
         return 42
 
     def readable(self):
         return True
 
-
-class MockWriteIO(io.RawIOBase):
-    def __init__(self):
-        self._writeStack = []
-
-    def write(self, b):
-        self._writeStack.append(b)
-        return len(b)
-
-    def writeable(self):
+    def seekable(self):
         return True
 
-    def fileno(self):
+    def seek(self, pos, whence):
+        pass
+
+    def tell(self):
         return 42
 
+class MockNonBlockWriterIO(io.RawIOBase):
+    def __init__(self, blockingScript):
+        self.bs = list(blockingScript)
+        self._write_stack = []
+    def write(self, b):
+        self._write_stack.append(b)
+        n = self.bs.pop(0)
+        if (n < 0):
+            raise io.BlockingIO(0, "test blocking", -n)
+        else:
+            return n
+    def writable(self):
+        return True
 
 class IOTest(unittest.TestCase):
 
@@ -90,9 +104,7 @@
         f = io.BytesIO(data)
         self.read_ops(f)
 
-
 class BytesIOTest(unittest.TestCase):
-
     def testInit(self):
         buf = b"1234567890"
         bytesIo = io.BytesIO(buf)
@@ -134,44 +146,51 @@
         bytesIo.seek(10000)
         self.assertEquals(10000, bytesIo.tell())
 
-
 class BufferedReaderTest(unittest.TestCase):
-
     def testRead(self):
-        rawIo = MockReadIO((b"abc", b"d", b"efg"))
+        rawIo = MockIO((b"abc", b"d", b"efg"))
         bufIo = io.BufferedReader(rawIo)
 
         self.assertEquals(b"abcdef", bufIo.read(6))
 
+    def testReadNonBlocking(self):
+        # Inject some None's in there to simulate EWOULDBLOCK
+        rawIo = MockIO((b"abc", b"d", None, b"efg", None, None))
+        bufIo = io.BufferedReader(rawIo)
+
+        self.assertEquals(b"abcd", bufIo.read(6))
+        self.assertEquals(b"e", bufIo.read(1))
+        self.assertEquals(b"fg", bufIo.read())
+        self.assert_(None is bufIo.read())
+        self.assertEquals(io.EOF, bufIo.read())
+
     def testReadToEof(self):
-        rawIo = MockReadIO((b"abc", b"d", b"efg"))
+        rawIo = MockIO((b"abc", b"d", b"efg"))
         bufIo = io.BufferedReader(rawIo)
 
         self.assertEquals(b"abcdefg", bufIo.read(9000))
 
     def testReadNoArgs(self):
-        rawIo = MockReadIO((b"abc", b"d", b"efg"))
+        rawIo = MockIO((b"abc", b"d", b"efg"))
         bufIo = io.BufferedReader(rawIo)
 
         self.assertEquals(b"abcdefg", bufIo.read())
 
     def testFileno(self):
-        rawIo = MockReadIO((b"abc", b"d", b"efg"))
+        rawIo = MockIO((b"abc", b"d", b"efg"))
         bufIo = io.BufferedReader(rawIo)
 
         self.assertEquals(42, bufIo.fileno())
 
     def testFilenoNoFileno(self):
-        # TODO will we always have fileno() function? If so, kill
+        # XXX will we always have fileno() function? If so, kill
         # this test. Else, write it.
         pass
 
-
 class BufferedWriterTest(unittest.TestCase):
-
     def testWrite(self):
         # Write to the buffered IO but don't overflow the buffer.
-        writer = MockWriteIO()
+        writer = MockIO()
         bufIo = io.BufferedWriter(writer, 8)
 
         bufIo.write(b"abc")
@@ -179,7 +198,7 @@
         self.assertFalse(writer._writeStack)
 
     def testWriteOverflow(self):
-        writer = MockWriteIO()
+        writer = MockIO()
         bufIo = io.BufferedWriter(writer, 8)
 
         bufIo.write(b"abc")
@@ -187,8 +206,33 @@
 
         self.assertEquals(b"abcdefghijkl", writer._writeStack[0])
 
+    def testWriteNonBlocking(self):
+        raw = MockNonBlockWriterIO((9, 2, 22, -6, 10, 12, 12))
+        bufIo = io.BufferedWriter(raw, 8, 16)
+
+        bufIo.write(b"asdf")
+        bufIo.write(b"asdfa")
+        self.assertEquals(b"asdfasdfa", raw._write_stack[0])
+
+        bufIo.write(b"asdfasdfasdf")
+        self.assertEquals(b"asdfasdfasdf", raw._write_stack[1])
+        bufIo.write(b"asdfasdfasdf")
+        self.assertEquals(b"dfasdfasdf", raw._write_stack[2])
+        self.assertEquals(b"asdfasdfasdf", raw._write_stack[3])
+
+        bufIo.write(b"asdfasdfasdf")
+
+        # XXX I don't like this test. It relies too heavily on how the algorithm
+        # actually works, which we might change. Refactor later.
+
+    def testFileno(self):
+        rawIo = MockIO((b"abc", b"d", b"efg"))
+        bufIo = io.BufferedWriter(rawIo)
+
+        self.assertEquals(42, bufIo.fileno())
+
     def testFlush(self):
-        writer = MockWriteIO()
+        writer = MockIO()
         bufIo = io.BufferedWriter(writer, 8)
 
         bufIo.write(b"abc")
@@ -196,11 +240,51 @@
 
         self.assertEquals(b"abc", writer._writeStack[0])
 
-# TODO. Tests for open()
+class BufferedRWPairTest(unittest.TestCase):
+    def testRWPair(self):
+        r = MockIO(())
+        w = MockIO()
+        pair = io.BufferedRWPair(r, w)
+
+        # XXX need implementation
+
+class BufferedRandom(unittest.TestCase):
+    def testReadAndWrite(self):
+        raw = MockIO((b"asdf", b"ghjk"))
+        rw = io.BufferedRandom(raw, 8, 12)
+
+        self.assertEqual(b"as", rw.read(2))
+        rw.write(b"ddd")
+        rw.write(b"eee")
+        self.assertFalse(raw._writeStack) # Buffer writes
+        self.assertEqual(b"ghjk", rw.read()) # This read forces write flush
+        self.assertEquals(b"dddeee", raw._writeStack[0])
+
+    def testSeekAndTell(self):
+        raw = io.BytesIO(b"asdfghjkl")
+        rw = io.BufferedRandom(raw)
+
+        self.assertEquals(b"as", rw.read(2))
+        self.assertEquals(2, rw.tell())
+        rw.seek(0, 0)
+        self.assertEquals(b"asdf", rw.read(4))
+
+        rw.write(b"asdf")
+        rw.seek(0, 0)
+        self.assertEquals(b"asdfasdfl", rw.read())
+        self.assertEquals(9, rw.tell())
+        rw.seek(-4, 2)
+        self.assertEquals(5, rw.tell())
+        rw.seek(2, 1)
+        self.assertEquals(7, rw.tell())
+        self.assertEquals(b"fl", rw.read(11))
+
+# XXX Tests for open()
 
 def test_main():
     test_support.run_unittest(IOTest, BytesIOTest, BufferedReaderTest,
-                              BufferedWriterTest)
+                              BufferedWriterTest, BufferedRWPairTest,
+                              BufferedRandom)
 
 if __name__ == "__main__":
     test_main()


More information about the Python-3000-checkins mailing list