[Python-checkins] cpython: add BufferedIOBase.readinto1 (closes #20578)

benjamin.peterson python-checkins at python.org
Sun Jun 8 05:07:37 CEST 2014


http://hg.python.org/cpython/rev/0fb7789b5eeb
changeset:   91079:0fb7789b5eeb
parent:      91077:3f2f1ffc3ce2
user:        Benjamin Peterson <benjamin at python.org>
date:        Sat Jun 07 20:06:48 2014 -0700
summary:
  add BufferedIOBase.readinto1 (closes #20578)

Patch by Nikolaus Rath.

files:
  Doc/library/io.rst       |  31 ++++++++--
  Lib/_pyio.py             |  83 +++++++++++++++++++++++++++-
  Lib/test/test_io.py      |  25 ++++++++
  Misc/NEWS                |   2 +
  Modules/_io/bufferedio.c |  67 ++++++++++++++++++++--
  5 files changed, 194 insertions(+), 14 deletions(-)


diff --git a/Doc/library/io.rst b/Doc/library/io.rst
--- a/Doc/library/io.rst
+++ b/Doc/library/io.rst
@@ -385,8 +385,8 @@
    .. method:: readinto(b)
 
       Read up to ``len(b)`` bytes into :class:`bytearray` *b* and return the
-      number of bytes read.  If the object is in non-blocking mode and no
-      bytes are available, ``None`` is returned.
+      number of bytes read.  If the object is in non-blocking mode and no bytes
+      are available, ``None`` is returned.
 
    .. method:: write(b)
 
@@ -459,10 +459,11 @@
 
    .. method:: read1(size=-1)
 
-      Read and return up to *size* bytes, with at most one call to the underlying
-      raw stream's :meth:`~RawIOBase.read` method.  This can be useful if you
-      are implementing your own buffering on top of a :class:`BufferedIOBase`
-      object.
+      Read and return up to *size* bytes, with at most one call to the
+      underlying raw stream's :meth:`~RawIOBase.read` (or
+      :meth:`~RawIOBase.readinto`) method.  This can be useful if you
+      are implementing your own buffering on top of a
+      :class:`BufferedIOBase` object.
 
    .. method:: readinto(b)
 
@@ -472,8 +473,19 @@
       Like :meth:`read`, multiple reads may be issued to the underlying raw
       stream, unless the latter is interactive.
 
+      A :exc:`BlockingIOError` is raised if the underlying raw stream is in non
+      blocking-mode, and has no data available at the moment.
+
+   .. method:: readinto1(b)
+
+      Read up to ``len(b)`` bytes into bytearray *b*, using at most one call to
+      the underlying raw stream's :meth:`~RawIOBase.read` (or
+      :meth:`~RawIOBase.readinto`) method. Return the number of bytes read.
+
       A :exc:`BlockingIOError` is raised if the underlying raw stream is in
-      non blocking-mode, and has no data available at the moment.
+      non-blocking mode and has no data available at the moment.
+
+      .. versionadded:: 3.5
 
    .. method:: write(b)
 
@@ -590,6 +602,11 @@
 
       In :class:`BytesIO`, this is the same as :meth:`read`.
 
+   .. method:: readinto1()
+
+      In :class:`BytesIO`, this is the same as :meth:`readinto`.
+
+      .. versionadded:: 3.5
 
 .. class:: BufferedReader(raw, buffer_size=DEFAULT_BUFFER_SIZE)
 
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -655,8 +655,26 @@
         Raises BlockingIOError if the underlying raw stream has no
         data at the moment.
         """
+
+        return self._readinto(b, read1=False)
+
+    def readinto1(self, b):
+        """Read up to len(b) bytes into *b*, using at most one system call
+
+        Returns an int representing the number of bytes read (0 for EOF).
+
+        Raises BlockingIOError if the underlying raw stream has no
+        data at the moment.
+        """
+
+        return self._readinto(b, read1=True)
+
+    def _readinto(self, b, read1):
         # XXX This ought to work with anything that supports the buffer API
-        data = self.read(len(b))
+        if read1:
+            data = self.read1(len(b))
+        else:
+            data = self.read(len(b))
         n = len(data)
         try:
             b[:n] = data
@@ -1058,6 +1076,62 @@
             return self._read_unlocked(
                 min(size, len(self._read_buf) - self._read_pos))
 
+    # Implementing readinto() and readinto1() is not strictly necessary (we
+    # could rely on the base class that provides an implementation in terms of
+    # read() and read1()). We do ai anyway to keep the _pyio implementation
+    # similar to the io implementation (which implements the methods for
+    # performance reasons).
+    def readinto(self, buf):
+        """Read data into *buf*."""
+        return self._readinto(buf, read1=False)
+    def readinto1(self, buf):
+        """Read data into *buf* with at most one system call."""
+        return self._readinto(buf, read1=True)
+
+    def _readinto(self, buf, read1):
+        """Read data into *buf* with at most one system call."""
+
+        if len(buf) == 0:
+            return 0
+
+        written = 0
+        with self._read_lock:
+            while written < len(buf):
+
+                # First try to read from internal buffer
+                avail = min(len(self._read_buf) - self._read_pos, len(buf))
+                if avail:
+                    buf[written:written+avail] = \
+                        self._read_buf[self._read_pos:self._read_pos+avail]
+                    self._read_pos += avail
+                    written += avail
+                    if written == len(buf):
+                        break
+
+                # If remaining space in callers buffer is larger than
+                # internal buffer, read directly into callers buffer
+                if len(buf) - written > self.buffer_size:
+                    # If we don't use a memoryview, slicing buf will create
+                    # a new object
+                    if not isinstance(buf, memoryview):
+                        buf = memoryview(buf)
+                    n = self.raw.readinto(buf[written:])
+                    if not n:
+                        break # eof
+                    written += n
+
+                # Otherwise refill internal buffer - unless we're
+                # in read1 mode and already got some data
+                elif not (read1 and written):
+                    if not self._peek_unlocked(1):
+                        break # eof
+
+                # In readinto1 mode, return as soon as we have some data
+                if read1 and written:
+                    break
+
+        return written
+
     def tell(self):
         return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
 
@@ -1207,6 +1281,9 @@
     def read1(self, size):
         return self.reader.read1(size)
 
+    def readinto1(self, b):
+        return self.reader.readinto1(b)
+
     def readable(self):
         return self.reader.readable()
 
@@ -1289,6 +1366,10 @@
         self.flush()
         return BufferedReader.read1(self, size)
 
+    def readinto1(self, b):
+        self.flush()
+        return BufferedReader.readinto1(self, b)
+
     def write(self, b):
         if self._read_buf:
             # Undo readahead
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -911,6 +911,29 @@
         self.assertEqual(bufio.readinto(b), 1)
         self.assertEqual(b, b"cb")
 
+    def test_readinto1(self):
+        buffer_size = 10
+        rawio = self.MockRawIO((b"abc", b"de", b"fgh", b"jkl"))
+        bufio = self.tp(rawio, buffer_size=buffer_size)
+        b = bytearray(2)
+        self.assertEqual(bufio.peek(3), b'abc')
+        self.assertEqual(rawio._reads, 1)
+        self.assertEqual(bufio.readinto1(b), 2)
+        self.assertEqual(b, b"ab")
+        self.assertEqual(rawio._reads, 1)
+        self.assertEqual(bufio.readinto1(b), 1)
+        self.assertEqual(b[:1], b"c")
+        self.assertEqual(rawio._reads, 1)
+        self.assertEqual(bufio.readinto1(b), 2)
+        self.assertEqual(b, b"de")
+        self.assertEqual(rawio._reads, 2)
+        b = bytearray(2*buffer_size)
+        self.assertEqual(bufio.peek(3), b'fgh')
+        self.assertEqual(rawio._reads, 3)
+        self.assertEqual(bufio.readinto1(b), 6)
+        self.assertEqual(b[:6], b"fghjkl")
+        self.assertEqual(rawio._reads, 4)
+
     def test_readlines(self):
         def bufio():
             rawio = self.MockRawIO((b"abc\n", b"d\n", b"ef"))
@@ -2985,6 +3008,8 @@
                 self.assertRaises(ValueError, f.readall)
             if hasattr(f, "readinto"):
                 self.assertRaises(ValueError, f.readinto, bytearray(1024))
+            if hasattr(f, "readinto1"):
+                self.assertRaises(ValueError, f.readinto1, bytearray(1024))
             self.assertRaises(ValueError, f.readline)
             self.assertRaises(ValueError, f.readlines)
             self.assertRaises(ValueError, f.seek, 0)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -92,6 +92,8 @@
 Library
 -------
 
+- Issue #20578: Add io.BufferedIOBase.readinto1.
+
 - Issue #21515: tempfile.TemporaryFile now uses os.O_TMPFILE flag is available.
 
 - Issue #13223: Fix pydoc.writedoc so that the HTML documentation for methods
diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c
--- a/Modules/_io/bufferedio.c
+++ b/Modules/_io/bufferedio.c
@@ -24,6 +24,7 @@
 _Py_IDENTIFIER(read1);
 _Py_IDENTIFIER(readable);
 _Py_IDENTIFIER(readinto);
+_Py_IDENTIFIER(readinto1);
 _Py_IDENTIFIER(writable);
 _Py_IDENTIFIER(write);
 
@@ -47,17 +48,21 @@
     );
 
 static PyObject *
-bufferediobase_readinto(PyObject *self, PyObject *args)
+_bufferediobase_readinto_generic(PyObject *self, PyObject *args, char readinto1)
 {
     Py_buffer buf;
     Py_ssize_t len;
     PyObject *data;
 
-    if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) {
+    if (!PyArg_ParseTuple(args,
+                          readinto1 ? "w*:readinto1" : "w*:readinto",
+                          &buf)) {
         return NULL;
     }
 
-    data = _PyObject_CallMethodId(self, &PyId_read, "n", buf.len);
+    data = _PyObject_CallMethodId(self,
+                                  readinto1 ? &PyId_read1 : &PyId_read,
+                                  "n", buf.len);
     if (data == NULL)
         goto error;
 
@@ -89,6 +94,18 @@
 }
 
 static PyObject *
+bufferediobase_readinto(PyObject *self, PyObject *args)
+{
+    return _bufferediobase_readinto_generic(self, args, 0);
+}
+
+static PyObject *
+bufferediobase_readinto1(PyObject *self, PyObject *args)
+{
+    return _bufferediobase_readinto_generic(self, args, 1);
+}
+
+static PyObject *
 bufferediobase_unsupported(const char *message)
 {
     _PyIO_State *state = IO_STATE();
@@ -167,6 +184,7 @@
     {"read", bufferediobase_read, METH_VARARGS, bufferediobase_read_doc},
     {"read1", bufferediobase_read1, METH_VARARGS, bufferediobase_read1_doc},
     {"readinto", bufferediobase_readinto, METH_VARARGS, NULL},
+    {"readinto1", bufferediobase_readinto1, METH_VARARGS, NULL},
     {"write", bufferediobase_write, METH_VARARGS, bufferediobase_write_doc},
     {NULL, NULL}
 };
@@ -988,7 +1006,7 @@
 }
 
 static PyObject *
-buffered_readinto(buffered *self, PyObject *args)
+_buffered_readinto_generic(buffered *self, PyObject *args, char readinto1)
 {
     Py_buffer buf;
     Py_ssize_t n, written = 0, remaining;
@@ -996,7 +1014,9 @@
 
     CHECK_INITIALIZED(self)
 
-    if (!PyArg_ParseTuple(args, "w*:readinto", &buf))
+    if (!PyArg_ParseTuple(args,
+                          readinto1 ? "w*:readinto1" : "w*:readinto",
+                          &buf))
         return NULL;
 
     n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
@@ -1034,7 +1054,10 @@
             n = _bufferedreader_raw_read(self, (char *) buf.buf + written,
                                          remaining);
         }
-        else {
+
+        /* In readinto1 mode, we do not want to fill the internal
+           buffer if we already have some data to return */
+        else if (!(readinto1 && written)) {
             n = _bufferedreader_fill_buffer(self);
             if (n > 0) {
                 if (n > remaining)
@@ -1045,6 +1068,10 @@
                 continue; /* short circuit */
             }
         }
+        else {
+            n = 0;
+        }
+        
         if (n == 0 || (n == -2 && written > 0))
             break;
         if (n < 0) {
@@ -1054,6 +1081,12 @@
             }
             goto end;
         }
+        
+        /* At most one read in readinto1 mode */
+        if (readinto1) {
+            written += n;
+            break;
+        }
     }
     res = PyLong_FromSsize_t(written);
 
@@ -1065,6 +1098,19 @@
 }
 
 static PyObject *
+buffered_readinto(buffered *self, PyObject *args)
+{
+    return _buffered_readinto_generic(self, args, 0);
+}
+
+static PyObject *
+buffered_readinto1(buffered *self, PyObject *args)
+{
+    return _buffered_readinto_generic(self, args, 1);
+}
+
+
+static PyObject *
 _buffered_readline(buffered *self, Py_ssize_t limit)
 {
     PyObject *res = NULL;
@@ -1749,6 +1795,7 @@
     {"peek", (PyCFunction)buffered_peek, METH_VARARGS},
     {"read1", (PyCFunction)buffered_read1, METH_VARARGS},
     {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS},
+    {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS},
     {"readline", (PyCFunction)buffered_readline, METH_VARARGS},
     {"seek", (PyCFunction)buffered_seek, METH_VARARGS},
     {"tell", (PyCFunction)buffered_tell, METH_NOARGS},
@@ -2348,6 +2395,12 @@
 }
 
 static PyObject *
+bufferedrwpair_readinto1(rwpair *self, PyObject *args)
+{
+    return _forward_call(self->reader, &PyId_readinto1, args);
+}
+
+static PyObject *
 bufferedrwpair_write(rwpair *self, PyObject *args)
 {
     return _forward_call(self->writer, &PyId_write, args);
@@ -2412,6 +2465,7 @@
     {"peek", (PyCFunction)bufferedrwpair_peek, METH_VARARGS},
     {"read1", (PyCFunction)bufferedrwpair_read1, METH_VARARGS},
     {"readinto", (PyCFunction)bufferedrwpair_readinto, METH_VARARGS},
+    {"readinto1", (PyCFunction)bufferedrwpair_readinto1, METH_VARARGS},
 
     {"write", (PyCFunction)bufferedrwpair_write, METH_VARARGS},
     {"flush", (PyCFunction)bufferedrwpair_flush, METH_NOARGS},
@@ -2560,6 +2614,7 @@
     {"read", (PyCFunction)buffered_read, METH_VARARGS},
     {"read1", (PyCFunction)buffered_read1, METH_VARARGS},
     {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS},
+    {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS},
     {"readline", (PyCFunction)buffered_readline, METH_VARARGS},
     {"peek", (PyCFunction)buffered_peek, METH_VARARGS},
     {"write", (PyCFunction)bufferedwriter_write, METH_VARARGS},

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list