[Python-checkins] bpo-34488: optimize BytesIO.writelines() (GH-8904)

Inada Naoki webhook-mailer at python.org
Tue Aug 6 20:38:35 EDT 2019


https://github.com/python/cpython/commit/3e41f3cabb661824a1a197116f7f5ead64eb6ced
commit: 3e41f3cabb661824a1a197116f7f5ead64eb6ced
branch: master
author: Sergey Fedoseev <fedoseev.sergey at gmail.com>
committer: Inada Naoki <songofacandy at gmail.com>
date: 2019-08-07T09:38:31+09:00
summary:

bpo-34488: optimize BytesIO.writelines() (GH-8904)

Avoid the creation of unused int object for each line.

files:
A Misc/NEWS.d/next/Library/2019-08-06-21-30-58.bpo-34488.OqxVo8.rst
M Modules/_io/bytesio.c

diff --git a/Misc/NEWS.d/next/Library/2019-08-06-21-30-58.bpo-34488.OqxVo8.rst b/Misc/NEWS.d/next/Library/2019-08-06-21-30-58.bpo-34488.OqxVo8.rst
new file mode 100644
index 000000000000..1e7e5a6ba0bc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-08-06-21-30-58.bpo-34488.OqxVo8.rst
@@ -0,0 +1,2 @@
+:meth:`writelines` method of :class:`io.BytesIO` is now slightly faster
+when many small lines are passed. Patch by Sergey Fedoseev.
diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c
index 19e1ed8441e3..793ce920004e 100644
--- a/Modules/_io/bytesio.c
+++ b/Modules/_io/bytesio.c
@@ -31,17 +31,34 @@ typedef struct {
   * exports > 0.  Py_REFCNT(buf) == 1, any modifications are forbidden.
 */
 
+static int
+check_closed(bytesio *self)
+{
+    if (self->buf == NULL) {
+        PyErr_SetString(PyExc_ValueError, "I/O operation on closed file.");
+        return 1;
+    }
+    return 0;
+}
+
+static int
+check_exports(bytesio *self)
+{
+    if (self->exports > 0) {
+        PyErr_SetString(PyExc_BufferError,
+                        "Existing exports of data: object cannot be re-sized");
+        return 1;
+    }
+    return 0;
+}
+
 #define CHECK_CLOSED(self)                                  \
-    if ((self)->buf == NULL) {                              \
-        PyErr_SetString(PyExc_ValueError,                   \
-                        "I/O operation on closed file.");   \
+    if (check_closed(self)) {                               \
         return NULL;                                        \
     }
 
 #define CHECK_EXPORTS(self) \
-    if ((self)->exports > 0) { \
-        PyErr_SetString(PyExc_BufferError, \
-                        "Existing exports of data: object cannot be re-sized"); \
+    if (check_exports(self)) { \
         return NULL; \
     }
 
@@ -156,23 +173,41 @@ resize_buffer(bytesio *self, size_t size)
 }
 
 /* Internal routine for writing a string of bytes to the buffer of a BytesIO
-   object. Returns the number of bytes written, or -1 on error. */
-static Py_ssize_t
-write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
+   object. Returns the number of bytes written, or -1 on error.
+   Inlining is disabled because it's significantly decreases performance
+   of writelines() in PGO build. */
+_Py_NO_INLINE static Py_ssize_t
+write_bytes(bytesio *self, PyObject *b)
 {
-    size_t endpos;
-    assert(self->buf != NULL);
-    assert(self->pos >= 0);
-    assert(len >= 0);
+    if (check_closed(self)) {
+        return -1;
+    }
+    if (check_exports(self)) {
+        return -1;
+    }
 
-    endpos = (size_t)self->pos + len;
+    Py_buffer buf;
+    if (PyObject_GetBuffer(b, &buf, PyBUF_CONTIG_RO) < 0) {
+        return -1;
+    }
+    Py_ssize_t len = buf.len;
+    if (len == 0) {
+        goto done;
+    }
+
+    assert(self->pos >= 0);
+    size_t endpos = (size_t)self->pos + len;
     if (endpos > (size_t)PyBytes_GET_SIZE(self->buf)) {
-        if (resize_buffer(self, endpos) < 0)
-            return -1;
+        if (resize_buffer(self, endpos) < 0) {
+            len = -1;
+            goto done;
+        }
     }
     else if (SHARED_BUF(self)) {
-        if (unshare_buffer(self, Py_MAX(endpos, (size_t)self->string_size)) < 0)
-            return -1;
+        if (unshare_buffer(self, Py_MAX(endpos, (size_t)self->string_size)) < 0) {
+            len = -1;
+            goto done;
+        }
     }
 
     if (self->pos > self->string_size) {
@@ -190,7 +225,7 @@ write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
 
     /* Copy the data to the internal buffer, overwriting some of the existing
        data if self->pos < self->string_size. */
-    memcpy(PyBytes_AS_STRING(self->buf) + self->pos, bytes, len);
+    memcpy(PyBytes_AS_STRING(self->buf) + self->pos, buf.buf, len);
     self->pos = endpos;
 
     /* Set the new length of the internal string if it has changed. */
@@ -198,6 +233,8 @@ write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
         self->string_size = endpos;
     }
 
+  done:
+    PyBuffer_Release(&buf);
     return len;
 }
 
@@ -669,19 +706,7 @@ static PyObject *
 _io_BytesIO_write(bytesio *self, PyObject *b)
 /*[clinic end generated code: output=53316d99800a0b95 input=f5ec7c8c64ed720a]*/
 {
-    Py_ssize_t n = 0;
-    Py_buffer buf;
-
-    CHECK_CLOSED(self);
-    CHECK_EXPORTS(self);
-
-    if (PyObject_GetBuffer(b, &buf, PyBUF_CONTIG_RO) < 0)
-        return NULL;
-
-    if (buf.len != 0)
-        n = write_bytes(self, buf.buf, buf.len);
-
-    PyBuffer_Release(&buf);
+    Py_ssize_t n = write_bytes(self, b);
     return n >= 0 ? PyLong_FromSsize_t(n) : NULL;
 }
 
@@ -702,7 +727,6 @@ _io_BytesIO_writelines(bytesio *self, PyObject *lines)
 /*[clinic end generated code: output=7f33aa3271c91752 input=e972539176fc8fc1]*/
 {
     PyObject *it, *item;
-    PyObject *ret;
 
     CHECK_CLOSED(self);
 
@@ -711,13 +735,12 @@ _io_BytesIO_writelines(bytesio *self, PyObject *lines)
         return NULL;
 
     while ((item = PyIter_Next(it)) != NULL) {
-        ret = _io_BytesIO_write(self, item);
+        Py_ssize_t ret = write_bytes(self, item);
         Py_DECREF(item);
-        if (ret == NULL) {
+        if (ret < 0) {
             Py_DECREF(it);
             return NULL;
         }
-        Py_DECREF(ret);
     }
     Py_DECREF(it);
 



More information about the Python-checkins mailing list