[Python-checkins] bpo-43260: io: Prevent large data remains in textio buffer. (GH-24592)

methane webhook-mailer at python.org
Sun Feb 21 18:29:38 EST 2021


https://github.com/python/cpython/commit/01806d5beba3d208bb56adba6829097d803bf54f
commit: 01806d5beba3d208bb56adba6829097d803bf54f
branch: master
author: Inada Naoki <songofacandy at gmail.com>
committer: methane <songofacandy at gmail.com>
date: 2021-02-22T08:29:30+09:00
summary:

bpo-43260: io: Prevent large data remains in textio buffer. (GH-24592)

When very large data remains in TextIOWrapper, flush() may fail forever.

So prevent that data larger than chunk_size is remained in TextIOWrapper internal
buffer.

Co-Authored-By: Eryk Sun

files:
A Misc/NEWS.d/next/Library/2021-02-20-12-15-29.bpo-43260.6znAas.rst
M Lib/test/test_io.py
M Modules/_io/textio.c

diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index cc54d0ea0062f..3768b625516f4 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -3767,6 +3767,33 @@ def test_del__CHUNK_SIZE_SystemError(self):
         with self.assertRaises(AttributeError):
             del t._CHUNK_SIZE
 
+    def test_internal_buffer_size(self):
+        # bpo-43260: TextIOWrapper's internal buffer should not store
+        # data larger than chunk size.
+        chunk_size = 8192  # default chunk size, updated later
+
+        class MockIO(self.MockRawIO):
+            def write(self, data):
+                if len(data) > chunk_size:
+                    raise RuntimeError
+                return super().write(data)
+
+        buf = MockIO()
+        t = self.TextIOWrapper(buf, encoding="ascii")
+        chunk_size = t._CHUNK_SIZE
+        t.write("abc")
+        t.write("def")
+        # default chunk size is 8192 bytes so t don't write data to buf.
+        self.assertEqual([], buf._write_stack)
+
+        with self.assertRaises(RuntimeError):
+            t.write("x"*(chunk_size+1))
+
+        self.assertEqual([b"abcdef"], buf._write_stack)
+        t.write("ghi")
+        t.write("x"*chunk_size)
+        self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], buf._write_stack)
+
 
 class PyTextIOWrapperTest(TextIOWrapperTest):
     io = pyio
diff --git a/Misc/NEWS.d/next/Library/2021-02-20-12-15-29.bpo-43260.6znAas.rst b/Misc/NEWS.d/next/Library/2021-02-20-12-15-29.bpo-43260.6znAas.rst
new file mode 100644
index 0000000000000..f3c21d1c63f72
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-02-20-12-15-29.bpo-43260.6znAas.rst
@@ -0,0 +1,2 @@
+Fix TextIOWrapper can not flush internal buffer forever after very large
+text is written.
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index f08d14e18b402..03001ecb0a5b3 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -1585,6 +1585,8 @@ _textiowrapper_writeflush(textio *self)
         ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
     } while (ret == NULL && _PyIO_trap_eintr());
     Py_DECREF(b);
+    // NOTE: We cleared buffer but we don't know how many bytes are actually written
+    // when an error occurred.
     if (ret == NULL)
         return -1;
     Py_DECREF(ret);
@@ -1642,7 +1644,10 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
 
     /* XXX What if we were just reading? */
     if (self->encodefunc != NULL) {
-        if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
+        if (PyUnicode_IS_ASCII(text) &&
+                // See bpo-43260
+                PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
+                is_asciicompat_encoding(self->encodefunc)) {
             b = text;
             Py_INCREF(b);
         }
@@ -1651,8 +1656,9 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
         }
         self->encoding_start_of_stream = 0;
     }
-    else
+    else {
         b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
+    }
 
     Py_DECREF(text);
     if (b == NULL)
@@ -1677,6 +1683,14 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
         self->pending_bytes_count = 0;
         self->pending_bytes = b;
     }
+    else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
+        // Prevent to concatenate more than chunk_size data.
+        if (_textiowrapper_writeflush(self) < 0) {
+            Py_DECREF(b);
+            return NULL;
+        }
+        self->pending_bytes = b;
+    }
     else if (!PyList_CheckExact(self->pending_bytes)) {
         PyObject *list = PyList_New(2);
         if (list == NULL) {
@@ -1696,7 +1710,7 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
     }
 
     self->pending_bytes_count += bytes_len;
-    if (self->pending_bytes_count > self->chunk_size || needflush ||
+    if (self->pending_bytes_count >= self->chunk_size || needflush ||
         text_needflush) {
         if (_textiowrapper_writeflush(self) < 0)
             return NULL;



More information about the Python-checkins mailing list