[Python-checkins] cpython (merge 3.3 -> default): Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying

serhiy.storchaka python-checkins at python.org
Sun Feb 3 16:14:11 CET 2013


http://hg.python.org/cpython/rev/398bcdf55f92
changeset:   81969:398bcdf55f92
parent:      81966:6c4857832510
parent:      81968:0c4cc967a733
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sun Feb 03 17:09:17 2013 +0200
summary:
  Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying
stream or a decoder produces data of an unexpected type (i.e. when
io.TextIOWrapper initialized with text stream or use bytes-to-bytes codec).

files:
  Lib/test/test_io.py  |  24 ++++++++++
  Misc/NEWS            |   4 +
  Modules/_io/textio.c |  76 ++++++++++++++++++++-----------
  3 files changed, 76 insertions(+), 28 deletions(-)


diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -2550,6 +2550,30 @@
         txt.write('5')
         self.assertEqual(b''.join(raw._write_stack), b'123\n45')
 
+    def test_read_nonbytes(self):
+        # Issue #17106
+        # Crash when underlying read() returns non-bytes
+        t = self.TextIOWrapper(self.StringIO('a'))
+        self.assertRaises(TypeError, t.read, 1)
+        t = self.TextIOWrapper(self.StringIO('a'))
+        self.assertRaises(TypeError, t.readline)
+        t = self.TextIOWrapper(self.StringIO('a'))
+        self.assertRaises(TypeError, t.read)
+
+    def test_illegal_decoder(self):
+        # Issue #17106
+        # Crash when decoder returns non-string
+        t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
+                               encoding='quopri_codec')
+        self.assertRaises(TypeError, t.read, 1)
+        t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
+                               encoding='quopri_codec')
+        self.assertRaises(TypeError, t.readline)
+        t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
+                               encoding='quopri_codec')
+        self.assertRaises(TypeError, t.read)
+
+
 class CTextIOWrapperTest(TextIOWrapperTest):
 
     def test_initialization(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -235,6 +235,10 @@
 Library
 -------
 
+- Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying
+  stream or a decoder produces data of an unexpected type (i.e. when
+  io.TextIOWrapper initialized with text stream or use bytes-to-bytes codec).
+
 - Issue #17015: When it has a spec, a Mock object now inspects its signature
   when matching calls, so that arguments can be matched positionally or
   by name.
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -257,6 +257,25 @@
     Py_TYPE(self)->tp_free((PyObject *)self);
 }
 
+static int
+check_decoded(PyObject *decoded)
+{
+    if (decoded == NULL)
+        return -1;
+    if (!PyUnicode_Check(decoded)) {
+        PyErr_Format(PyExc_TypeError,
+                     "decoder should return a string result, not '%.200s'",
+                     Py_TYPE(decoded)->tp_name);
+        Py_DECREF(decoded);
+        return -1;
+    }
+    if (PyUnicode_READY(decoded) < 0) {
+        Py_DECREF(decoded);
+        return -1;
+    }
+    return 0;
+}
+
 #define SEEN_CR   1
 #define SEEN_LF   2
 #define SEEN_CRLF 4
@@ -286,18 +305,9 @@
         Py_INCREF(output);
     }
 
-    if (output == NULL)
+    if (check_decoded(output) < 0)
         return NULL;
 
-    if (!PyUnicode_Check(output)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "decoder should return a string result");
-        goto error;
-    }
-
-    if (PyUnicode_READY(output) == -1)
-        goto error;
-
     output_len = PyUnicode_GET_LENGTH(output);
     if (self->pendingcr && (final || output_len > 0)) {
         /* Prefix output with CR */
@@ -1458,7 +1468,13 @@
     Py_DECREF(chunk_size);
     if (input_chunk == NULL)
         goto fail;
-    assert(PyBytes_Check(input_chunk));
+    if (!PyBytes_Check(input_chunk)) {
+        PyErr_Format(PyExc_TypeError,
+                     "underlying %s() should have returned a bytes object, "
+                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
+                     Py_TYPE(input_chunk)->tp_name);
+        goto fail;
+    }
 
     nbytes = PyBytes_Size(input_chunk);
     eof = (nbytes == 0);
@@ -1472,10 +1488,7 @@
             _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
     }
 
-    /* TODO sanity check: isinstance(decoded_chars, unicode) */
-    if (decoded_chars == NULL)
-        goto fail;
-    if (PyUnicode_READY(decoded_chars) == -1)
+    if (check_decoded(decoded_chars) < 0)
         goto fail;
     textiowrapper_set_decoded_chars(self, decoded_chars);
     nchars = PyUnicode_GET_LENGTH(decoded_chars);
@@ -1493,7 +1506,14 @@
         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
         if (next_input == NULL)
             goto fail;
-        assert (PyBytes_Check(next_input));
+        if (!PyBytes_Check(next_input)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder getstate() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(next_input)->tp_name);
+            Py_DECREF(next_input);
+            goto fail;
+        }
         Py_DECREF(dec_buffer);
         Py_CLEAR(self->snapshot);
         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -1542,7 +1562,7 @@
             decoded = PyObject_CallMethodObjArgs(
                 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
         Py_DECREF(bytes);
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
 
         result = textiowrapper_get_decoded_chars(self, -1);
@@ -2145,7 +2165,14 @@
         if (input_chunk == NULL)
             goto fail;
 
-        assert (PyBytes_Check(input_chunk));
+        if (!PyBytes_Check(input_chunk)) {
+            PyErr_Format(PyExc_TypeError,
+                         "underlying read() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(input_chunk)->tp_name);
+            Py_DECREF(input_chunk);
+            goto fail;
+        }
 
         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
         if (self->snapshot == NULL) {
@@ -2156,12 +2183,8 @@
         decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
             "Oi", input_chunk, (int)cookie.need_eof);
 
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
-        if (PyUnicode_READY(decoded) == -1) {
-            Py_DECREF(decoded);
-            goto fail;
-        }
 
         textiowrapper_set_decoded_chars(self, decoded);
 
@@ -2277,13 +2300,11 @@
         Py_DECREF(_state); \
     } while (0)
 
-    /* TODO: replace assert with exception */
 #define DECODER_DECODE(start, len, res) do { \
         PyObject *_decoded = _PyObject_CallMethodId( \
             self->decoder, &PyId_decode, "y#", start, len); \
-        if (_decoded == NULL) \
+        if (check_decoded(_decoded) < 0) \
             goto fail; \
-        assert (PyUnicode_Check(_decoded)); \
         res = PyUnicode_GET_LENGTH(_decoded); \
         Py_DECREF(_decoded); \
     } while (0)
@@ -2364,9 +2385,8 @@
         /* We didn't get enough decoded data; signal EOF to get more. */
         PyObject *decoded = _PyObject_CallMethodId(
             self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
-        assert (PyUnicode_Check(decoded));
         chars_decoded += PyUnicode_GET_LENGTH(decoded);
         Py_DECREF(decoded);
         cookie.need_eof = 1;

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list