[issue20538] Segfault in UTF-7 incremental decoder

Fri Feb 7 18:49:30 CET 2014

Serhiy Storchaka added the comment:

Here are patches for 3.3 and 3.4 (this is 3.3+ only bug).

----------
keywords: +patch
Added file: http://bugs.python.org/file33962/issue20538-3.3.patch
Added file: http://bugs.python.org/file33963/issue20538-3.4.patch

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue20538>
_______________________________________
-------------- next part --------------
diff -r e5a78f7c2dcb Lib/test/test_codecs.py

--- a/Lib/test/test_codecs.py	Fri Feb 07 10:06:39 2014 +0200
+++ b/Lib/test/test_codecs.py	Fri Feb 07 19:46:39 2014 +0200
@@ -852,13 +852,40 @@
 
     def test_partial(self):
         self.check_partial(
-            "a+-b",
+            'a+-b\x00c\x80d\u0100e\U00010000f',
             [
-                "a",
-                "a",
-                "a+",
-                "a+-",
-                "a+-b",
+                'a',
+                'a',
+                'a+',
+                'a+-',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b\x00',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c\x80',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d\u0100',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e\U00010000',
+                'a+-b\x00c\x80d\u0100e\U00010000f',
             ]
         )
 
diff -r e5a78f7c2dcb Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Fri Feb 07 10:06:39 2014 +0200
+++ b/Objects/unicodeobject.c	Fri Feb 07 19:46:39 2014 +0200
@@ -4474,8 +4474,16 @@
     /* return state */
     if (consumed) {
         if (inShift) {
+            *consumed = startinpos;
+            if (outpos != shiftOutStart &&
+                PyUnicode_MAX_CHAR_VALUE(unicode) > 127) {
+                PyObject *result = PyUnicode_FromKindAndData(
+                        PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
+                        shiftOutStart);
+                Py_DECREF(unicode);
+                unicode = result;
+            }
             outpos = shiftOutStart; /* back off output */
-            *consumed = startinpos;
         }
         else {
             *consumed = s-starts;
-------------- next part --------------
diff -r 3b94a4ef244e Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py	Fri Feb 07 17:53:13 2014 +0100
+++ b/Lib/test/test_codecs.py	Fri Feb 07 19:46:46 2014 +0200
@@ -124,8 +124,6 @@
             "".join(codecs.iterdecode([bytes([c]) for c in encoded], self.encoding))
         )
 
-    # Temporary skip, see http://bugs.python.org/issue20542
-    @unittest.skip
     def test_readline(self):
         def getreader(input):
             stream = io.BytesIO(input.encode(self.encoding))
@@ -899,13 +897,40 @@
 
     def test_partial(self):
         self.check_partial(
-            "a+-b",
+            'a+-b\x00c\x80d\u0100e\U00010000f',
             [
-                "a",
-                "a",
-                "a+",
-                "a+-",
-                "a+-b",
+                'a',
+                'a',
+                'a+',
+                'a+-',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b\x00',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c\x80',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d\u0100',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e\U00010000',
+                'a+-b\x00c\x80d\u0100e\U00010000f',
             ]
         )
 
diff -r 3b94a4ef244e Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Fri Feb 07 17:53:13 2014 +0100
+++ b/Objects/unicodeobject.c	Fri Feb 07 19:46:46 2014 +0200
@@ -4459,8 +4459,16 @@
     /* return state */
     if (consumed) {
         if (inShift) {
+            *consumed = startinpos;
+            if (writer.pos != shiftOutStart && writer.maxchar > 127) {
+                PyObject *result = PyUnicode_FromKindAndData(
+                        writer.kind, writer.data, shiftOutStart);
+                Py_XDECREF(errorHandler);
+                Py_XDECREF(exc);
+                _PyUnicodeWriter_Dealloc(&writer);
+                return result;
+            }
             writer.pos = shiftOutStart; /* back off output */
-            *consumed = startinpos;
         }
         else {
             *consumed = s-starts;