[Python-checkins] cpython (merge 3.3 -> default): Issue #5308: Raise ValueError when marshalling too large object (a sequence

serhiy.storchaka python-checkins at python.org
Wed Feb 13 11:15:23 CET 2013


http://hg.python.org/cpython/rev/ea36478a36ee
changeset:   82192:ea36478a36ee
parent:      82188:fec33725f319
parent:      82191:b48e1cd2d3be
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Wed Feb 13 12:12:11 2013 +0200
summary:
  Issue #5308: Raise ValueError when marshalling too large object (a sequence
with size >= 2**31), instead of producing illegal marshal data.

files:
  Lib/test/test_marshal.py |   61 ++++++++++++-
  Misc/NEWS                |    3 +
  Python/marshal.c         |  118 +++++++++++++-------------
  3 files changed, 116 insertions(+), 66 deletions(-)


diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py
--- a/Lib/test/test_marshal.py
+++ b/Lib/test/test_marshal.py
@@ -279,16 +279,63 @@
         unicode_string = 'T'
         self.assertRaises(TypeError, marshal.loads, unicode_string)
 
+LARGE_SIZE = 2**31
+character_size = 4 if sys.maxunicode > 0xFFFF else 2
+pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4
+
+class NullWriter:
+    def write(self, s):
+        pass
+
+ at unittest.skipIf(LARGE_SIZE > sys.maxsize, "test cannot run on 32-bit systems")
+class LargeValuesTestCase(unittest.TestCase):
+    def check_unmarshallable(self, data):
+        self.assertRaises(ValueError, marshal.dump, data, NullWriter())
+
+    @support.bigmemtest(size=LARGE_SIZE, memuse=1, dry_run=False)
+    def test_bytes(self, size):
+        self.check_unmarshallable(b'x' * size)
+
+    @support.bigmemtest(size=LARGE_SIZE, memuse=character_size, dry_run=False)
+    def test_str(self, size):
+        self.check_unmarshallable('x' * size)
+
+    @support.bigmemtest(size=LARGE_SIZE, memuse=pointer_size, dry_run=False)
+    def test_tuple(self, size):
+        self.check_unmarshallable((None,) * size)
+
+    @support.bigmemtest(size=LARGE_SIZE, memuse=pointer_size, dry_run=False)
+    def test_list(self, size):
+        self.check_unmarshallable([None] * size)
+
+    @support.bigmemtest(size=LARGE_SIZE,
+            memuse=pointer_size*12 + sys.getsizeof(LARGE_SIZE-1),
+            dry_run=False)
+    def test_set(self, size):
+        self.check_unmarshallable(set(range(size)))
+
+    @support.bigmemtest(size=LARGE_SIZE,
+            memuse=pointer_size*12 + sys.getsizeof(LARGE_SIZE-1),
+            dry_run=False)
+    def test_frozenset(self, size):
+        self.check_unmarshallable(frozenset(range(size)))
+
+    @support.bigmemtest(size=LARGE_SIZE, memuse=1, dry_run=False)
+    def test_bytearray(self, size):
+        self.check_unmarshallable(bytearray(size))
+
 
 def test_main():
     support.run_unittest(IntTestCase,
-                              FloatTestCase,
-                              StringTestCase,
-                              CodeTestCase,
-                              ContainerTestCase,
-                              ExceptionTestCase,
-                              BufferTestCase,
-                              BugsTestCase)
+                         FloatTestCase,
+                         StringTestCase,
+                         CodeTestCase,
+                         ContainerTestCase,
+                         ExceptionTestCase,
+                         BufferTestCase,
+                         BugsTestCase,
+                         LargeValuesTestCase,
+                        )
 
 if __name__ == "__main__":
     test_main()
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #5308: Raise ValueError when marshalling too large object (a sequence
+  with size >= 2**31), instead of producing illegal marshal data.
+
 - Issue #12983: Bytes literals with invalid \x escape now raise a SyntaxError
   and a full traceback including line number.
 
diff --git a/Python/marshal.c b/Python/marshal.c
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -95,7 +95,7 @@
 }
 
 static void
-w_string(char *s, int n, WFILE *p)
+w_string(char *s, Py_ssize_t n, WFILE *p)
 {
     if (p->fp != NULL) {
         fwrite(s, 1, n, p->fp);
@@ -124,6 +124,21 @@
     w_byte((char)((x>>24) & 0xff), p);
 }
 
+#define SIZE32_MAX  0x7FFFFFFF
+
+#if SIZEOF_SIZE_T > 4
+# define W_SIZE(n, p)  do {                     \
+        if ((n) > SIZE32_MAX) {                 \
+            (p)->depth--;                       \
+            (p)->error = WFERR_UNMARSHALLABLE;  \
+            return;                             \
+        }                                       \
+        w_long((long)(n), p);                   \
+    } while(0)
+#else
+# define W_SIZE  w_long
+#endif
+
 /* We assume that Python longs are stored internally in base some power of
    2**15; for the sake of portability we'll always read and write them in base
    exactly 2**15. */
@@ -157,6 +172,11 @@
         d >>= PyLong_MARSHAL_SHIFT;
         l++;
     } while (d != 0);
+    if (l > SIZE32_MAX) {
+        p->depth--;
+        p->error = WFERR_UNMARSHALLABLE;
+        return;
+    }
     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
 
     for (i=0; i < n-1; i++) {
@@ -245,7 +265,7 @@
             n = strlen(buf);
             w_byte(TYPE_FLOAT, p);
             w_byte((int)n, p);
-            w_string(buf, (int)n, p);
+            w_string(buf, n, p);
             PyMem_Free(buf);
         }
     }
@@ -277,7 +297,7 @@
             }
             n = strlen(buf);
             w_byte((int)n, p);
-            w_string(buf, (int)n, p);
+            w_string(buf, n, p);
             PyMem_Free(buf);
             buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
                                         'g', 17, 0, NULL);
@@ -287,21 +307,15 @@
             }
             n = strlen(buf);
             w_byte((int)n, p);
-            w_string(buf, (int)n, p);
+            w_string(buf, n, p);
             PyMem_Free(buf);
         }
     }
     else if (PyBytes_CheckExact(v)) {
         w_byte(TYPE_STRING, p);
         n = PyBytes_GET_SIZE(v);
-        if (n > INT_MAX) {
-            /* huge strings are not supported */
-            p->depth--;
-            p->error = WFERR_UNMARSHALLABLE;
-            return;
-        }
-        w_long((long)n, p);
-        w_string(PyBytes_AS_STRING(v), (int)n, p);
+        W_SIZE(n, p);
+        w_string(PyBytes_AS_STRING(v), n, p);
     }
     else if (PyUnicode_CheckExact(v)) {
         PyObject *utf8;
@@ -313,19 +327,14 @@
         }
         w_byte(TYPE_UNICODE, p);
         n = PyBytes_GET_SIZE(utf8);
-        if (n > INT_MAX) {
-            p->depth--;
-            p->error = WFERR_UNMARSHALLABLE;
-            return;
-        }
-        w_long((long)n, p);
-        w_string(PyBytes_AS_STRING(utf8), (int)n, p);
+        W_SIZE(n, p);
+        w_string(PyBytes_AS_STRING(utf8), n, p);
         Py_DECREF(utf8);
     }
     else if (PyTuple_CheckExact(v)) {
         w_byte(TYPE_TUPLE, p);
         n = PyTuple_Size(v);
-        w_long((long)n, p);
+        W_SIZE(n, p);
         for (i = 0; i < n; i++) {
             w_object(PyTuple_GET_ITEM(v, i), p);
         }
@@ -333,7 +342,7 @@
     else if (PyList_CheckExact(v)) {
         w_byte(TYPE_LIST, p);
         n = PyList_GET_SIZE(v);
-        w_long((long)n, p);
+        W_SIZE(n, p);
         for (i = 0; i < n; i++) {
             w_object(PyList_GET_ITEM(v, i), p);
         }
@@ -363,7 +372,7 @@
             p->error = WFERR_UNMARSHALLABLE;
             return;
         }
-        w_long((long)n, p);
+        W_SIZE(n, p);
         it = PyObject_GetIter(v);
         if (it == NULL) {
             p->depth--;
@@ -413,13 +422,8 @@
         w_byte(TYPE_STRING, p);
         n = view.len;
         s = view.buf;
-        if (n > INT_MAX) {
-            p->depth--;
-            p->error = WFERR_UNMARSHALLABLE;
-            return;
-        }
-        w_long((long)n, p);
-        w_string(s, (int)n, p);
+        W_SIZE(n, p);
+        w_string(s, n, p);
         PyBuffer_Release(&view);
     }
     else {
@@ -456,18 +460,18 @@
 
 #define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
 
-static int
-r_string(char *s, int n, RFILE *p)
+static Py_ssize_t
+r_string(char *s, Py_ssize_t n, RFILE *p)
 {
     char *ptr;
-    int read, left;
+    Py_ssize_t read, left;
 
     if (!p->readable) {
         if (p->fp != NULL)
             /* The result fits into int because it must be <=n. */
-            read = (int) fread(s, 1, n, p->fp);
+            read = fread(s, 1, n, p->fp);
         else {
-            left = (int)(p->end - p->ptr);
+            left = p->end - p->ptr;
             read = (left < n) ? left : n;
             memcpy(s, p->ptr, read);
             p->ptr += read;
@@ -476,7 +480,7 @@
     else {
         _Py_IDENTIFIER(read);
 
-        PyObject *data = _PyObject_CallMethodId(p->readable, &PyId_read, "i", n);
+        PyObject *data = _PyObject_CallMethodId(p->readable, &PyId_read, "n", n);
         read = 0;
         if (data != NULL) {
             if (!PyBytes_Check(data)) {
@@ -506,7 +510,7 @@
 {
     int c = EOF;
     unsigned char ch;
-    int n;
+    Py_ssize_t n;
 
     if (!p->readable)
         c = p->fp ? getc(p->fp) : rs_byte(p);
@@ -590,8 +594,8 @@
 r_PyLong(RFILE *p)
 {
     PyLongObject *ob;
-    int size, i, j, md, shorts_in_top_digit;
-    long n;
+    long n, size, i;
+    int j, md, shorts_in_top_digit;
     digit d;
 
     n = r_long(p);
@@ -599,7 +603,7 @@
         return NULL;
     if (n == 0)
         return (PyObject *)_PyLong_New(0);
-    if (n < -INT_MAX || n > INT_MAX) {
+    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
         PyErr_SetString(PyExc_ValueError,
                        "bad marshal data (long size out of range)");
         return NULL;
@@ -730,7 +734,7 @@
             double dx;
             retval = NULL;
             n = r_byte(p);
-            if (n == EOF || r_string(buf, (int)n, p) != n) {
+            if (n == EOF || r_string(buf, n, p) != n) {
                 PyErr_SetString(PyExc_EOFError,
                     "EOF read where object expected");
                 break;
@@ -768,7 +772,7 @@
             Py_complex c;
             retval = NULL;
             n = r_byte(p);
-            if (n == EOF || r_string(buf, (int)n, p) != n) {
+            if (n == EOF || r_string(buf, n, p) != n) {
                 PyErr_SetString(PyExc_EOFError,
                     "EOF read where object expected");
                 break;
@@ -778,7 +782,7 @@
             if (c.real == -1.0 && PyErr_Occurred())
                 break;
             n = r_byte(p);
-            if (n == EOF || r_string(buf, (int)n, p) != n) {
+            if (n == EOF || r_string(buf, n, p) != n) {
                 PyErr_SetString(PyExc_EOFError,
                     "EOF read where object expected");
                 break;
@@ -827,7 +831,7 @@
             retval = NULL;
             break;
         }
-        if (n < 0 || n > INT_MAX) {
+        if (n < 0 || n > SIZE32_MAX) {
             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
             retval = NULL;
             break;
@@ -837,7 +841,7 @@
             retval = NULL;
             break;
         }
-        if (r_string(PyBytes_AS_STRING(v), (int)n, p) != n) {
+        if (r_string(PyBytes_AS_STRING(v), n, p) != n) {
             Py_DECREF(v);
             PyErr_SetString(PyExc_EOFError,
                             "EOF read where object expected");
@@ -856,7 +860,7 @@
             retval = NULL;
             break;
         }
-        if (n < 0 || n > INT_MAX) {
+        if (n < 0 || n > SIZE32_MAX) {
             PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
             retval = NULL;
             break;
@@ -866,7 +870,7 @@
             retval = PyErr_NoMemory();
             break;
         }
-        if (r_string(buffer, (int)n, p) != n) {
+        if (r_string(buffer, n, p) != n) {
             PyMem_DEL(buffer);
             PyErr_SetString(PyExc_EOFError,
                 "EOF read where object expected");
@@ -885,12 +889,12 @@
             retval = NULL;
             break;
         }
-        if (n < 0 || n > INT_MAX) {
+        if (n < 0 || n > SIZE32_MAX) {
             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
             retval = NULL;
             break;
         }
-        v = PyTuple_New((int)n);
+        v = PyTuple_New(n);
         if (v == NULL) {
             retval = NULL;
             break;
@@ -905,7 +909,7 @@
                 v = NULL;
                 break;
             }
-            PyTuple_SET_ITEM(v, (int)i, v2);
+            PyTuple_SET_ITEM(v, i, v2);
         }
         retval = v;
         break;
@@ -916,12 +920,12 @@
             retval = NULL;
             break;
         }
-        if (n < 0 || n > INT_MAX) {
+        if (n < 0 || n > SIZE32_MAX) {
             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
             retval = NULL;
             break;
         }
-        v = PyList_New((int)n);
+        v = PyList_New(n);
         if (v == NULL) {
             retval = NULL;
             break;
@@ -936,7 +940,7 @@
                 v = NULL;
                 break;
             }
-            PyList_SET_ITEM(v, (int)i, v2);
+            PyList_SET_ITEM(v, i, v2);
         }
         retval = v;
         break;
@@ -972,7 +976,7 @@
             retval = NULL;
             break;
         }
-        if (n < 0 || n > INT_MAX) {
+        if (n < 0 || n > SIZE32_MAX) {
             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
             retval = NULL;
             break;
@@ -1180,12 +1184,8 @@
     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
         char* pBuf = (char *)PyMem_MALLOC(filesize);
         if (pBuf != NULL) {
-            PyObject* v;
-            size_t n;
-            /* filesize must fit into an int, because it
-               is smaller than REASONABLE_FILE_LIMIT */
-            n = fread(pBuf, 1, (int)filesize, fp);
-            v = PyMarshal_ReadObjectFromString(pBuf, n);
+            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
+            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
             PyMem_FREE(pBuf);
             return v;
         }

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list