[Python-checkins] cpython (3.5): mmap: do all internal arithmetic with Py_ssize_t while being very careful about

benjamin.peterson python-checkins at python.org
Thu Oct 6 01:54:37 EDT 2016


https://hg.python.org/cpython/rev/ee73afc39f3e
changeset:   104321:ee73afc39f3e
branch:      3.5
parent:      104314:3771a6326725
user:        Benjamin Peterson <benjamin at python.org>
date:        Wed Oct 05 21:45:48 2016 -0700
summary:
  mmap: do all internal arithmetic with Py_ssize_t while being very careful about overflow

files:
  Lib/test/test_mmap.py |   11 +
  Misc/NEWS             |    3 +
  Modules/mmapmodule.c  |  189 ++++++++++++-----------------
  3 files changed, 93 insertions(+), 110 deletions(-)


diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py
--- a/Lib/test/test_mmap.py
+++ b/Lib/test/test_mmap.py
@@ -713,6 +713,17 @@
         gc_collect()
         self.assertIs(wr(), None)
 
+    def test_resize_past_pos(self):
+        m = mmap.mmap(-1, 8192)
+        self.addCleanup(m.close)
+        m.read(5000)
+        m.resize(4096)
+        self.assertEqual(m.read(14), b'')
+        self.assertRaises(ValueError, m.read_byte,)
+        self.assertRaises(ValueError, m.write_byte, 42)
+        self.assertRaises(ValueError, m.write, b'abc')
+
+
 class LargeMmapTests(unittest.TestCase):
 
     def setUp(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -94,6 +94,9 @@
 - Issue #28322: Fixed possible crashes when unpickle itertools objects from
   incorrect pickle data.  Based on patch by John Leitch.
 
+- Fix possible integer overflows and crashes in the mmap module with unusual
+  usage patterns.
+
 - Issue #1703178: Fix the ability to pass the --link-objects option to the
   distutils build_ext command.
 
diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c
--- a/Modules/mmapmodule.c
+++ b/Modules/mmapmodule.c
@@ -90,8 +90,8 @@
 typedef struct {
     PyObject_HEAD
     char *      data;
-    size_t      size;
-    size_t      pos;    /* relative to offset */
+    Py_ssize_t  size;
+    Py_ssize_t  pos;    /* relative to offset */
 #ifdef MS_WINDOWS
     PY_LONG_LONG offset;
 #else
@@ -210,33 +210,32 @@
                       PyObject *unused)
 {
     CHECK_VALID(NULL);
-    if (self->pos < self->size) {
-        char value = self->data[self->pos];
-        self->pos += 1;
-        return Py_BuildValue("B", (unsigned char)value);
-    } else {
+    if (self->pos >= self->size) {
         PyErr_SetString(PyExc_ValueError, "read byte out of range");
         return NULL;
     }
+    return PyLong_FromLong((unsigned char)self->data[self->pos++]);
 }
 
 static PyObject *
 mmap_read_line_method(mmap_object *self,
                       PyObject *unused)
 {
-    char *start = self->data+self->pos;
-    char *eof = self->data+self->size;
-    char *eol;
+    Py_ssize_t remaining;
+    char *start, *eol;
     PyObject *result;
 
     CHECK_VALID(NULL);
 
-    eol = memchr(start, '\n', self->size - self->pos);
+    remaining = (self->pos < self->size) ? self->size - self->pos : 0;
+    if (!remaining)
+        return PyBytes_FromString("");
+    start = self->data + self->pos;
+    eol = memchr(start, '\n', remaining);
     if (!eol)
-        eol = eof;
+        eol = self->data + self->size;
     else
-        ++eol;                  /* we're interested in the position after the
-                           newline. */
+        ++eol; /* advance past newline */
     result = PyBytes_FromStringAndSize(start, (eol - start));
     self->pos += (eol - start);
     return result;
@@ -268,7 +267,7 @@
 mmap_read_method(mmap_object *self,
                  PyObject *args)
 {
-    Py_ssize_t num_bytes = -1, n;
+    Py_ssize_t num_bytes, remaining;
     PyObject *result;
 
     CHECK_VALID(NULL);
@@ -276,20 +275,10 @@
         return(NULL);
 
     /* silently 'adjust' out-of-range requests */
-    assert(self->size >= self->pos);
-    n = self->size - self->pos;
-    /* The difference can overflow, only if self->size is greater than
-     * PY_SSIZE_T_MAX.  But then the operation cannot possibly succeed,
-     * because the mapped area and the returned string each need more
-     * than half of the addressable memory.  So we clip the size, and let
-     * the code below raise MemoryError.
-     */
-    if (n < 0)
-        n = PY_SSIZE_T_MAX;
-    if (num_bytes < 0 || num_bytes > n) {
-        num_bytes = n;
-    }
-    result = PyBytes_FromStringAndSize(self->data+self->pos, num_bytes);
+    remaining = (self->pos < self->size) ? self->size - self->pos : 0;
+    if (num_bytes < 0 || num_bytes > remaining)
+        num_bytes = remaining;
+    result = PyBytes_FromStringAndSize(&self->data[self->pos], num_bytes);
     self->pos += num_bytes;
     return result;
 }
@@ -317,14 +306,14 @@
             start += self->size;
         if (start < 0)
             start = 0;
-        else if ((size_t)start > self->size)
+        else if (start > self->size)
             start = self->size;
 
         if (end < 0)
             end += self->size;
         if (end < 0)
             end = 0;
-        else if ((size_t)end > self->size)
+        else if (end > self->size)
             end = self->size;
 
         start_p = self->data + start;
@@ -394,18 +383,17 @@
     if (!PyArg_ParseTuple(args, "y*:write", &data))
         return(NULL);
 
-    if (!is_writable(self)) {
+    if (!is_writable(self))
+        return NULL;
+
+    if (self->pos > self->size || self->size - self->pos < data.len) {
         PyBuffer_Release(&data);
+        PyErr_SetString(PyExc_ValueError, "data out of range");
         return NULL;
     }
 
-    if ((self->pos + data.len) > self->size) {
-        PyErr_SetString(PyExc_ValueError, "data out of range");
-        PyBuffer_Release(&data);
-        return NULL;
-    }
-    memcpy(self->data + self->pos, data.buf, data.len);
-    self->pos = self->pos + data.len;
+    memcpy(&self->data[self->pos], data.buf, data.len);
+    self->pos += data.len;
     PyBuffer_Release(&data);
     Py_INCREF(Py_None);
     return Py_None;
@@ -425,8 +413,7 @@
         return NULL;
 
     if (self->pos < self->size) {
-        *(self->data+self->pos) = value;
-        self->pos += 1;
+        self->data[self->pos++] = value;
         Py_INCREF(Py_None);
         return Py_None;
     }
@@ -495,8 +482,14 @@
     if (!PyArg_ParseTuple(args, "n:resize", &new_size) ||
         !is_resizeable(self)) {
         return NULL;
+    }
+    if (new_size < 0 || PY_SSIZE_T_MAX - new_size < self->offset) {
+        PyErr_SetString(PyExc_ValueError, "new size out of range");
+        return NULL;
+    }
+
+    {
 #ifdef MS_WINDOWS
-    } else {
         DWORD dwErrCode = 0;
         DWORD off_hi, off_lo, newSizeLow, newSizeHigh;
         /* First, unmap the file view */
@@ -546,15 +539,13 @@
 
 #ifdef UNIX
 #ifndef HAVE_MREMAP
-    } else {
         PyErr_SetString(PyExc_SystemError,
                         "mmap: resizing not available--no mremap()");
         return NULL;
 #else
-    } else {
         void *newmap;
 
-        if (ftruncate(self->fd, self->offset + new_size) == -1) {
+        if (self->fd != -1 && ftruncate(self->fd, self->offset + new_size) == -1) {
             PyErr_SetFromErrno(PyExc_OSError);
             return NULL;
         }
@@ -562,11 +553,11 @@
 #ifdef MREMAP_MAYMOVE
         newmap = mremap(self->data, self->size, new_size, MREMAP_MAYMOVE);
 #else
-        #if defined(__NetBSD__)
-            newmap = mremap(self->data, self->size, self->data, new_size, 0);
-        #else
-            newmap = mremap(self->data, self->size, new_size, 0);
-        #endif /* __NetBSD__ */
+#if defined(__NetBSD__)
+        newmap = mremap(self->data, self->size, self->data, new_size, 0);
+#else
+        newmap = mremap(self->data, self->size, new_size, 0);
+#endif /* __NetBSD__ */
 #endif
         if (newmap == (void *)-1)
         {
@@ -597,7 +588,7 @@
     CHECK_VALID(NULL);
     if (!PyArg_ParseTuple(args, "|nn:flush", &offset, &size))
         return NULL;
-    if ((size_t)(offset + size) > self->size) {
+    if (size < 0 || offset < 0 || self->size - offset < size) {
         PyErr_SetString(PyExc_ValueError, "flush values out of range");
         return NULL;
     }
@@ -630,20 +621,18 @@
     if (!PyArg_ParseTuple(args, "n|i:seek", &dist, &how))
         return NULL;
     else {
-        size_t where;
+        Py_ssize_t where;
         switch (how) {
         case 0: /* relative to start */
-            if (dist < 0)
-                goto onoutofrange;
             where = dist;
             break;
         case 1: /* relative to current position */
-            if ((Py_ssize_t)self->pos + dist < 0)
+            if (PY_SSIZE_T_MAX - self->pos < dist)
                 goto onoutofrange;
             where = self->pos + dist;
             break;
         case 2: /* relative to end */
-            if ((Py_ssize_t)self->size + dist < 0)
+            if (PY_SSIZE_T_MAX - self->size < dist)
                 goto onoutofrange;
             where = self->size + dist;
             break;
@@ -651,7 +640,7 @@
             PyErr_SetString(PyExc_ValueError, "unknown seek type");
             return NULL;
         }
-        if (where > self->size)
+        if (where > self->size || where < 0)
             goto onoutofrange;
         self->pos = where;
         Py_INCREF(Py_None);
@@ -666,23 +655,27 @@
 static PyObject *
 mmap_move_method(mmap_object *self, PyObject *args)
 {
-    unsigned long dest, src, cnt;
+    Py_ssize_t dest, src, cnt;
     CHECK_VALID(NULL);
-    if (!PyArg_ParseTuple(args, "kkk:move", &dest, &src, &cnt) ||
+    if (!PyArg_ParseTuple(args, "nnn:move", &dest, &src, &cnt) ||
         !is_writable(self)) {
         return NULL;
     } else {
         /* bounds check the values */
-        if ((cnt + dest) < cnt || (cnt + src) < cnt ||
-           src > self->size || (src + cnt) > self->size ||
-           dest > self->size || (dest + cnt) > self->size) {
-            PyErr_SetString(PyExc_ValueError,
-                "source, destination, or count out of range");
-            return NULL;
-        }
-        memmove(self->data+dest, self->data+src, cnt);
+        if (dest < 0 || src < 0 || cnt < 0)
+            goto bounds;
+        if (self->size - dest < cnt || self->size - src < cnt)
+            goto bounds;
+
+        memmove(&self->data[dest], &self->data[src], cnt);
+
         Py_INCREF(Py_None);
         return Py_None;
+
+      bounds:
+        PyErr_SetString(PyExc_ValueError,
+                        "source, destination, or count out of range");
+        return NULL;
     }
 }
 
@@ -785,7 +778,7 @@
 mmap_item(mmap_object *self, Py_ssize_t i)
 {
     CHECK_VALID(NULL);
-    if (i < 0 || (size_t)i >= self->size) {
+    if (i < 0 || i >= self->size) {
         PyErr_SetString(PyExc_IndexError, "mmap index out of range");
         return NULL;
     }
@@ -802,7 +795,7 @@
             return NULL;
         if (i < 0)
             i += self->size;
-        if (i < 0 || (size_t)i >= self->size) {
+        if (i < 0 || i >= self->size) {
             PyErr_SetString(PyExc_IndexError,
                 "mmap index out of range");
             return NULL;
@@ -870,7 +863,7 @@
     const char *buf;
 
     CHECK_VALID(-1);
-    if (i < 0 || (size_t)i >= self->size) {
+    if (i < 0 || i >= self->size) {
         PyErr_SetString(PyExc_IndexError, "mmap index out of range");
         return -1;
     }
@@ -907,7 +900,7 @@
             return -1;
         if (i < 0)
             i += self->size;
-        if (i < 0 || (size_t)i >= self->size) {
+        if (i < 0 || i >= self->size) {
             PyErr_SetString(PyExc_IndexError,
                             "mmap index out of range");
             return -1;
@@ -1074,32 +1067,6 @@
 };
 
 
-/* extract the map size from the given PyObject
-
-   Returns -1 on error, with an appropriate Python exception raised. On
-   success, the map size is returned. */
-static Py_ssize_t
-_GetMapSize(PyObject *o, const char* param)
-{
-    if (o == NULL)
-        return 0;
-    if (PyIndex_Check(o)) {
-        Py_ssize_t i = PyNumber_AsSsize_t(o, PyExc_OverflowError);
-        if (i==-1 && PyErr_Occurred())
-            return -1;
-        if (i < 0) {
-            PyErr_Format(PyExc_OverflowError,
-                            "memory mapped %s must be positive",
-                            param);
-            return -1;
-        }
-        return i;
-    }
-
-    PyErr_SetString(PyExc_TypeError, "map size must be an integral value");
-    return -1;
-}
-
 #ifdef UNIX
 #ifdef HAVE_LARGEFILE_SUPPORT
 #define _Py_PARSE_OFF_T "L"
@@ -1112,7 +1079,6 @@
 {
     struct _Py_stat_struct status;
     mmap_object *m_obj;
-    PyObject *map_size_obj = NULL;
     Py_ssize_t map_size;
     off_t offset = 0;
     int fd, flags = MAP_SHARED, prot = PROT_WRITE | PROT_READ;
@@ -1122,13 +1088,15 @@
                                "flags", "prot",
                                "access", "offset", NULL};
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|iii" _Py_PARSE_OFF_T, keywords,
-                                     &fd, &map_size_obj, &flags, &prot,
+    if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|iii" _Py_PARSE_OFF_T, keywords,
+                                     &fd, &map_size, &flags, &prot,
                                      &access, &offset))
         return NULL;
-    map_size = _GetMapSize(map_size_obj, "size");
-    if (map_size < 0)
+    if (map_size < 0) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "memory mapped length must be postiive");
         return NULL;
+    }
     if (offset < 0) {
         PyErr_SetString(PyExc_OverflowError,
             "memory mapped offset must be positive");
@@ -1194,7 +1162,7 @@
                 return NULL;
             }
             map_size = (Py_ssize_t) (status.st_size - offset);
-        } else if (offset + map_size > status.st_size) {
+        } else if (offset > status.st_size || status.st_size - offset < map_size) {
             PyErr_SetString(PyExc_ValueError,
                             "mmap length is greater than file size");
             return NULL;
@@ -1203,8 +1171,8 @@
     m_obj = (mmap_object *)type->tp_alloc(type, 0);
     if (m_obj == NULL) {return NULL;}
     m_obj->data = NULL;
-    m_obj->size = (size_t) map_size;
-    m_obj->pos = (size_t) 0;
+    m_obj->size = map_size;
+    m_obj->pos = 0;
     m_obj->weakreflist = NULL;
     m_obj->exports = 0;
     m_obj->offset = offset;
@@ -1264,7 +1232,6 @@
 new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
 {
     mmap_object *m_obj;
-    PyObject *map_size_obj = NULL;
     Py_ssize_t map_size;
     PY_LONG_LONG offset = 0, size;
     DWORD off_hi;       /* upper 32 bits of offset */
@@ -1281,8 +1248,8 @@
                                 "tagname",
                                 "access", "offset", NULL };
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|ziL", keywords,
-                                     &fileno, &map_size_obj,
+    if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|ziL", keywords,
+                                     &fileno, &map_size,
                                      &tagname, &access, &offset)) {
         return NULL;
     }
@@ -1305,9 +1272,11 @@
                             "mmap invalid access parameter.");
     }
 
-    map_size = _GetMapSize(map_size_obj, "size");
-    if (map_size < 0)
+    if (map_size < 0) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "memory mapped length must be postiive");
         return NULL;
+    }
     if (offset < 0) {
         PyErr_SetString(PyExc_OverflowError,
             "memory mapped offset must be positive");

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list