[Python-checkins] cpython (merge 3.5 -> default): Issue #24802: Merge null termination fixes from 3.5

martin.panter python-checkins at python.org
Fri Nov 6 22:33:10 EST 2015


https://hg.python.org/cpython/rev/4df1eaecb506
changeset:   98994:4df1eaecb506
parent:      98991:c495c9dd7726
parent:      98993:95b9c07b27f7
user:        Martin Panter <vadmium+py at gmail.com>
date:        Sat Nov 07 03:15:32 2015 +0000
summary:
  Issue #24802: Merge null termination fixes from 3.5

files:
  Lib/test/test_compile.py |  21 ++++++++++++
  Lib/test/test_float.py   |  38 +++++++++++++++++++++-
  Lib/test/test_int.py     |  42 +++++++++++++++++++-----
  Misc/NEWS                |   8 ++++
  Objects/abstract.c       |  22 +++++++++++-
  Objects/complexobject.c  |   7 ----
  Objects/floatobject.c    |  15 ++++++++
  Python/bltinmodule.c     |  48 ++++++++++++++++++---------
  8 files changed, 165 insertions(+), 36 deletions(-)


diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -542,6 +542,27 @@
         check_limit("a", "[0]")
         check_limit("a", "*a")
 
+    def test_null_terminated(self):
+        # The source code is null-terminated internally, but bytes-like
+        # objects are accepted, which could be not terminated.
+        # Exception changed from TypeError to ValueError in 3.5
+        with self.assertRaisesRegex(Exception, "cannot contain null"):
+            compile("123\x00", "<dummy>", "eval")
+        with self.assertRaisesRegex(Exception, "cannot contain null"):
+            compile(memoryview(b"123\x00"), "<dummy>", "eval")
+        code = compile(memoryview(b"123\x00")[1:-1], "<dummy>", "eval")
+        self.assertEqual(eval(code), 23)
+        code = compile(memoryview(b"1234")[1:-1], "<dummy>", "eval")
+        self.assertEqual(eval(code), 23)
+        code = compile(memoryview(b"$23$")[1:-1], "<dummy>", "eval")
+        self.assertEqual(eval(code), 23)
+
+        # Also test when eval() and exec() do the compilation step
+        self.assertEqual(eval(memoryview(b"1234")[1:-1]), 23)
+        namespace = dict()
+        exec(memoryview(b"ax = 123")[1:-1], namespace)
+        self.assertEqual(namespace['x'], 12)
+
 
 class TestStackSize(unittest.TestCase):
     # These tests check that the computed stack size for a code object
diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py
--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
@@ -31,7 +31,6 @@
         self.assertEqual(float(3.14), 3.14)
         self.assertEqual(float(314), 314.0)
         self.assertEqual(float("  3.14  "), 3.14)
-        self.assertEqual(float(b" 3.14  "), 3.14)
         self.assertRaises(ValueError, float, "  0x3.1  ")
         self.assertRaises(ValueError, float, "  -0x3.p-1  ")
         self.assertRaises(ValueError, float, "  +0x3.p-1  ")
@@ -43,7 +42,6 @@
         self.assertRaises(ValueError, float, "+.inf")
         self.assertRaises(ValueError, float, ".")
         self.assertRaises(ValueError, float, "-.")
-        self.assertRaises(ValueError, float, b"-")
         self.assertRaises(TypeError, float, {})
         self.assertRaisesRegex(TypeError, "not 'dict'", float, {})
         # Lone surrogate
@@ -57,6 +55,42 @@
         float(b'.' + b'1'*1000)
         float('.' + '1'*1000)
 
+    def test_non_numeric_input_types(self):
+        # Test possible non-numeric types for the argument x, including
+        # subclasses of the explicitly documented accepted types.
+        class CustomStr(str): pass
+        class CustomBytes(bytes): pass
+        class CustomByteArray(bytearray): pass
+
+        factories = [
+            bytes,
+            bytearray,
+            lambda b: CustomStr(b.decode()),
+            CustomBytes,
+            CustomByteArray,
+            memoryview,
+        ]
+        try:
+            from array import array
+        except ImportError:
+            pass
+        else:
+            factories.append(lambda b: array('B', b))
+
+        for f in factories:
+            x = f(b" 3.14  ")
+            with self.subTest(type(x)):
+                self.assertEqual(float(x), 3.14)
+                with self.assertRaisesRegex(ValueError, "could not convert"):
+                    float(f(b'A' * 0x10))
+
+    def test_float_memoryview(self):
+        self.assertEqual(float(memoryview(b'12.3')[1:4]), 2.3)
+        self.assertEqual(float(memoryview(b'12.3\x00')[1:4]), 2.3)
+        self.assertEqual(float(memoryview(b'12.3 ')[1:4]), 2.3)
+        self.assertEqual(float(memoryview(b'12.3A')[1:4]), 2.3)
+        self.assertEqual(float(memoryview(b'12.34')[1:4]), 2.3)
+
     def test_error_message(self):
         testlist = ('\xbd', '123\xbd', '  123 456  ')
         for s in testlist:
diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py
--- a/Lib/test/test_int.py
+++ b/Lib/test/test_int.py
@@ -276,16 +276,40 @@
         class CustomBytes(bytes): pass
         class CustomByteArray(bytearray): pass
 
-        values = [b'100',
-                  bytearray(b'100'),
-                  CustomStr('100'),
-                  CustomBytes(b'100'),
-                  CustomByteArray(b'100')]
+        factories = [
+            bytes,
+            bytearray,
+            lambda b: CustomStr(b.decode()),
+            CustomBytes,
+            CustomByteArray,
+            memoryview,
+        ]
+        try:
+            from array import array
+        except ImportError:
+            pass
+        else:
+            factories.append(lambda b: array('B', b))
 
-        for x in values:
-            msg = 'x has type %s' % type(x).__name__
-            self.assertEqual(int(x), 100, msg=msg)
-            self.assertEqual(int(x, 2), 4, msg=msg)
+        for f in factories:
+            x = f(b'100')
+            with self.subTest(type(x)):
+                self.assertEqual(int(x), 100)
+                if isinstance(x, (str, bytes, bytearray)):
+                    self.assertEqual(int(x, 2), 4)
+                else:
+                    msg = "can't convert non-string"
+                    with self.assertRaisesRegex(TypeError, msg):
+                        int(x, 2)
+                with self.assertRaisesRegex(ValueError, 'invalid literal'):
+                    int(f(b'A' * 0x10))
+
+    def test_int_memoryview(self):
+        self.assertEqual(int(memoryview(b'123')[1:3]), 23)
+        self.assertEqual(int(memoryview(b'123\x00')[1:3]), 23)
+        self.assertEqual(int(memoryview(b'123 ')[1:3]), 23)
+        self.assertEqual(int(memoryview(b'123A')[1:3]), 23)
+        self.assertEqual(int(memoryview(b'1234')[1:3]), 23)
 
     def test_string_float(self):
         self.assertRaises(ValueError, int, '1.2')
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@
 Core and Builtins
 -----------------
 
+- Issue #24802: Avoid buffer overreads when int(), float(), compile(), exec()
+  and eval() are passed bytes-like objects.  These objects are not
+  necessarily terminated by a null byte, but the functions assumed they were.
+
 - Issue #25555: Fix parser and AST: fill lineno and col_offset of "arg" node
   when compiling AST from Python objects.
 
@@ -357,6 +361,10 @@
 Core and Builtins
 -----------------
 
+- Issue #24802: Avoid buffer overreads when int(), float(), compile(), exec()
+  and eval() are passed bytes-like objects.  These objects are not
+  necessarily terminated by a null byte, but the functions assumed they were.
+
 - Issue #24402: Fix input() to prompt to the redirected stdout when
   sys.stdout.fileno() fails.
 
diff --git a/Objects/abstract.c b/Objects/abstract.c
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -1312,12 +1312,30 @@
         /* The below check is done in PyLong_FromUnicode(). */
         return PyLong_FromUnicodeObject(o, 10);
 
-    if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) {
+    if (PyBytes_Check(o))
         /* need to do extra error checking that PyLong_FromString()
          * doesn't do.  In particular int('9\x005') must raise an
          * exception, not truncate at the null.
          */
-        PyObject *result = _PyLong_FromBytes(view.buf, view.len, 10);
+        return _PyLong_FromBytes(PyBytes_AS_STRING(o),
+                                 PyBytes_GET_SIZE(o), 10);
+
+    if (PyByteArray_Check(o))
+        return _PyLong_FromBytes(PyByteArray_AS_STRING(o),
+                                 PyByteArray_GET_SIZE(o), 10);
+
+    if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) {
+        PyObject *result, *bytes;
+
+        /* Copy to NUL-terminated buffer. */
+        bytes = PyBytes_FromStringAndSize((const char *)view.buf, view.len);
+        if (bytes == NULL) {
+            PyBuffer_Release(&view);
+            return NULL;
+        }
+        result = _PyLong_FromBytes(PyBytes_AS_STRING(bytes),
+                                   PyBytes_GET_SIZE(bytes), 10);
+        Py_DECREF(bytes);
         PyBuffer_Release(&view);
         return result;
     }
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -767,7 +767,6 @@
     int got_bracket=0;
     PyObject *s_buffer = NULL;
     Py_ssize_t len;
-    Py_buffer view = {NULL, NULL};
 
     if (PyUnicode_Check(v)) {
         s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
@@ -777,10 +776,6 @@
         if (s == NULL)
             goto error;
     }
-    else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) {
-        s = (const char *)view.buf;
-        len = view.len;
-    }
     else {
         PyErr_Format(PyExc_TypeError,
             "complex() argument must be a string or a number, not '%.200s'",
@@ -895,7 +890,6 @@
     if (s-start != len)
         goto parse_error;
 
-    PyBuffer_Release(&view);
     Py_XDECREF(s_buffer);
     return complex_subtype_from_doubles(type, x, y);
 
@@ -903,7 +897,6 @@
     PyErr_SetString(PyExc_ValueError,
                     "complex() arg is a malformed string");
   error:
-    PyBuffer_Release(&view);
     Py_XDECREF(s_buffer);
     return NULL;
 }
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -144,9 +144,24 @@
             return NULL;
         }
     }
+    else if (PyBytes_Check(v)) {
+        s = PyBytes_AS_STRING(v);
+        len = PyBytes_GET_SIZE(v);
+    }
+    else if (PyByteArray_Check(v)) {
+        s = PyByteArray_AS_STRING(v);
+        len = PyByteArray_GET_SIZE(v);
+    }
     else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) {
         s = (const char *)view.buf;
         len = view.len;
+        /* Copy to NUL-terminated buffer. */
+        s_buffer = PyBytes_FromStringAndSize(s, len);
+        if (s_buffer == NULL) {
+            PyBuffer_Release(&view);
+            return NULL;
+        }
+        s = PyBytes_AS_STRING(s_buffer);
     }
     else {
         PyErr_Format(PyExc_TypeError,
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -599,20 +599,37 @@
 
 
 static const char *
-source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, Py_buffer *view)
+source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy)
 {
     const char *str;
     Py_ssize_t size;
+    Py_buffer view;
 
+    *cmd_copy = NULL;
     if (PyUnicode_Check(cmd)) {
         cf->cf_flags |= PyCF_IGNORE_COOKIE;
         str = PyUnicode_AsUTF8AndSize(cmd, &size);
         if (str == NULL)
             return NULL;
     }
-    else if (PyObject_GetBuffer(cmd, view, PyBUF_SIMPLE) == 0) {
-        str = (const char *)view->buf;
-        size = view->len;
+    else if (PyBytes_Check(cmd)) {
+        str = PyBytes_AS_STRING(cmd);
+        size = PyBytes_GET_SIZE(cmd);
+    }
+    else if (PyByteArray_Check(cmd)) {
+        str = PyByteArray_AS_STRING(cmd);
+        size = PyByteArray_GET_SIZE(cmd);
+    }
+    else if (PyObject_GetBuffer(cmd, &view, PyBUF_SIMPLE) == 0) {
+        /* Copy to NUL-terminated buffer. */
+        *cmd_copy = PyBytes_FromStringAndSize(
+            (const char *)view.buf, view.len);
+        PyBuffer_Release(&view);
+        if (*cmd_copy == NULL) {
+            return NULL;
+        }
+        str = PyBytes_AS_STRING(*cmd_copy);
+        size = PyBytes_GET_SIZE(*cmd_copy);
     }
     else {
         PyErr_Format(PyExc_TypeError,
@@ -624,7 +641,7 @@
     if (strlen(str) != (size_t)size) {
         PyErr_SetString(PyExc_ValueError,
                         "source code string cannot contain null bytes");
-        PyBuffer_Release(view);
+        Py_CLEAR(*cmd_copy);
         return NULL;
     }
     return str;
@@ -660,7 +677,7 @@
                      int dont_inherit, int optimize)
 /*[clinic end generated code: output=31881762c1bb90c4 input=9d53e8cfb3c86414]*/
 {
-    Py_buffer view = {NULL, NULL};
+    PyObject *source_copy;
     const char *str;
     int compile_mode = -1;
     int is_ast;
@@ -732,12 +749,12 @@
         goto finally;
     }
 
-    str = source_as_string(source, "compile", "string, bytes or AST", &cf, &view);
+    str = source_as_string(source, "compile", "string, bytes or AST", &cf, &source_copy);
     if (str == NULL)
         goto error;
 
     result = Py_CompileStringObject(str, filename, start[compile_mode], &cf, optimize);
-    PyBuffer_Release(&view);
+    Py_XDECREF(source_copy);
     goto finally;
 
 error:
@@ -812,8 +829,7 @@
                   PyObject *locals)
 /*[clinic end generated code: output=7284501fb7b4d666 input=11ee718a8640e527]*/
 {
-    PyObject *result, *tmp = NULL;
-    Py_buffer view = {NULL, NULL};
+    PyObject *result, *source_copy;
     const char *str;
     PyCompilerFlags cf;
 
@@ -861,7 +877,7 @@
     }
 
     cf.cf_flags = PyCF_SOURCE_IS_UTF8;
-    str = source_as_string(source, "eval", "string, bytes or code", &cf, &view);
+    str = source_as_string(source, "eval", "string, bytes or code", &cf, &source_copy);
     if (str == NULL)
         return NULL;
 
@@ -870,8 +886,7 @@
 
     (void)PyEval_MergeCompilerFlags(&cf);
     result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf);
-    PyBuffer_Release(&view);
-    Py_XDECREF(tmp);
+    Py_XDECREF(source_copy);
     return result;
 }
 
@@ -942,12 +957,13 @@
         v = PyEval_EvalCode(source, globals, locals);
     }
     else {
-        Py_buffer view = {NULL, NULL};
+        PyObject *source_copy;
         const char *str;
         PyCompilerFlags cf;
         cf.cf_flags = PyCF_SOURCE_IS_UTF8;
         str = source_as_string(source, "exec",
-                                       "string, bytes or code", &cf, &view);
+                                       "string, bytes or code", &cf,
+                                       &source_copy);
         if (str == NULL)
             return NULL;
         if (PyEval_MergeCompilerFlags(&cf))
@@ -955,7 +971,7 @@
                                   locals, &cf);
         else
             v = PyRun_String(str, Py_file_input, globals, locals);
-        PyBuffer_Release(&view);
+        Py_XDECREF(source_copy);
     }
     if (v == NULL)
         return NULL;

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list