[Python-checkins] cpython (2.7): Issue #23908: os functions, open() and the io.FileIO constructor now reject

serhiy.storchaka python-checkins at python.org
Fri Jul 1 16:35:15 EDT 2016


https://hg.python.org/cpython/rev/30099abdb3a4
changeset:   102244:30099abdb3a4
branch:      2.7
parent:      102232:8f1d3ebdbc56
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Fri Jul 01 23:34:44 2016 +0300
summary:
  Issue #23908: os functions, open() and the io.FileIO constructor now reject
unicode paths with embedded null character on Windows instead of silently
truncate them.

files:
  Lib/test/test_fileio.py   |   5 +
  Lib/test/test_getargs2.py |   2 +-
  Lib/test/test_io.py       |   9 ++
  Lib/test/test_posix.py    |  38 +++++++++++
  Misc/NEWS                 |   4 +
  Modules/_io/fileio.c      |  15 ++++-
  Modules/posixmodule.c     |  87 ++++++++++----------------
  Objects/fileobject.c      |   3 +-
  Python/getargs.c          |  19 +++++-
  9 files changed, 125 insertions(+), 57 deletions(-)


diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py
--- a/Lib/test/test_fileio.py
+++ b/Lib/test/test_fileio.py
@@ -361,6 +361,11 @@
         finally:
             os.unlink(TESTFN)
 
+    def testConstructorHandlesNULChars(self):
+        fn_with_NUL = 'foo\0bar'
+        self.assertRaises(TypeError, _FileIO, fn_with_NUL, 'w')
+        self.assertRaises(TypeError, _FileIO, fn_with_NUL.encode('ascii'), 'w')
+
     def testInvalidFd(self):
         self.assertRaises(ValueError, _FileIO, -10)
         self.assertRaises(OSError, _FileIO, make_bad_fd())
diff --git a/Lib/test/test_getargs2.py b/Lib/test/test_getargs2.py
--- a/Lib/test/test_getargs2.py
+++ b/Lib/test/test_getargs2.py
@@ -779,7 +779,7 @@
     def test_u(self):
         from _testcapi import getargs_u
         self.assertEqual(getargs_u(u'abc\xe9'), u'abc\xe9')
-        self.assertEqual(getargs_u(u'nul:\0'), u'nul:')
+        self.assertRaises(TypeError, getargs_u, u'nul:\0')
         self.assertRaises(TypeError, getargs_u, 'bytes')
         self.assertRaises(TypeError, getargs_u, bytearray('bytearray'))
         self.assertRaises(TypeError, getargs_u, memoryview('memoryview'))
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -349,6 +349,15 @@
             self.assertRaises(IOError, fp.write, "blah")
             self.assertRaises(IOError, fp.writelines, ["blah\n"])
 
+    def test_open_handles_NUL_chars(self):
+        fn_with_NUL = 'foo\0bar'
+        self.assertRaises(TypeError, self.open, fn_with_NUL, 'w')
+
+        bytes_fn = fn_with_NUL.encode('ascii')
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", DeprecationWarning)
+            self.assertRaises(TypeError, self.open, bytes_fn, 'w')
+
     def test_raw_file_io(self):
         with self.open(support.TESTFN, "wb", buffering=0) as f:
             self.assertEqual(f.readable(), False)
diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py
--- a/Lib/test/test_posix.py
+++ b/Lib/test/test_posix.py
@@ -577,6 +577,44 @@
                 set([int(x) for x in groups.split()]),
                 set(posix.getgroups() + [posix.getegid()]))
 
+    @test_support.requires_unicode
+    def test_path_with_null_unicode(self):
+        fn = test_support.TESTFN_UNICODE
+        fn_with_NUL = fn + u'\0'
+        self.addCleanup(test_support.unlink, fn)
+        test_support.unlink(fn)
+        fd = None
+        try:
+            with self.assertRaises(TypeError):
+                fd = os.open(fn_with_NUL, os.O_WRONLY | os.O_CREAT) # raises
+        finally:
+            if fd is not None:
+                os.close(fd)
+        self.assertFalse(os.path.exists(fn))
+        self.assertRaises(TypeError, os.mkdir, fn_with_NUL)
+        self.assertFalse(os.path.exists(fn))
+        open(fn, 'wb').close()
+        self.assertRaises(TypeError, os.stat, fn_with_NUL)
+
+    def test_path_with_null_byte(self):
+        fn = test_support.TESTFN
+        fn_with_NUL = fn + '\0'
+        self.addCleanup(test_support.unlink, fn)
+        test_support.unlink(fn)
+        fd = None
+        try:
+            with self.assertRaises(TypeError):
+                fd = os.open(fn_with_NUL, os.O_WRONLY | os.O_CREAT) # raises
+        finally:
+            if fd is not None:
+                os.close(fd)
+        self.assertFalse(os.path.exists(fn))
+        self.assertRaises(TypeError, os.mkdir, fn_with_NUL)
+        self.assertFalse(os.path.exists(fn))
+        open(fn, 'wb').close()
+        self.assertRaises(TypeError, os.stat, fn_with_NUL)
+
+
 class PosixGroupsTester(unittest.TestCase):
 
     def setUp(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@
 Core and Builtins
 -----------------
 
+- Issue #23908: os functions, open() and the io.FileIO constructor now reject
+  unicode paths with embedded null character on Windows instead of silently
+  truncate them.
+
 Library
 -------
 
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -224,8 +224,13 @@
     }
 
 #ifdef MS_WINDOWS
-    if (PyUnicode_Check(nameobj))
+    if (PyUnicode_Check(nameobj)) {
         widename = PyUnicode_AS_UNICODE(nameobj);
+        if (wcslen(widename) != (size_t)PyUnicode_GET_SIZE(nameobj)) {
+            PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+            return -1;
+        }
+    }
     if (widename == NULL)
 #endif
     if (fd < 0)
@@ -234,6 +239,10 @@
             Py_ssize_t namelen;
             if (PyObject_AsCharBuffer(nameobj, &name, &namelen) < 0)
                 return -1;
+            if (strlen(name) != (size_t)namelen) {
+                PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+                return -1;
+            }
         }
         else {
             PyObject *u = PyUnicode_FromObject(nameobj);
@@ -252,6 +261,10 @@
                 goto error;
             }
             name = PyBytes_AS_STRING(stringobj);
+            if (strlen(name) != (size_t)PyBytes_GET_SIZE(stringobj)) {
+                PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+                goto error;
+            }
         }
     }
 
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -1646,13 +1646,9 @@
     PyObject *result;
 
 #ifdef MS_WINDOWS
-    PyUnicodeObject *po;
-    if (PyArg_ParseTuple(args, wformat, &po)) {
-        Py_UNICODE *wpath = PyUnicode_AS_UNICODE(po);
-
+    Py_UNICODE *wpath;
+    if (PyArg_ParseTuple(args, wformat, &wpath)) {
         Py_BEGIN_ALLOW_THREADS
-            /* PyUnicode_AS_UNICODE result OK without
-               thread lock as it is a simple dereference. */
         res = wstatfunc(wpath, &st);
         Py_END_ALLOW_THREADS
 
@@ -1706,12 +1702,10 @@
 
 #ifdef MS_WINDOWS
     DWORD attr;
-    PyUnicodeObject *po;
-    if (PyArg_ParseTuple(args, "Ui:access", &po, &mode)) {
+    Py_UNICODE *wpath;
+    if (PyArg_ParseTuple(args, "ui:access", &wpath, &mode)) {
         Py_BEGIN_ALLOW_THREADS
-        /* PyUnicode_AS_UNICODE OK without thread lock as
-           it is a simple dereference. */
-        attr = GetFileAttributesW(PyUnicode_AS_UNICODE(po));
+        attr = GetFileAttributesW(wpath);
         Py_END_ALLOW_THREADS
         goto finish;
     }
@@ -1858,23 +1852,22 @@
     int res;
 #ifdef MS_WINDOWS
     DWORD attr;
-    PyUnicodeObject *po;
-    if (PyArg_ParseTuple(args, "Ui|:chmod", &po, &i)) {
+    Py_UNICODE *wpath;
+    if (PyArg_ParseTuple(args, "ui|:chmod", &wpath, &i)) {
         Py_BEGIN_ALLOW_THREADS
-        attr = GetFileAttributesW(PyUnicode_AS_UNICODE(po));
+        attr = GetFileAttributesW(wpath);
         if (attr != 0xFFFFFFFF) {
             if (i & _S_IWRITE)
                 attr &= ~FILE_ATTRIBUTE_READONLY;
             else
                 attr |= FILE_ATTRIBUTE_READONLY;
-            res = SetFileAttributesW(PyUnicode_AS_UNICODE(po), attr);
+            res = SetFileAttributesW(wpath, attr);
         }
         else
             res = 0;
         Py_END_ALLOW_THREADS
         if (!res)
-            return win32_error_unicode("chmod",
-                                       PyUnicode_AS_UNICODE(po));
+            return win32_error_unicode("chmod", wpath);
         Py_INCREF(Py_None);
         return Py_None;
     }
@@ -2300,18 +2293,18 @@
     char *bufptr = namebuf;
     Py_ssize_t len = sizeof(namebuf)-5; /* only claim to have space for MAX_PATH */
 
-    PyObject *po;
-    if (PyArg_ParseTuple(args, "U:listdir", &po)) {
+    Py_UNICODE *wpath;
+    if (PyArg_ParseTuple(args, "u:listdir", &wpath)) {
         WIN32_FIND_DATAW wFileData;
         Py_UNICODE *wnamebuf;
         /* Overallocate for \\*.*\0 */
-        len = PyUnicode_GET_SIZE(po);
+        len = wcslen(wpath);
         wnamebuf = malloc((len + 5) * sizeof(wchar_t));
         if (!wnamebuf) {
             PyErr_NoMemory();
             return NULL;
         }
-        wcscpy(wnamebuf, PyUnicode_AS_UNICODE(po));
+        wcscpy(wnamebuf, wpath);
         if (len > 0) {
             Py_UNICODE wch = wnamebuf[len-1];
             if (wch != L'/' && wch != L'\\' && wch != L':')
@@ -2615,9 +2608,8 @@
     char outbuf[MAX_PATH*2];
     char *temp;
 
-    PyUnicodeObject *po;
-    if (PyArg_ParseTuple(args, "U|:_getfullpathname", &po)) {
-        Py_UNICODE *wpath = PyUnicode_AS_UNICODE(po);
+    Py_UNICODE *wpath;
+    if (PyArg_ParseTuple(args, "u|:_getfullpathname", &wpath)) {
         Py_UNICODE woutbuf[MAX_PATH*2], *woutbufp = woutbuf;
         Py_UNICODE *wtemp;
         DWORD result;
@@ -2670,15 +2662,13 @@
     int mode = 0777;
 
 #ifdef MS_WINDOWS
-    PyUnicodeObject *po;
-    if (PyArg_ParseTuple(args, "U|i:mkdir", &po, &mode)) {
+    Py_UNICODE *wpath;
+    if (PyArg_ParseTuple(args, "u|i:mkdir", &wpath, &mode)) {
         Py_BEGIN_ALLOW_THREADS
-        /* PyUnicode_AS_UNICODE OK without thread lock as
-           it is a simple dereference. */
-        res = CreateDirectoryW(PyUnicode_AS_UNICODE(po), NULL);
+        res = CreateDirectoryW(wpath, NULL);
         Py_END_ALLOW_THREADS
         if (!res)
-            return win32_error_unicode("mkdir", PyUnicode_AS_UNICODE(po));
+            return win32_error_unicode("mkdir", wpath);
         Py_INCREF(Py_None);
         return Py_None;
     }
@@ -2689,8 +2679,6 @@
                           Py_FileSystemDefaultEncoding, &path, &mode))
         return NULL;
     Py_BEGIN_ALLOW_THREADS
-    /* PyUnicode_AS_UNICODE OK without thread lock as
-       it is a simple dereference. */
     res = CreateDirectoryA(path, NULL);
     Py_END_ALLOW_THREADS
     if (!res) {
@@ -2833,7 +2821,7 @@
 posix_stat(PyObject *self, PyObject *args)
 {
 #ifdef MS_WINDOWS
-    return posix_do_stat(self, args, "et:stat", STAT, "U:stat", win32_wstat);
+    return posix_do_stat(self, args, "et:stat", STAT, "u:stat", win32_wstat);
 #else
     return posix_do_stat(self, args, "et:stat", STAT, NULL, NULL);
 #endif
@@ -2969,7 +2957,6 @@
 {
 #ifdef MS_WINDOWS
     PyObject *arg;
-    PyUnicodeObject *obwpath;
     wchar_t *wpath = NULL;
     char *apath = NULL;
     HANDLE hFile;
@@ -2978,8 +2965,7 @@
     FILETIME atime, mtime;
     PyObject *result = NULL;
 
-    if (PyArg_ParseTuple(args, "UO|:utime", &obwpath, &arg)) {
-        wpath = PyUnicode_AS_UNICODE(obwpath);
+    if (PyArg_ParseTuple(args, "uO|:utime", &wpath, &arg)) {
         Py_BEGIN_ALLOW_THREADS
         hFile = CreateFileW(wpath, FILE_WRITE_ATTRIBUTES, 0,
                             NULL, OPEN_EXISTING,
@@ -4440,14 +4426,11 @@
 static PyObject *
 posix__isdir(PyObject *self, PyObject *args)
 {
-    PyObject *opath;
     char *path;
-    PyUnicodeObject *po;
+    Py_UNICODE *wpath;
     DWORD attributes;
 
-    if (PyArg_ParseTuple(args, "U|:_isdir", &po)) {
-        Py_UNICODE *wpath = PyUnicode_AS_UNICODE(po);
-
+    if (PyArg_ParseTuple(args, "u|:_isdir", &wpath)) {
         attributes = GetFileAttributesW(wpath);
         if (attributes == INVALID_FILE_ATTRIBUTES)
             Py_RETURN_FALSE;
@@ -6326,7 +6309,7 @@
     return posix_do_stat(self, args, "et:lstat", lstat, NULL, NULL);
 #else /* !HAVE_LSTAT */
 #ifdef MS_WINDOWS
-    return posix_do_stat(self, args, "et:lstat", STAT, "U:lstat", win32_wstat);
+    return posix_do_stat(self, args, "et:lstat", STAT, "u:lstat", win32_wstat);
 #else
     return posix_do_stat(self, args, "et:lstat", STAT, NULL, NULL);
 #endif
@@ -6600,12 +6583,10 @@
     int fd;
 
 #ifdef MS_WINDOWS
-    PyUnicodeObject *po;
-    if (PyArg_ParseTuple(args, "Ui|i:mkdir", &po, &flag, &mode)) {
+    Py_UNICODE *wpath;
+    if (PyArg_ParseTuple(args, "ui|i:mkdir", &wpath, &flag, &mode)) {
         Py_BEGIN_ALLOW_THREADS
-        /* PyUnicode_AS_UNICODE OK without thread
-           lock as it is a simple dereference. */
-        fd = _wopen(PyUnicode_AS_UNICODE(po), flag, mode);
+        fd = _wopen(wpath, flag, mode);
         Py_END_ALLOW_THREADS
         if (fd < 0)
             return posix_error();
@@ -8662,12 +8643,13 @@
 win32_startfile(PyObject *self, PyObject *args)
 {
     char *filepath;
+    Py_UNICODE *wpath;
     char *operation = NULL;
     HINSTANCE rc;
 
-    PyObject *unipath, *woperation = NULL;
-    if (!PyArg_ParseTuple(args, "U|s:startfile",
-                          &unipath, &operation)) {
+    PyObject *woperation = NULL;
+    if (!PyArg_ParseTuple(args, "u|s:startfile",
+                          &wpath, &operation)) {
         PyErr_Clear();
         goto normal;
     }
@@ -8684,14 +8666,13 @@
 
     Py_BEGIN_ALLOW_THREADS
     rc = ShellExecuteW((HWND)0, woperation ? PyUnicode_AS_UNICODE(woperation) : 0,
-        PyUnicode_AS_UNICODE(unipath),
+        wpath,
         NULL, NULL, SW_SHOWNORMAL);
     Py_END_ALLOW_THREADS
 
     Py_XDECREF(woperation);
     if (rc <= (HINSTANCE)32) {
-        PyObject *errval = win32_error_unicode("startfile",
-                                               PyUnicode_AS_UNICODE(unipath));
+        PyObject *errval = win32_error_unicode("startfile", wpath);
         return errval;
     }
     Py_INCREF(Py_None);
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -2394,7 +2394,8 @@
 
 #ifdef MS_WINDOWS
     if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
-                                    kwlist, &po, &mode, &bufsize)) {
+                                    kwlist, &po, &mode, &bufsize) &&
+        wcslen(PyUnicode_AS_UNICODE(po)) == (size_t)PyUnicode_GET_SIZE(po)) {
         wideargument = 1;
         if (fill_file_fields(foself, NULL, po, mode,
                              fclose) == NULL)
diff --git a/Python/getargs.c b/Python/getargs.c
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -576,6 +576,17 @@
         return 0;
 }
 
+#ifdef Py_USING_UNICODE
+static size_t
+_ustrlen(Py_UNICODE *u)
+{
+    size_t i = 0;
+    Py_UNICODE *v = u;
+    while (*v != 0) { i++; v++; }
+    return i;
+}
+#endif
+
 /* Convert a non-tuple argument.  Return NULL if conversion went OK,
    or a string with a message describing the failure.  The message is
    formatted as "must be <desired type>, not <actual type>".
@@ -1202,8 +1213,14 @@
             format++;
         } else {
             Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **);
-            if (PyUnicode_Check(arg))
+            if (PyUnicode_Check(arg)) {
                 *p = PyUnicode_AS_UNICODE(arg);
+                if (_ustrlen(*p) != (size_t)PyUnicode_GET_SIZE(arg)) {
+                    return converterr(
+                        "unicode without null characters",
+                        arg, msgbuf, bufsize);
+                }
+            }
             else
                 return converterr("unicode", arg, msgbuf, bufsize);
         }

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list