[Python-checkins] cpython (merge 3.3 -> default): Issue #20368: The null character now correctly passed from Tcl to Python.

serhiy.storchaka python-checkins at python.org
Mon Feb 3 20:39:41 CET 2014


http://hg.python.org/cpython/rev/321b714653e3
changeset:   88935:321b714653e3
parent:      88932:28ec384e7dcc
parent:      88934:08e3343f01a5
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Mon Feb 03 21:25:56 2014 +0200
summary:
  Issue #20368: The null character now correctly passed from Tcl to Python.
Improved error handling in variables-related commands.

files:
  Lib/test/test_tcl.py                            |   50 ++-
  Lib/tkinter/test/test_tkinter/test_variables.py |   18 +
  Misc/NEWS                                       |    3 +
  Modules/_tkinter.c                              |  184 ++++++---
  4 files changed, 176 insertions(+), 79 deletions(-)


diff --git a/Lib/test/test_tcl.py b/Lib/test/test_tcl.py
--- a/Lib/test/test_tcl.py
+++ b/Lib/test/test_tcl.py
@@ -55,6 +55,10 @@
         tcl.eval('set a 1')
         self.assertEqual(tcl.eval('set a'),'1')
 
+    def test_eval_null_in_result(self):
+        tcl = self.interp
+        self.assertEqual(tcl.eval('set a "a\\0b"'), 'a\x00b')
+
     def testEvalException(self):
         tcl = self.interp
         self.assertRaises(TclError,tcl.eval,'set a')
@@ -127,20 +131,29 @@
 
     def testEvalFile(self):
         tcl = self.interp
-        filename = "testEvalFile.tcl"
-        fd = open(filename,'w')
-        script = """set a 1
-        set b 2
-        set c [ expr $a + $b ]
-        """
-        fd.write(script)
-        fd.close()
-        tcl.evalfile(filename)
-        os.remove(filename)
+        with open(support.TESTFN, 'w') as f:
+            self.addCleanup(support.unlink, support.TESTFN)
+            f.write("""set a 1
+            set b 2
+            set c [ expr $a + $b ]
+            """)
+        tcl.evalfile(support.TESTFN)
         self.assertEqual(tcl.eval('set a'),'1')
         self.assertEqual(tcl.eval('set b'),'2')
         self.assertEqual(tcl.eval('set c'),'3')
 
+    def test_evalfile_null_in_result(self):
+        tcl = self.interp
+        with open(support.TESTFN, 'w') as f:
+            self.addCleanup(support.unlink, support.TESTFN)
+            f.write("""
+            set a "a\0b"
+            set b "a\\0b"
+            """)
+        tcl.evalfile(support.TESTFN)
+        self.assertEqual(tcl.eval('set a'), 'a\x00b')
+        self.assertEqual(tcl.eval('set b'), 'a\x00b')
+
     def testEvalFileException(self):
         tcl = self.interp
         filename = "doesnotexists"
@@ -209,6 +222,7 @@
         check('"abc"', 'abc')
         check('"a\xbd\u20ac"', 'a\xbd\u20ac')
         check(r'"a\xbd\u20ac"', 'a\xbd\u20ac')
+        check(r'"a\0b"', 'a\x00b')
 
     def test_exprdouble(self):
         tcl = self.interp
@@ -320,6 +334,11 @@
         self.assertEqual(passValue(False), False if self.wantobjects else '0')
         self.assertEqual(passValue('string'), 'string')
         self.assertEqual(passValue('string\u20ac'), 'string\u20ac')
+        self.assertEqual(passValue('str\x00ing'), 'str\x00ing')
+        self.assertEqual(passValue('str\x00ing\xbd'), 'str\x00ing\xbd')
+        self.assertEqual(passValue('str\x00ing\u20ac'), 'str\x00ing\u20ac')
+        self.assertEqual(passValue(b'str\x00ing'), 'str\x00ing')
+        self.assertEqual(passValue(b'str\xc0\x80ing'), 'str\x00ing')
         for i in (0, 1, -1, 2**31-1, -2**31):
             self.assertEqual(passValue(i), i if self.wantobjects else str(i))
         for f in (0.0, 1.0, -1.0, 1/3,
@@ -368,6 +387,13 @@
         check('string', 'string')
         check('string\xbd', 'string\xbd')
         check('string\u20ac', 'string\u20ac')
+        check(b'string', 'string')
+        check(b'string\xe2\x82\xac', 'string\u20ac')
+        check('str\x00ing', 'str\x00ing')
+        check('str\x00ing\xbd', 'str\x00ing\xbd')
+        check('str\x00ing\u20ac', 'str\x00ing\u20ac')
+        check(b'str\xc0\x80ing', 'str\x00ing')
+        check(b'str\xc0\x80ing\xe2\x82\xac', 'str\x00ing\u20ac')
         for i in (0, 1, -1, 2**31-1, -2**31):
             check(i, str(i))
         for f in (0.0, 1.0, -1.0):
@@ -396,6 +422,7 @@
             (b'a\n b\t\r c\n ', ('a', 'b', 'c')),
             ('a \u20ac', ('a', '\u20ac')),
             (b'a \xe2\x82\xac', ('a', '\u20ac')),
+            (b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')),
             ('a {b c}', ('a', 'b c')),
             (r'a b\ c', ('a', 'b c')),
             (('a', 'b c'), ('a', 'b c')),
@@ -438,6 +465,9 @@
             (b'a\n b\t\r c\n ', ('a', 'b', 'c')),
             ('a \u20ac', ('a', '\u20ac')),
             (b'a \xe2\x82\xac', ('a', '\u20ac')),
+            (b'a\xc0\x80b', 'a\x00b'),
+            (b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')),
+            (b'{a\xc0\x80b c\xc0\x80d', '{a\x00b c\x00d'),
             ('a {b c}', ('a', ('b', 'c'))),
             (r'a b\ c', ('a', ('b', 'c'))),
             (('a', b'b c'), ('a', ('b', 'c'))),
diff --git a/Lib/tkinter/test/test_tkinter/test_variables.py b/Lib/tkinter/test/test_tkinter/test_variables.py
--- a/Lib/tkinter/test/test_tkinter/test_variables.py
+++ b/Lib/tkinter/test/test_tkinter/test_variables.py
@@ -68,6 +68,18 @@
         with self.assertRaises(TypeError):
             Variable(self.root, name=123)
 
+    def test_null_in_name(self):
+        with self.assertRaises(ValueError):
+            Variable(self.root, name='var\x00name')
+        with self.assertRaises(ValueError):
+            self.root.globalsetvar('var\x00name', "value")
+        with self.assertRaises(ValueError):
+            self.root.globalsetvar(b'var\x00name', "value")
+        with self.assertRaises(ValueError):
+            self.root.setvar('var\x00name', "value")
+        with self.assertRaises(ValueError):
+            self.root.setvar(b'var\x00name', "value")
+
     def test_initialize(self):
         v = Var()
         self.assertFalse(v.side_effect)
@@ -87,6 +99,12 @@
         self.root.globalsetvar("name", "value")
         self.assertEqual("value", v.get())
 
+    def test_get_null(self):
+        v = StringVar(self.root, "abc\x00def", "name")
+        self.assertEqual("abc\x00def", v.get())
+        self.root.globalsetvar("name", "val\x00ue")
+        self.assertEqual("val\x00ue", v.get())
+
 
 class TestIntVar(TestBase):
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -16,6 +16,9 @@
 Library
 -------
 
+- Issue #20368: The null character now correctly passed from Tcl to Python.
+  Improved error handling in variables-related commands.
+
 - Issue #20435: Fix _pyio.StringIO.getvalue() to take into account newline
   translation settings.
 
diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c
--- a/Modules/_tkinter.c
+++ b/Modules/_tkinter.c
@@ -329,6 +329,51 @@
 
 
 static PyObject *
+unicodeFromTclStringAndSize(const char *s, Py_ssize_t size)
+{
+    PyObject *r = PyUnicode_DecodeUTF8(s, size, NULL);
+    if (!r && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+        /* Tcl encodes null character as \xc0\x80 */
+        if (memchr(s, '\xc0', size)) {
+            char *buf, *q;
+            const char *e = s + size;
+            PyErr_Clear();
+            q = buf = (char *)PyMem_Malloc(size);
+            if (buf == NULL)
+                return NULL;
+            while (s != e) {
+                if (s + 1 != e && s[0] == '\xc0' && s[1] == '\x80') {
+                    *q++ = '\0';
+                    s += 2;
+                }
+                else
+                    *q++ = *s++;
+            }
+            s = buf;
+            size = q - s;
+            r = PyUnicode_DecodeUTF8(s, size, NULL);
+            PyMem_Free(buf);
+        }
+    }
+    return r;
+}
+
+static PyObject *
+unicodeFromTclString(const char *s)
+{
+    return unicodeFromTclStringAndSize(s, strlen(s));
+}
+
+static PyObject *
+unicodeFromTclObj(Tcl_Obj *value)
+{
+    int len;
+    char *s = Tcl_GetStringFromObj(value, &len);
+    return unicodeFromTclStringAndSize(s, len);
+}
+
+
+static PyObject *
 Split(char *list)
 {
     int argc;
@@ -344,13 +389,13 @@
          * Could be a quoted string containing funnies, e.g. {"}.
          * Return the string itself.
          */
-        return PyUnicode_FromString(list);
+        return unicodeFromTclString(list);
     }
 
     if (argc == 0)
         v = PyUnicode_FromString("");
     else if (argc == 1)
-        v = PyUnicode_FromString(argv[0]);
+        v = unicodeFromTclString(argv[0]);
     else if ((v = PyTuple_New(argc)) != NULL) {
         int i;
         PyObject *w;
@@ -694,11 +739,8 @@
 static PyObject *
 PyTclObject_string(PyTclObject *self, void *ignored)
 {
-    char *s;
-    int len;
     if (!self->string) {
-        s = Tcl_GetStringFromObj(self->value, &len);
-        self->string = PyUnicode_FromStringAndSize(s, len);
+        self->string = unicodeFromTclObj(self->value);
         if (!self->string)
             return NULL;
     }
@@ -709,15 +751,12 @@
 static PyObject *
 PyTclObject_str(PyTclObject *self, void *ignored)
 {
-    char *s;
-    int len;
-    if (self->string && PyUnicode_Check(self->string)) {
+    if (self->string) {
         Py_INCREF(self->string);
         return self->string;
     }
     /* XXX Could chache result if it is non-ASCII. */
-    s = Tcl_GetStringFromObj(self->value, &len);
-    return PyUnicode_DecodeUTF8(s, len, "strict");
+    return unicodeFromTclObj(self->value);
 }
 
 static PyObject *
@@ -792,7 +831,7 @@
 static PyObject*
 get_typename(PyTclObject* obj, void* ignored)
 {
-    return PyUnicode_FromString(obj->value->typePtr->name);
+    return unicodeFromTclString(obj->value->typePtr->name);
 }
 
 
@@ -879,6 +918,8 @@
             return NULL;
         }
         kind = PyUnicode_KIND(value);
+        if (kind == sizeof(Tcl_UniChar))
+            return Tcl_NewUnicodeObj(inbuf, size);
         allocsize = ((size_t)size) * sizeof(Tcl_UniChar);
         outbuf = (Tcl_UniChar*)ckalloc(allocsize);
         /* Else overflow occurred, and we take the next exit */
@@ -929,8 +970,7 @@
     TkappObject *app = (TkappObject*)tkapp;
 
     if (value->typePtr == NULL) {
-        return PyUnicode_FromStringAndSize(value->bytes,
-                                           value->length);
+        return unicodeFromTclStringAndSize(value->bytes, value->length);
     }
 
     if (value->typePtr == app->BooleanType) {
@@ -987,15 +1027,9 @@
     }
 
     if (value->typePtr == app->StringType) {
-#if TCL_UTF_MAX==3
         return PyUnicode_FromKindAndData(
-            PyUnicode_2BYTE_KIND, Tcl_GetUnicode(value),
+            sizeof(Tcl_UniChar), Tcl_GetUnicode(value),
             Tcl_GetCharLength(value));
-#else
-        return PyUnicode_FromKindAndData(
-            PyUnicode_4BYTE_KIND, Tcl_GetUnicode(value),
-            Tcl_GetCharLength(value));
-#endif
     }
 
     return newPyTclObject(value);
@@ -1089,8 +1123,8 @@
 Tkapp_CallResult(TkappObject *self)
 {
     PyObject *res = NULL;
+    Tcl_Obj *value = Tcl_GetObjResult(self->interp);
     if(self->wantobjects) {
-        Tcl_Obj *value = Tcl_GetObjResult(self->interp);
         /* Not sure whether the IncrRef is necessary, but something
            may overwrite the interpreter result while we are
            converting it. */
@@ -1098,7 +1132,7 @@
         res = FromObj((PyObject*)self, value);
         Tcl_DecrRefCount(value);
     } else {
-        res = PyUnicode_FromString(Tcl_GetStringResult(self->interp));
+        res = unicodeFromTclObj(value);
     }
     return res;
 }
@@ -1253,7 +1287,7 @@
     if (err == TCL_ERROR)
         res = Tkinter_Error(self);
     else
-        res = PyUnicode_FromString(Tkapp_Result(self));
+        res = unicodeFromTclString(Tkapp_Result(self));
     LEAVE_OVERLAP_TCL
     return res;
 }
@@ -1275,9 +1309,8 @@
     ENTER_OVERLAP
     if (err == TCL_ERROR)
         res = Tkinter_Error(self);
-
     else
-        res = PyUnicode_FromString(Tkapp_Result(self));
+        res = unicodeFromTclString(Tkapp_Result(self));
     LEAVE_OVERLAP_TCL
     return res;
 }
@@ -1300,7 +1333,7 @@
     if (err == TCL_ERROR)
         res = Tkinter_Error(self);
     else
-        res = PyUnicode_FromString(Tkapp_Result(self));
+        res = unicodeFromTclString(Tkapp_Result(self));
     LEAVE_OVERLAP_TCL
     return res;
 }
@@ -1346,20 +1379,42 @@
 static int
 varname_converter(PyObject *in, void *_out)
 {
+    char *s;
     char **out = (char**)_out;
     if (PyBytes_Check(in)) {
-        *out = PyBytes_AsString(in);
+        if (PyBytes_Size(in) > INT_MAX) {
+            PyErr_SetString(PyExc_OverflowError, "bytes object is too long");
+            return 0;
+        }
+        s = PyBytes_AsString(in);
+        if (strlen(s) != PyBytes_Size(in)) {
+            PyErr_SetString(PyExc_ValueError, "null byte in bytes object");
+            return 0;
+        }
+        *out = s;
         return 1;
     }
     if (PyUnicode_Check(in)) {
-        *out = _PyUnicode_AsString(in);
+        Py_ssize_t size;
+        s = PyUnicode_AsUTF8AndSize(in, &size);
+        if (size > INT_MAX) {
+            PyErr_SetString(PyExc_OverflowError, "string is too long");
+            return 0;
+        }
+        if (strlen(s) != size) {
+            PyErr_SetString(PyExc_ValueError, "null character in string");
+            return 0;
+        }
+        *out = s;
         return 1;
     }
     if (PyTclObject_Check(in)) {
         *out = PyTclObject_TclString(in);
         return 1;
     }
-    /* XXX: Should give diagnostics. */
+    PyErr_Format(PyExc_TypeError,
+                 "must be str, bytes or Tcl_Obj, not %.50s",
+                 in->ob_type->tp_name);
     return 0;
 }
 
@@ -1445,8 +1500,11 @@
     PyObject *res = NULL;
     Tcl_Obj *newval, *ok;
 
-    if (PyArg_ParseTuple(args, "O&O:setvar",
-                         varname_converter, &name1, &newValue)) {
+    switch (PyTuple_GET_SIZE(args)) {
+    case 2:
+        if (!PyArg_ParseTuple(args, "O&O:setvar",
+                              varname_converter, &name1, &newValue))
+            return NULL;
         /* XXX Acquire tcl lock??? */
         newval = AsObj(newValue);
         if (newval == NULL)
@@ -1462,27 +1520,27 @@
             Py_INCREF(res);
         }
         LEAVE_OVERLAP_TCL
-    }
-    else {
-        PyErr_Clear();
-        if (PyArg_ParseTuple(args, "ssO:setvar",
-                             &name1, &name2, &newValue)) {
-            /* XXX must hold tcl lock already??? */
-            newval = AsObj(newValue);
-            ENTER_TCL
-            ok = Tcl_SetVar2Ex(Tkapp_Interp(self), name1, name2, newval, flags);
-            ENTER_OVERLAP
-            if (!ok)
-                Tkinter_Error(self);
-            else {
-                res = Py_None;
-                Py_INCREF(res);
-            }
-            LEAVE_OVERLAP_TCL
+        break;
+    case 3:
+        if (!PyArg_ParseTuple(args, "ssO:setvar",
+                              &name1, &name2, &newValue))
+            return NULL;
+        /* XXX must hold tcl lock already??? */
+        newval = AsObj(newValue);
+        ENTER_TCL
+        ok = Tcl_SetVar2Ex(Tkapp_Interp(self), name1, name2, newval, flags);
+        ENTER_OVERLAP
+        if (!ok)
+            Tkinter_Error(self);
+        else {
+            res = Py_None;
+            Py_INCREF(res);
         }
-        else {
-            return NULL;
-        }
+        LEAVE_OVERLAP_TCL
+        break;
+    default:
+        PyErr_SetString(PyExc_TypeError, "setvar requires 2 to 3 arguments");
+        return NULL;
     }
     return res;
 }
@@ -1523,7 +1581,7 @@
             res = FromObj(self, tres);
         }
         else {
-            res = PyUnicode_FromString(Tcl_GetString(tres));
+            res = unicodeFromTclObj(tres);
         }
     }
     LEAVE_OVERLAP_TCL
@@ -1662,7 +1720,7 @@
     if (retval == TCL_ERROR)
         res = Tkinter_Error(self);
     else
-        res = Py_BuildValue("s", Tkapp_Result(self));
+        res = unicodeFromTclString(Tkapp_Result(self));
     LEAVE_OVERLAP_TCL
     return res;
 }
@@ -1787,7 +1845,7 @@
         goto finally;
 
     for (i = 0; i < argc; i++) {
-        PyObject *s = PyUnicode_FromString(argv[i]);
+        PyObject *s = unicodeFromTclString(argv[i]);
         if (!s || PyTuple_SetItem(v, i, s)) {
             Py_DECREF(v);
             v = NULL;
@@ -1885,20 +1943,8 @@
         return PythonCmd_Error(interp);
 
     for (i = 0; i < (argc - 1); i++) {
-        PyObject *s = PyUnicode_FromString(argv[i + 1]);
-        if (!s) {
-            /* Is Tk leaking 0xC080 in %A - a "modified" utf-8 null? */
-            if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError) &&
-                !strcmp(argv[i + 1], "\xC0\x80")) {
-                PyErr_Clear();
-                /* Convert to "strict" utf-8 null */
-                s = PyUnicode_FromString("\0");
-            } else {
-                Py_DECREF(arg);
-                return PythonCmd_Error(interp);
-            }
-        }
-        if (PyTuple_SetItem(arg, i, s)) {
+        PyObject *s = unicodeFromTclString(argv[i + 1]);
+        if (!s || PyTuple_SetItem(arg, i, s)) {
             Py_DECREF(arg);
             return PythonCmd_Error(interp);
         }

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list