[Python-checkins] cpython (3.2): Issue #13093: Fix error handling on PyUnicode_EncodeDecimal()

victor.stinner python-checkins at python.org
Tue Nov 22 01:48:02 CET 2011


http://hg.python.org/cpython/rev/f29d7d597fae
changeset:   73680:f29d7d597fae
branch:      3.2
parent:      73665:ac2c4c62b486
user:        Victor Stinner <victor.stinner at haypocalc.com>
date:        Tue Nov 22 01:45:37 2011 +0100
summary:
  Issue #13093: Fix error handling on PyUnicode_EncodeDecimal()

 * Add tests for PyUnicode_EncodeDecimal() and PyUnicode_TransformDecimalToASCII()
 * Remove the unused "e" variable in replace()

files:
  Lib/test/test_unicode.py  |  36 +++++++++++++++++++
  Modules/_testcapimodule.c |  51 +++++++++++++++++++++++++-
  Objects/unicodeobject.c   |  10 ++---
  3 files changed, 89 insertions(+), 8 deletions(-)


diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1767,6 +1767,42 @@
             ]])
         self.assertRaises(TypeError, _string.formatter_field_name_split, 1)
 
+    def test_encode_decimal(self):
+        from _testcapi import unicode_encodedecimal
+        self.assertEqual(unicode_encodedecimal('123'),
+                         b'123')
+        self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
+                         b'3.14')
+        self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+                         b' 3.14 ')
+        self.assertRaises(UnicodeEncodeError,
+                          unicode_encodedecimal, "123\u20ac", "strict")
+        self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"),
+                         b'123?')
+        self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"),
+                         b'123')
+        self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"),
+                         b'123&#8364;')
+        self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
+                         b'123\\u20ac')
+        self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
+                         b'123? ')
+        self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
+                         b'123??')
+        self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
+                         b'123?0')
+
+    def test_transform_decimal(self):
+        from _testcapi import unicode_transformdecimaltoascii as transform_decimal
+        self.assertEqual(transform_decimal('123'),
+                         '123')
+        self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
+                         '3.14')
+        self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+                         "\N{EM SPACE}3.14\N{EN SPACE}")
+        self.assertEqual(transform_decimal('123\u20ac'),
+                         '123\u20ac')
+
 
 def test_main():
     support.run_unittest(__name__)
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1438,6 +1438,51 @@
 }
 
 static PyObject *
+unicode_encodedecimal(PyObject *self, PyObject *args)
+{
+    Py_UNICODE *unicode;
+    Py_ssize_t length;
+    char *errors = NULL;
+    PyObject *decimal;
+    Py_ssize_t decimal_length, new_length;
+    int res;
+
+    if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length, &errors))
+        return NULL;
+
+    decimal_length = length * 7; /* len('&#8364;') */
+    decimal = PyBytes_FromStringAndSize(NULL, decimal_length);
+    if (decimal == NULL)
+        return NULL;
+
+    res = PyUnicode_EncodeDecimal(unicode, length,
+                                  PyBytes_AS_STRING(decimal),
+                                  errors);
+    if (res < 0) {
+        Py_DECREF(decimal);
+        return NULL;
+    }
+
+    new_length = strlen(PyBytes_AS_STRING(decimal));
+    assert(new_length <= decimal_length);
+    res = _PyBytes_Resize(&decimal, new_length);
+    if (res < 0)
+        return NULL;
+
+    return decimal;
+}
+
+static PyObject *
+unicode_transformdecimaltoascii(PyObject *self, PyObject *args)
+{
+    Py_UNICODE *unicode;
+    Py_ssize_t length;
+    if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length))
+        return NULL;
+    return PyUnicode_TransformDecimalToASCII(unicode, length);
+}
+
+static PyObject *
 getargs_w_star(PyObject *self, PyObject *args)
 {
     Py_buffer buffer;
@@ -2320,8 +2365,10 @@
     {"test_u_code",             (PyCFunction)test_u_code,        METH_NOARGS},
     {"test_Z_code",             (PyCFunction)test_Z_code,        METH_NOARGS},
     {"test_widechar",           (PyCFunction)test_widechar,      METH_NOARGS},
-    {"unicode_aswidechar",      unicode_aswidechar,                 METH_VARARGS},
-    {"unicode_aswidecharstring",unicode_aswidecharstring,           METH_VARARGS},
+    {"unicode_aswidechar",      unicode_aswidechar,              METH_VARARGS},
+    {"unicode_aswidecharstring",unicode_aswidecharstring,        METH_VARARGS},
+    {"unicode_encodedecimal",   unicode_encodedecimal,           METH_VARARGS},
+    {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
 #ifdef WITH_THREAD
     {"_test_thread_state",      test_thread_state,               METH_VARARGS},
     {"_pending_threadfunc",     pending_threadfunc,              METH_VARARGS},
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6323,11 +6323,10 @@
         }
         /* All other characters are considered unencodable */
         collstart = p;
-        collend = p+1;
-        while (collend < end) {
+        for (collend = p+1; collend < end; collend++) {
             if ((0 < *collend && *collend < 256) ||
-                !Py_UNICODE_ISSPACE(*collend) ||
-                Py_UNICODE_TODECIMAL(*collend))
+                Py_UNICODE_ISSPACE(*collend) ||
+                0 <= Py_UNICODE_TODECIMAL(*collend))
                 break;
         }
         /* cache callback name lookup
@@ -7004,7 +7003,7 @@
         }
     } else {
 
-        Py_ssize_t n, i, j, e;
+        Py_ssize_t n, i, j;
         Py_ssize_t product, new_size, delta;
         Py_UNICODE *p;
 
@@ -7036,7 +7035,6 @@
             return NULL;
         i = 0;
         p = u->str;
-        e = self->length - str1->length;
         if (str1->length > 0) {
             while (n-- > 0) {
                 /* look for next match */

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list