[Python-checkins] bpo-37476: Adding tests for asutf8 and asutf8andsize (GH-14531)

Xiang Zhang webhook-mailer at python.org
Sat Jul 20 03:56:38 EDT 2019


https://github.com/python/cpython/commit/5623ac87bbe5de481957eca5eeae06347612fbeb
commit: 5623ac87bbe5de481957eca5eeae06347612fbeb
branch: master
author: Hai Shi <shihai1992 at gmail.com>
committer: Xiang Zhang <angwerzx at 126.com>
date: 2019-07-20T15:56:23+08:00
summary:

bpo-37476: Adding tests for asutf8 and asutf8andsize (GH-14531)

files:
M Lib/test/test_unicode.py
M Modules/_testcapimodule.c

diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 177d80d27e1a..8be16c8da926 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2819,6 +2819,34 @@ def test_asucs4(self):
             self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
             self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
 
+    # Test PyUnicode_AsUTF8()
+    @support.cpython_only
+    def test_asutf8(self):
+        from _testcapi import unicode_asutf8
+
+        bmp = '\u0100'
+        bmp2 = '\uffff'
+        nonbmp = chr(0x10ffff)
+
+        self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
+        self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
+        self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
+        self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')
+
+    # Test PyUnicode_AsUTF8AndSize()
+    @support.cpython_only
+    def test_asutf8andsize(self):
+        from _testcapi import unicode_asutf8andsize
+
+        bmp = '\u0100'
+        bmp2 = '\uffff'
+        nonbmp = chr(0x10ffff)
+
+        self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
+        self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
+        self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
+        self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
+
     # Test PyUnicode_FindChar()
     @support.cpython_only
     def test_findchar(self):
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 8f34e935353b..8a6e741d2810 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1921,6 +1921,48 @@ unicode_asucs4(PyObject *self, PyObject *args)
     return result;
 }
 
+static PyObject *
+unicode_asutf8(PyObject *self, PyObject *args)
+{
+    PyObject *unicode;
+    const char *buffer;
+
+    if (!PyArg_ParseTuple(args, "U", &unicode)) {
+        return NULL;
+    }
+
+    buffer = PyUnicode_AsUTF8(unicode);
+    if (buffer == NULL) {
+        return NULL;
+    }
+
+    return PyBytes_FromString(buffer);
+}
+
+static PyObject *
+unicode_asutf8andsize(PyObject *self, PyObject *args)
+{
+    PyObject *unicode, *result;
+    const char *buffer;
+    Py_ssize_t utf8_len;
+
+    if(!PyArg_ParseTuple(args, "U", &unicode)) {
+        return NULL;
+    }
+
+    buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len); 
+    if (buffer == NULL) {
+        return NULL;
+    }
+
+    result = PyBytes_FromString(buffer);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    return Py_BuildValue("(Nn)", result, utf8_len);
+}
+
 static PyObject *
 unicode_findchar(PyObject *self, PyObject *args)
 {
@@ -5174,6 +5216,8 @@ static PyMethodDef TestMethods[] = {
     {"unicode_aswidechar",      unicode_aswidechar,              METH_VARARGS},
     {"unicode_aswidecharstring",unicode_aswidecharstring,        METH_VARARGS},
     {"unicode_asucs4",          unicode_asucs4,                  METH_VARARGS},
+    {"unicode_asutf8",          unicode_asutf8,                  METH_VARARGS},
+    {"unicode_asutf8andsize",   unicode_asutf8andsize,           METH_VARARGS},
     {"unicode_findchar",        unicode_findchar,                METH_VARARGS},
     {"unicode_copycharacters",  unicode_copycharacters,          METH_VARARGS},
     {"unicode_encodedecimal",   unicode_encodedecimal,           METH_VARARGS},



More information about the Python-checkins mailing list