[pypy-commit] cffi default: Merge the 'wchar_t' branch, adding support for wchar_t.

arigo noreply at buildbot.pypy.org
Mon Jul 9 17:30:36 CEST 2012


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r613:b0c29bd26001
Date: 2012-07-09 17:13 +0200
http://bitbucket.org/cffi/cffi/changeset/b0c29bd26001/

Log:	Merge the 'wchar_t' branch, adding support for wchar_t.

diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -27,7 +27,7 @@
 /* base type flag: exactly one of the following: */
 #define CT_PRIMITIVE_SIGNED   1    /* signed integer */
 #define CT_PRIMITIVE_UNSIGNED 2    /* unsigned integer */
-#define CT_PRIMITIVE_CHAR     4    /* char (and, later, wchar_t) */
+#define CT_PRIMITIVE_CHAR     4    /* char, wchar_t */
 #define CT_PRIMITIVE_FLOAT    8    /* float, double */
 #define CT_POINTER           16    /* pointer, excluding ptr-to-func */
 #define CT_ARRAY             32    /* array */
@@ -157,6 +157,10 @@
 # endif
 #endif
 
+#ifdef HAVE_WCHAR_H
+# include "wchar_helper.h"
+#endif
+
 /************************************************************/
 
 static CTypeDescrObject *
@@ -602,7 +606,12 @@
         return PyFloat_FromDouble(value);
     }
     else if (ct->ct_flags & CT_PRIMITIVE_CHAR) {
-        return PyString_FromStringAndSize(data, 1);
+        if (ct->ct_size == sizeof(char))
+            return PyString_FromStringAndSize(data, 1);
+#ifdef HAVE_WCHAR_H
+        else
+            return _my_PyUnicode_FromWideChar((wchar_t *)data, 1);
+#endif
     }
 
     PyErr_Format(PyExc_SystemError,
@@ -664,8 +673,9 @@
         return (unsigned char)(PyString_AS_STRING(init)[0]);
     }
     if (CData_Check(init) &&
-           (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR)) {
-        return (unsigned char)(((CDataObject *)init)->c_data[0]);
+           (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) &&
+           (((CDataObject *)init)->c_type->ct_size == sizeof(char))) {
+        return *(unsigned char *)((CDataObject *)init)->c_data;
     }
     PyErr_Format(PyExc_TypeError,
                  "initializer for ctype 'char' must be a string of length 1, "
@@ -673,6 +683,26 @@
     return -1;
 }
 
+#ifdef HAVE_WCHAR_H
+static wchar_t _convert_to_wchar_t(PyObject *init)
+{
+    if (PyUnicode_Check(init)) {
+        wchar_t ordinal;
+        if (_my_PyUnicode_AsSingleWideChar(init, &ordinal) == 0)
+            return ordinal;
+    }
+    if (CData_Check(init) &&
+           (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) &&
+           (((CDataObject *)init)->c_type->ct_size == sizeof(wchar_t))) {
+        return *(wchar_t *)((CDataObject *)init)->c_data;
+    }
+    PyErr_Format(PyExc_TypeError,
+                 "initializer for ctype 'wchar_t' must be a unicode string "
+                 "of length 1, not %.200s", Py_TYPE(init)->tp_name);
+    return (wchar_t)-1;
+}
+#endif
+
 static int _convert_error(PyObject *init, const char *ct_name,
                           const char *expected)
 {
@@ -732,24 +762,46 @@
             return 0;
         }
         else if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
-            char *srcdata;
-            Py_ssize_t n;
-            if (!PyString_Check(init)) {
-                expected = "str or list or tuple";
-                goto cannot_convert;
+            if (ctitem->ct_size == sizeof(char)) {
+                char *srcdata;
+                Py_ssize_t n;
+                if (!PyString_Check(init)) {
+                    expected = "str or list or tuple";
+                    goto cannot_convert;
+                }
+                n = PyString_GET_SIZE(init);
+                if (ct->ct_length >= 0 && n > ct->ct_length) {
+                    PyErr_Format(PyExc_IndexError,
+                                 "initializer string is too long for '%s' "
+                                 "(got %zd characters)", ct->ct_name, n);
+                    return -1;
+                }
+                if (n != ct->ct_length)
+                    n++;
+                srcdata = PyString_AS_STRING(init);
+                memcpy(data, srcdata, n);
+                return 0;
             }
-            n = PyString_GET_SIZE(init);
-            if (ct->ct_length >= 0 && n > ct->ct_length) {
-                PyErr_Format(PyExc_IndexError,
-                             "initializer string is too long for '%s' "
-                             "(got %zd characters)", ct->ct_name, n);
-                return -1;
+#ifdef HAVE_WCHAR_H
+            else {
+                Py_ssize_t n;
+                if (!PyUnicode_Check(init)) {
+                    expected = "unicode or list or tuple";
+                    goto cannot_convert;
+                }
+                n = _my_PyUnicode_SizeAsWideChar(init);
+                if (ct->ct_length >= 0 && n > ct->ct_length) {
+                    PyErr_Format(PyExc_IndexError,
+                                 "initializer unicode is too long for '%s' "
+                                 "(got %zd characters)", ct->ct_name, n);
+                    return -1;
+                }
+                if (n != ct->ct_length)
+                    n++;
+                _my_PyUnicode_AsWideChar(init, (wchar_t *)data, n);
+                return 0;
             }
-            if (n != ct->ct_length)
-                n++;
-            srcdata = PyString_AS_STRING(init);
-            memcpy(data, srcdata, n);
-            return 0;
+#endif
         }
         else {
             expected = "list or tuple";
@@ -829,11 +881,22 @@
         return 0;
     }
     if (ct->ct_flags & CT_PRIMITIVE_CHAR) {
-        int res = _convert_to_char(init);
-        if (res < 0)
-            return -1;
-        data[0] = res;
-        return 0;
+        if (ct->ct_size == sizeof(char)) {
+            int res = _convert_to_char(init);
+            if (res < 0)
+                return -1;
+            data[0] = res;
+            return 0;
+        }
+#ifdef HAVE_WCHAR_H
+        else {
+            wchar_t res = _convert_to_wchar_t(init);
+            if (res == (wchar_t)-1 && PyErr_Occurred())
+                return -1;
+            *(wchar_t *)data = res;
+            return 0;
+        }
+#endif
     }
     if (ct->ct_flags & (CT_STRUCT|CT_UNION)) {
 
@@ -1064,11 +1127,13 @@
 
 static PyObject *cdata_str(CDataObject *cd)
 {
-    if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) {
+    if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR &&
+        cd->c_type->ct_size == sizeof(char)) {
         return PyString_FromStringAndSize(cd->c_data, 1);
     }
     else if (cd->c_type->ct_itemdescr != NULL &&
-             cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR) {
+             cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR &&
+             cd->c_type->ct_itemdescr->ct_size == sizeof(char)) {
         Py_ssize_t length;
 
         if (cd->c_type->ct_flags & CT_ARRAY) {
@@ -1101,6 +1166,48 @@
         return Py_TYPE(cd)->tp_repr((PyObject *)cd);
 }
 
+#ifdef HAVE_WCHAR_H
+static PyObject *cdata_unicode(CDataObject *cd)
+{
+    if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR &&
+        cd->c_type->ct_size == sizeof(wchar_t)) {
+        return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, 1);
+    }
+    else if (cd->c_type->ct_itemdescr != NULL &&
+             cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR &&
+             cd->c_type->ct_itemdescr->ct_size == sizeof(wchar_t)) {
+        Py_ssize_t length;
+        const wchar_t *start = (wchar_t *)cd->c_data;
+
+        if (cd->c_type->ct_flags & CT_ARRAY) {
+            const Py_ssize_t lenmax = get_array_length(cd);
+            length = 0;
+            while (length < lenmax && start[length])
+                length++;
+        }
+        else {
+            if (cd->c_data == NULL) {
+                PyObject *s = cdata_repr(cd);
+                if (s != NULL) {
+                    PyErr_Format(PyExc_RuntimeError,
+                                 "cannot use unicode() on %s",
+                                 PyString_AS_STRING(s));
+                    Py_DECREF(s);
+                }
+                return NULL;
+            }
+            length = 0;
+            while (start[length])
+                length++;
+        }
+
+        return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, length);
+    }
+    else
+        return cdata_repr(cd);
+}
+#endif
+
 static PyObject *cdataowning_repr(CDataObject *cd)
 {
     Py_ssize_t size;
@@ -1152,7 +1259,12 @@
         return convert_to_object(cd->c_data, cd->c_type);
     }
     else if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) {
-        return PyInt_FromLong((unsigned char)cd->c_data[0]);
+        if (cd->c_type->ct_size == sizeof(char))
+            return PyInt_FromLong((unsigned char)cd->c_data[0]);
+#ifdef HAVE_WCHAR_H
+        else
+            return PyInt_FromLong((long)*(wchar_t *)cd->c_data);
+#endif
     }
     else if (cd->c_type->ct_flags & CT_PRIMITIVE_FLOAT) {
         PyObject *o = convert_to_object(cd->c_data, cd->c_type);
@@ -1552,12 +1664,27 @@
             argtype = (CTypeDescrObject *)PyTuple_GET_ITEM(fvarargs, i);
 
         if ((argtype->ct_flags & CT_POINTER) &&
-            (argtype->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR) &&
-            PyString_Check(obj)) {
-            /* special case: Python string -> cdata 'char *' */
-            *(char **)data = PyString_AS_STRING(obj);
+            (argtype->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR)) {
+            if (argtype->ct_itemdescr->ct_size == sizeof(char)) {
+                if (PyString_Check(obj)) {
+                    /* special case: Python string -> cdata 'char *' */
+                    *(char **)data = PyString_AS_STRING(obj);
+                    continue;
+                }
+            }
+#ifdef HAVE_WCHAR_H
+            else {
+                if (PyUnicode_Check(obj)) {
+                    /* Python Unicode string -> cdata 'wchar_t *':
+                       not supported yet */
+                    PyErr_SetString(PyExc_NotImplementedError,
+                        "automatic unicode-to-'wchar_t *' conversion");
+                    goto error;
+                }
+            }
+#endif
         }
-        else if (convert_from_object(data, argtype, obj) < 0)
+        if (convert_from_object(data, argtype, obj) < 0)
             goto error;
     }
 
@@ -1645,6 +1772,11 @@
     (objobjargproc)cdata_ass_sub, /*mp_ass_subscript*/
 };
 
+static PyMethodDef CData_methods[] = {
+    {"__unicode__",     (PyCFunction)cdata_unicode,  METH_NOARGS},
+    {NULL,              NULL}           /* sentinel */
+};
+
 static PyTypeObject CData_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
     "_cffi_backend.CData",
@@ -1672,6 +1804,8 @@
     cdata_richcompare,                          /* tp_richcompare */
     0,                                          /* tp_weaklistoffset */
     (getiterfunc)cdata_iter,                    /* tp_iter */
+    0,                                          /* tp_iternext */
+    CData_methods,                              /* tp_methods */
 };
 
 static PyTypeObject CDataOwning_Type = {
@@ -1848,7 +1982,7 @@
             return NULL;
         }
         if (ctitem->ct_flags & CT_PRIMITIVE_CHAR)
-            datasize += sizeof(char);  /* forcefully add a null character */
+            datasize *= 2;   /* forcefully add another character: a null */
     }
     else if (ct->ct_flags & CT_ARRAY) {
         dataoffset = offsetof(CDataObject_own_nolength, alignment);
@@ -1861,6 +1995,10 @@
                 /* from a string, we add the null terminator */
                 explicitlength = PyString_GET_SIZE(init) + 1;
             }
+            else if (PyUnicode_Check(init)) {
+                /* from a unicode, we add the null terminator */
+                explicitlength = _my_PyUnicode_SizeAsWideChar(init) + 1;
+            }
             else {
                 explicitlength = PyNumber_AsSsize_t(init, PyExc_OverflowError);
                 if (explicitlength < 0) {
@@ -1973,6 +2111,18 @@
             value = (unsigned char)PyString_AS_STRING(ob)[0];
         }
     }
+#ifdef HAVE_WCHAR_H
+    else if (PyUnicode_Check(ob)) {
+        wchar_t ordinal;
+        if (_my_PyUnicode_AsSingleWideChar(ob, &ordinal) < 0) {
+            PyErr_Format(PyExc_TypeError,
+                         "cannot cast unicode of length %zd to ctype '%s'",
+                         PyUnicode_GET_SIZE(ob), ct->ct_name);
+            return NULL;
+        }
+        value = (long)ordinal;
+    }
+#endif
     else {
         value = _my_PyLong_AsUnsignedLongLong(ob, 0);
         if (value == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())
@@ -2240,7 +2390,6 @@
         { "ptrdiff_t",     sizeof(ptrdiff_t) },
         { "size_t",        sizeof(size_t) | UNSIGNED },
         { "ssize_t",       sizeof(ssize_t) },
-        /*{ "wchar_t",       sizeof(wchar_t) | UNSIGNED },*/
         { NULL }
     };
 #undef UNSIGNED
@@ -2284,10 +2433,17 @@
        EPTYPE(ull, unsigned long long, CT_PRIMITIVE_UNSIGNED )  \
        EPTYPE(f, float, CT_PRIMITIVE_FLOAT )                    \
        EPTYPE(d, double, CT_PRIMITIVE_FLOAT )
+#ifdef HAVE_WCHAR_H
+# define ENUM_PRIMITIVE_TYPES_WCHAR                             \
+       EPTYPE(wc, wchar_t, CT_PRIMITIVE_CHAR )
+#else
+# define ENUM_PRIMITIVE_TYPES_WCHAR   /* nothing */
+#endif
 
 #define EPTYPE(code, typename, flags)                   \
     struct aligncheck_##code { char x; typename y; };
     ENUM_PRIMITIVE_TYPES
+    ENUM_PRIMITIVE_TYPES_WCHAR
 #undef EPTYPE
 
     CTypeDescrObject *td;
@@ -2301,7 +2457,9 @@
           flags                                         \
         },
     ENUM_PRIMITIVE_TYPES
+    ENUM_PRIMITIVE_TYPES_WCHAR
 #undef EPTYPE
+#undef ENUM_PRIMITIVE_TYPES_WCHAR
 #undef ENUM_PRIMITIVE_TYPES
         { NULL }
     };
@@ -2314,6 +2472,11 @@
 
     for (ptypes=types; ; ptypes++) {
         if (ptypes->name == NULL) {
+#ifndef HAVE_WCHAR_H
+            if (strcmp(name, "wchar_t"))
+                PyErr_SetString(PyExc_NotImplementedError, name);
+            else
+#endif
             PyErr_SetString(PyExc_KeyError, name);
             return NULL;
         }
@@ -2358,11 +2521,11 @@
     td->ct_length = ptypes->align;
     td->ct_extra = ffitype;
     td->ct_flags = ptypes->flags;
-    if (td->ct_flags & CT_PRIMITIVE_SIGNED) {
+    if (td->ct_flags & (CT_PRIMITIVE_SIGNED | CT_PRIMITIVE_CHAR)) {
         if (td->ct_size <= sizeof(long))
             td->ct_flags |= CT_PRIMITIVE_FITS_LONG;
     }
-    else if (td->ct_flags & (CT_PRIMITIVE_UNSIGNED | CT_PRIMITIVE_CHAR)) {
+    else if (td->ct_flags & CT_PRIMITIVE_UNSIGNED) {
         if (td->ct_size < sizeof(long))
             td->ct_flags |= CT_PRIMITIVE_FITS_LONG;
     }
@@ -2592,6 +2755,10 @@
             if (!(ftype->ct_flags & (CT_PRIMITIVE_SIGNED |
                                      CT_PRIMITIVE_UNSIGNED |
                                      CT_PRIMITIVE_CHAR)) ||
+#ifdef HAVE_WCHAR_H
+                    ((ftype->ct_flags & CT_PRIMITIVE_CHAR)
+                         && ftype->ct_size == sizeof(wchar_t)) ||
+#endif
                     fbitsize == 0 ||
                     fbitsize > 8 * ftype->ct_size) {
                 PyErr_Format(PyExc_TypeError, "invalid bit field '%s'",
@@ -3763,6 +3930,12 @@
     return PyString_FromStringAndSize(&x, 1);
 }
 
+#ifdef HAVE_WCHAR_H
+static PyObject *_cffi_from_c_wchar_t(wchar_t x) {
+    return _my_PyUnicode_FromWideChar(&x, 1);
+}
+#endif
+
 static void *cffi_exports[] = {
     _cffi_to_c_char_p,
     _cffi_to_c_signed_char,
@@ -3788,6 +3961,13 @@
     convert_to_object,
     convert_from_object,
     convert_struct_to_owning_object,
+#ifdef HAVE_WCHAR_H
+    _convert_to_wchar_t,
+    _cffi_from_c_wchar_t,
+#else
+    0,
+    0,
+#endif
 };
 
 /************************************************************/
diff --git a/c/test_c.py b/c/test_c.py
--- a/c/test_c.py
+++ b/c/test_c.py
@@ -1279,6 +1279,121 @@
     py.test.raises(TypeError, newp, BStructPtr, [cast(BIntP, 0)])
     py.test.raises(TypeError, newp, BStructPtr, [cast(BFunc2, 0)])
 
+def test_wchar():
+    BWChar = new_primitive_type("wchar_t")
+    BInt = new_primitive_type("int")
+    pyuni4 = {1: True, 2: False}[len(u'\U00012345')]
+    wchar4 = {2: False, 4: True}[sizeof(BWChar)]
+    assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' u'E'>"
+    assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' u'\u1234'>"
+    if wchar4:
+        x = cast(BWChar, 0x12345)
+        assert str(x) == "<cdata 'wchar_t' u'\U00012345'>"
+        assert unicode(x) == u'\U00012345'
+    else:
+        assert not pyuni4
+    #
+    BWCharP = new_pointer_type(BWChar)
+    BStruct = new_struct_type("foo_s")
+    BStructPtr = new_pointer_type(BStruct)
+    complete_struct_or_union(BStruct, [('a1', BWChar, -1),
+                                       ('a2', BWCharP, -1)])
+    s = newp(BStructPtr)
+    s.a1 = u'\x00'
+    assert s.a1 == u'\x00'
+    py.test.raises(TypeError, "s.a1 = 'a'")
+    py.test.raises(TypeError, "s.a1 = '\xFF'")
+    s.a1 = u'\u1234'
+    assert s.a1 == u'\u1234'
+    if pyuni4:
+        assert wchar4
+        s.a1 = u'\U00012345'
+        assert s.a1 == u'\U00012345'
+    elif wchar4:
+        s.a1 = cast(BWChar, 0x12345)
+        assert s.a1 == u'\ud808\udf45'
+        s.a1 = u'\ud807\udf44'
+        assert s.a1 == u'\U00011f44'
+    else:
+        py.test.raises(ValueError, "s.a1 = u'\U00012345'")
+    #
+    BWCharArray = new_array_type(BWCharP, None)
+    a = newp(BWCharArray, u'hello \u1234 world')
+    assert len(a) == 14   # including the final null
+    assert unicode(a) == u'hello \u1234 world'
+    a[13] = u'!'
+    assert unicode(a) == u'hello \u1234 world!'
+    assert str(a) == repr(a)
+    assert a[6] == u'\u1234'
+    a[6] = u'-'
+    assert unicode(a) == 'hello - world!'
+    assert str(a) == repr(a)
+    #
+    if wchar4:
+        u = u'\U00012345\U00012346\U00012347'
+        a = newp(BWCharArray, u)
+        assert len(a) == 4
+        assert unicode(a) == u
+        assert len(list(a)) == 4
+        expected = [u'\U00012345', u'\U00012346', u'\U00012347', unichr(0)]
+        assert list(a) == expected
+        got = [a[i] for i in range(4)]
+        assert got == expected
+        py.test.raises(IndexError, 'a[4]')
+    #
+    w = cast(BWChar, 'a')
+    assert repr(w) == "<cdata 'wchar_t' u'a'>"
+    assert str(w) == repr(w)
+    assert unicode(w) == u'a'
+    assert int(w) == ord('a')
+    w = cast(BWChar, 0x1234)
+    assert repr(w) == "<cdata 'wchar_t' u'\u1234'>"
+    assert str(w) == repr(w)
+    assert unicode(w) == u'\u1234'
+    assert int(w) == 0x1234
+    w = cast(BWChar, u'\u1234')
+    assert repr(w) == "<cdata 'wchar_t' u'\u1234'>"
+    assert str(w) == repr(w)
+    assert unicode(w) == u'\u1234'
+    assert int(w) == 0x1234
+    w = cast(BInt, u'\u1234')
+    assert repr(w) == "<cdata 'int' 4660>"
+    if wchar4:
+        w = cast(BWChar, u'\U00012345')
+        assert repr(w) == "<cdata 'wchar_t' u'\U00012345'>"
+        assert str(w) == repr(w)
+        assert unicode(w) == u'\U00012345'
+        assert int(w) == 0x12345
+        w = cast(BInt, u'\U00012345')
+        assert repr(w) == "<cdata 'int' 74565>"
+    py.test.raises(TypeError, cast, BInt, u'')
+    py.test.raises(TypeError, cast, BInt, u'XX')
+    assert int(cast(BInt, u'a')) == ord('a')
+    #
+    a = newp(BWCharArray, u'hello - world')
+    p = cast(BWCharP, a)
+    assert unicode(p) == u'hello - world'
+    p[6] = u'\u2345'
+    assert unicode(p) == u'hello \u2345 world'
+    #
+    s = newp(BStructPtr, [u'\u1234', p])
+    assert s.a1 == u'\u1234'
+    assert s.a2 == p
+    assert str(s.a2) == repr(s.a2)
+    assert unicode(s.a2) == u'hello \u2345 world'
+    #
+    q = cast(BWCharP, 0)
+    assert str(q) == repr(q)
+    py.test.raises(RuntimeError, unicode, q)
+    #
+    def cb(p):
+        assert repr(p).startswith("<cdata 'wchar_t *' 0x")
+        return len(unicode(p))
+    BFunc = new_function_type((BWCharP,), BInt, False)
+    f = callback(BFunc, cb, -42)
+    #assert f(u'a\u1234b') == 3    -- not implemented
+    py.test.raises(NotImplementedError, f, u'a\u1234b')
+
 def test_keepalive_struct():
     # exception to the no-keepalive rule: p=newp(BStructPtr) returns a
     # pointer owning the memory, and p[0] returns a pointer to the
diff --git a/c/wchar_helper.h b/c/wchar_helper.h
new file mode 100644
--- /dev/null
+++ b/c/wchar_helper.h
@@ -0,0 +1,121 @@
+/*
+ * wchar_t helpers
+ */
+
+#if (Py_UNICODE_SIZE == 2) && (SIZEOF_WCHAR_T == 4)
+# define CONVERT_WCHAR_TO_SURROGATES
+#endif
+
+
+#if PY_VERSION_HEX < 0x02070000 && defined(CONVERT_WCHAR_TO_SURROGATES)
+
+/* Before Python 2.7, PyUnicode_FromWideChar is not able to convert
+   wchar_t values greater than 65535 into two-unicode-characters surrogates.
+*/
+static PyObject *
+_my_PyUnicode_FromWideChar(register const wchar_t *w,
+                           Py_ssize_t size)
+{
+    PyObject *unicode;
+    register Py_ssize_t i;
+    Py_ssize_t alloc;
+    const wchar_t *orig_w;
+
+    if (w == NULL) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    alloc = size;
+    orig_w = w;
+    for (i = size; i > 0; i--) {
+        if (*w > 0xFFFF)
+            alloc++;
+        w++;
+    }
+    w = orig_w;
+    unicode = PyUnicode_FromUnicode(NULL, alloc);
+    if (!unicode)
+        return NULL;
+
+    /* Copy the wchar_t data into the new object */
+    {
+        register Py_UNICODE *u;
+        u = PyUnicode_AS_UNICODE(unicode);
+        for (i = size; i > 0; i--) {
+            if (*w > 0xFFFF) {
+                wchar_t ordinal = *w++;
+                ordinal -= 0x10000;
+                *u++ = 0xD800 | (ordinal >> 10);
+                *u++ = 0xDC00 | (ordinal & 0x3FF);
+            }
+            else
+                *u++ = *w++;
+        }
+    }
+    return unicode;
+}
+
+#else
+
+# define _my_PyUnicode_FromWideChar PyUnicode_FromWideChar
+
+#endif
+
+
+#define IS_SURROGATE(u)   (0xD800 <= (u)[0] && (u)[0] <= 0xDBFF &&   \
+                           0xDC00 <= (u)[1] && (u)[1] <= 0xDFFF)
+#define AS_SURROGATE(u)   (0x10000 + (((u)[0] - 0xD800) << 10) +     \
+                                     ((u)[1] - 0xDC00))
+
+static int _my_PyUnicode_AsSingleWideChar(PyObject *unicode, wchar_t *result)
+{
+    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+    if (PyUnicode_GET_SIZE(unicode) == 1) {
+        *result = (wchar_t)(u[0]);
+        return 0;
+    }
+#ifdef CONVERT_WCHAR_TO_SURROGATES
+    if (PyUnicode_GET_SIZE(unicode) == 2 && IS_SURROGATE(u)) {
+        *result = AS_SURROGATE(u);
+        return 0;
+    }
+#endif
+    return -1;
+}
+
+static Py_ssize_t _my_PyUnicode_SizeAsWideChar(PyObject *unicode)
+{
+    Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
+    Py_ssize_t result = length;
+
+#ifdef CONVERT_WCHAR_TO_SURROGATES
+    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+    Py_ssize_t i;
+
+    for (i=0; i<length-1; i++) {
+        if (IS_SURROGATE(u+i))
+            result--;
+    }
+#endif
+    return result;
+}
+
+static void _my_PyUnicode_AsWideChar(PyObject *unicode,
+                                     wchar_t *result,
+                                     Py_ssize_t resultlen)
+{
+    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+    Py_ssize_t i;
+    for (i=0; i<resultlen; i++) {
+        wchar_t ordinal = *u;
+#ifdef CONVERT_WCHAR_TO_SURROGATES
+        if (IS_SURROGATE(u)) {
+            ordinal = AS_SURROGATE(u);
+            u++;
+        }
+#endif
+        result[i] = ordinal;
+        u++;
+    }
+}
diff --git a/cffi/backend_ctypes.py b/cffi/backend_ctypes.py
--- a/cffi/backend_ctypes.py
+++ b/cffi/backend_ctypes.py
@@ -267,8 +267,6 @@
             if size == ctypes.sizeof(ctypes.c_size_t):
                 result['size_t'] = size | UNSIGNED
                 result['ssize_t'] = size
-            #if size == ctypes.sizeof(ctypes.c_wchar):
-            #    result['wchar_t'] = size | UNSIGNED
         return result
 
     def load_library(self, path):
@@ -292,6 +290,8 @@
         return CTypesVoid
 
     def new_primitive_type(self, name):
+        if name == 'wchar_t':
+            raise NotImplementedError(name)
         ctype = self.PRIMITIVE_TYPES[name]
         if name == 'char':
             kind = 'char'
diff --git a/cffi/cparser.py b/cffi/cparser.py
--- a/cffi/cparser.py
+++ b/cffi/cparser.py
@@ -53,7 +53,7 @@
         # internals of CParser...  the following registers the
         # typedefs, because their presence or absence influences the
         # parsing itself (but what they are typedef'ed to plays no role)
-        csourcelines = []
+        csourcelines = ['typedef int wchar_t;']
         for name in sorted(self._declarations):
             if name.startswith('typedef '):
                 csourcelines.append('typedef int %s;' % (name[8:],))
diff --git a/cffi/model.py b/cffi/model.py
--- a/cffi/model.py
+++ b/cffi/model.py
@@ -53,7 +53,7 @@
         return self.name + replace_with
 
     def is_char_type(self):
-        return self.name == 'char'
+        return self.name in ('char', 'wchar_t')
     def is_signed_type(self):
         return self.is_integer_type() and not self.is_unsigned_type()
     def is_unsigned_type(self):
diff --git a/cffi/verifier.py b/cffi/verifier.py
--- a/cffi/verifier.py
+++ b/cffi/verifier.py
@@ -619,7 +619,11 @@
     ((int(*)(char *, CTypeDescrObject *, PyObject *))_cffi_exports[17])
 #define _cffi_from_c_struct                                              \
     ((PyObject *(*)(char *, CTypeDescrObject *))_cffi_exports[18])
-#define _CFFI_NUM_EXPORTS 19
+#define _cffi_to_c_wchar_t                                               \
+                 ((wchar_t(*)(PyObject *))_cffi_exports[19])
+#define _cffi_from_c_wchar_t                                             \
+    ((PyObject *(*)(wchar_t))_cffi_exports[20])
+#define _CFFI_NUM_EXPORTS 21
 
 #if SIZEOF_LONG < SIZEOF_LONG_LONG
 #  define _cffi_to_c_long_long PyLong_AsLongLong
diff --git a/testing/backend_tests.py b/testing/backend_tests.py
--- a/testing/backend_tests.py
+++ b/testing/backend_tests.py
@@ -6,7 +6,7 @@
 SIZE_OF_LONG  = ctypes.sizeof(ctypes.c_long)
 SIZE_OF_SHORT = ctypes.sizeof(ctypes.c_short)
 SIZE_OF_PTR   = ctypes.sizeof(ctypes.c_void_p)
-#SIZE_OF_WCHAR = ctypes.sizeof(ctypes.c_wchar)
+SIZE_OF_WCHAR = ctypes.sizeof(ctypes.c_wchar)
 
 
 class BackendTests:
@@ -41,7 +41,6 @@
         self._test_int_type(ffi, 'ptrdiff_t', SIZE_OF_PTR, False)
         self._test_int_type(ffi, 'size_t', SIZE_OF_PTR, True)
         self._test_int_type(ffi, 'ssize_t', SIZE_OF_PTR, False)
-        #self._test_int_type(ffi, 'wchar_t', SIZE_OF_WCHAR, True)
 
     def _test_int_type(self, ffi, c_decl, size, unsigned):
         if unsigned:
@@ -274,8 +273,9 @@
         assert ffi.new("char", "\xff")[0] == '\xff'
         assert ffi.new("char")[0] == '\x00'
         assert int(ffi.cast("char", 300)) == 300 - 256
-        assert bool(ffi.new("char"))
+        assert bool(ffi.cast("char", 0))
         py.test.raises(TypeError, ffi.new, "char", 32)
+        py.test.raises(TypeError, ffi.new, "char", u"x")
         py.test.raises(TypeError, ffi.new, "char", "foo")
         #
         p = ffi.new("char[]", ['a', 'b', '\x9c'])
@@ -297,6 +297,63 @@
         assert [p[i] for i in range(2)] == ['a', 'b']
         py.test.raises(IndexError, ffi.new, "char[2]", "abc")
 
+    def check_wchar_t(self, ffi):
+        try:
+            ffi.cast("wchar_t", 0)
+        except NotImplementedError:
+            py.test.skip("NotImplementedError: wchar_t")
+
+    def test_wchar_t(self):
+        ffi = FFI(backend=self.Backend())
+        self.check_wchar_t(ffi)
+        assert ffi.new("wchar_t", u'x')[0] == u'x'
+        assert ffi.new("wchar_t", unichr(1234))[0] == unichr(1234)
+        if SIZE_OF_WCHAR > 2:
+            assert ffi.new("wchar_t", u'\U00012345')[0] == u'\U00012345'
+        else:
+            py.test.raises(TypeError, ffi.new, "wchar_t", u'\U00012345')
+        assert ffi.new("wchar_t")[0] == u'\x00'
+        assert int(ffi.cast("wchar_t", 300)) == 300
+        assert bool(ffi.cast("wchar_t", 0))
+        py.test.raises(TypeError, ffi.new, "wchar_t", 32)
+        py.test.raises(TypeError, ffi.new, "wchar_t", "foo")
+        #
+        p = ffi.new("wchar_t[]", [u'a', u'b', unichr(1234)])
+        assert len(p) == 3
+        assert p[0] == u'a'
+        assert p[1] == u'b' and type(p[1]) is unicode
+        assert p[2] == unichr(1234)
+        p[0] = u'x'
+        assert p[0] == u'x' and type(p[0]) is unicode
+        p[1] = unichr(1357)
+        assert p[1] == unichr(1357)
+        p = ffi.new("wchar_t[]", u"abcd")
+        assert len(p) == 5
+        assert p[4] == u'\x00'
+        p = ffi.new("wchar_t[]", u"a\u1234b")
+        assert len(p) == 4
+        assert p[1] == unichr(0x1234)
+        #
+        p = ffi.new("wchar_t[]", u'\U00023456')
+        if SIZE_OF_WCHAR == 2:
+            assert sys.maxunicode == 0xffff
+            assert len(p) == 3
+            assert p[0] == u'\ud84d'
+            assert p[1] == u'\udc56'
+            assert p[2] == u'\x00'
+        else:
+            assert len(p) == 2
+            assert p[0] == u'\U00023456'
+            assert p[1] == u'\x00'
+        #
+        p = ffi.new("wchar_t[4]", u"ab")
+        assert len(p) == 4
+        assert [p[i] for i in range(4)] == [u'a', u'b', u'\x00', u'\x00']
+        p = ffi.new("wchar_t[2]", u"ab")
+        assert len(p) == 2
+        assert [p[i] for i in range(2)] == [u'a', u'b']
+        py.test.raises(IndexError, ffi.new, "wchar_t[2]", u"abc")
+
     def test_none_as_null_doesnt_work(self):
         ffi = FFI(backend=self.Backend())
         p = ffi.new("int*[1]")
@@ -492,6 +549,14 @@
         assert str(ffi.new("char", "x")) == "x"
         assert str(ffi.new("char", "\x00")) == ""
 
+    def test_unicode_from_wchar_pointer(self):
+        ffi = FFI(backend=self.Backend())
+        self.check_wchar_t(ffi)
+        assert unicode(ffi.new("wchar_t", u"x")) == u"x"
+        assert unicode(ffi.new("wchar_t", u"\x00")) == u""
+        x = ffi.new("wchar_t", u"\x00")
+        assert str(x) == repr(x)
+
     def test_string_from_char_array(self):
         ffi = FFI(backend=self.Backend())
         assert str(ffi.cast("char", "x")) == "x"
@@ -509,6 +574,28 @@
         p = ffi.cast("char *", a)
         assert str(p) == 'hello'
 
+    def test_string_from_wchar_array(self):
+        ffi = FFI(backend=self.Backend())
+        self.check_wchar_t(ffi)
+        assert unicode(ffi.cast("wchar_t", "x")) == u"x"
+        assert unicode(ffi.cast("wchar_t", u"x")) == u"x"
+        x = ffi.cast("wchar_t", "x")
+        assert str(x) == repr(x)
+        #
+        p = ffi.new("wchar_t[]", u"hello.")
+        p[5] = u'!'
+        assert unicode(p) == u"hello!"
+        p[6] = unichr(1234)
+        assert unicode(p) == u"hello!\u04d2"
+        p[3] = u'\x00'
+        assert unicode(p) == u"hel"
+        py.test.raises(IndexError, "p[7] = u'X'")
+        #
+        a = ffi.new("wchar_t[]", u"hello\x00world")
+        assert len(a) == 12
+        p = ffi.cast("wchar_t *", a)
+        assert unicode(p) == u'hello'
+
     def test_fetch_const_char_p_field(self):
         # 'const' is ignored so far
         ffi = FFI(backend=self.Backend())
@@ -521,6 +608,18 @@
         s.name = ffi.NULL
         assert s.name == ffi.NULL
 
+    def test_fetch_const_wchar_p_field(self):
+        # 'const' is ignored so far
+        ffi = FFI(backend=self.Backend())
+        self.check_wchar_t(ffi)
+        ffi.cdef("struct foo { const wchar_t *name; };")
+        t = ffi.new("const wchar_t[]", u"testing")
+        s = ffi.new("struct foo", [t])
+        assert type(s.name) not in (str, unicode)
+        assert unicode(s.name) == u"testing"
+        s.name = ffi.NULL
+        assert s.name == ffi.NULL
+
     def test_voidp(self):
         ffi = FFI(backend=self.Backend())
         py.test.raises(TypeError, ffi.new, "void")
@@ -630,6 +729,19 @@
         p = ffi.cast("int", "\x81")
         assert int(p) == 0x81
 
+    def test_wchar_cast(self):
+        ffi = FFI(backend=self.Backend())
+        self.check_wchar_t(ffi)
+        p = ffi.cast("int", ffi.cast("wchar_t", unichr(1234)))
+        assert int(p) == 1234
+        p = ffi.cast("long long", ffi.cast("wchar_t", -1))
+        if SIZE_OF_WCHAR == 2:      # 2 bytes, unsigned
+            assert int(p) == 0xffff
+        else:                       # 4 bytes, signed
+            assert int(p) == -1
+        p = ffi.cast("int", unichr(1234))
+        assert int(p) == 1234
+
     def test_cast_array_to_charp(self):
         ffi = FFI(backend=self.Backend())
         a = ffi.new("short int[]", [0x1234, 0x5678])
diff --git a/testing/test_verify.py b/testing/test_verify.py
--- a/testing/test_verify.py
+++ b/testing/test_verify.py
@@ -68,9 +68,9 @@
 all_float_types = ['float', 'double']
 
 def test_primitive_category():
-    for typename in all_integer_types + all_float_types + ['char']:
+    for typename in all_integer_types + all_float_types + ['char', 'wchar_t']:
         tp = model.PrimitiveType(typename)
-        assert tp.is_char_type() == (typename == 'char')
+        assert tp.is_char_type() == (typename in ('char', 'wchar_t'))
         assert tp.is_signed_type() == (typename in all_signed_integer_types)
         assert tp.is_unsigned_type()== (typename in all_unsigned_integer_types)
         assert tp.is_integer_type() == (typename in all_integer_types)
@@ -104,6 +104,19 @@
     assert lib.foo("A") == "B"
     py.test.raises(TypeError, lib.foo, "bar")
 
+def test_wchar_type():
+    ffi = FFI()
+    if ffi.sizeof('wchar_t') == 2:
+        uniexample1 = u'\u1234'
+        uniexample2 = u'\u1235'
+    else:
+        uniexample1 = u'\U00012345'
+        uniexample2 = u'\U00012346'
+    #
+    ffi.cdef("wchar_t foo(wchar_t);")
+    lib = ffi.verify("wchar_t foo(wchar_t x) { return x+1; }")
+    assert lib.foo(uniexample1) == uniexample2
+
 def test_no_argument():
     ffi = FFI()
     ffi.cdef("int foo(void);")


More information about the pypy-commit mailing list