[pypy-commit] cffi wchar_t: in-progress
arigo
noreply at buildbot.pypy.org
Mon Jul 9 12:31:46 CEST 2012
Author: Armin Rigo <arigo at tunes.org>
Branch: wchar_t
Changeset: r603:c64975b8743a
Date: 2012-07-09 12:31 +0200
http://bitbucket.org/cffi/cffi/changeset/c64975b8743a/
Log: in-progress
diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -157,6 +157,10 @@
# endif
#endif
+#ifdef HAVE_WCHAR_H
+# include "wchar_helper.h"
+#endif
+
/************************************************************/
static CTypeDescrObject *
@@ -604,8 +608,10 @@
else if (ct->ct_flags & CT_PRIMITIVE_CHAR) {
if (ct->ct_size == sizeof(char))
return PyString_FromStringAndSize(data, 1);
+#ifdef HAVE_WCHAR_H
else
- return PyUnicode_FromWideChar((wchar_t *)data, 1);
+ return _my_PyUnicode_FromWideChar((wchar_t *)data, 1);
+#endif
}
PyErr_Format(PyExc_SystemError,
@@ -677,10 +683,13 @@
return -1;
}
+#ifdef HAVE_WCHAR_H
static wchar_t _convert_to_wchar_t(PyObject *init)
{
- if (PyUnicode_Check(init) && PyUnicode_GET_SIZE(init) == 1) {
- return (wchar_t)(PyUnicode_AS_UNICODE(init)[0]);
+ if (PyUnicode_Check(init)) {
+ wchar_t ordinal;
+ if (_my_PyUnicode_AsSingleWideChar(init, &ordinal) == 0)
+ return ordinal;
}
if (CData_Check(init) &&
(((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) &&
@@ -692,6 +701,7 @@
"of length 1, not %.200s", Py_TYPE(init)->tp_name);
return (wchar_t)-1;
}
+#endif
static int _convert_error(PyObject *init, const char *ct_name,
const char *expected)
@@ -855,12 +865,14 @@
return -1;
data[0] = res;
}
+#ifdef HAVE_WCHAR_H
else {
wchar_t res = _convert_to_wchar_t(init);
if (res == (wchar_t)-1 && PyErr_Occurred())
return -1;
*(wchar_t *)data = res;
}
+#endif
return 0;
}
if (ct->ct_flags & (CT_STRUCT|CT_UNION)) {
@@ -1092,11 +1104,13 @@
static PyObject *cdata_str(CDataObject *cd)
{
- if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) {
+ if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR &&
+ cd->c_type->ct_size == sizeof(char)) {
return PyString_FromStringAndSize(cd->c_data, 1);
}
else if (cd->c_type->ct_itemdescr != NULL &&
- cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR) {
+ cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR &&
+ cd->c_type->ct_itemdescr->ct_size == sizeof(char)) {
Py_ssize_t length;
if (cd->c_type->ct_flags & CT_ARRAY) {
@@ -1129,6 +1143,48 @@
return cdata_repr(cd);
}
+#ifdef HAVE_WCHAR_H
+static PyObject *cdata_unicode(CDataObject *cd)
+{
+ if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR &&
+ cd->c_type->ct_size > sizeof(char)) {
+ return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, 1);
+ }
+ else if (cd->c_type->ct_itemdescr != NULL &&
+ cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR &&
+ cd->c_type->ct_itemdescr->ct_size > sizeof(char)) {
+ abort();
+ Py_ssize_t length;
+
+ if (cd->c_type->ct_flags & CT_ARRAY) {
+ const char *start = cd->c_data;
+ const char *end;
+ length = get_array_length(cd);
+ end = (const char *)memchr(start, 0, length);
+ if (end != NULL)
+ length = end - start;
+ }
+ else {
+ if (cd->c_data == NULL) {
+ PyObject *s = cdata_repr(cd);
+ if (s != NULL) {
+ PyErr_Format(PyExc_RuntimeError,
+ "cannot use str() on %s",
+ PyString_AS_STRING(s));
+ Py_DECREF(s);
+ }
+ return NULL;
+ }
+ length = strlen(cd->c_data);
+ }
+
+ return PyString_FromStringAndSize(cd->c_data, length);
+ }
+ else
+ return cdata_repr(cd);
+}
+#endif
+
static PyObject *cdataowning_repr(CDataObject *cd)
{
Py_ssize_t size;
@@ -1670,6 +1726,11 @@
(objobjargproc)cdata_ass_sub, /*mp_ass_subscript*/
};
+static PyMethodDef CData_methods[] = {
+ {"__unicode__", (PyCFunction)cdata_unicode, METH_NOARGS},
+ {NULL, NULL} /* sentinel */
+};
+
static PyTypeObject CData_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_cffi_backend.CData",
@@ -1697,6 +1758,8 @@
cdata_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
(getiterfunc)cdata_iter, /* tp_iter */
+ 0, /* tp_iternext */
+ CData_methods, /* tp_methods */
};
static PyTypeObject CDataOwning_Type = {
@@ -2307,12 +2370,18 @@
EPTYPE(ul, unsigned long, CT_PRIMITIVE_UNSIGNED ) \
EPTYPE(ull, unsigned long long, CT_PRIMITIVE_UNSIGNED ) \
EPTYPE(f, float, CT_PRIMITIVE_FLOAT ) \
- EPTYPE(d, double, CT_PRIMITIVE_FLOAT ) \
+ EPTYPE(d, double, CT_PRIMITIVE_FLOAT )
+#ifdef HAVE_WCHAR_H
+# define ENUM_PRIMITIVE_TYPES_WCHAR \
EPTYPE(wc, wchar_t, CT_PRIMITIVE_CHAR )
+#else
+# define ENUM_PRIMITIVE_TYPES_WCHAR /* nothing */
+#endif
#define EPTYPE(code, typename, flags) \
struct aligncheck_##code { char x; typename y; };
ENUM_PRIMITIVE_TYPES
+ ENUM_PRIMITIVE_TYPES_WCHAR
#undef EPTYPE
CTypeDescrObject *td;
@@ -2326,7 +2395,9 @@
flags \
},
ENUM_PRIMITIVE_TYPES
+ ENUM_PRIMITIVE_TYPES_WCHAR
#undef EPTYPE
+#undef ENUM_PRIMITIVE_TYPES_WCHAR
#undef ENUM_PRIMITIVE_TYPES
{ NULL }
};
diff --git a/c/test_c.py b/c/test_c.py
--- a/c/test_c.py
+++ b/c/test_c.py
@@ -1278,6 +1278,14 @@
BWChar = new_primitive_type("wchar_t")
pyuni4 = {1: True, 2: False}[len(u'\U00012345')]
wchar4 = {2: False, 4: True}[sizeof(BWChar)]
+ assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' u'E'>"
+ assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' u'\u1234'>"
+ if wchar4:
+ x = cast(BWChar, 0x12345)
+ assert str(x) == "<cdata 'wchar_t' u'\U00012345'>"
+ assert unicode(x) == u'\U00012345'
+ else:
+ assert not pyuni4
#
BWCharP = new_pointer_type(BWChar)
BStruct = new_struct_type("foo_s")
diff --git a/c/wchar_helper.h b/c/wchar_helper.h
new file mode 100644
--- /dev/null
+++ b/c/wchar_helper.h
@@ -0,0 +1,82 @@
+/*
+ * wchar_t helpers
+ */
+
+#if (Py_UNICODE_SIZE == 2) && (SIZEOF_WCHAR_T == 4)
+# define CONVERT_WCHAR_TO_SURROGATES
+#endif
+
+
+#if PY_VERSION_HEX < 0x02070000 && defined(CONVERT_WCHAR_TO_SURROGATES)
+
+/* Before Python 2.7, PyUnicode_FromWideChar is not able to convert
+ wchar_t values greater than 65535 into two-unicode-characters surrogates.
+*/
+static PyObject *
+_my_PyUnicode_FromWideChar(register const wchar_t *w,
+ Py_ssize_t size)
+{
+ PyObject *unicode;
+ register Py_ssize_t i;
+ Py_ssize_t alloc;
+ const wchar_t *orig_w;
+
+ if (w == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+
+ alloc = size;
+ orig_w = w;
+ for (i = size; i > 0; i--) {
+ if (*w > 0xFFFF)
+ alloc++;
+ w++;
+ }
+ w = orig_w;
+ unicode = PyUnicode_FromUnicode(NULL, alloc);
+ if (!unicode)
+ return NULL;
+
+ /* Copy the wchar_t data into the new object */
+ {
+ register Py_UNICODE *u;
+ u = PyUnicode_AS_UNICODE(unicode);
+ for (i = size; i > 0; i--) {
+ if (*w > 0xFFFF) {
+ wchar_t ordinal = *w++;
+ ordinal -= 0x10000;
+ *u++ = 0xD800 | (ordinal >> 10);
+ *u++ = 0xDC00 | (ordinal & 0x3FF);
+ }
+ else
+ *u++ = *w++;
+ }
+ }
+ return unicode;
+}
+
+#else
+
+# define _my_PyUnicode_FromWideChar PyUnicode_FromWideChar
+
+#endif
+
+
+static int _my_PyUnicode_AsSingleWideChar(PyObject *unicode, wchar_t *result)
+{
+ Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+ if (PyUnicode_GET_SIZE(unicode) == 1) {
+ *result = (wchar_t)(u[0]);
+ return 0;
+ }
+#ifdef CONVERT_WCHAR_TO_SURROGATES
+ if (PyUnicode_GET_SIZE(unicode) == 2 &&
+ 0xD800 <= u[0] && u[0] <= 0xDBFF &&
+ 0xDC00 <= u[1] && u[1] <= 0xDFFF) {
+ *result = 0x10000 + ((u[0] - 0xD800) << 10) + (u[1] - 0xDC00);
+ return 0;
+ }
+#endif
+ return -1;
+}
More information about the pypy-commit
mailing list