[pypy-svn] pypy xapian: encode utf8
iko
commits-noreply at bitbucket.org
Thu Apr 28 16:13:57 CEST 2011
Author: Anders Hammarquist <iko at iko.pp.se>
Branch: xapian
Changeset: r43711:ce4a4022a789
Date: 2011-04-26 20:16 +0200
http://bitbucket.org/pypy/pypy/changeset/ce4a4022a789/
Log: encode utf8
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -188,6 +188,20 @@
assert space.unwrap(w_u) == 'sp'
rffi.free_charp(u)
+ def test_encode_utf8(self, space, api):
+ uni = u'abcdefg'
+ data = rffi.unicode2wcharp(uni)
+ w_s = api.PyUnicode_EncodeUTF8(data, len(uni), lltype.nullptr(rffi.CCHARP.TO))
+ assert space.eq_w(space.wrap("abcdefg"), w_s)
+ rffi.free_wcharp(data)
+
+ uni = u'r�ksm�rg�s'
+ data = rffi.unicode2wcharp(uni)
+ w_s = api.PyUnicode_EncodeUTF8(data, len(uni), lltype.nullptr(rffi.CCHARP.TO))
+ assert space.eq_w(space.wrap("r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s"), w_s)
+ rffi.free_wcharp(data)
+
+
def test_IS(self, space, api):
for char in [0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x85, 0xa0, 0x1680, 0x2000, 0x2001, 0x2002,
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -1,4 +1,4 @@
-from pypy.module.cpyext.api import (
+xfrom pypy.module.cpyext.api import (
cpython_api, PyObject, PyObjectP, CANNOT_FAIL
)
from pypy.module.cpyext.complexobject import Py_complex_ptr as Py_complex
@@ -2545,15 +2545,6 @@
changes in your code for properly supporting 64-bit systems."""
raise NotImplementedError
- at cpython_api([rffi.CWCHARP, Py_ssize_t, rffi.CCHARP], PyObject)
-def PyUnicode_EncodeUTF8(space, s, size, errors):
- """Encode the Py_UNICODE buffer of the given size using UTF-8 and return a
- Python string object. Return NULL if an exception was raised by the codec.
-
- This function used an int type for size. This might require
- changes in your code for properly supporting 64-bit systems."""
- raise NotImplementedError
-
@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
def PyUnicode_DecodeUTF32(space, s, size, errors, byteorder):
"""Decode length bytes from a UTF-32 encoded buffer string and return the
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -406,6 +406,22 @@
w_errors = space.w_None
return space.call_method(w_str, 'decode', space.wrap("utf-8"), w_errors)
+ at cpython_api([rffi.CWCHARP, Py_ssize_t, rffi.CCHARP], PyObject)
+def PyUnicode_EncodeUTF8(space, s, size, errors):
+ """Encode the Py_UNICODE buffer of the given size using UTF-8 and return a
+ Python string object. Return NULL if an exception was raised by the codec.
+
+ This function used an int type for size. This might require
+ changes in your code for properly supporting 64-bit systems."""
+
+ w_s = space.wrap(rffi.wcharpsize2unicode(s, size))
+ if errors:
+ w_errors = space.wrap(rffi.charp2str(errors))
+ else:
+ w_errors = space.w_None
+ return space.call_method(w_s, 'encode', space.wrap('utf-8'), w_errors)
+
+
@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
def PyUnicode_DecodeUTF16(space, s, size, llerrors, pbyteorder):
"""Decode length bytes from a UTF-16 encoded buffer string and return the
More information about the Pypy-commit
mailing list