[pypy-svn] pypy xapian: encode utf8

iko commits-noreply at bitbucket.org
Thu Apr 28 16:13:57 CEST 2011


Author: Anders Hammarquist <iko at iko.pp.se>
Branch: xapian
Changeset: r43711:ce4a4022a789
Date: 2011-04-26 20:16 +0200
http://bitbucket.org/pypy/pypy/changeset/ce4a4022a789/

Log:	encode utf8

diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -188,6 +188,20 @@
         assert space.unwrap(w_u) == 'sp'
         rffi.free_charp(u)
 
+    def test_encode_utf8(self, space, api):
+        uni = u'abcdefg'
+        data = rffi.unicode2wcharp(uni)
+        w_s = api.PyUnicode_EncodeUTF8(data, len(uni), lltype.nullptr(rffi.CCHARP.TO))
+        assert space.eq_w(space.wrap("abcdefg"), w_s)
+        rffi.free_wcharp(data)
+        
+        uni = u'r&#65533;ksm&#65533;rg&#65533;s'
+        data = rffi.unicode2wcharp(uni)
+        w_s = api.PyUnicode_EncodeUTF8(data, len(uni), lltype.nullptr(rffi.CCHARP.TO))
+        assert space.eq_w(space.wrap("r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s"), w_s)
+        rffi.free_wcharp(data)
+        
+
     def test_IS(self, space, api):
         for char in [0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x1c, 0x1d, 0x1e, 0x1f,
                      0x20, 0x85, 0xa0, 0x1680, 0x2000, 0x2001, 0x2002,

diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -1,4 +1,4 @@
-from pypy.module.cpyext.api import (
+xfrom pypy.module.cpyext.api import (
     cpython_api, PyObject, PyObjectP, CANNOT_FAIL
     )
 from pypy.module.cpyext.complexobject import Py_complex_ptr as Py_complex
@@ -2545,15 +2545,6 @@
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
 
- at cpython_api([rffi.CWCHARP, Py_ssize_t, rffi.CCHARP], PyObject)
-def PyUnicode_EncodeUTF8(space, s, size, errors):
-    """Encode the Py_UNICODE buffer of the given size using UTF-8 and return a
-    Python string object.  Return NULL if an exception was raised by the codec.
-    
-    This function used an int type for size. This might require
-    changes in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
 def PyUnicode_DecodeUTF32(space, s, size, errors, byteorder):
     """Decode length bytes from a UTF-32 encoded buffer string and return the

diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -406,6 +406,22 @@
         w_errors = space.w_None
     return space.call_method(w_str, 'decode', space.wrap("utf-8"), w_errors)
 
+ at cpython_api([rffi.CWCHARP, Py_ssize_t, rffi.CCHARP], PyObject)
+def PyUnicode_EncodeUTF8(space, s, size, errors):
+    """Encode the Py_UNICODE buffer of the given size using UTF-8 and return a
+    Python string object.  Return NULL if an exception was raised by the codec.
+    
+    This function used an int type for size. This might require
+    changes in your code for properly supporting 64-bit systems."""
+
+    w_s = space.wrap(rffi.wcharpsize2unicode(s, size))
+    if errors:
+        w_errors = space.wrap(rffi.charp2str(errors))
+    else:
+        w_errors = space.w_None
+    return space.call_method(w_s, 'encode', space.wrap('utf-8'), w_errors)
+
+
 @cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
 def PyUnicode_DecodeUTF16(space, s, size, llerrors, pbyteorder):
     """Decode length bytes from a UTF-16 encoded buffer string and return the


More information about the Pypy-commit mailing list