[pypy-svn] r73999 - in pypy/branch/cpython-extension/pypy/module/cpyext: . test

Fri Apr 23 00:29:39 CEST 2010

Author: afa
Date: Fri Apr 23 00:29:37 2010
New Revision: 73999

Modified:
   pypy/branch/cpython-extension/pypy/module/cpyext/test/test_unicodeobject.py
   pypy/branch/cpython-extension/pypy/module/cpyext/unicodeobject.py
Log:
Add PyUnicode_GetSize,  PyUnicode_EncodeMBCS and PyUnicode_DecodeMBCS on windows

the detection of leaks is not perfect, see test_unicodeobject.test_leak.


Modified: pypy/branch/cpython-extension/pypy/module/cpyext/test/test_unicodeobject.py
==============================================================================

--- pypy/branch/cpython-extension/pypy/module/cpyext/test/test_unicodeobject.py	(original)
+++ pypy/branch/cpython-extension/pypy/module/cpyext/test/test_unicodeobject.py	Fri Apr 23 00:29:37 2010
@@ -2,10 +2,12 @@
 from pypy.module.cpyext.test.test_api import BaseApiTest
 from pypy.module.cpyext.unicodeobject import Py_UNICODE
 from pypy.rpython.lltypesystem import rffi, lltype
+import sys, py
 
 class TestUnicode(BaseApiTest):
     def test_unicodeobject(self, space, api):
         assert api.PyUnicode_GET_SIZE(space.wrap(u'späm')) == 4
+        assert api.PyUnicode_GetSize(space.wrap(u'späm')) == 4
         unichar = rffi.sizeof(Py_UNICODE)
         assert api.PyUnicode_GET_DATA_SIZE(space.wrap(u'späm')) == 4 * unichar
 
@@ -94,3 +96,27 @@
             api.PyUnicode_Decode(b_text, 4, b_encoding, None)) == u'caf\xe9'
         rffi.free_charp(b_text)
         rffi.free_charp(b_encoding)
+
+    def test_leak(self):
+        py.test.skip("This test seems to leak memory")
+        size = 50
+        raw_buf, gc_buf = rffi.alloc_buffer(size)
+        for i in range(size): raw_buf[i] = 'a'
+        str = rffi.str_from_buffer(raw_buf, gc_buf, size, size)
+        rffi.keep_buffer_alive_until_here(raw_buf, gc_buf)
+
+    def test_mbcs(self, space, api):
+        if sys.platform != 'win32':
+            py.test.skip("mcbs encoding only exists on Windows")
+        # unfortunately, mbcs is locale-dependent.
+        # This tests works at least on a Western Windows.
+        unichars = u"abc" + unichr(12345)
+        wbuf = rffi.unicode2wcharp(unichars)
+        w_str = api.PyUnicode_EncodeMBCS(wbuf, 4, None)
+        rffi.free_wcharp(wbuf)
+        assert space.type(w_str) is space.w_str
+        assert space.str_w(w_str) == "abc?"
+
+        # XXX this test seems to leak references, see test_leak above
+        from pypy.module.cpyext.test.test_cpyext import freeze_refcnts
+        freeze_refcnts(self)

Modified: pypy/branch/cpython-extension/pypy/module/cpyext/unicodeobject.py
==============================================================================
--- pypy/branch/cpython-extension/pypy/module/cpyext/unicodeobject.py	(original)
+++ pypy/branch/cpython-extension/pypy/module/cpyext/unicodeobject.py	Fri Apr 23 00:29:37 2010
@@ -10,6 +10,7 @@
 from pypy.module.cpyext.stringobject import PyString_Check
 from pypy.module.sys.interp_encoding import setdefaultencoding
 from pypy.objspace.std import unicodeobject, unicodetype
+import sys
 
 PyUnicodeObjectStruct = lltype.ForwardReference()
 PyUnicodeObject = lltype.Ptr(PyUnicodeObjectStruct)
@@ -142,6 +143,15 @@
                              space.wrap("expected unicode object"))
     return PyUnicode_AS_UNICODE(space, ref)
 
+ at cpython_api([PyObject], Py_ssize_t, error=-1)
+def PyUnicode_GetSize(space, ref):
+    if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_unicode:
+        ref = rffi.cast(PyUnicodeObject, ref)
+        return ref.c_size
+    else:
+        w_obj = from_ref(space, ref)
+        return space.int_w(space.len(w_obj))
+
 @cpython_api([PyUnicodeObject, rffi.CWCHARP, Py_ssize_t], Py_ssize_t, error=-1)
 def PyUnicode_AsWideChar(space, ref, buf, size):
     """Copy the Unicode object contents into the wchar_t buffer w.  At most
@@ -245,3 +255,31 @@
     else:
         w_errors = space.w_None
     return space.call_method(w_str, 'decode', w_encoding, w_errors)
+
+
+if sys.platform == 'win32':
+    @cpython_api([CONST_WSTRING, Py_ssize_t, CONST_STRING], PyObject)
+    def PyUnicode_EncodeMBCS(space, wchar_p, length, errors):
+        """Encode the Py_UNICODE buffer of the given size using MBCS and return a
+        Python string object.  Return NULL if an exception was raised by the codec.
+        """
+        w_unicode = space.wrap(rffi.wcharpsize2unicode(wchar_p, length))
+        if errors:
+            w_errors = space.wrap(rffi.charp2str(errors))
+        else:
+            w_errors = space.w_None
+        return space.call_method(w_unicode, "encode",
+                                 space.wrap("mbcs"), w_errors)
+
+    @cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING], PyObject)
+    def PyUnicode_DecodeMBCS(space, s, size, errors):
+        """Create a Unicode object by decoding size bytes of the MBCS encoded string s.
+        Return NULL if an exception was raised by the codec.
+        """
+        w_str = space.wrap(rffi.charpsize2str(s, size))
+        w_encoding = space.wrap("mbcs")
+        if errors:
+            w_errors = space.wrap(rffi.charp2str(errors))
+        else:
+            w_errors = space.w_None
+        return space.call_method(w_str, 'decode', w_encoding, w_errors)