[pypy-commit] pypy default: cpyext: implement PyUnicode_Split and PyUnicode_Splitlines

Tue Apr 10 23:55:16 CEST 2012

Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: 
Changeset: r54279:859f1579f2bd
Date: 2012-04-10 23:19 +0200
http://bitbucket.org/pypy/pypy/changeset/859f1579f2bd/

Log:	cpyext: implement PyUnicode_Split and PyUnicode_Splitlines

diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -2253,24 +2253,6 @@
     """Concat two strings giving a new Unicode string."""
     raise NotImplementedError
 
- at cpython_api([PyObject, PyObject, Py_ssize_t], PyObject)
-def PyUnicode_Split(space, s, sep, maxsplit):
-    """Split a string giving a list of Unicode strings.  If sep is NULL, splitting
-    will be done at all whitespace substrings.  Otherwise, splits occur at the given
-    separator.  At most maxsplit splits will be done.  If negative, no limit is
-    set.  Separators are not included in the resulting list.
-
-    This function used an int type for maxsplit. This might require
-    changes in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
-
- at cpython_api([PyObject, rffi.INT_real], PyObject)
-def PyUnicode_Splitlines(space, s, keepend):
-    """Split a Unicode string at line breaks, returning a list of Unicode strings.
-    CRLF is considered to be one line break.  If keepend is 0, the Line break
-    characters are not included in the resulting strings."""
-    raise NotImplementedError
-
 @cpython_api([PyObject, PyObject, rffi.CCHARP], PyObject)
 def PyUnicode_Translate(space, str, table, errors):
     """Translate a string by applying a character mapping table to it and return the
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -472,3 +472,14 @@
         assert api.PyUnicode_Find(w_str, space.wrap(u"c"), 3, 7, -1) == 5
         assert api.PyUnicode_Find(w_str, space.wrap(u"c"), 0, 4, -1) == 2
         assert api.PyUnicode_Find(w_str, space.wrap(u"z"), 0, 4, -1) == -1
+
+    def test_split(self, space, api):
+        w_str = space.wrap(u"a\nb\nc\nd")
+        assert "[u'a', u'b', u'c', u'd']" == space.unwrap(space.repr(
+                api.PyUnicode_Split(w_str, space.wrap('\n'), -1)))
+        assert r"[u'a', u'b', u'c\nd']" == space.unwrap(space.repr(
+                api.PyUnicode_Split(w_str, space.wrap('\n'), 2)))
+        assert "[u'a', u'b', u'c', u'd']" == space.unwrap(space.repr(
+                api.PyUnicode_Splitlines(w_str, 0)))
+        assert r"[u'a\n', u'b\n', u'c\n', u'd']" == space.unwrap(space.repr(
+                api.PyUnicode_Splitlines(w_str, 1)))
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -622,3 +622,20 @@
         w_pos = space.call_method(w_str, "rfind", w_substr,
                                   space.wrap(start), space.wrap(end))
     return space.int_w(w_pos)
+
+ at cpython_api([PyObject, PyObject, Py_ssize_t], PyObject)
+def PyUnicode_Split(space, w_str, w_sep, maxsplit):
+    """Split a string giving a list of Unicode strings.  If sep is
+    NULL, splitting will be done at all whitespace substrings.
+    Otherwise, splits occur at the given separator.  At most maxsplit
+    splits will be done.  If negative, no limit is set.  Separators
+    are not included in the resulting list."""
+    return space.call_method(w_str, "split", w_sep, space.wrap(maxsplit))
+
+ at cpython_api([PyObject, rffi.INT_real], PyObject)
+def PyUnicode_Splitlines(space, w_str, keepend):
+    """Split a Unicode string at line breaks, returning a list of
+    Unicode strings.  CRLF is considered to be one line break.  If
+    keepend is 0, the Line break characters are not included in the
+    resulting strings."""
+    return space.call_method(w_str, "splitlines", space.wrap(keepend))