[pypy-commit] pypy default: cpyext: implement PyUnicode_Split and PyUnicode_Splitlines
amauryfa
noreply at buildbot.pypy.org
Tue Apr 10 23:55:16 CEST 2012
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch:
Changeset: r54279:859f1579f2bd
Date: 2012-04-10 23:19 +0200
http://bitbucket.org/pypy/pypy/changeset/859f1579f2bd/
Log: cpyext: implement PyUnicode_Split and PyUnicode_Splitlines
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -2253,24 +2253,6 @@
"""Concat two strings giving a new Unicode string."""
raise NotImplementedError
- at cpython_api([PyObject, PyObject, Py_ssize_t], PyObject)
-def PyUnicode_Split(space, s, sep, maxsplit):
- """Split a string giving a list of Unicode strings. If sep is NULL, splitting
- will be done at all whitespace substrings. Otherwise, splits occur at the given
- separator. At most maxsplit splits will be done. If negative, no limit is
- set. Separators are not included in the resulting list.
-
- This function used an int type for maxsplit. This might require
- changes in your code for properly supporting 64-bit systems."""
- raise NotImplementedError
-
- at cpython_api([PyObject, rffi.INT_real], PyObject)
-def PyUnicode_Splitlines(space, s, keepend):
- """Split a Unicode string at line breaks, returning a list of Unicode strings.
- CRLF is considered to be one line break. If keepend is 0, the Line break
- characters are not included in the resulting strings."""
- raise NotImplementedError
-
@cpython_api([PyObject, PyObject, rffi.CCHARP], PyObject)
def PyUnicode_Translate(space, str, table, errors):
"""Translate a string by applying a character mapping table to it and return the
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -472,3 +472,14 @@
assert api.PyUnicode_Find(w_str, space.wrap(u"c"), 3, 7, -1) == 5
assert api.PyUnicode_Find(w_str, space.wrap(u"c"), 0, 4, -1) == 2
assert api.PyUnicode_Find(w_str, space.wrap(u"z"), 0, 4, -1) == -1
+
+ def test_split(self, space, api):
+ w_str = space.wrap(u"a\nb\nc\nd")
+ assert "[u'a', u'b', u'c', u'd']" == space.unwrap(space.repr(
+ api.PyUnicode_Split(w_str, space.wrap('\n'), -1)))
+ assert r"[u'a', u'b', u'c\nd']" == space.unwrap(space.repr(
+ api.PyUnicode_Split(w_str, space.wrap('\n'), 2)))
+ assert "[u'a', u'b', u'c', u'd']" == space.unwrap(space.repr(
+ api.PyUnicode_Splitlines(w_str, 0)))
+ assert r"[u'a\n', u'b\n', u'c\n', u'd']" == space.unwrap(space.repr(
+ api.PyUnicode_Splitlines(w_str, 1)))
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -622,3 +622,20 @@
w_pos = space.call_method(w_str, "rfind", w_substr,
space.wrap(start), space.wrap(end))
return space.int_w(w_pos)
+
+ at cpython_api([PyObject, PyObject, Py_ssize_t], PyObject)
+def PyUnicode_Split(space, w_str, w_sep, maxsplit):
+ """Split a string giving a list of Unicode strings. If sep is
+ NULL, splitting will be done at all whitespace substrings.
+ Otherwise, splits occur at the given separator. At most maxsplit
+ splits will be done. If negative, no limit is set. Separators
+ are not included in the resulting list."""
+ return space.call_method(w_str, "split", w_sep, space.wrap(maxsplit))
+
+ at cpython_api([PyObject, rffi.INT_real], PyObject)
+def PyUnicode_Splitlines(space, w_str, keepend):
+ """Split a Unicode string at line breaks, returning a list of
+ Unicode strings. CRLF is considered to be one line break. If
+ keepend is 0, the Line break characters are not included in the
+ resulting strings."""
+ return space.call_method(w_str, "splitlines", space.wrap(keepend))
More information about the pypy-commit
mailing list