[Jython-checkins] jython: buffer() support in (str|unicode).partition, .rpartition
jeff.allen
jython-checkins at python.org
Sun Oct 27 23:49:34 CET 2013
http://hg.python.org/jython/rev/82fe8cbfc238
changeset: 7141:82fe8cbfc238
user: Jeff Allen <ja.py at farowl.co.uk>
date: Tue Oct 22 23:46:17 2013 +0100
summary:
buffer() support in (str|unicode).partition, .rpartition
Tests added to string_tests.py for buffer arguments.
Includes some re-work of coerceToUnicode that incidentally fixes
null pointer exception in unicode.replace() with None as argument.
files:
Lib/test/string_tests.py | 40 +++++++
src/org/python/core/PyString.java | 96 +++++++++--------
src/org/python/core/PyUnicode.java | 43 +++++--
3 files changed, 123 insertions(+), 56 deletions(-)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -1218,6 +1218,26 @@
# mixed use of str and unicode
self.assertEqual('a/b/c'.partition(u'/'), ('a', '/', 'b/c'))
+ # with buffer arg (Jython addition)
+ b = buffer('ti')
+ if self.__class__.type2test is unicode:
+ self.checkequal(('this is the par', u'ti', 'tion method'),
+ 'this is the partition method', 'partition', b)
+ else:
+ self.checkequal(('this is the par', b, 'tion method'),
+ 'this is the partition method', 'partition', b)
+
+ # with memoryview arg (Jython addition)
+ if test_support.is_jython:
+ # CPython does not support until v3.2
+ with memoryview('ti') as m:
+ if self.__class__.type2test is unicode:
+ self.checkequal(('this is the par', u'ti', 'tion method'),
+ 'this is the partition method', 'partition', m)
+ else:
+ self.checkequal(('this is the par', m, 'tion method'),
+ 'this is the partition method', 'partition', m)
+
def test_rpartition(self):
self.checkequal(('this is the rparti', 'ti', 'on method'),
@@ -1236,6 +1256,26 @@
# mixed use of str and unicode
self.assertEqual('a/b/c'.rpartition(u'/'), ('a/b', '/', 'c'))
+ # with buffer arg (Jython addition)
+ b = buffer('ti')
+ if self.__class__.type2test is unicode:
+ self.checkequal(('this is the parti', u'ti', 'on method'),
+ 'this is the partition method', 'rpartition', b)
+ else:
+ self.checkequal(('this is the parti', b, 'on method'),
+ 'this is the partition method', 'rpartition', b)
+
+ # with memoryview arg (Jython addition)
+ if test_support.is_jython:
+ # CPython does not support until v3.2
+ with memoryview('ti') as m:
+ if self.__class__.type2test is unicode:
+ self.checkequal(('this is the parti', u'ti', 'on method'),
+ 'this is the partition method', 'rpartition', m)
+ else:
+ self.checkequal(('this is the parti', m, 'on method'),
+ 'this is the partition method', 'rpartition', m)
+
def test_none_arguments(self):
# issue 11828
s = 'hello'
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -1126,7 +1126,7 @@
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_strip(chars);
} else {
- // It ought to be None, null, some kind of bytes the with buffer API.
+ // It ought to be None, null, some kind of bytes with the buffer API.
String stripChars = asStripSepOrError(chars, "strip");
// Strip specified characters or whitespace if stripChars == null
return new PyString(_strip(stripChars));
@@ -1296,7 +1296,7 @@
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_lstrip(chars);
} else {
- // It ought to be None, null, some kind of bytes the with buffer API.
+ // It ought to be None, null, some kind of bytes with the buffer API.
String stripChars = asStripSepOrError(chars, "lstrip");
// Strip specified characters or whitespace if stripChars == null
return new PyString(_lstrip(stripChars));
@@ -1385,7 +1385,7 @@
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_rstrip(chars);
} else {
- // It ought to be None, null, some kind of bytes the with buffer API.
+ // It ought to be None, null, some kind of bytes with the buffer API.
String stripChars = asStripSepOrError(chars, "rstrip");
// Strip specified characters or whitespace if stripChars == null
return new PyString(_rstrip(stripChars));
@@ -1506,7 +1506,7 @@
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_split(sepObj, maxsplit);
} else {
- // It ought to be None, null, some kind of bytes the with buffer API.
+ // It ought to be None, null, some kind of bytes with the buffer API.
String sep = asStripSepOrError(sepObj, "split");
// Split on specified string or whitespace if sep == null
return _split(sep, maxsplit);
@@ -1757,7 +1757,7 @@
// Promote the problem to a Unicode one
return ((PyUnicode)decode()).unicode_rsplit(sepObj, maxsplit);
} else {
- // It ought to be None, null, some kind of bytes the with buffer API.
+ // It ought to be None, null, some kind of bytes with the buffer API.
String sep = asStripSepOrError(sepObj, "rsplit");
// Split on specified string or whitespace if sep == null
return _rsplit(sep, maxsplit);
@@ -1925,6 +1925,14 @@
return list;
}
+ /**
+ * Equivalent to Python <code>str.partition()</code>, splits the <code>PyString</code> at the
+ * first occurrence of <code>sepObj</code> returning a {@link PyTuple} containing the part
+ * before the separator, the separator itself, and the part after the separator.
+ *
+ * @param sepObj str, unicode or object implementing {@link BufferProtocol}
+ * @return tuple of parts
+ */
public PyTuple partition(PyObject sepObj) {
return str_partition(sepObj);
}
@@ -1932,28 +1940,25 @@
@ExposedMethod(doc = BuiltinDocs.str_partition_doc)
final PyTuple str_partition(PyObject sepObj) {
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- String sep;
-
if (sepObj instanceof PyUnicode) {
+ // Deal with Unicode separately
return unicodePartition(sepObj);
- } else if (sepObj instanceof PyString) {
- sep = ((PyString)sepObj).getString();
+
} else {
- throw Py.TypeError("expected a character buffer object");
- }
-
- if (sep.length() == 0) {
- throw Py.ValueError("empty separator");
- }
-
- int index = getString().indexOf(sep);
- if (index != -1) {
- return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(index + sep.length(),
- getString().length()));
- } else {
- return new PyTuple(this, Py.EmptyString, Py.EmptyString);
+ // It ought to be some kind of bytes with the buffer API.
+ String sep = asStringOrError(sepObj);
+
+ if (sep.length() == 0) {
+ throw Py.ValueError("empty separator");
+ }
+
+ int index = getString().indexOf(sep);
+ if (index != -1) {
+ return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(
+ index + sep.length(), getString().length()));
+ } else {
+ return new PyTuple(this, Py.EmptyString, Py.EmptyString);
+ }
}
}
@@ -1979,6 +1984,14 @@
}
}
+ /**
+ * Equivalent to Python <code>str.rpartition()</code>, splits the <code>PyString</code> at the
+ * last occurrence of <code>sepObj</code> returning a {@link PyTuple} containing the part
+ * before the separator, the separator itself, and the part after the separator.
+ *
+ * @param sepObj str, unicode or object implementing {@link BufferProtocol}
+ * @return tuple of parts
+ */
public PyTuple rpartition(PyObject sepObj) {
return str_rpartition(sepObj);
}
@@ -1986,28 +1999,25 @@
@ExposedMethod(doc = BuiltinDocs.str_rpartition_doc)
final PyTuple str_rpartition(PyObject sepObj) {
- // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
- String sep;
-
if (sepObj instanceof PyUnicode) {
+ // Deal with Unicode separately
return unicodeRpartition(sepObj);
- } else if (sepObj instanceof PyString) {
- sep = ((PyString)sepObj).getString();
+
} else {
- throw Py.TypeError("expected a character buffer object");
- }
-
- if (sep.length() == 0) {
- throw Py.ValueError("empty separator");
- }
-
- int index = getString().lastIndexOf(sep);
- if (index != -1) {
- return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(index + sep.length(),
- getString().length()));
- } else {
- return new PyTuple(Py.EmptyString, Py.EmptyString, this);
+ // It ought to be some kind of bytes with the buffer API.
+ String sep = asStringOrError(sepObj);
+
+ if (sep.length() == 0) {
+ throw Py.ValueError("empty separator");
+ }
+
+ int index = getString().lastIndexOf(sep);
+ if (index != -1) {
+ return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(
+ index + sep.length(), getString().length()));
+ } else {
+ return new PyTuple(Py.EmptyString, Py.EmptyString, this);
+ }
}
}
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -432,19 +432,15 @@
/**
* Helper used many times to "coerce" a method argument into a <code>PyUnicode</code> (which it
- * may already be). A <code>null</code> argument or a <code>PyNone</code> causes
- * <code>null</code> to be returned.
+ * may already be). A <code>null</code> or incoercible argument will raise a
+ * <code>TypeError</code>.
*
* @param o the object to coerce
- * @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
+ * @return an equivalent <code>PyUnicode</code> (or o itself)
*/
private PyUnicode coerceToUnicode(PyObject o) {
- if (o == null) {
- return null;
- } else if (o instanceof PyUnicode) {
+ if (o instanceof PyUnicode) {
return (PyUnicode)o;
- } else if (o == Py.None) {
- return null;
} else if (o instanceof BufferProtocol) {
// PyString or PyByteArray, PyMemoryView, Py2kBuffer ...
PyBuffer buf = ((BufferProtocol)o).getBuffer(PyBUF.FULL_RO);
@@ -454,11 +450,32 @@
buf.release();
}
} else {
+ // o is some type not allowed:
+ if (o == null) {
+ // Do something safe and approximately right
+ o = Py.None;
+ }
throw Py.TypeError("coercing to Unicode: need string or buffer, "
+ o.getType().fastGetName() + " found");
}
}
+ /**
+ * Helper used many times to "coerce" a method argument into a <code>PyUnicode</code> (which it
+ * may already be). A <code>null</code> argument or a <code>PyNone</code> causes
+ * <code>null</code> to be returned.
+ *
+ * @param o the object to coerce
+ * @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
+ */
+ private PyUnicode coerceToUnicodeOrNull(PyObject o) {
+ if (o == null || o == Py.None) {
+ return null;
+ } else {
+ return coerceToUnicode(o);
+ }
+ }
+
@ExposedMethod(doc = BuiltinDocs.unicode___contains___doc)
final boolean unicode___contains__(PyObject o) {
return str___contains__(o);
@@ -605,7 +622,7 @@
* <code>PyUnicode</code> (which it may already be). A <code>null</code> argument or a
* <code>PyNone</code> causes <code>null</code> to be returned. A buffer type is not acceptable
* to (Unicode) <code>.strip()</code>. This is the difference from
- * {@link #coerceToUnicode(PyObject)}.
+ * {@link #coerceToUnicodeOrNull(PyObject)}.
*
* @param o the object to coerce
* @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
@@ -693,7 +710,7 @@
@ExposedMethod(doc = BuiltinDocs.unicode_partition_doc)
final PyTuple unicode_partition(PyObject sep) {
- return unicodePartition(sep);
+ return unicodePartition(coerceToUnicode(sep));
}
private abstract class SplitIterator implements Iterator {
@@ -947,12 +964,12 @@
@ExposedMethod(doc = BuiltinDocs.unicode_rpartition_doc)
final PyTuple unicode_rpartition(PyObject sep) {
- return unicodeRpartition(sep);
+ return unicodeRpartition(coerceToUnicode(sep));
}
@ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_split_doc)
final PyList unicode_split(PyObject sepObj, int maxsplit) {
- PyUnicode sep = coerceToUnicode(sepObj);
+ PyUnicode sep = coerceToUnicodeOrNull(sepObj);
if (sep != null) {
return _split(sep.getString(), maxsplit);
} else {
@@ -962,7 +979,7 @@
@ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_rsplit_doc)
final PyList unicode_rsplit(PyObject sepObj, int maxsplit) {
- PyUnicode sep = coerceToUnicode(sepObj);
+ PyUnicode sep = coerceToUnicodeOrNull(sepObj);
if (sep != null) {
return _rsplit(sep.getString(), maxsplit);
} else {
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list