[Jython-checkins] jython: buffer() support in str.replace, unicode.replace and str.translate.
jeff.allen
jython-checkins at python.org
Sun Oct 27 23:49:41 CET 2013
http://hg.python.org/jython/rev/4af2f4241912
changeset: 7146:4af2f4241912
user: Jeff Allen <ja.py at farowl.co.uk>
date: Sun Oct 27 21:46:26 2013 +0000
summary:
buffer() support in str.replace, unicode.replace and str.translate.
Java API and some tests in string_tests augmented. Removed probably unused code
PyString.translate(PyObject) in favour of the new code with same signature.
files:
Lib/test/string_tests.py | 52 +++-
src/org/python/core/PyString.java | 249 ++++++++++------
src/org/python/core/PyUnicode.java | 61 ++-
3 files changed, 230 insertions(+), 132 deletions(-)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -817,13 +817,11 @@
EQ("bobobXbobob", "bobobobXbobobob", "replace", "bobob", "bob")
EQ("BOBOBOB", "BOBOBOB", "replace", "bob", "bobby")
- # buffer not supported in Jython.
- if not test_support.is_jython:
- with test_support.check_py3k_warnings():
- ba = buffer('a')
- bb = buffer('b')
- EQ("bbc", "abc", "replace", ba, bb)
- EQ("aac", "abc", "replace", bb, ba)
+ with test_support.check_py3k_warnings():
+ ba = buffer('a')
+ bb = buffer('b')
+ EQ("bbc", "abc", "replace", ba, bb)
+ EQ("aac", "abc", "replace", bb, ba)
#
self.checkequal('one at two!three!', 'one!two!three!', 'replace', '!', '@', 1)
@@ -853,6 +851,40 @@
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
+ # Repeat some tests including buffer API objects (Jython addition)
+ if test_support.is_jython:
+ for buftype in (buffer, memoryview, bytearray):
+ # Buffer type as sought argument
+ EQ("", "", "replace", buftype(""), "")
+ EQ("", "", "replace", buftype("A"), "A")
+ EQ("*-A*-A*-", "AA", "replace", buftype(""), "*-")
+ EQ("", "AAA", "replace", buftype("A"), "")
+ EQ("BCD", "ABCADAA", "replace", buftype("A"), "")
+ EQ("ater", "theater", "replace", buftype("the"), "")
+ EQ("", "thethethethe", "replace", buftype("the"), "")
+ EQ("aaaa", "theatheatheathea", "replace", buftype("the"), "")
+ EQ("WhO gOes there?", "Who goes there?", "replace", buftype("o"), "O")
+ EQ("Th** ** a t**sue", "This is a tissue", "replace", buftype("is"), "**")
+ EQ("cobobXcobocob", "bobobXbobobob", "replace", buftype("bob"), "cob")
+ EQ("ReyKKjaviKK", "Reykjavik", "replace", buftype("k"), "KK")
+ EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
+ "replace", buftype("spam"), "ham")
+ # Buffer type as replacement argument
+ EQ("", "", "replace", "", buftype(""))
+ EQ("", "", "replace", "A", buftype("A"))
+ EQ("*-A*-A*-", "AA", "replace", "", buftype("*-"))
+ EQ("", "AAA", "replace", "A", buftype(""))
+ EQ("BCD", "ABCADAA", "replace", "A", buftype(""))
+ EQ("ater", "theater", "replace", "the", buftype(""))
+ EQ("", "thethethethe", "replace", "the", buftype(""))
+ EQ("aaaa", "theatheatheathea", "replace", "the", buftype(""))
+ EQ("WhO gOes there?", "Who goes there?", "replace", "o", buftype("O"))
+ EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", buftype("**"))
+ EQ("cobobXcobocob", "bobobXbobobob", "replace", "bob", buftype("cob"))
+ EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", buftype("KK"))
+ EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
+ "replace", "spam", buftype("ham"))
+
def test_replace_overflow(self):
# Check for overflow checking on 32 bit machines
if sys.maxint != 2147483647 or struct.calcsize("P") > 4:
@@ -1428,6 +1460,12 @@
table = string.maketrans('abc', 'xyz')
self.checkequal('xyzxyz', 'xyzabcdef', 'translate', table, 'def')
+ # Repeat using buffer API objects (Jython addition)
+ if test_support.is_jython:
+ for buftype in (buffer, memoryview, bytearray):
+ self.checkequal('xyzxyz', 'xyzabcdef', 'translate', buftype(table), 'def')
+ self.checkequal('xyzxyz', 'xyzabcdef', 'translate', table, buftype('def'))
+
table = string.maketrans('a', 'A')
self.checkequal('Abc', 'abc', 'translate', table)
self.checkequal('xyz', 'xyz', 'translate', table)
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -708,23 +708,23 @@
if (ret != null) {
return ret;
} else {
- throw Py.TypeError("expected str, bytearray or buffer compatible object");
+ throw Py.TypeError("expected str, bytearray or other buffer compatible object");
}
}
/**
- * Return a String equivalent to the argument according to the calling conventions of the
- * <code>strip</code> and <code>split</code> methods of <code>str</code>. Those methods accept
- * anything bearing the buffer interface as a byte string, but also PyNone (or the argument may
- * be omitted, showing up here as null) to indicate that the criterion is whitespace. They also
- * accept a unicode argument, not dealt with here.
+ * Return a String equivalent to the argument according to the calling conventions of methods
+ * that accept anything bearing the buffer interface as a byte string, but also
+ * <code>PyNone</code>. (Or the argument may be omitted, showing up here as null.) These include
+ * the <code>strip</code> and <code>split</code> methods of <code>str</code>, where a null
+ * indicates that the criterion is whitespace, and <code>str.translate</code>.
*
* @param obj to coerce to a String or null
* @param name of method
* @return coerced value or null
* @throws PyException if the coercion fails
*/
- private static String asStripSepOrError(PyObject obj, String name) throws PyException {
+ private static String asStringNullOrError(PyObject obj, String name) throws PyException {
if (obj == null || obj == Py.None) {
return null;
@@ -732,9 +732,12 @@
String ret = asStringOrNull(obj);
if (ret != null) {
return ret;
+ } else if (name == null) {
+ // A nameless method is the client
+ throw Py.TypeError("expected None, str or buffer compatible object");
} else {
- throw Py.TypeError(name
- + " arg must be None, str, unicode, buffer compatible object");
+ // Tuned for .strip and its relations, which supply their name
+ throw Py.TypeError(name + " arg must be None, str or buffer compatible object");
}
}
}
@@ -1148,7 +1151,7 @@
return ((PyUnicode)decode()).unicode_strip(chars);
} else {
// It ought to be None, null, some kind of bytes with the buffer API.
- String stripChars = asStripSepOrError(chars, "strip");
+ String stripChars = asStringNullOrError(chars, "strip");
// Strip specified characters or whitespace if stripChars == null
return new PyString(_strip(stripChars));
}
@@ -1318,7 +1321,7 @@
return ((PyUnicode)decode()).unicode_lstrip(chars);
} else {
// It ought to be None, null, some kind of bytes with the buffer API.
- String stripChars = asStripSepOrError(chars, "lstrip");
+ String stripChars = asStringNullOrError(chars, "lstrip");
// Strip specified characters or whitespace if stripChars == null
return new PyString(_lstrip(stripChars));
}
@@ -1407,7 +1410,7 @@
return ((PyUnicode)decode()).unicode_rstrip(chars);
} else {
// It ought to be None, null, some kind of bytes with the buffer API.
- String stripChars = asStripSepOrError(chars, "rstrip");
+ String stripChars = asStringNullOrError(chars, "rstrip");
// Strip specified characters or whitespace if stripChars == null
return new PyString(_rstrip(stripChars));
}
@@ -1528,7 +1531,7 @@
return ((PyUnicode)decode()).unicode_split(sepObj, maxsplit);
} else {
// It ought to be None, null, some kind of bytes with the buffer API.
- String sep = asStripSepOrError(sepObj, "split");
+ String sep = asStringNullOrError(sepObj, "split");
// Split on specified string or whitespace if sep == null
return _split(sep, maxsplit);
}
@@ -1779,7 +1782,7 @@
return ((PyUnicode)decode()).unicode_rsplit(sepObj, maxsplit);
} else {
// It ought to be None, null, some kind of bytes with the buffer API.
- String sep = asStripSepOrError(sepObj, "rsplit");
+ String sep = asStringNullOrError(sepObj, "rsplit");
// Split on specified string or whitespace if sep == null
return _rsplit(sep, maxsplit);
}
@@ -2921,54 +2924,93 @@
return first.concat(getString().substring(1).toLowerCase());
}
- @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_replace_doc)
- final PyString str_replace(PyObject oldPiece, PyObject newPiece, PyObject maxsplit) {
-
- // XXX Accept PyObjects that may be BufferProtocol or PyUnicode
-
- if (!(oldPiece instanceof PyString) || !(newPiece instanceof PyString)) {
- throw Py.TypeError("str or unicode required for replace");
+ /**
+ * Equivalent to Python str.replace(old, new), returning a copy of the string with all
+ * occurrences of substring old replaced by new. If either argument is a {@link PyUnicode} (or
+ * this object is), the result will be a <code>PyUnicode</code>.
+ *
+ * @param oldPiece to replace where found.
+ * @param newPiece replacement text.
+ * @param count maximum number of replacements to make, or -1 meaning all of them.
+ * @return PyString (or PyUnicode if any string is one), this string after replacements.
+ */
+ public PyString replace(PyObject oldPieceObj, PyObject newPieceObj) {
+ return str_replace(oldPieceObj, newPieceObj, -1);
+ }
+
+ /**
+ * Equivalent to Python str.replace(old, new[, count]), returning a copy of the string with all
+ * occurrences of substring old replaced by new. If argument <code>count</code> is nonnegative,
+ * only the first <code>count</code> occurrences are replaced. If either argument is a
+ * {@link PyUnicode} (or this object is), the result will be a <code>PyUnicode</code>.
+ *
+ * @param oldPiece to replace where found.
+ * @param newPiece replacement text.
+ * @param count maximum number of replacements to make, or -1 meaning all of them.
+ * @return PyString (or PyUnicode if any string is one), this string after replacements.
+ */
+ public PyString replace(PyObject oldPieceObj, PyObject newPieceObj, int count) {
+ return str_replace(oldPieceObj, newPieceObj, count);
+ }
+
+ @ExposedMethod(defaults = "-1", doc = BuiltinDocs.str_replace_doc)
+ final PyString str_replace(PyObject oldPieceObj, PyObject newPieceObj, int count) {
+ if (oldPieceObj instanceof PyUnicode || newPieceObj instanceof PyUnicode) {
+ // Promote the problem to a Unicode one
+ return ((PyUnicode)decode()).unicode_replace(oldPieceObj, newPieceObj, count);
+ } else {
+ // Neither is a PyUnicode: both ought to be some kind of bytes with the buffer API.
+ String oldPiece = asStringOrError(oldPieceObj);
+ String newPiece = asStringOrError(newPieceObj);
+ return _replace(oldPiece, newPiece, count);
}
-
- return replace((PyString)oldPiece, (PyString)newPiece,
- maxsplit == null ? -1 : maxsplit.asInt());
- }
-
- protected PyString replace(PyString oldPiece, PyString newPiece, int maxsplit) {
- int len = getString().length();
- int old_len = oldPiece.getString().length();
+ }
+
+ /**
+ * Helper common to the Python and Java API for <code>str.replace</code>, returning a new string
+ * equal to this string with ocurrences of <code>oldPiece</code> replaced by
+ * <code>newPiece</code>, up to a maximum of <code>count</code> occurrences, or all of them.
+ * This method also supports {@link PyUnicode#unicode_replace(PyObject, PyObject, int)}, in
+ * which context it returns a <code>PyUnicode</code>
+ *
+ * @param oldPiece to replace where found.
+ * @param newPiece replacement text.
+ * @param count maximum number of replacements to make, or -1 meaning all of them.
+ * @return PyString (or PyUnicode if this string is one), this string after replacements.
+ */
+ protected final PyString _replace(String oldPiece, String newPiece, int count) {
+
+ String s = getString();
+ int len = s.length();
+ int oldLen = oldPiece.length();
+ int newLen = newPiece.length();
+
if (len == 0) {
- if (maxsplit == -1 && old_len == 0) {
- return createInstance(newPiece.getString(), true);
+ if (count < 0 && oldLen == 0) {
+ return createInstance(newPiece, true);
}
- return createInstance(getString(), true);
- }
-
- if (old_len == 0 && newPiece.getString().length() != 0 && maxsplit != 0) {
+ return createInstance(s, true);
+
+ } else if (oldLen == 0 && newLen != 0 && count != 0) {
/*
- * old="" and new != "", interleave new piece with each char in original, taking in
- * effect maxsplit
+ * old="" and new != "", interleave new piece with each char in original, taking into
+ * account count
*/
StringBuilder buffer = new StringBuilder();
int i = 0;
- buffer.append(newPiece.getString());
- for (; i < len && (i < maxsplit - 1 || maxsplit == -1); i++) {
- buffer.append(getString().charAt(i));
- buffer.append(newPiece.getString());
+ buffer.append(newPiece);
+ for (; i < len && (count < 0 || i < count - 1); i++) {
+ buffer.append(s.charAt(i)).append(newPiece);
}
- buffer.append(getString().substring(i));
+ buffer.append(s.substring(i));
return createInstance(buffer.toString(), true);
+
+ } else {
+ if (count < 0) {
+ count = (oldLen == 0) ? len + 1 : len;
+ }
+ return createInstance(newPiece).join(splitfields(oldPiece, count));
}
-
- if (maxsplit == -1) {
- if (old_len == 0) {
- maxsplit = len + 1;
- } else {
- maxsplit = len;
- }
- }
-
- return newPiece.join(splitfields(oldPiece.getString(), maxsplit));
}
public PyString join(PyObject seq) {
@@ -3286,28 +3328,74 @@
return new int[] {iStart, iEnd, iStartUnadjusted};
}
- public String translate() {
- return str_translate(null, null);
- }
-
+ /**
+ * Equivalent to Python <code>str.translate</code> returning a copy of this string where the
+ * characters have been mapped through the translation <code>table</code>. <code>table</code>
+ * must be equivalent to a string of length 256 (if it is not <code>null</code>).
+ *
+ * @param table of character (byte) translations (or <code>null</code>)
+ * @return transformed byte string
+ */
+ public String translate(PyObject table) {
+ return translate(table, null);
+ }
+
+ /**
+ * Equivalent to Python <code>str.translate</code> returning a copy of this string where all
+ * characters (bytes) occurring in the argument <code>deletechars</code> are removed (if it is
+ * not <code>null</code>), and the remaining characters have been mapped through the translation
+ * <code>table</code>. <code>table</code> must be equivalent to a string of length 256 (if it is
+ * not <code>null</code>).
+ *
+ * @param table of character (byte) translations (or <code>null</code>)
+ * @param deletechars set of characters to remove (or <code>null</code>)
+ * @return transformed byte string
+ */
+ public String translate(PyObject table, PyObject deletechars) {
+ return str_translate(table, deletechars);
+ }
+
+ /**
+ * Equivalent to {@link #translate(PyObject)} specialized to <code>String</code>.
+ */
public String translate(String table) {
- return str_translate(table, null);
- }
-
+ return _translate(table, null);
+ }
+
+ /**
+ * Equivalent to {@link #translate(PyObject, PyObject)} specialized to <code>String</code>.
+ */
public String translate(String table, String deletechars) {
- return str_translate(table, deletechars);
+ return _translate(table, deletechars);
}
@ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_translate_doc)
- final String str_translate(String table, String deletechars) {
-
- // XXX Accept PyObjects that may be BufferProtocol
+ final String str_translate(PyObject tableObj, PyObject deletecharsObj) {
+ // Accept anythiong withthe buffer API or null
+ String table = asStringNullOrError(tableObj, null);
+ String deletechars = asStringNullOrError(deletecharsObj, null);
+ return _translate(table, deletechars);
+ }
+
+ /**
+ * Helper common to the Python and Java API implementing <code>str.translate</code> returning a
+ * copy of this string where all characters (bytes) occurring in the argument
+ * <code>deletechars</code> are removed (if it is not <code>null</code>), and the remaining
+ * characters have been mapped through the translation <code>table</code>, which must be
+ * equivalent to a string of length 256 (if it is not <code>null</code>).
+ *
+ * @param table of character (byte) translations (or <code>null</code>)
+ * @param deletechars set of characters to remove (or <code>null</code>)
+ * @return transformed byte string
+ */
+ private final String _translate(String table, String deletechars) {
if (table != null && table.length() != 256) {
throw Py.ValueError("translation table must be 256 characters long");
}
StringBuilder buf = new StringBuilder(getString().length());
+
for (int i = 0; i < getString().length(); i++) {
char c = getString().charAt(i);
if (deletechars != null && deletechars.indexOf(c) >= 0) {
@@ -3326,41 +3414,6 @@
return buf.toString();
}
- // XXX: is this needed?
- public String translate(PyObject table) {
- StringBuilder v = new StringBuilder(getString().length());
- for (int i = 0; i < getString().length(); i++) {
- char ch = getString().charAt(i);
-
- PyObject w = Py.newInteger(ch);
- PyObject x = table.__finditem__(w);
- if (x == null) {
- /* No mapping found: default to 1-1 mapping */
- v.append(ch);
- continue;
- }
-
- /* Apply mapping */
- if (x instanceof PyInteger) {
- int value = ((PyInteger)x).getValue();
- v.append((char)value);
- } else if (x == Py.None) {
- // Do nothing
- } else if (x instanceof PyString) {
- if (x.__len__() != 1) {
- /* 1-n mapping */
- throw new PyException(Py.NotImplementedError,
- "1-n mappings are currently not implemented");
- }
- v.append(x.toString());
- } else {
- /* wrong return value */
- throw Py.TypeError("character mapping must return integer, None or unicode");
- }
- }
- return v.toString();
- }
-
public boolean islower() {
return str_islower();
}
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -1169,43 +1169,50 @@
return new PyUnicode(buffer);
}
- @ExposedMethod(defaults = "-1", doc = BuiltinDocs.unicode___getslice___doc)
- final PyObject unicode_replace(PyObject oldPieceObj, PyObject newPieceObj, int maxsplit) {
+ @ExposedMethod(defaults = "-1", doc = BuiltinDocs.unicode_replace_doc)
+ final PyString unicode_replace(PyObject oldPieceObj, PyObject newPieceObj, int count) {
+
+ // Convert other argument types to PyUnicode (or error)
PyUnicode newPiece = coerceToUnicode(newPieceObj);
PyUnicode oldPiece = coerceToUnicode(oldPieceObj);
+
if (isBasicPlane() && newPiece.isBasicPlane() && oldPiece.isBasicPlane()) {
- return replace(oldPiece, newPiece, maxsplit);
- }
+ // Use the mechanics of PyString, since all is basic plane
+ return _replace(oldPiece.getString(), newPiece.getString(), count);
- StringBuilder buffer = new StringBuilder();
+ } else {
+ // A Unicode-specific implementation is needed working in code points
+ StringBuilder buffer = new StringBuilder();
- if (oldPiece.getCodePointCount() == 0) {
- Iterator<Integer> iter = newSubsequenceIterator();
- for (int i = 1; (maxsplit == -1 || i < maxsplit) && iter.hasNext(); i++) {
- if (i == 1) {
+ if (oldPiece.getCodePointCount() == 0) {
+ Iterator<Integer> iter = newSubsequenceIterator();
+ for (int i = 1; (count == -1 || i < count) && iter.hasNext(); i++) {
+ if (i == 1) {
+ buffer.append(newPiece.getString());
+ }
+ buffer.appendCodePoint(iter.next());
buffer.append(newPiece.getString());
}
- buffer.appendCodePoint(iter.next());
- buffer.append(newPiece.getString());
- }
- while (iter.hasNext()) {
- buffer.appendCodePoint(iter.next());
- }
- return new PyUnicode(buffer);
- } else {
- SplitIterator iter = newSplitIterator(oldPiece, maxsplit);
- int numSplits = 0;
- while (iter.hasNext()) {
- buffer.append(((PyUnicode)iter.next()).getString());
- if (iter.hasNext()) {
+ while (iter.hasNext()) {
+ buffer.appendCodePoint(iter.next());
+ }
+ return new PyUnicode(buffer);
+
+ } else {
+ SplitIterator iter = newSplitIterator(oldPiece, count);
+ int numSplits = 0;
+ while (iter.hasNext()) {
+ buffer.append(((PyUnicode)iter.next()).getString());
+ if (iter.hasNext()) {
+ buffer.append(newPiece.getString());
+ }
+ numSplits++;
+ }
+ if (iter.getEndsWithSeparator() && (count == -1 || numSplits <= count)) {
buffer.append(newPiece.getString());
}
- numSplits++;
+ return new PyUnicode(buffer);
}
- if (iter.getEndsWithSeparator() && (maxsplit == -1 || numSplits <= maxsplit)) {
- buffer.append(newPiece.getString());
- }
- return new PyUnicode(buffer);
}
}
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list