[Jython-checkins] jython: buffer() support in (str|unicode).partition, .rpartition

jeff.allen jython-checkins at python.org
Sun Oct 27 23:49:34 CET 2013


http://hg.python.org/jython/rev/82fe8cbfc238
changeset:   7141:82fe8cbfc238
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Tue Oct 22 23:46:17 2013 +0100
summary:
  buffer() support in (str|unicode).partition, .rpartition
Tests added to string_tests.py for buffer arguments.
Includes some re-work of coerceToUnicode that incidentally fixes
null pointer exception in unicode.replace() with None as argument.

files:
  Lib/test/string_tests.py           |  40 +++++++
  src/org/python/core/PyString.java  |  96 +++++++++--------
  src/org/python/core/PyUnicode.java |  43 +++++--
  3 files changed, 123 insertions(+), 56 deletions(-)


diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -1218,6 +1218,26 @@
         # mixed use of str and unicode
         self.assertEqual('a/b/c'.partition(u'/'), ('a', '/', 'b/c'))
 
+        # with buffer arg (Jython addition)
+        b = buffer('ti')
+        if self.__class__.type2test is unicode:
+            self.checkequal(('this is the par', u'ti', 'tion method'),
+                'this is the partition method', 'partition', b)
+        else:
+            self.checkequal(('this is the par', b, 'tion method'),
+                'this is the partition method', 'partition', b)
+
+        # with memoryview arg (Jython addition)
+        if test_support.is_jython:
+            # CPython does not support until v3.2
+            with memoryview('ti') as m:
+                if self.__class__.type2test is unicode:
+                    self.checkequal(('this is the par', u'ti', 'tion method'),
+                        'this is the partition method', 'partition', m)
+                else:
+                    self.checkequal(('this is the par', m, 'tion method'),
+                        'this is the partition method', 'partition', m)
+
     def test_rpartition(self):
 
         self.checkequal(('this is the rparti', 'ti', 'on method'),
@@ -1236,6 +1256,26 @@
         # mixed use of str and unicode
         self.assertEqual('a/b/c'.rpartition(u'/'), ('a/b', '/', 'c'))
 
+        # with buffer arg (Jython addition)
+        b = buffer('ti')
+        if self.__class__.type2test is unicode:
+            self.checkequal(('this is the parti', u'ti', 'on method'),
+                'this is the partition method', 'rpartition', b)
+        else:
+            self.checkequal(('this is the parti', b, 'on method'),
+                'this is the partition method', 'rpartition', b)
+
+        # with memoryview arg (Jython addition)
+        if test_support.is_jython:
+            # CPython does not support until v3.2
+            with memoryview('ti') as m:
+                if self.__class__.type2test is unicode:
+                    self.checkequal(('this is the parti', u'ti', 'on method'),
+                        'this is the partition method', 'rpartition', m)
+                else:
+                    self.checkequal(('this is the parti', m, 'on method'),
+                        'this is the partition method', 'rpartition', m)
+
     def test_none_arguments(self):
         # issue 11828
         s = 'hello'
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -1126,7 +1126,7 @@
             // Promote the problem to a Unicode one
             return ((PyUnicode)decode()).unicode_strip(chars);
         } else {
-            // It ought to be None, null, some kind of bytes the with buffer API.
+            // It ought to be None, null, some kind of bytes with the buffer API.
             String stripChars = asStripSepOrError(chars, "strip");
             // Strip specified characters or whitespace if stripChars == null
             return new PyString(_strip(stripChars));
@@ -1296,7 +1296,7 @@
             // Promote the problem to a Unicode one
             return ((PyUnicode)decode()).unicode_lstrip(chars);
         } else {
-            // It ought to be None, null, some kind of bytes the with buffer API.
+            // It ought to be None, null, some kind of bytes with the buffer API.
             String stripChars = asStripSepOrError(chars, "lstrip");
             // Strip specified characters or whitespace if stripChars == null
             return new PyString(_lstrip(stripChars));
@@ -1385,7 +1385,7 @@
             // Promote the problem to a Unicode one
             return ((PyUnicode)decode()).unicode_rstrip(chars);
         } else {
-            // It ought to be None, null, some kind of bytes the with buffer API.
+            // It ought to be None, null, some kind of bytes with the buffer API.
             String stripChars = asStripSepOrError(chars, "rstrip");
             // Strip specified characters or whitespace if stripChars == null
             return new PyString(_rstrip(stripChars));
@@ -1506,7 +1506,7 @@
             // Promote the problem to a Unicode one
             return ((PyUnicode)decode()).unicode_split(sepObj, maxsplit);
         } else {
-            // It ought to be None, null, some kind of bytes the with buffer API.
+            // It ought to be None, null, some kind of bytes with the buffer API.
             String sep = asStripSepOrError(sepObj, "split");
             // Split on specified string or whitespace if sep == null
             return _split(sep, maxsplit);
@@ -1757,7 +1757,7 @@
             // Promote the problem to a Unicode one
             return ((PyUnicode)decode()).unicode_rsplit(sepObj, maxsplit);
         } else {
-            // It ought to be None, null, some kind of bytes the with buffer API.
+            // It ought to be None, null, some kind of bytes with the buffer API.
             String sep = asStripSepOrError(sepObj, "rsplit");
             // Split on specified string or whitespace if sep == null
             return _rsplit(sep, maxsplit);
@@ -1925,6 +1925,14 @@
         return list;
     }
 
+    /**
+     * Equivalent to Python <code>str.partition()</code>, splits the <code>PyString</code> at the
+     * first occurrence of <code>sepObj</code> returning a {@link PyTuple} containing the part
+     * before the separator, the separator itself, and the part after the separator.
+     *
+     * @param sepObj str, unicode or object implementing {@link BufferProtocol}
+     * @return tuple of parts
+     */
     public PyTuple partition(PyObject sepObj) {
         return str_partition(sepObj);
     }
@@ -1932,28 +1940,25 @@
     @ExposedMethod(doc = BuiltinDocs.str_partition_doc)
     final PyTuple str_partition(PyObject sepObj) {
 
-        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
-        String sep;
-
         if (sepObj instanceof PyUnicode) {
+            // Deal with Unicode separately
             return unicodePartition(sepObj);
-        } else if (sepObj instanceof PyString) {
-            sep = ((PyString)sepObj).getString();
+
         } else {
-            throw Py.TypeError("expected a character buffer object");
-        }
-
-        if (sep.length() == 0) {
-            throw Py.ValueError("empty separator");
-        }
-
-        int index = getString().indexOf(sep);
-        if (index != -1) {
-            return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(index + sep.length(),
-                    getString().length()));
-        } else {
-            return new PyTuple(this, Py.EmptyString, Py.EmptyString);
+            // It ought to be some kind of bytes with the buffer API.
+            String sep = asStringOrError(sepObj);
+
+            if (sep.length() == 0) {
+                throw Py.ValueError("empty separator");
+            }
+
+            int index = getString().indexOf(sep);
+            if (index != -1) {
+                return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(
+                        index + sep.length(), getString().length()));
+            } else {
+                return new PyTuple(this, Py.EmptyString, Py.EmptyString);
+            }
         }
     }
 
@@ -1979,6 +1984,14 @@
         }
     }
 
+    /**
+     * Equivalent to Python <code>str.rpartition()</code>, splits the <code>PyString</code> at the
+     * last occurrence of <code>sepObj</code> returning a {@link PyTuple} containing the part
+     * before the separator, the separator itself, and the part after the separator.
+     *
+     * @param sepObj str, unicode or object implementing {@link BufferProtocol}
+     * @return tuple of parts
+     */
     public PyTuple rpartition(PyObject sepObj) {
         return str_rpartition(sepObj);
     }
@@ -1986,28 +1999,25 @@
     @ExposedMethod(doc = BuiltinDocs.str_rpartition_doc)
     final PyTuple str_rpartition(PyObject sepObj) {
 
-        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
-
-        String sep;
-
         if (sepObj instanceof PyUnicode) {
+            // Deal with Unicode separately
             return unicodeRpartition(sepObj);
-        } else if (sepObj instanceof PyString) {
-            sep = ((PyString)sepObj).getString();
+
         } else {
-            throw Py.TypeError("expected a character buffer object");
-        }
-
-        if (sep.length() == 0) {
-            throw Py.ValueError("empty separator");
-        }
-
-        int index = getString().lastIndexOf(sep);
-        if (index != -1) {
-            return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(index + sep.length(),
-                    getString().length()));
-        } else {
-            return new PyTuple(Py.EmptyString, Py.EmptyString, this);
+            // It ought to be some kind of bytes with the buffer API.
+            String sep = asStringOrError(sepObj);
+
+            if (sep.length() == 0) {
+                throw Py.ValueError("empty separator");
+            }
+
+            int index = getString().lastIndexOf(sep);
+            if (index != -1) {
+                return new PyTuple(fromSubstring(0, index), sepObj, fromSubstring(
+                        index + sep.length(), getString().length()));
+            } else {
+                return new PyTuple(Py.EmptyString, Py.EmptyString, this);
+            }
         }
     }
 
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -432,19 +432,15 @@
 
     /**
      * Helper used many times to "coerce" a method argument into a <code>PyUnicode</code> (which it
-     * may already be). A <code>null</code> argument or a <code>PyNone</code> causes
-     * <code>null</code> to be returned.
+     * may already be). A <code>null</code> or incoercible argument will raise a
+     * <code>TypeError</code>.
      *
      * @param o the object to coerce
-     * @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
+     * @return an equivalent <code>PyUnicode</code> (or o itself)
      */
     private PyUnicode coerceToUnicode(PyObject o) {
-        if (o == null) {
-            return null;
-        } else if (o instanceof PyUnicode) {
+        if (o instanceof PyUnicode) {
             return (PyUnicode)o;
-        } else if (o == Py.None) {
-            return null;
         } else if (o instanceof BufferProtocol) {
             // PyString or PyByteArray, PyMemoryView, Py2kBuffer ...
             PyBuffer buf = ((BufferProtocol)o).getBuffer(PyBUF.FULL_RO);
@@ -454,11 +450,32 @@
                 buf.release();
             }
         } else {
+            // o is some type not allowed:
+            if (o == null) {
+                // Do something safe and approximately right
+                o = Py.None;
+            }
             throw Py.TypeError("coercing to Unicode: need string or buffer, "
                     + o.getType().fastGetName() + " found");
         }
     }
 
+    /**
+     * Helper used many times to "coerce" a method argument into a <code>PyUnicode</code> (which it
+     * may already be). A <code>null</code> argument or a <code>PyNone</code> causes
+     * <code>null</code> to be returned.
+     *
+     * @param o the object to coerce
+     * @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
+     */
+    private PyUnicode coerceToUnicodeOrNull(PyObject o) {
+        if (o == null || o == Py.None) {
+            return null;
+        } else {
+            return coerceToUnicode(o);
+        }
+    }
+
     @ExposedMethod(doc = BuiltinDocs.unicode___contains___doc)
     final boolean unicode___contains__(PyObject o) {
         return str___contains__(o);
@@ -605,7 +622,7 @@
      * <code>PyUnicode</code> (which it may already be). A <code>null</code> argument or a
      * <code>PyNone</code> causes <code>null</code> to be returned. A buffer type is not acceptable
      * to (Unicode) <code>.strip()</code>. This is the difference from
-     * {@link #coerceToUnicode(PyObject)}.
+     * {@link #coerceToUnicodeOrNull(PyObject)}.
      *
      * @param o the object to coerce
      * @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
@@ -693,7 +710,7 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_partition_doc)
     final PyTuple unicode_partition(PyObject sep) {
-        return unicodePartition(sep);
+        return unicodePartition(coerceToUnicode(sep));
     }
 
     private abstract class SplitIterator implements Iterator {
@@ -947,12 +964,12 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_rpartition_doc)
     final PyTuple unicode_rpartition(PyObject sep) {
-        return unicodeRpartition(sep);
+        return unicodeRpartition(coerceToUnicode(sep));
     }
 
     @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_split_doc)
     final PyList unicode_split(PyObject sepObj, int maxsplit) {
-        PyUnicode sep = coerceToUnicode(sepObj);
+        PyUnicode sep = coerceToUnicodeOrNull(sepObj);
         if (sep != null) {
             return _split(sep.getString(), maxsplit);
         } else {
@@ -962,7 +979,7 @@
 
     @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_rsplit_doc)
     final PyList unicode_rsplit(PyObject sepObj, int maxsplit) {
-        PyUnicode sep = coerceToUnicode(sepObj);
+        PyUnicode sep = coerceToUnicodeOrNull(sepObj);
         if (sep != null) {
             return _rsplit(sep.getString(), maxsplit);
         } else {

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list