[Jython-checkins] jython: Implement bytearray.is* methods (isalnum, isalpha, isdigit, islower, isspace,

frank.wierzbicki jython-checkins at python.org
Wed Jun 13 20:44:04 CEST 2012


http://hg.python.org/jython/rev/7e68dde41018
changeset:   6706:7e68dde41018
user:        Jeff Allen <ja...py at farowl.co.uk>
date:        Sun Jun 10 20:03:32 2012 +0100
summary:
  Implement bytearray.is* methods (isalnum, isalpha, isdigit, islower, isspace, istitle and isupper).
Choices of character class in the upper 128 may not match CPython, as the make a Unicode interpretation, rather than a locale-dependent one.
 Now scoring 1 failure and 14 errors in test_bytes.py.

files:
  src/org/python/core/BaseBytes.java   |  267 ++++++++++++++-
  src/org/python/core/PyByteArray.java |   39 ++
  2 files changed, 305 insertions(+), 1 deletions(-)


diff --git a/src/org/python/core/BaseBytes.java b/src/org/python/core/BaseBytes.java
--- a/src/org/python/core/BaseBytes.java
+++ b/src/org/python/core/BaseBytes.java
@@ -3253,7 +3253,7 @@
 
     /**
      * Ready-to-expose implementation of Python <code>splitlines(keepends)</code>, returning a list
-     * of the lines in the string, breaking at line boundaries. Line breaks are not included in the
+     * of the lines in the array, breaking at line boundaries. Line breaks are not included in the
      * resulting list unless keepends is given and true.
      *
      * @param keepends if true, include the end of line bytes(s)
@@ -3480,6 +3480,259 @@
         return builder.getResult();
     }
 
+    //
+    // Character class operations
+    //
+
+    /**
+     * Java API equivalent of Python <code>isalnum()</code>. This method treats the bytes as Unicode
+     * pont codes and is consistent with Java's {@link Character#isLetterOrDigit(char)}.
+     *
+     * @return true if all bytes in the array are point codes for alphanumerics and there is at
+     *         least one byte, false otherwise.
+     */
+    public boolean isalnum() {
+        return basebytes_isalnum();
+    }
+
+    /**
+     * Ready-to-expose implementation of Python <code>isalnum()</code>.
+     *
+     * @return true if all bytes in the array are point codes for alphanumerics and there is at
+     *         least one byte, false otherwise.
+     */
+    final boolean basebytes_isalnum() {
+        if (size <= 0) {
+            // Treat empty string as special case
+            return false;
+        } else {
+            // Test the bytes
+            for (int i = 0; i < size; i++) {
+                if (!Character.isLetterOrDigit(charAt(i))) {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
+    /**
+     * Java API equivalent of Python <code>isalpha()</code>. This method treats the bytes as Unicode
+     * pont codes and is consistent with Java's {@link Character#isLetter(char)}.
+     *
+     * @return true if all bytes in the array are alphabetic and there is at least one byte, false
+     *         otherwise
+     */
+    public boolean isalpha() {
+        return basebytes_isalpha();
+    }
+
+    /**
+     * Ready-to-expose implementation of Python <code>isalpha()</code>.
+     *
+     * @return true if all bytes in the array are alphabetic and there is at least one byte, false
+     *         otherwise
+     */
+    final boolean basebytes_isalpha() {
+        if (size <= 0) {
+            // Treat empty string as special case
+            return false;
+        } else {
+            // Test the bytes
+            for (int i = 0; i < size; i++) {
+                if (!Character.isLetter(charAt(i))) {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
+    /**
+     * Java API equivalent of Python <code>isdigit()</code>. This method treats the bytes as Unicode
+     * pont codes and is consistent with Java's {@link Character#isDigit(char)}.
+     *
+     * @return true if all bytes in the array are point codes for digits and there is at least one
+     *         byte, false otherwise.
+     */
+    public boolean isdigit() {
+        return basebytes_isdigit();
+    }
+
+    /**
+     * Ready-to-expose implementation of Python <code>isdigit()</code>.
+     *
+     * @return true if all bytes in the array are point codes for digits and there is at least one
+     *         byte, false otherwise.
+     */
+    final boolean basebytes_isdigit() {
+        if (size <= 0) {
+            // Treat empty string as special case
+            return false;
+        } else {
+            // Test the bytes
+            for (int i = 0; i < size; i++) {
+                if (!Character.isDigit(charAt(i))) {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
+    /**
+     * Java API equivalent of Python <code>islower()</code>. This method treats the bytes as Unicode
+     * pont codes and is consistent with Java's {@link Character#isLowerCase(char)}.
+     *
+     * @return true if all cased bytes in the array are point codes for lowercase characters and
+     *         there is at least one cased byte, false otherwise.
+     */
+    public boolean islower() {
+        return basebytes_islower();
+    }
+
+    /**
+     * Ready-to-expose implementation of Python <code>islower()</code>.
+     *
+     * @return true if all cased bytes in the array are point codes for lowercase characters and
+     *         there is at least one cased byte, false otherwise.
+     */
+    final boolean basebytes_islower() {
+        boolean hasCased = false;
+        // Test the bytes
+        for (int i = 0; i < size; i++) {
+            char c = charAt(i);
+            if (Character.isUpperCase(c)) {
+                return false;
+            } else if (hasCased) {
+                continue;   // Don't need to keep checking for cased characters
+            } else if (Character.isLowerCase(c)) {
+                hasCased = true;
+            }
+        }
+        // Found no upper case bytes, but did we find any cased bytes at all?
+        return hasCased;
+    }
+
+    /**
+     * Java API equivalent of Python <code>isspace()</code>. This method treats the bytes as Unicode
+     * pont codes and is consistent with Java's {@link Character#isWhitespace(char)}.
+     *
+     * @return true if all the bytes in the array are point codes for whitespace characters and
+     *         there is at least one byte, false otherwise.
+     */
+    public boolean isspace() {
+        return basebytes_isspace();
+    }
+
+    /**
+     * Ready-to-expose implementation of Python <code>isspace()</code>.
+     *
+     * @return true if all the bytes in the array are point codes for whitespace characters and
+     *         there is at least one byte, false otherwise.
+     */
+    final boolean basebytes_isspace() {
+        if (size <= 0) {
+            // Treat empty string as special case
+            return false;
+        } else {
+            // Test the bytes
+            for (int i = 0; i < size; i++) {
+                if (!Character.isWhitespace(charAt(i))) {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
+    /**
+     * Java API equivalent of Python <code>istitle()</code>. This method treats the bytes as Unicode
+     * pont codes and is consistent with Java's {@link Character#isUpperCase(char)} and
+     * {@link Character#isLowerCase(char)}.
+     *
+     * @return true if the string is a titlecased string and there is at least one cased byte, for example
+     *         uppercase characters may only follow uncased bytes and lowercase characters only
+     *         cased ones. Return false otherwise.
+     */
+    public boolean istitle() {
+        return basebytes_istitle();
+    }
+
+    /**
+     * Ready-to-expose implementation of Python <code>istitle()</code>.
+     *
+     * @return true if the string is a titlecased string and there is at least one cased byte, for example
+     *         uppercase characters may only follow uncased bytes and lowercase characters only
+     *         cased ones. Return false otherwise.
+     */
+    final boolean basebytes_istitle() {
+
+        int state = 0;
+        // 0 = have seen no cased characters (can't be in a word)
+        // 1 = have seen cased character, but am not in a word
+        // 2 = in a word (hence have have seen cased character)
+
+        for (int i = 0; i < size; i++) {
+            char c = charAt(i);
+            if (Character.isUpperCase(c)) {
+                if (state == 2) {
+                    // Violation: can't continue a word in upper case
+                    return false;
+                } else {
+                    // Validly in a word
+                    state = 2;
+                }
+            } else if (Character.isLowerCase(c)) {
+                if (state != 2) {
+                    // Violation: can't start a word in lower case
+                    return false;
+                }
+            } else {
+                if (state == 2) {
+                    // Uncased character: end of the word as we know it
+                    state = 1;
+                }
+            }
+        }
+        // Found no case violations, but did we find any cased bytes at all?
+        return state != 0;
+    }
+
+    /**
+     * Java API equivalent of Python <code>isupper()</code>. This method treats the bytes as Unicode
+     * pont codes and is consistent with Java's {@link Character#isUpperCase(char)}.
+     *
+     * @return true if all cased bytes in the array are point codes for uppercase characters and
+     *         there is at least one cased byte, false otherwise.
+     */
+    public boolean isupper() {
+        return basebytes_isupper();
+    }
+
+    /**
+     * Ready-to-expose implementation of Python <code>isupper()</code>.
+     *
+     * @return true if all cased bytes in the array are point codes for uppercase characters and
+     *         there is at least one cased byte, false otherwise.
+     */
+    final boolean basebytes_isupper() {
+        boolean hasCased = false;
+        // Test the bytes
+        for (int i = 0; i < size; i++) {
+            char c = charAt(i);
+            if (Character.isLowerCase(c)) {
+                return false;
+            } else if (hasCased) {
+                continue;   // Don't need to keep checking for cased characters
+            } else if (Character.isUpperCase(c)) {
+                hasCased = true;
+            }
+        }
+        // Found no lower case bytes, but did we find any cased bytes at all?
+        return hasCased;
+    }
+
     /*
      * ============================================================================================
      * Java API for access as byte[]
@@ -3511,6 +3764,18 @@
     }
 
     /**
+     * Return the Python byte (in range 0 to 255 inclusive) at the given index, interpreted as an
+     * unsigned point code, without checking the index.
+     *
+     * @param index of value in byte array
+     * @return the char value at the index
+     * @throws IndexOutOfBoundsException if outside storage array
+     */
+    private final char charAt(int index) throws IndexOutOfBoundsException {
+        return (char)(0xff & storage[index + offset]);
+    }
+
+    /**
      * Helper to implement {@link #repeat(int)}. Use something like:
      *
      * <pre>
diff --git a/src/org/python/core/PyByteArray.java b/src/org/python/core/PyByteArray.java
--- a/src/org/python/core/PyByteArray.java
+++ b/src/org/python/core/PyByteArray.java
@@ -1270,6 +1270,45 @@
         pyinsert(boundToSequence(index.asIndex()), value);
     }
 
+    //
+    // Character class operations
+    //
+
+    @ExposedMethod(doc = BuiltinDocs.bytearray_isalnum_doc)
+    final boolean bytearray_isalnum() {
+        return basebytes_isalnum();
+    }
+
+    @ExposedMethod(doc = BuiltinDocs.bytearray_isalpha_doc)
+    final boolean bytearray_isalpha() {
+        return basebytes_isalpha();
+    }
+
+    @ExposedMethod(doc = BuiltinDocs.bytearray_isdigit_doc)
+    final boolean bytearray_isdigit() {
+        return basebytes_isdigit();
+    }
+
+    @ExposedMethod(doc = BuiltinDocs.bytearray_islower_doc)
+    final boolean bytearray_islower() {
+        return basebytes_islower();
+    }
+
+    @ExposedMethod(doc = BuiltinDocs.bytearray_isspace_doc)
+    final boolean bytearray_isspace() {
+        return basebytes_isspace();
+    }
+
+    @ExposedMethod(doc = BuiltinDocs.bytearray_istitle_doc)
+    final boolean bytearray_istitle() {
+        return basebytes_istitle();
+    }
+
+    @ExposedMethod(doc = BuiltinDocs.bytearray_isupper_doc)
+    final boolean bytearray_isupper() {
+        return basebytes_isupper();
+    }
+
     /**
      * Implementation of Python <code>join(iterable)</code>. Return a bytearray which is the
      * concatenation of the byte arrays in the iterable <code>iterable</code>. The separator between

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list