[Jython-checkins] jython: Implement bytearray.is* methods (isalnum, isalpha, isdigit, islower, isspace,
frank.wierzbicki
jython-checkins at python.org
Wed Jun 13 20:44:04 CEST 2012
http://hg.python.org/jython/rev/7e68dde41018
changeset: 6706:7e68dde41018
user: Jeff Allen <ja...py at farowl.co.uk>
date: Sun Jun 10 20:03:32 2012 +0100
summary:
Implement bytearray.is* methods (isalnum, isalpha, isdigit, islower, isspace, istitle and isupper).
Choices of character class in the upper 128 may not match CPython, as the make a Unicode interpretation, rather than a locale-dependent one.
Now scoring 1 failure and 14 errors in test_bytes.py.
files:
src/org/python/core/BaseBytes.java | 267 ++++++++++++++-
src/org/python/core/PyByteArray.java | 39 ++
2 files changed, 305 insertions(+), 1 deletions(-)
diff --git a/src/org/python/core/BaseBytes.java b/src/org/python/core/BaseBytes.java
--- a/src/org/python/core/BaseBytes.java
+++ b/src/org/python/core/BaseBytes.java
@@ -3253,7 +3253,7 @@
/**
* Ready-to-expose implementation of Python <code>splitlines(keepends)</code>, returning a list
- * of the lines in the string, breaking at line boundaries. Line breaks are not included in the
+ * of the lines in the array, breaking at line boundaries. Line breaks are not included in the
* resulting list unless keepends is given and true.
*
* @param keepends if true, include the end of line bytes(s)
@@ -3480,6 +3480,259 @@
return builder.getResult();
}
+ //
+ // Character class operations
+ //
+
+ /**
+ * Java API equivalent of Python <code>isalnum()</code>. This method treats the bytes as Unicode
+ * pont codes and is consistent with Java's {@link Character#isLetterOrDigit(char)}.
+ *
+ * @return true if all bytes in the array are point codes for alphanumerics and there is at
+ * least one byte, false otherwise.
+ */
+ public boolean isalnum() {
+ return basebytes_isalnum();
+ }
+
+ /**
+ * Ready-to-expose implementation of Python <code>isalnum()</code>.
+ *
+ * @return true if all bytes in the array are point codes for alphanumerics and there is at
+ * least one byte, false otherwise.
+ */
+ final boolean basebytes_isalnum() {
+ if (size <= 0) {
+ // Treat empty string as special case
+ return false;
+ } else {
+ // Test the bytes
+ for (int i = 0; i < size; i++) {
+ if (!Character.isLetterOrDigit(charAt(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+
+ /**
+ * Java API equivalent of Python <code>isalpha()</code>. This method treats the bytes as Unicode
+ * pont codes and is consistent with Java's {@link Character#isLetter(char)}.
+ *
+ * @return true if all bytes in the array are alphabetic and there is at least one byte, false
+ * otherwise
+ */
+ public boolean isalpha() {
+ return basebytes_isalpha();
+ }
+
+ /**
+ * Ready-to-expose implementation of Python <code>isalpha()</code>.
+ *
+ * @return true if all bytes in the array are alphabetic and there is at least one byte, false
+ * otherwise
+ */
+ final boolean basebytes_isalpha() {
+ if (size <= 0) {
+ // Treat empty string as special case
+ return false;
+ } else {
+ // Test the bytes
+ for (int i = 0; i < size; i++) {
+ if (!Character.isLetter(charAt(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+
+ /**
+ * Java API equivalent of Python <code>isdigit()</code>. This method treats the bytes as Unicode
+ * pont codes and is consistent with Java's {@link Character#isDigit(char)}.
+ *
+ * @return true if all bytes in the array are point codes for digits and there is at least one
+ * byte, false otherwise.
+ */
+ public boolean isdigit() {
+ return basebytes_isdigit();
+ }
+
+ /**
+ * Ready-to-expose implementation of Python <code>isdigit()</code>.
+ *
+ * @return true if all bytes in the array are point codes for digits and there is at least one
+ * byte, false otherwise.
+ */
+ final boolean basebytes_isdigit() {
+ if (size <= 0) {
+ // Treat empty string as special case
+ return false;
+ } else {
+ // Test the bytes
+ for (int i = 0; i < size; i++) {
+ if (!Character.isDigit(charAt(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+
+ /**
+ * Java API equivalent of Python <code>islower()</code>. This method treats the bytes as Unicode
+ * pont codes and is consistent with Java's {@link Character#isLowerCase(char)}.
+ *
+ * @return true if all cased bytes in the array are point codes for lowercase characters and
+ * there is at least one cased byte, false otherwise.
+ */
+ public boolean islower() {
+ return basebytes_islower();
+ }
+
+ /**
+ * Ready-to-expose implementation of Python <code>islower()</code>.
+ *
+ * @return true if all cased bytes in the array are point codes for lowercase characters and
+ * there is at least one cased byte, false otherwise.
+ */
+ final boolean basebytes_islower() {
+ boolean hasCased = false;
+ // Test the bytes
+ for (int i = 0; i < size; i++) {
+ char c = charAt(i);
+ if (Character.isUpperCase(c)) {
+ return false;
+ } else if (hasCased) {
+ continue; // Don't need to keep checking for cased characters
+ } else if (Character.isLowerCase(c)) {
+ hasCased = true;
+ }
+ }
+ // Found no upper case bytes, but did we find any cased bytes at all?
+ return hasCased;
+ }
+
+ /**
+ * Java API equivalent of Python <code>isspace()</code>. This method treats the bytes as Unicode
+ * pont codes and is consistent with Java's {@link Character#isWhitespace(char)}.
+ *
+ * @return true if all the bytes in the array are point codes for whitespace characters and
+ * there is at least one byte, false otherwise.
+ */
+ public boolean isspace() {
+ return basebytes_isspace();
+ }
+
+ /**
+ * Ready-to-expose implementation of Python <code>isspace()</code>.
+ *
+ * @return true if all the bytes in the array are point codes for whitespace characters and
+ * there is at least one byte, false otherwise.
+ */
+ final boolean basebytes_isspace() {
+ if (size <= 0) {
+ // Treat empty string as special case
+ return false;
+ } else {
+ // Test the bytes
+ for (int i = 0; i < size; i++) {
+ if (!Character.isWhitespace(charAt(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+
+ /**
+ * Java API equivalent of Python <code>istitle()</code>. This method treats the bytes as Unicode
+ * pont codes and is consistent with Java's {@link Character#isUpperCase(char)} and
+ * {@link Character#isLowerCase(char)}.
+ *
+ * @return true if the string is a titlecased string and there is at least one cased byte, for example
+ * uppercase characters may only follow uncased bytes and lowercase characters only
+ * cased ones. Return false otherwise.
+ */
+ public boolean istitle() {
+ return basebytes_istitle();
+ }
+
+ /**
+ * Ready-to-expose implementation of Python <code>istitle()</code>.
+ *
+ * @return true if the string is a titlecased string and there is at least one cased byte, for example
+ * uppercase characters may only follow uncased bytes and lowercase characters only
+ * cased ones. Return false otherwise.
+ */
+ final boolean basebytes_istitle() {
+
+ int state = 0;
+ // 0 = have seen no cased characters (can't be in a word)
+ // 1 = have seen cased character, but am not in a word
+ // 2 = in a word (hence have have seen cased character)
+
+ for (int i = 0; i < size; i++) {
+ char c = charAt(i);
+ if (Character.isUpperCase(c)) {
+ if (state == 2) {
+ // Violation: can't continue a word in upper case
+ return false;
+ } else {
+ // Validly in a word
+ state = 2;
+ }
+ } else if (Character.isLowerCase(c)) {
+ if (state != 2) {
+ // Violation: can't start a word in lower case
+ return false;
+ }
+ } else {
+ if (state == 2) {
+ // Uncased character: end of the word as we know it
+ state = 1;
+ }
+ }
+ }
+ // Found no case violations, but did we find any cased bytes at all?
+ return state != 0;
+ }
+
+ /**
+ * Java API equivalent of Python <code>isupper()</code>. This method treats the bytes as Unicode
+ * pont codes and is consistent with Java's {@link Character#isUpperCase(char)}.
+ *
+ * @return true if all cased bytes in the array are point codes for uppercase characters and
+ * there is at least one cased byte, false otherwise.
+ */
+ public boolean isupper() {
+ return basebytes_isupper();
+ }
+
+ /**
+ * Ready-to-expose implementation of Python <code>isupper()</code>.
+ *
+ * @return true if all cased bytes in the array are point codes for uppercase characters and
+ * there is at least one cased byte, false otherwise.
+ */
+ final boolean basebytes_isupper() {
+ boolean hasCased = false;
+ // Test the bytes
+ for (int i = 0; i < size; i++) {
+ char c = charAt(i);
+ if (Character.isLowerCase(c)) {
+ return false;
+ } else if (hasCased) {
+ continue; // Don't need to keep checking for cased characters
+ } else if (Character.isUpperCase(c)) {
+ hasCased = true;
+ }
+ }
+ // Found no lower case bytes, but did we find any cased bytes at all?
+ return hasCased;
+ }
+
/*
* ============================================================================================
* Java API for access as byte[]
@@ -3511,6 +3764,18 @@
}
/**
+ * Return the Python byte (in range 0 to 255 inclusive) at the given index, interpreted as an
+ * unsigned point code, without checking the index.
+ *
+ * @param index of value in byte array
+ * @return the char value at the index
+ * @throws IndexOutOfBoundsException if outside storage array
+ */
+ private final char charAt(int index) throws IndexOutOfBoundsException {
+ return (char)(0xff & storage[index + offset]);
+ }
+
+ /**
* Helper to implement {@link #repeat(int)}. Use something like:
*
* <pre>
diff --git a/src/org/python/core/PyByteArray.java b/src/org/python/core/PyByteArray.java
--- a/src/org/python/core/PyByteArray.java
+++ b/src/org/python/core/PyByteArray.java
@@ -1270,6 +1270,45 @@
pyinsert(boundToSequence(index.asIndex()), value);
}
+ //
+ // Character class operations
+ //
+
+ @ExposedMethod(doc = BuiltinDocs.bytearray_isalnum_doc)
+ final boolean bytearray_isalnum() {
+ return basebytes_isalnum();
+ }
+
+ @ExposedMethod(doc = BuiltinDocs.bytearray_isalpha_doc)
+ final boolean bytearray_isalpha() {
+ return basebytes_isalpha();
+ }
+
+ @ExposedMethod(doc = BuiltinDocs.bytearray_isdigit_doc)
+ final boolean bytearray_isdigit() {
+ return basebytes_isdigit();
+ }
+
+ @ExposedMethod(doc = BuiltinDocs.bytearray_islower_doc)
+ final boolean bytearray_islower() {
+ return basebytes_islower();
+ }
+
+ @ExposedMethod(doc = BuiltinDocs.bytearray_isspace_doc)
+ final boolean bytearray_isspace() {
+ return basebytes_isspace();
+ }
+
+ @ExposedMethod(doc = BuiltinDocs.bytearray_istitle_doc)
+ final boolean bytearray_istitle() {
+ return basebytes_istitle();
+ }
+
+ @ExposedMethod(doc = BuiltinDocs.bytearray_isupper_doc)
+ final boolean bytearray_isupper() {
+ return basebytes_isupper();
+ }
+
/**
* Implementation of Python <code>join(iterable)</code>. Return a bytearray which is the
* concatenation of the byte arrays in the iterable <code>iterable</code>. The separator between
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list