[Jython-checkins] jython: Make binascii methods take objects implementing the BufferProtocol interface.

jim.baker jython-checkins at python.org
Sat May 10 17:38:01 CEST 2014


http://hg.python.org/jython/rev/355bb70327e0
changeset:   7236:355bb70327e0
user:        Indra Talip <indra.talip at gmail.com>
date:        Sat May 10 15:52:54 2014 +1000
summary:
  Make binascii methods take objects implementing the BufferProtocol interface.
Change return type on various binascii methods to PyString to prevent automatic
conversion to PyUnicode.

Unlike the other methods converted binascii.b2a_qp does not operate on the PyBuffer
object, rather it converts it to a java String to reuse the existing behaviour.
binascii.b2a_qp could probably be made more efficient if it is made to operate on
a PyBuffer directly rather than converting to a java String first.

files:
  src/org/python/modules/binascii.java |  668 ++++++++------
  1 files changed, 379 insertions(+), 289 deletions(-)


diff --git a/src/org/python/modules/binascii.java b/src/org/python/modules/binascii.java
--- a/src/org/python/modules/binascii.java
+++ b/src/org/python/modules/binascii.java
@@ -12,7 +12,10 @@
 import java.util.regex.Pattern;
 
 import org.python.core.ArgParser;
+import org.python.core.BufferProtocol;
 import org.python.core.Py;
+import org.python.core.PyBUF;
+import org.python.core.PyBuffer;
 import org.python.core.PyException;
 import org.python.core.PyObject;
 import org.python.core.PyString;
@@ -272,65 +275,70 @@
      * binary data. Lines normally contain 45 (binary) bytes, except for the
      * last line. Line data may be followed by whitespace.
      */
-    public static PyString a2b_uu(String ascii_data) {
+    public static PyString a2b_uu(BufferProtocol bp) {
         int leftbits = 0;
         int leftchar = 0;
 
-        if (ascii_data.length() == 0)
-            return new PyString("");
+        StringBuilder bin_data = new StringBuilder();
+        PyBuffer ascii_data = bp.getBuffer(PyBUF.SIMPLE);
+        try {
+            if (ascii_data.getLen() == 0)
+                return new PyString("");
+
+
+            char this_ch;
+            int i;
+
+            int ascii_len = ascii_data.getLen()-1;
+
+            int bin_len = (ascii_data.intAt(0) - ' ') & 077;
+
+            for (i = 0; bin_len > 0 && ascii_len > 0; i++, ascii_len--) {
+                this_ch = (char) ascii_data.intAt(i+1);
+                if (this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
+                    // Whitespace. Assume some spaces got eaten at
+                    // end-of-line. (We check this later)
+                    this_ch = 0;
+                } else {
+                    // Check the character for legality
+                    // The 64 in stead of the expected 63 is because
+                    // there are a few uuencodes out there that use
+                    // '@' as zero instead of space.
+                    if ( this_ch < ' ' || this_ch > (' ' + 64)) {
+                        throw new PyException(Error, "Illegal char");
+                    }
+                    this_ch = (char)((this_ch - ' ') & 077);
+                }
+                // Shift it in on the low end, and see if there's
+                // a byte ready for output.
+                leftchar = (leftchar << 6) | (this_ch);
+                leftbits += 6;
+                if (leftbits >= 8) {
+                    leftbits -= 8;
+                    bin_data.append((char)((leftchar >> leftbits) & 0xff));
+                    leftchar &= ((1 << leftbits) - 1);
+                    bin_len--;
+                }
+            }
+
+            // Finally, check that if there's anything left on the line
+            // that it's whitespace only.
+            while (ascii_len-- > 0) {
+                this_ch = (char) ascii_data.intAt(++i);
+                // Extra '@' may be written as padding in some cases
+                if (this_ch != ' ' && this_ch != '@' &&
+                         this_ch != '\n' && this_ch != '\r') {
+                    throw new PyException(Error, "Trailing garbage");
+                }
+            }
         
-        StringBuilder bin_data = new StringBuilder();
-
-        char this_ch;
-        int i;
-
-        int ascii_len = ascii_data.length()-1;
-
-        int bin_len = (ascii_data.charAt(0) - ' ') & 077;
-
-        for (i = 0; bin_len > 0 && ascii_len > 0; i++, ascii_len--) {
-            this_ch = ascii_data.charAt(i+1);
-            if (this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
-                // Whitespace. Assume some spaces got eaten at
-                // end-of-line. (We check this later)
-                this_ch = 0;
-            } else {
-                // Check the character for legality
-                // The 64 in stead of the expected 63 is because
-                // there are a few uuencodes out there that use
-                // '@' as zero instead of space.
-                if ( this_ch < ' ' || this_ch > (' ' + 64)) {
-                    throw new PyException(Error, "Illegal char");
-                }
-                this_ch = (char)((this_ch - ' ') & 077);
-            }
-            // Shift it in on the low end, and see if there's
-            // a byte ready for output.
-            leftchar = (leftchar << 6) | (this_ch);
-            leftbits += 6;
-            if (leftbits >= 8) {
-                leftbits -= 8;
-                bin_data.append((char)((leftchar >> leftbits) & 0xff));
-                leftchar &= ((1 << leftbits) - 1);
-                bin_len--;
-            }
+            // finally, if we haven't decoded enough stuff, fill it up with zeros
+            for (; i<bin_len; i++)
+                    bin_data.append((char)0);
+        } finally {
+            ascii_data.release();
         }
         
-        // Finally, check that if there's anything left on the line
-        // that it's whitespace only.
-        while (ascii_len-- > 0) {
-            this_ch = ascii_data.charAt(++i);
-            // Extra '@' may be written as padding in some cases
-            if (this_ch != ' ' && this_ch != '@' &&
-                     this_ch != '\n' && this_ch != '\r') {
-                throw new PyException(Error, "Trailing garbage");
-            }
-        }
-        
-        // finally, if we haven't decoded enough stuff, fill it up with zeros
-        for (; i<bin_len; i++)
-        	bin_data.append((char)0);
-        
         return new PyString(bin_data.toString());
     }
 
@@ -346,36 +354,41 @@
      * is the converted line, including a newline char. The length of
      * <i>data</i> should be at most 45.
      */
-    public static PyString b2a_uu(String bin_data) {
+    public static PyString b2a_uu(BufferProtocol bp) {
         int leftbits = 0;
         char this_ch;
         int leftchar = 0;
 
-        int bin_len = bin_data.length();
-        if (bin_len > 45) {
-            // The 45 is a limit that appears in all uuencode's
-            throw new PyException(Error, "At most 45 bytes at once");
-        }
+        PyBuffer bin_data = bp.getBuffer(PyBUF.SIMPLE);
 
         StringBuilder ascii_data = new StringBuilder();
+        try {
+            int bin_len = bin_data.getLen();
+            if (bin_len > 45) {
+                // The 45 is a limit that appears in all uuencode's
+                throw new PyException(Error, "At most 45 bytes at once");
+            }
 
-        // Store the length */
-        ascii_data.append((char)(' ' + (bin_len & 077)));
+            // Store the length */
+            ascii_data.append((char)(' ' + (bin_len & 077)));
 
-        for (int i = 0; bin_len > 0 || leftbits != 0; i++, bin_len--) {
-            // Shift the data (or padding) into our buffer
-            if (bin_len > 0)    // Data
-                leftchar = (leftchar << 8) | bin_data.charAt(i);
-            else  // Padding
-                leftchar <<= 8;
-            leftbits += 8;
+            for (int i = 0; bin_len > 0 || leftbits != 0; i++, bin_len--) {
+                // Shift the data (or padding) into our buffer
+                if (bin_len > 0)    // Data
+                    leftchar = (leftchar << 8) | (char) bin_data.intAt(i);
+                else  // Padding
+                    leftchar <<= 8;
+                leftbits += 8;
 
-            // See if there are 6-bit groups ready
-            while (leftbits >= 6) {
-                this_ch = (char)((leftchar >> (leftbits-6)) & 0x3f);
-                leftbits -= 6;
-                ascii_data.append((char)(this_ch + ' '));
+                // See if there are 6-bit groups ready
+                while (leftbits >= 6) {
+                    this_ch = (char)((leftchar >> (leftbits-6)) & 0x3f);
+                    leftbits -= 6;
+                    ascii_data.append((char)(this_ch + ' '));
+                }
             }
+        } finally {
+            bin_data.release();
         }
         ascii_data.append('\n'); // Append a courtesy newline
 
@@ -383,8 +396,8 @@
     }
 
 
-    private static int binascii_find_valid(String s, int offset, int num) {
-        int slen = s.length() - offset;
+    private static int binascii_find_valid(PyBuffer b, int offset, int num) {
+        int blen = b.getLen() - offset;
 
         /* Finds & returns the (num+1)th
         ** valid character for base64, or -1 if none.
@@ -392,8 +405,8 @@
 
         int ret = -1;
 
-        while ((slen > 0) && (ret == -1)) {
-            int c = s.charAt(offset);
+        while ((blen > 0) && (ret == -1)) {
+            int c = b.intAt(offset);
             short b64val = table_a2b_base64[c & 0x7f];
             if (((c <= 0x7f) && (b64val != -1)) ) {
                 if (num == 0)
@@ -402,7 +415,7 @@
             }
 
             offset++;
-            slen--;
+            blen--;
         }
         return ret;
     }
@@ -417,52 +430,57 @@
      * Convert a block of base64 data back to binary and return the
      * binary data. More than one line may be passed at a time.
      */
-    public static PyString a2b_base64(String ascii_data) {
+    public static PyString a2b_base64(BufferProtocol bp) {
         int leftbits = 0;
         char this_ch;
         int leftchar = 0;
         int quad_pos = 0;
 
-        int ascii_len = ascii_data.length();
+        PyBuffer ascii_data = bp.getBuffer(PyBUF.SIMPLE);
+        int ascii_len = ascii_data.getLen();
 
         int bin_len = 0;
         StringBuilder bin_data = new StringBuilder();
 
-        for(int i = 0; ascii_len > 0 ; ascii_len--, i++) {
-            // Skip some punctuation
-            this_ch = ascii_data.charAt(i);
-            if (this_ch > 0x7F || this_ch == '\r' ||
-                      this_ch == '\n' || this_ch == ' ')
-                continue;
+        try {
+            for(int i = 0; ascii_len > 0 ; ascii_len--, i++) {
+                // Skip some punctuation
+                this_ch = (char) ascii_data.intAt(i);
+                if (this_ch > 0x7F || this_ch == '\r' ||
+                          this_ch == '\n' || this_ch == ' ')
+                    continue;
 
-            if (this_ch == BASE64_PAD) {
-                if (quad_pos < 2 || (quad_pos == 2 &&
-                         binascii_find_valid(ascii_data, i, 1) != BASE64_PAD))
+                if (this_ch == BASE64_PAD) {
+                    if (quad_pos < 2 || (quad_pos == 2 &&
+                             binascii_find_valid(ascii_data, i, 1) != BASE64_PAD))
+                        continue;
+                    else {
+                        // A pad sequence means no more input.
+                        // We've already interpreted the data
+                        // from the quad at this point.
+                        leftbits = 0;
+                        break;
+                    }
+                }
+
+                short this_v = table_a2b_base64[this_ch];
+                if (this_v == -1)
                     continue;
-                else {
-                    // A pad sequence means no more input.
-                    // We've already interpreted the data
-                    // from the quad at this point.
-                    leftbits = 0;
-                    break;
+
+                // Shift it in on the low end, and see if there's
+                // a byte ready for output.
+                quad_pos = (quad_pos + 1) & 0x03;
+                leftchar = (leftchar << 6) | (this_v);
+                leftbits += 6;
+                if (leftbits >= 8) {
+                    leftbits -= 8;
+                    bin_data.append((char)((leftchar >> leftbits) & 0xff));
+                    bin_len++;
+                    leftchar &= ((1 << leftbits) - 1);
                 }
             }
-
-            short this_v = table_a2b_base64[this_ch];
-            if (this_v == -1)
-                continue;
-
-            // Shift it in on the low end, and see if there's
-            // a byte ready for output.
-            quad_pos = (quad_pos + 1) & 0x03;
-            leftchar = (leftchar << 6) | (this_v);
-            leftbits += 6;
-            if (leftbits >= 8) {
-                leftbits -= 8;
-                bin_data.append((char)((leftchar >> leftbits) & 0xff));
-                bin_len++;
-                leftchar &= ((1 << leftbits) - 1);
-            }
+        } finally {
+            ascii_data.release();
         }
         // Check that no bits are left
         if (leftbits != 0) {
@@ -482,29 +500,34 @@
      * Convert binary data to a line of ASCII characters in base64 coding.
      * The return value is the converted line, including a newline char.
      */
-    public static PyString b2a_base64(String bin_data) {
+    public static PyString b2a_base64(BufferProtocol bp) {
         int leftbits = 0;
         char this_ch;
         int leftchar = 0;
 
         StringBuilder ascii_data = new StringBuilder();
 
-        int bin_len = bin_data.length();
-        if (bin_len > BASE64_MAXBIN) {
-            throw new PyException(Error,"Too much data for base64 line");
-        }
+        PyBuffer bin_data = bp.getBuffer(PyBUF.SIMPLE);
+        try {
+            int bin_len = bin_data.getLen();
+            if (bin_len > BASE64_MAXBIN) {
+                throw new PyException(Error,"Too much data for base64 line");
+            }
 
-        for (int i = 0; bin_len > 0 ; bin_len--, i++) {
-            // Shift the data into our buffer
-            leftchar = (leftchar << 8) | bin_data.charAt(i);
-            leftbits += 8;
+            for (int i = 0; bin_len > 0 ; bin_len--, i++) {
+                // Shift the data into our buffer
+                leftchar = (leftchar << 8) | (char) bin_data.intAt(i); //charAt(i);
+                leftbits += 8;
 
-            // See if there are 6-bit groups ready
-            while (leftbits >= 6) {
-                this_ch = (char)((leftchar >> (leftbits-6)) & 0x3f);
-                leftbits -= 6;
-                ascii_data.append((char)table_b2a_base64[this_ch]);
+                // See if there are 6-bit groups ready
+                while (leftbits >= 6) {
+                    this_ch = (char)((leftchar >> (leftbits-6)) & 0x3f);
+                    leftbits -= 6;
+                    ascii_data.append((char)table_b2a_base64[this_ch]);
+                }
             }
+        } finally {
+            bin_data.release();
         }
         if (leftbits == 2) {
             ascii_data.append((char)table_b2a_base64[(leftchar&3) << 4]);
@@ -530,39 +553,43 @@
      * binary bytes, or (in case of the last portion of the binhex4 data)
      * have the remaining bits zero.
      */
-    public static PyTuple a2b_hqx(String ascii_data) {
+    public static PyTuple a2b_hqx(BufferProtocol bp) {
         int leftbits = 0;
         char this_ch;
         int leftchar = 0;
 
         boolean done = false;
-
-        int len = ascii_data.length();
+        PyBuffer ascii_data = bp.getBuffer(PyBUF.SIMPLE);
+        int len = ascii_data.getLen();
 
         StringBuilder bin_data = new StringBuilder();
 
-        for(int i = 0; len > 0 ; len--, i++) {
-            // Get the byte and look it up
-            this_ch = (char) table_a2b_hqx[ascii_data.charAt(i)];
-            if (this_ch == SKIP)
-                continue;
-            if (this_ch == FAIL) {
-                throw new PyException(Error, "Illegal char");
+        try {
+            for(int i = 0; len > 0 ; len--, i++) {
+                // Get the byte and look it up
+                this_ch = (char) table_a2b_hqx[ascii_data.intAt(i)];
+                if (this_ch == SKIP)
+                    continue;
+                if (this_ch == FAIL) {
+                    throw new PyException(Error, "Illegal char");
+                }
+                if (this_ch == DONE) {
+                    // The terminating colon
+                    done = true;
+                    break;
+                }
+
+                // Shift it into the buffer and see if any bytes are ready
+                leftchar = (leftchar << 6) | (this_ch);
+                leftbits += 6;
+                if (leftbits >= 8) {
+                    leftbits -= 8;
+                    bin_data.append((char)((leftchar >> leftbits) & 0xff));
+                    leftchar &= ((1 << leftbits) - 1);
+                }
             }
-            if (this_ch == DONE) {
-                // The terminating colon
-                done = true;
-                break;
-            }
-
-            // Shift it into the buffer and see if any bytes are ready
-            leftchar = (leftchar << 6) | (this_ch);
-            leftbits += 6;
-            if (leftbits >= 8) {
-                leftbits -= 8;
-                bin_data.append((char)((leftchar >> leftbits) & 0xff));
-                leftchar &= ((1 << leftbits) - 1);
-            }
+        } finally {
+            ascii_data.release();
         }
 
         if (leftbits != 0 && !done) {
@@ -570,7 +597,7 @@
                                   "String has incomplete number of bytes");
         }
 
-        return new PyTuple(Py.java2py(bin_data.toString()), Py.newInteger(done ? 1 : 0));
+        return new PyTuple(new PyString(bin_data.toString()), Py.newInteger(done ? 1 : 0));
     }
 
 
@@ -582,37 +609,42 @@
      * Perform binhex4 style RLE-compression on <i>data</i> and return the
      * result.
      */
-    static public String rlecode_hqx(String in_data) {
-        int len = in_data.length();
+    static public PyString rlecode_hqx(BufferProtocol bp) {
+        PyBuffer in_data = bp.getBuffer(PyBUF.SIMPLE);
+        int len = in_data.getLen();
 
         StringBuilder out_data = new StringBuilder();
 
-        for (int in=0; in < len; in++) {
-            char ch = in_data.charAt(in);
-            if (ch == RUNCHAR) {
-                // RUNCHAR. Escape it.
-                out_data.append(RUNCHAR);
-                out_data.append(0);
-            } else {
-                // Check how many following are the same
-                int inend;
-                for (inend=in+1; inend < len &&
-                                 in_data.charAt(inend) == ch &&
-                                 inend < in+255; inend++)
-                    ;
-                if (inend - in > 3) {
-                    // More than 3 in a row. Output RLE.
-                    out_data.append(ch);
+        try {
+            for (int in=0; in < len; in++) {
+                char ch = (char) in_data.intAt(in);
+                if (ch == RUNCHAR) {
+                    // RUNCHAR. Escape it.
                     out_data.append(RUNCHAR);
-                    out_data.append((char) (inend-in));
-                    in = inend-1;
+                    out_data.append(0);
                 } else {
-                    // Less than 3. Output the byte itself
-                    out_data.append(ch);
+                    // Check how many following are the same
+                    int inend;
+                    for (inend=in+1; inend < len &&
+                                     (char) in_data.intAt(inend) == ch &&
+                                     inend < in+255; inend++)
+                        ;
+                    if (inend - in > 3) {
+                        // More than 3 in a row. Output RLE.
+                        out_data.append(ch);
+                        out_data.append(RUNCHAR);
+                        out_data.append((char) (inend-in));
+                        in = inend-1;
+                    } else {
+                        // Less than 3. Output the byte itself
+                        out_data.append(ch);
+                    }
                 }
             }
+        } finally {
+            in_data.release();
         }
-        return out_data.toString();
+        return new PyString(out_data.toString());
     }
 
 
@@ -625,25 +657,31 @@
      * resulting string. The argument should already be RLE-coded, and have a
      * length divisible by 3 (except possibly the last fragment).
      */
-    public static PyString b2a_hqx(String bin_data) {
+    public static PyString b2a_hqx(BufferProtocol bp) {
         int leftbits = 0;
         char this_ch;
         int leftchar = 0;
 
-        int len = bin_data.length();
+        PyBuffer bin_data = bp.getBuffer(PyBUF.SIMPLE);
+        int len = bin_data.getLen();
 
         StringBuilder ascii_data = new StringBuilder();
 
-        for(int i = 0; len > 0; len--, i++) {
-            // Shift into our buffer, and output any 6bits ready
-            leftchar = (leftchar << 8) | bin_data.charAt(i);
-            leftbits += 8;
-            while (leftbits >= 6) {
-                this_ch = (char) ((leftchar >> (leftbits-6)) & 0x3f);
-                leftbits -= 6;
-                ascii_data.append((char) table_b2a_hqx[this_ch]);
+        try {
+            for(int i = 0; len > 0; len--, i++) {
+                // Shift into our buffer, and output any 6bits ready
+                leftchar = (leftchar << 8) | (char) bin_data.intAt(i);
+                leftbits += 8;
+                while (leftbits >= 6) {
+                    this_ch = (char) ((leftchar >> (leftbits-6)) & 0x3f);
+                    leftbits -= 6;
+                    ascii_data.append((char) table_b2a_hqx[this_ch]);
+                }
             }
+        } finally {
+            bin_data.release();
         }
+
         // Output a possible runt byte
         if (leftbits != 0) {
             leftchar <<= (6-leftbits);
@@ -667,60 +705,65 @@
      * unless data input data ends in an orphaned repeat indicator, in which
      * case the <tt>Incomplete</tt> exception is raised.
      */
-    static public String rledecode_hqx(String in_data) {
+    static public PyString rledecode_hqx(BufferProtocol bp) {
         char in_byte, in_repeat;
+        
+        PyBuffer in_data = bp.getBuffer(PyBUF.SIMPLE);
+        int in_len = in_data.getLen();
+        int i = 0;
+        
+        StringBuilder out_data = new StringBuilder();
+        try {
+            // Empty string is a special case
+            if (in_len == 0)
+                return Py.EmptyString;
 
-        int in_len = in_data.length();
-        int i = 0;
 
-        // Empty string is a special case
-        if (in_len == 0)
-            return "";
-
-        StringBuilder out_data = new StringBuilder();
-
-        // Handle first byte separately (since we have to get angry
-        // in case of an orphaned RLE code).
-        if (--in_len < 0) throw new PyException(Incomplete);
-        in_byte = in_data.charAt(i++);
-
-        if (in_byte == RUNCHAR) {
+            // Handle first byte separately (since we have to get angry
+            // in case of an orphaned RLE code).
             if (--in_len < 0) throw new PyException(Incomplete);
-            in_repeat = in_data.charAt(i++);
-
-            if (in_repeat != 0) {
-                // Note Error, not Incomplete (which is at the end
-                // of the string only). This is a programmer error.
-                throw new PyException(Error, "Orphaned RLE code at start");
-            }
-            out_data.append(RUNCHAR);
-        } else {
-            out_data.append(in_byte);
-        }
-
-        while (in_len > 0) {
-            if (--in_len < 0) throw new PyException(Incomplete);
-            in_byte = in_data.charAt(i++);
+            in_byte = (char) in_data.intAt(i++);
 
             if (in_byte == RUNCHAR) {
                 if (--in_len < 0) throw new PyException(Incomplete);
-                in_repeat = in_data.charAt(i++);
+                in_repeat = (char) in_data.intAt(i++);
 
-                if (in_repeat == 0) {
-                    // Just an escaped RUNCHAR value
-                    out_data.append(RUNCHAR);
-                } else {
-                    // Pick up value and output a sequence of it
-                    in_byte = out_data.charAt(out_data.length()-1);
-                    while (--in_repeat > 0)
-                        out_data.append(in_byte);
+                if (in_repeat != 0) {
+                    // Note Error, not Incomplete (which is at the end
+                    // of the string only). This is a programmer error.
+                    throw new PyException(Error, "Orphaned RLE code at start");
                 }
+                out_data.append(RUNCHAR);
             } else {
-                // Normal byte
                 out_data.append(in_byte);
             }
+
+            while (in_len > 0) {
+                if (--in_len < 0) throw new PyException(Incomplete);
+                in_byte = (char) in_data.intAt(i++);
+
+                if (in_byte == RUNCHAR) {
+                    if (--in_len < 0) throw new PyException(Incomplete);
+                    in_repeat = (char) in_data.intAt(i++);
+
+                    if (in_repeat == 0) {
+                        // Just an escaped RUNCHAR value
+                        out_data.append(RUNCHAR);
+                    } else {
+                        // Pick up value and output a sequence of it
+                        in_byte = out_data.charAt(out_data.length()-1);
+                        while (--in_repeat > 0)
+                            out_data.append(in_byte);
+                    }
+                } else {
+                    // Normal byte
+                    out_data.append(in_byte);
+                }
+            }
+        } finally {
+            in_data.release();
         }
-        return out_data.toString();
+        return new PyString(out_data.toString());
     }
 
 
@@ -734,13 +777,18 @@
      * Compute the binhex4 crc value of <i>data</i>, starting with an initial
      * <i>crc</i> and returning the result.
      */
-    public static int crc_hqx(String bin_data, int crc) {
-        int len = bin_data.length();
+    public static int crc_hqx(BufferProtocol bp, int crc) {
+        PyBuffer bin_data = bp.getBuffer(PyBUF.SIMPLE);
+        int len = bin_data.getLen();
         int i = 0;
 
-        while(len-- > 0) {
-            crc=((crc<<8)&0xff00) ^
-                       crctab_hqx[((crc>>8)&0xff)^bin_data.charAt(i++)];
+        try {
+            while(len-- > 0) {
+                crc=((crc<<8)&0xff00) ^
+                           crctab_hqx[((crc>>8)&0xff)^ (char) bin_data.intAt(i++)];
+            }
+        } finally {
+            bin_data.release();
         }
 
         return crc;
@@ -804,20 +852,25 @@
 0x2d02ef8dL
 };
 
-    public static int crc32(String bin_data) {
-        return crc32(bin_data, 0);
+    public static int crc32(BufferProtocol bp) {
+        return crc32(bp, 0);
     }
 
-    public static int crc32(String bin_data, long crc) {
-        int len = bin_data.length();
+    public static int crc32(BufferProtocol bp, long crc) {
+        PyBuffer bin_data = bp.getBuffer(PyBUF.SIMPLE);
+        int len = bin_data.getLen();
 
         crc &= 0xFFFFFFFFL;
         crc = crc ^ 0xFFFFFFFFL;
-        for (int i = 0; i < len; i++) {
-            char ch = bin_data.charAt(i);
-            crc = (int)crc_32_tab[(int) ((crc ^ ch) & 0xffL)] ^ (crc >> 8);
-            /* Note:  (crc >> 8) MUST zero fill on left */
-            crc &= 0xFFFFFFFFL;
+        try {
+            for (int i = 0; i < len; i++) {
+                char ch = (char) bin_data.intAt(i);
+                crc = (int)crc_32_tab[(int) ((crc ^ ch) & 0xffL)] ^ (crc >> 8);
+                /* Note:  (crc >> 8) MUST zero fill on left */
+                crc &= 0xFFFFFFFFL;
+            }
+        } finally {
+            bin_data.release();
         }
         if (crc >= 0x80000000)
             return -(int)(crc+1 & 0xFFFFFFFF);
@@ -834,23 +887,27 @@
         "This function is also available as \"hexlify()\"."
     );
 
-    public static PyString b2a_hex(String argbuf) {
-        int arglen = argbuf.length();
+    public static PyString b2a_hex(BufferProtocol bp) {
+        PyBuffer argbuf = bp.getBuffer(PyBUF.SIMPLE);
+        int arglen = argbuf.getLen();
 
         StringBuilder retbuf = new StringBuilder(arglen*2);
 
-        /* make hex version of string, taken from shamodule.c */
-        for (int i = 0; i < arglen; i++) {
-            char ch = argbuf.charAt(i);
-            retbuf.append(hexdigit[(ch >>> 4) & 0xF]);
-            retbuf.append(hexdigit[ch & 0xF]);
+        try {
+            /* make hex version of string, taken from shamodule.c */
+            for (int i = 0; i < arglen; i++) {
+                char ch = (char) argbuf.intAt(i);
+                retbuf.append(hexdigit[(ch >>> 4) & 0xF]);
+                retbuf.append(hexdigit[ch & 0xF]);
+            }
+        } finally {
+            argbuf.release();
         }
         return new PyString(retbuf.toString());
-
     }
 
 
-    public static PyString hexlify(String argbuf) {
+    public static PyString hexlify(BufferProtocol argbuf) {
         return b2a_hex(argbuf);
     }
 
@@ -864,30 +921,34 @@
     );
 
 
-    public static PyString a2b_hex(String argbuf) {
-        int arglen = argbuf.length();
+    public static PyString a2b_hex(BufferProtocol bp) {
+        PyBuffer argbuf = bp.getBuffer(PyBUF.SIMPLE);
+        int arglen = argbuf.getLen();
 
+        StringBuilder retbuf = new StringBuilder(arglen/2);
         /* XXX What should we do about strings with an odd length?  Should
          * we add an implicit leading zero, or a trailing zero?  For now,
          * raise an exception.
          */
-        if (arglen % 2 != 0)
-            throw Py.TypeError("Odd-length string");
+        try {
+            if (arglen % 2 != 0)
+                throw Py.TypeError("Odd-length string");
 
-        StringBuilder retbuf = new StringBuilder(arglen/2);
-
-        for (int i = 0; i < arglen; i += 2) {
-            int top = Character.digit(argbuf.charAt(i), 16);
-            int bot = Character.digit(argbuf.charAt(i+1), 16);
-            if (top == -1 || bot == -1)
-                throw Py.TypeError("Non-hexadecimal digit found");
-            retbuf.append((char) ((top << 4) + bot));
+            for (int i = 0; i < arglen; i += 2) {
+                int top = Character.digit(argbuf.intAt(i), 16);
+                int bot = Character.digit(argbuf.intAt(i+1), 16);
+                if (top == -1 || bot == -1)
+                    throw Py.TypeError("Non-hexadecimal digit found");
+                retbuf.append((char) ((top << 4) + bot));
+            }
+        } finally {
+            argbuf.release();
         }
         return new PyString(retbuf.toString());
     }
 
 
-    public static PyString unhexlify(String argbuf) {
+    public static PyString unhexlify(BufferProtocol argbuf) {
         return a2b_hex(argbuf);
     }
 
@@ -920,36 +981,49 @@
     public static PyString a2b_qp(PyObject[] arg, String[] kws)
     {
         ArgParser ap = new ArgParser("a2b_qp", arg, kws, new String[] {"s", "header"});
-        String s = ap.getString(0);
+
+        PyObject pyObject = ap.getPyObject(0);
+        BufferProtocol bp;
+        if (pyObject instanceof BufferProtocol) {
+            bp = (BufferProtocol) pyObject;
+        } else {
+            throw Py.TypeError("expected something conforming to the buffer protocol, got "
+                    + pyObject.getType().fastGetName());
+        }
+
         StringBuilder sb = new StringBuilder();
         boolean header = getIntFlagAsBool(ap, 1, 0, "an integer is required");
 
-        if (header)
-        	s = UNDERSCORE.matcher(s).replaceAll(" ");
-        
-        for (int i=0, m=s.length(); i<m;) {
-        	char c = s.charAt(i++);
-        	if (c == '=') {
-        		if (i < m) {
-        			c = s.charAt(i++);
-        			if (c == '=') {
-        				sb.append(c);
-                                } else if (c == ' ') {
-                                    sb.append("= ");     
-        			} else if ((c >= '0' && c <= '9' || c >= 'A' && c <= 'F') && i < m) {
-        				char nc = s.charAt(i++);
-        				if ((nc >= '0' && nc <= '9' || nc >= 'A' && nc <= 'F')) {
-        					sb.append((char)(Character.digit(c, 16) << 4 | Character.digit(nc, 16)));
-        				} else {
-        					sb.append('=').append(c).append(nc);
-        				}
-        			} else if (c != '\n') {
-        				sb.append('=').append(c);
-        			}
-        		}
-        	} else {
-        		sb.append(c);
-        	}
+        PyBuffer ascii_data = bp.getBuffer(PyBUF.SIMPLE);
+        try {
+            for (int i=0, m=ascii_data.getLen(); i<m;) {
+                    char c = (char) ascii_data.intAt(i++);
+                    if (header && c == '_') {
+                        sb.append(' ');
+                    } else if (c == '=') {
+                            if (i < m) {
+                                    c = (char) ascii_data.intAt(i++);
+                                    if (c == '=') {
+                                            sb.append(c);
+                                    } else if (c == ' ') {
+                                        sb.append("= ");
+                                    } else if ((c >= '0' && c <= '9' || c >= 'A' && c <= 'F') && i < m) {
+                                            char nc = (char) ascii_data.intAt(i++);
+                                            if ((nc >= '0' && nc <= '9' || nc >= 'A' && nc <= 'F')) {
+                                                    sb.append((char)(Character.digit(c, 16) << 4 | Character.digit(nc, 16)));
+                                            } else {
+                                                    sb.append('=').append(c).append(nc);
+                                            }
+                                    } else if (c != '\n') {
+                                            sb.append('=').append(c);
+                                    }
+                            }
+                    } else {
+                            sb.append(c);
+                    }
+            }
+        } finally {
+            ascii_data.release();
         }
         return new PyString(sb.toString());
     }
@@ -965,11 +1039,25 @@
 
     public static PyString b2a_qp(PyObject[] arg, String[] kws) {
         ArgParser ap = new ArgParser("b2a_qp", arg, kws, new String[] {"s", "quotetabs", "istext", "header"});
-        String s = ap.getString(0);
         boolean quotetabs = getIntFlagAsBool(ap, 1, 0, "an integer is required");
         boolean istext = getIntFlagAsBool(ap, 2, 1, "an integer is required");
         boolean header = getIntFlagAsBool(ap, 3, 0, "an integer is required");
 
+        PyObject pyObject = ap.getPyObject(0);
+        BufferProtocol bp;
+        if (pyObject instanceof BufferProtocol) {
+            bp = (BufferProtocol) pyObject;
+        } else {
+            throw Py.TypeError("expected something conforming to the buffer protocol, got "
+                    + pyObject.getType().fastGetName());
+        }
+
+        PyBuffer bin_data = bp.getBuffer(PyBUF.SIMPLE);
+
+        // TODO make this operate on the raw buffer rather converting it to java String.
+        String s = bin_data.toString();
+        bin_data.release();
+
         String lineEnd;
         int pos = s.indexOf('\n');
         if (pos > 0 && s.charAt(pos-1) == '\r') {
@@ -979,11 +1067,13 @@
         	lineEnd = "\n";
         	s = RN_TO_N.matcher(s).replaceAll("\n");
         }
+
         StringBuilder sb = new StringBuilder();
         int count = 0;
+
         for (int i=0, m=s.length(); i<m; i++) {
         	char c = s.charAt(i);
-                
+
                 // RFC 1521 requires that the line ending in a space or tab must have
                 // that trailing character encoded.
                 if (lineEnding(s, lineEnd, i)) {

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list