Adding info to Python Buglist
Jason Trowbridge
ratman at nmt.edu
Sat Oct 16 21:11:03 EDT 1999
I've located the problem. It's a bug in the "binascii.c" module.
The binascii base64 decoding function assumes that, for every pad
character ('='), there is one less byte present in the output. There
are only two valid quad sequences with the pad symbol: 'XXX=' and
'XX=='. Using the binascii assumption with '====', 4 characters are
removed from a string with 3 characters. _PyString_Resize() is called
without checking whether the calling argument is a positive, and odd
behavior can follow.
The RFC* says that invalid sequences and characters should be ignored.
This includes invalid pad sequences and invalid characters. The
"binascii.c" file that I got for Python 1.5.2 does not ignore valid
characters or invalid pad sequences. Characters are masked with 0x7f;
the binary value 0xc1 is mapped to the letter 'A' in this case.
I've fixed the problem in my copy of Python, and have included the patch
to "Modules/binascii.c" with this message. To apply the patch:
1) Go into the directory containing source code for Python 1.5.2.
2) Go into the Modules directory.
3) Apply the patch: patch -N binascii.c binascii.patch
4) Recompile.
*The RFC I'm talking about is RFC 1521. Section 5.2 details base64
encoding.
Jason Trowbridge
ratman at nmt.edu
-------------- next part --------------
diff -Naur older/Modules/binascii.c newer/Modules/binascii.c
--- older/Modules/binascii.c Sat Oct 16 18:44:21 1999
+++ newer/Modules/binascii.c Sat Oct 16 18:49:46 1999
@@ -75,6 +75,21 @@
** Jack Jansen, CWI, July 1995.
*/
+/* I don't know about the other encodings, but base64 is detailed in the MIME
+ * RFC. Take a look at Section 5.2 of RFC 1521 at
+ * http://www.faqs.org/rfcs/rfc1521.html for details on what's valid
+ * and what's not.
+ *
+ * Fixed a bug in base64 decoding involving illegal pad sequences. Illegal
+ * pad sequences would actually shorten the result string. If there were
+ * too many illegal pad sequences, the string could end up with a negative
+ * length, resulting in Bad Things.
+ *
+ * Fixed a bug where illegal characters could be remapped into legal
+ * characters (instead of being ignored).
+ *
+ * --Jason Trowbridge
+ */
#include "Python.h"
@@ -328,6 +343,29 @@
return rv;
}
+
+int find_valid( char *s, int slen, int num )
+{
+ /* Finds & returns the (num+1)th valid character for base64, or -1 if none. */
+
+ int ret = -1, tmp;
+ unsigned char c, b64val;
+
+ while ((slen > 0) && (ret == -1)) {
+ c = *s;
+ b64val = table_a2b_base64[c & 0x7f];
+ if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
+ num--;
+ if (num == 0)
+ ret = *s;
+ }
+
+ s++;
+ slen--;
+ }
+ return ret;
+}
+
static char doc_a2b_base64[] = "(ascii) -> bin. Decode a line of base64 data";
static PyObject *
@@ -339,9 +377,10 @@
int leftbits = 0;
unsigned char this_ch;
unsigned int leftchar = 0;
- int npad = 0;
+ int npadfills = 0;
PyObject *rv;
int ascii_len, bin_len;
+ int quad_pos = 0;
if ( !PyArg_ParseTuple(args, "t#", &ascii_data, &ascii_len) )
return NULL;
@@ -353,37 +392,54 @@
return NULL;
bin_data = (unsigned char *)PyString_AsString(rv);
bin_len = 0;
- for( ; ascii_len > 0 ; ascii_len--, ascii_data++ ) {
- /* Skip some punctuation */
- this_ch = (*ascii_data & 0x7f);
- if ( this_ch == '\r' || this_ch == '\n' || this_ch == ' ' )
+
+ for( ; ascii_len > 0 ; ascii_len--, ascii_data++) {
+ this_ch = *ascii_data;
+
+ if (this_ch > 0x7f || this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
continue;
-
- if ( this_ch == BASE64_PAD )
- npad++;
- this_ch = table_a2b_base64[(*ascii_data) & 0x7f];
- if ( this_ch == (unsigned char) -1 ) continue;
+
+ if (this_ch == BASE64_PAD) { // Ignore invalid pad sequences
+ if ( (quad_pos < 2) ||
+ (quad_pos == 2) && (find_valid(ascii_data, ascii_len, 1) != '=') ) {
+ continue;
+ } else {
+ npadfills = 4 - quad_pos;
+ }
+ }
+
+ this_ch = table_a2b_base64[*ascii_data];
+ if ( this_ch == (unsigned char) -1 )
+ continue;
+
/*
** Shift it in on the low end, and see if there's
** a byte ready for output.
*/
+ quad_pos = (quad_pos + 1) & 0x03;
leftchar = (leftchar << 6) | (this_ch);
leftbits += 6;
+
if ( leftbits >= 8 ) {
leftbits -= 8;
- *bin_data++ = (leftchar >> leftbits) & 0xff;
+
+ if (npadfills == 0) {
+ *bin_data++ = (leftchar >> leftbits) & 0xff;
+ bin_len++;
+ } else {
+ npadfills--;
+ }
+
leftchar &= ((1 << leftbits) - 1);
- bin_len++;
}
- }
- /* Check that no bits are left */
- if ( leftbits ) {
+ }
+
+ if (leftbits != 0) {
PyErr_SetString(Error, "Incorrect padding");
Py_DECREF(rv);
return NULL;
}
- /* and remove any padding */
- bin_len -= npad;
+
/* and set string size correctly */
_PyString_Resize(&rv, bin_len);
return rv;
More information about the Python-list
mailing list