[Python-checkins] CVS: python/dist/src/Modules binascii.c,2.29,2.30
Martin v. L?wis
loewis@users.sourceforge.net
Sun, 30 Sep 2001 13:32:13 -0700
Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv10858/Modules
Modified Files:
binascii.c
Log Message:
Patch #462190, patch #464070: Support quoted printable in the binascii module.
Decode and encode underscores for header style encoding. Fixes bug #463996.
Index: binascii.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/binascii.c,v
retrieving revision 2.29
retrieving revision 2.30
diff -C2 -d -r2.29 -r2.30
*** binascii.c 2001/06/07 05:51:36 2.29
--- binascii.c 2001/09/30 20:32:11 2.30
***************
*** 43,46 ****
--- 43,55 ----
**
** Jack Jansen, CWI, July 1995.
+ **
+ ** Added support for quoted-printable encoding, based on rfc 1521 et al
+ ** quoted-printable encoding specifies that non printable characters (anything
+ ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
+ ** of the character. It also specifies some other behavior to enable 8bit data
+ ** in a mail message with little difficulty (maximum line sizes, protecting
+ ** some cases of whitespace, etc).
+ **
+ ** Brandon Long, September 2001.
*/
***************
*** 972,975 ****
--- 981,1267 ----
This function is also available as \"unhexlify()\"";
+ static int table_hex[128] = {
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
+ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
+ };
+
+ #define hexval(c) table_hex[(unsigned int)(c)]
+
+ #define MAXLINESIZE 76
+
+ static char doc_a2b_qp[] = "Decode a string of qp-encoded data";
+
+ static PyObject*
+ binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
+ {
+ unsigned int in, out;
+ char ch;
+ unsigned char *data, *odata;
+ unsigned int datalen = 0;
+ PyObject *rv;
+ static char *kwlist[] = {"data", "header", NULL};
+ int header = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
+ &datalen, &header))
+ return NULL;
+
+ /* We allocate the output same size as input, this is overkill */
+ odata = (char *) calloc(1, datalen);
+
+ if (odata == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ in = out = 0;
+ while (in < datalen) {
+ if (data[in] == '=') {
+ in++;
+ if (in >= datalen) break;
+ /* Soft line breaks */
+ if ((data[in] == '\n') || (data[in] == '\r') ||
+ (data[in] == ' ') || (data[in] == '\t')) {
+ if (data[in] != '\n') {
+ while (in < datalen && data[in] != '\n') in++;
+ }
+ if (in < datalen) in++;
+ }
+ else if (data[in] == '=') {
+ /* broken case from broken python qp */
+ odata[out++] = '=';
+ in++;
+ }
+ else if (((data[in] >= 'A' && data[in] <= 'F') ||
+ (data[in] >= 'a' && data[in] <= 'f') ||
+ (data[in] >= '0' && data[in] <= '9')) &&
+ ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
+ (data[in+1] >= 'a' && data[in+1] <= 'f') ||
+ (data[in+1] >= '0' && data[in+1] <= '9'))) {
+ /* hexval */
+ ch = hexval(data[in]) << 4;
+ in++;
+ ch |= hexval(data[in]);
+ in++;
+ odata[out++] = ch;
+ }
+ else {
+ odata[out++] = '=';
+ }
+ }
+ else if (header && data[in] == '_') {
+ odata[out++] = ' ';
+ in++;
+ }
+ else {
+ odata[out] = data[in];
+ in++;
+ out++;
+ }
+ }
+ if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
+ free (odata);
+ return NULL;
+ }
+ free (odata);
+ return rv;
+ }
+
+ static int
+ to_hex (unsigned char ch, unsigned char *s)
+ {
+ unsigned int uvalue = ch;
+
+ s[1] = "0123456789ABCDEF"[uvalue % 16];
+ uvalue = (uvalue / 16);
+ s[0] = "0123456789ABCDEF"[uvalue % 16];
+ return 0;
+ }
+
+ static char doc_b2a_qp[] =
+ "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
+ Encode a string using quoted-printable encoding. \n\
+ \n\
+ On encoding, when istext is set, newlines are not encoded, and white \n\
+ space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
+ both encoded. When quotetabs is set, space and tabs are encoded.";
+
+ /* XXX: This is ridiculously complicated to be backward compatible
+ * (mostly) with the quopri module. It doesn't re-create the quopri
+ * module bug where text ending in CRLF has the CR encoded */
+ static PyObject*
+ binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
+ {
+ unsigned int in, out;
+ unsigned char *data, *odata;
+ unsigned int datalen = 0, odatalen = 0;
+ PyObject *rv;
+ unsigned int linelen = 0;
+ static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
+ int istext = 1;
+ int quotetabs = 0;
+ int header = 0;
+ unsigned char ch;
+ int crlf = 0;
+ unsigned char *p;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
+ &datalen, "etabs, &istext, &header))
+ return NULL;
+
+ /* See if this string is using CRLF line ends */
+ /* XXX: this function has the side effect of converting all of
+ * the end of lines to be the same depending on this detection
+ * here */
+ p = strchr(data, '\n');
+ if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
+ crlf = 1;
+
+ /* First, scan to see how many characters need to be encoded */
+ in = 0;
+ while (in < datalen) {
+ if ((data[in] > 126) ||
+ (data[in] == '=') ||
+ (header && data[in] == '_') ||
+ ((data[in] == '.') && (linelen == 1)) ||
+ (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
+ ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
+ ((data[in] < 33) &&
+ (data[in] != '\r') && (data[in] != '\n') &&
+ (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
+ {
+ if ((linelen + 3) >= MAXLINESIZE) {
+ linelen = 0;
+ if (crlf)
+ odatalen += 3;
+ else
+ odatalen += 2;
+ }
+ linelen += 3;
+ odatalen += 3;
+ in++;
+ }
+ else {
+ if (istext &&
+ ((data[in] == '\n') ||
+ ((in+1 < datalen) && (data[in] == '\r') &&
+ (data[in+1] == '\n'))))
+ {
+ linelen = 0;
+ /* Protect against whitespace on end of line */
+ if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
+ odatalen += 2;
+ if (crlf)
+ odatalen += 2;
+ else
+ odatalen += 1;
+ if (data[in] == '\r')
+ in += 2;
+ else
+ in++;
+ }
+ else {
+ if ((in + 1 != datalen) &&
+ (data[in+1] != '\n') &&
+ (linelen + 1) >= MAXLINESIZE) {
+ linelen = 0;
+ if (crlf)
+ odatalen += 3;
+ else
+ odatalen += 2;
+ }
+ linelen++;
+ odatalen++;
+ in++;
+ }
+ }
+ }
+
+ odata = (char *) calloc(1, odatalen);
+
+ if (odata == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ in = out = linelen = 0;
+ while (in < datalen) {
+ if ((data[in] > 126) ||
+ (data[in] == '=') ||
+ (header && data[in] == '_') ||
+ ((data[in] == '.') && (linelen == 1)) ||
+ (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
+ ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
+ ((data[in] < 33) &&
+ (data[in] != '\r') && (data[in] != '\n') &&
+ (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
+ {
+ if ((linelen + 3 )>= MAXLINESIZE) {
+ odata[out++] = '=';
+ if (crlf) odata[out++] = '\r';
+ odata[out++] = '\n';
+ linelen = 0;
+ }
+ odata[out++] = '=';
+ to_hex(data[in], &odata[out]);
+ out += 2;
+ in++;
+ linelen += 3;
+ }
+ else {
+ if (istext &&
+ ((data[in] == '\n') ||
+ ((in+1 < datalen) && (data[in] == '\r') &&
+ (data[in+1] == '\n'))))
+ {
+ linelen = 0;
+ /* Protect against whitespace on end of line */
+ if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
+ ch = odata[out-1];
+ odata[out-1] = '=';
+ to_hex(ch, &odata[out]);
+ out += 2;
+ }
+
+ if (crlf) odata[out++] = '\r';
+ odata[out++] = '\n';
+ if (data[in] == '\r')
+ in += 2;
+ else
+ in++;
+ }
+ else {
+ if ((in + 1 != datalen) &&
+ (data[in+1] != '\n') &&
+ (linelen + 1) >= MAXLINESIZE) {
+ odata[out++] = '=';
+ if (crlf) odata[out++] = '\r';
+ odata[out++] = '\n';
+ linelen = 0;
+ }
+ linelen++;
+ if (header && data[in] == ' ') {
+ odata[out++] = '_';
+ in++;
+ }
+ else {
+ odata[out++] = data[in++];
+ }
+ }
+ }
+ }
+ if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
+ free (odata);
+ return NULL;
+ }
+ free (odata);
+ return rv;
+ }
/* List of functions defined in the module */
***************
*** 991,994 ****
--- 1283,1290 ----
{"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
{"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
+ {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
+ doc_a2b_qp},
+ {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
+ doc_b2a_qp},
{NULL, NULL} /* sentinel */
};