Simple encryption proposal. Comments ?
Bengt Richter
bokr at oz.net
Thu Jan 2 17:02:43 EST 2003
On 02 Jan 2003 06:58:20 -0800, Paul Rubin <phr-n2002b at NOSPAMnightsong.com> wrote:
>bokr at oz.net (Bengt Richter) writes:
>> Thanks for posting, mine was in C++ and not so pretty (though fast enough ;-)
>>
>> BTW, (untested)
>>
>> k1, k2, k3 = map(_inv_letters.__getitem__, x[i:i+3])
>>
>> might be an interesting alternative to the list comprehension?
>
>I don't know, I wasn't really trying to make that function fast, I was
>just playing around. Base52 is a clever idea (I've gotten burned a
>number of times by non-alphanumeric base64 characters needing escaping
>for some purpose) so I wanted to try it. A more serious
>implementation should be done as a C patch to the binascii module.
>
Ok. Below is a diff. I haven't tested it extensively, and it is called binasciiXXX.c
to kludge it into being an independently importable module for testing. I didn't
rebuild python (2.2.2 on windows nt4) yet.
It was linked with (just relinked for log) the following, which warns of my test kludging to
make the DLL export the init name.
[13:57] C:\pywk\ut\binascii>cl -nologo -LD -DUSE_DL_IMPORT /ID:\python22\include binasciiXXX.c -
link -LIBPATH:d:\python22\libs
binasciiXXX.c
D:\python22\include\pyconfig.h(105) : warning C4005: 'USE_DL_IMPORT' : macro redefinition
unknown(0) : see previous definition of 'USE_DL_IMPORT'
Creating library binasciiXXX.lib and object binasciiXXX.exp
===================================================================================
[13:58] C:\pywk\ut\binascii>diff -u D:\Python-2.2.2\Modules\binascii.c binasciiXXX.c
--- D:\Python-2.2.2\Modules\binascii.c Wed Sep 25 02:25:14 2002
+++ binasciiXXX.c Thu Jan 02 13:57:38 2003
@@ -51,6 +51,23 @@
** some cases of whitespace, etc).
**
** Brandon Long, September 2001.
+**
+** Added support for "base52" encoding and decoding analogous to base64, but
+** using only the first52 characters of encoding letters (A-Z plus a-z).
+** It encodes pairs of binary bytes and special cases an odd length by
+** processing the first byte specially if so. It takes 3 base52 characters
+** to encode an input pair and also 3 chars for the odd byte. It is based
+** on the fact that 52**3 > 2**17, meaning that three code characters can
+** specify two bytes and a flag. The latter is 1 at bit 16 to flag an odd byte.
+** input pairs xy become a number ord(x)*256+ord(y), or ord(x)+65536 for the
+** odd byte. Base52 coefficients hi,mid,lo are then found such that
+** hi*52*52+mid*52+lo = the aforementioned number. Hi, mid, and lo are then
+** used to index into the A-Za-z string to get respective code character.
+** The inverse is the inverse, but ignores non-code-alphabet characters, so
+** base52 code strings can be arbitrarily broken up with spaces and newlines etc.
+**
+** Bengt Richter, January 2003
+**
*/
@@ -467,6 +484,108 @@
return rv;
}
+static char doc_b2a_base52[] = "(bin) -> ascii. Base52-code line of data";
+
+static PyObject *
+binascii_b2a_base52(PyObject *self, PyObject *args)
+{
+ /* use same same table as base64 (just first 52) */
+ #define table_b2a_base52 table_b2a_base64
+ unsigned char *ascii_data, *bin_data;
+ PyObject *rv;
+ int bin_len, hi, lo, mid;
+
+ if ( !PyArg_ParseTuple(args, "s#:b2a_base52", &bin_data, &bin_len) )
+ return NULL;
+ if ( bin_len > BASE64_MAXBIN ) {
+ PyErr_SetString(Error, "Too much data for base52 line");
+ return NULL;
+ }
+
+ /* We allocate exact space for output */
+ if ( (rv=PyString_FromStringAndSize(NULL, ((bin_len+1)/2)*3)) == NULL )
+ return NULL;
+ ascii_data = (unsigned char *)PyString_AsString(rv);
+
+ for( ; bin_len > 0 ; bin_len-=2 ) {
+ /* make 16 bit int and determine base52 coeffs*/
+ if(bin_len&1){ /* use up and flag odd char if any at front*/
+ hi = 65536 + *bin_data++;
+ bin_len += 1; /*make it look like 2 for -=2*/
+ } else {
+ hi = *bin_data++;
+ hi = hi*256+ *bin_data++;
+ }
+ lo = hi%52; hi /= 52;
+ mid = hi%52; hi /= 52;
+ *ascii_data++ = table_b2a_base52[hi];
+ *ascii_data++ = table_b2a_base52[mid];
+ *ascii_data++ = table_b2a_base52[lo];
+ }
+
+ return rv;
+}
+
+static char doc_a2b_base52[] = "(ascii) -> bin. Decode a line of base52 data";
+
+static PyObject *
+binascii_a2b_base52(PyObject *self, PyObject *args)
+{
+ unsigned char *ascii_data, *bin_data, *bin_beg;
+ unsigned char this_ch;
+ PyObject *rv;
+ int ascii_len, bin_len;
+ int nc = 0; /* number of code chars, counting three at a time*/
+ int num17 = 0; /* possibly 17-bit value of base52 code */
+
+ if ( !PyArg_ParseTuple(args, "t#:a2b_base52", &ascii_data, &ascii_len) )
+ return NULL;
+
+ bin_len = (ascii_len/3)*2; /* Upper bound, corrected later */
+
+ /* Allocate the buffer */
+ if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
+ return NULL;
+ bin_data = bin_beg = (unsigned char *)PyString_AsString(rv);
+ for( ; ascii_len > 0; ascii_len--) {
+ this_ch = *ascii_data++;
+
+ if (this_ch > 0x7f ||
+ this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
+ continue;
+ if(this_ch >= 'A' && this_ch <= 'Z'){
+ this_ch -= 'A';
+ } else if(this_ch >= 'a' && this_ch <= 'z'){
+ this_ch -= ('a'-26);
+ } else {
+ continue;
+ }
+
+ num17 = num17*52 + this_ch;
+ if( ++nc == 3){
+ nc = 0; /* we should alsways get triplets or encoding is bad*/
+ if( num17 & 65536 ){
+ *bin_data++ = num17&0xff;
+ } else {
+ *bin_data++ = (num17>>8)&0xff;
+ *bin_data++ = num17&0xff;
+ }
+ num17 = 0;
+ }
+ }
+
+ if (nc != 0) {
+ PyErr_SetString(Error, "Number of base52 characters not a multiple of 3");
+ Py_DECREF(rv);
+ return NULL;
+ }
+
+ /* and set string size correctly */
+ if (bin_data > bin_beg)
+ _PyString_Resize(&rv, bin_data - bin_beg);
+ return rv;
+}
+
static char doc_a2b_hqx[] = "ascii -> bin, done. Decode .hqx coding";
static PyObject *
@@ -1286,6 +1405,8 @@
static struct PyMethodDef binascii_module_methods[] = {
{"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
{"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
+ {"a2b_base52", binascii_a2b_base52, METH_VARARGS, doc_a2b_base52},
+ {"b2a_base52", binascii_b2a_base52, METH_VARARGS, doc_b2a_base52},
{"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
{"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
{"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
@@ -1308,15 +1429,18 @@
/* Initialization function for the module (*must* be called initbinascii) */
-static char doc_binascii[] = "Conversion between binary data and ASCII";
+static char doc_binascii[] = "XXX Conversion between binary data and ASCII";
+
+/* #define DL_IMPORT(RTYPE) __declspec(dllexport) RTYPE */
+#define DL_EXPORT_XXX(RTYPE) __declspec(dllexport) RTYPE
-DL_EXPORT(void)
-initbinascii(void)
+DL_EXPORT_XXX(void)
+initbinasciiXXX(void)
{
PyObject *m, *d, *x;
/* Create the module and add the functions */
- m = Py_InitModule("binascii", binascii_module_methods);
+ m = Py_InitModule("binasciiXXX", binascii_module_methods);
d = PyModule_GetDict(m);
x = PyString_FromString(doc_binascii);
===================================================================================
To use this in python itself, change and/or get rid of XXX kludges. The doc string
only has that so I could tell I was importing the separate test module at first.
Regards,
Bengt Richter
More information about the Python-list
mailing list