pickle alternative

Wed Jun 1 04:09:34 EDT 2005

simonwittber posted his test code.

I tooks the code from the cookbook, called it "sencode" and
added these two lines

dumps = encode
loads = decode

I then ran your test code (unchanged except that my newsreader
folded the "value = ..." line) and got

marshal enc T: 0.21
marshal dec T: 0.4
sencode enc T: 7.76
sencode dec T: 11.56

This is with Python 2.3; the stock one provided by Apple
for my Mac.

I expected the numbers to be like this because the marshal
code is used to make and read the .pyc files and is supposed
to be pretty fast.

BTW, I tried the performance approach I outlined earlier.
The numbers aren't much better

marshal enc T: 0.2
marshal dec T: 0.38
sencode2 enc T: 7.16
sencode2 dec T: 9.49

I changed the format a little bit; dicts are treated a bit
differently.

from struct import pack, unpack
from cStringIO import StringIO

class EncodeError(Exception):
    pass
class DecodeError(Exception):
    pass

def encode(data):
    f = StringIO()
    _encode(data, f.write)
    return f.getvalue()

def _encode(data, write, pack = pack):
    # The original code use the equivalent of "type(data) is list"
    # I preserve that behavior

    T = type(data)

    if T is int:
        write("I")
        write(pack("!i", data))
    elif T is list:
        write("L")
        write(pack("!L", len(data)))
        # Assumes len and 'for ... in' aren't lying
        for item in data:
            _encode(item, write)
    elif T is tuple:
        write("T")
        write(pack("!L", len(data)))
        # Assumes len and 'for ... in' aren't lying
        for item in data:
            _encode(item, write)
    elif T is str:
        write("S")
        write(pack("!L", len(data)))
        write(data)
    elif T is long:
        s = hex(data)[2:-1]
        write("B")
        write(pack("!i", len(s)))
        write(s)
    elif T is type(None):
        write("N")
    elif T is float:
        write("F")
        write(pack("!f", data))
    elif T is dict:
        write("D")
        write(pack("!L", len(data)))
        for k, v in data.items():
            _encode(k, write)
            _encode(v, write)
    else:
        raise EncodeError((data, T))

def decode(s):
    """
    Decode a binary string into the original Python types.
    """
    buffer = StringIO(s)
    return _decode(buffer.read)

def _decode(read, unpack = unpack):
    code = read(1)
    if code == "I":
        return unpack("!i", read(4))[0]
    if code == "D":
        size = unpack("!L", read(4))[0]
        x = [_decode(read) for i in range(size*2)]
        return dict(zip(x[0::2], x[1::2]))
    if code == "T":
        size = unpack("!L", read(4))[0]
        return tuple([_decode(read) for i in range(size)])
    if code == "L":
        size = unpack("!L", read(4))[0]
        return [_decode(read) for i in range(size)]
    if code == "N":
        return None
    if code == "S":
        size = unpack("!L", read(4))[0]
        return read(size)
    if code == "F":
        return unpack("!f", read(4))[0]
    if code == "B":
        size = unpack("!L", read(4))[0]
        return long(read(size), 16)
    raise DecodeError(code)

dumps = encode
loads = decode

I wonder if this could be improved by a "struct2" module
which could compile a pack/unpack format once.  Eg,

float_struct = struct2.struct("!f")

float_struct.pack(f)
return float_struct.unpack('?\x80\x00\x00')[0]
  which might the same as
return float_struct.unpack1('?\x80\x00\x00')

				Andrew
				dalke at dalkescientific.com