pickle alternative
Andrew Dalke
dalke at dalkescientific.com
Wed Jun 1 04:09:34 EDT 2005
simonwittber posted his test code.
I tooks the code from the cookbook, called it "sencode" and
added these two lines
dumps = encode
loads = decode
I then ran your test code (unchanged except that my newsreader
folded the "value = ..." line) and got
marshal enc T: 0.21
marshal dec T: 0.4
sencode enc T: 7.76
sencode dec T: 11.56
This is with Python 2.3; the stock one provided by Apple
for my Mac.
I expected the numbers to be like this because the marshal
code is used to make and read the .pyc files and is supposed
to be pretty fast.
BTW, I tried the performance approach I outlined earlier.
The numbers aren't much better
marshal enc T: 0.2
marshal dec T: 0.38
sencode2 enc T: 7.16
sencode2 dec T: 9.49
I changed the format a little bit; dicts are treated a bit
differently.
from struct import pack, unpack
from cStringIO import StringIO
class EncodeError(Exception):
pass
class DecodeError(Exception):
pass
def encode(data):
f = StringIO()
_encode(data, f.write)
return f.getvalue()
def _encode(data, write, pack = pack):
# The original code use the equivalent of "type(data) is list"
# I preserve that behavior
T = type(data)
if T is int:
write("I")
write(pack("!i", data))
elif T is list:
write("L")
write(pack("!L", len(data)))
# Assumes len and 'for ... in' aren't lying
for item in data:
_encode(item, write)
elif T is tuple:
write("T")
write(pack("!L", len(data)))
# Assumes len and 'for ... in' aren't lying
for item in data:
_encode(item, write)
elif T is str:
write("S")
write(pack("!L", len(data)))
write(data)
elif T is long:
s = hex(data)[2:-1]
write("B")
write(pack("!i", len(s)))
write(s)
elif T is type(None):
write("N")
elif T is float:
write("F")
write(pack("!f", data))
elif T is dict:
write("D")
write(pack("!L", len(data)))
for k, v in data.items():
_encode(k, write)
_encode(v, write)
else:
raise EncodeError((data, T))
def decode(s):
"""
Decode a binary string into the original Python types.
"""
buffer = StringIO(s)
return _decode(buffer.read)
def _decode(read, unpack = unpack):
code = read(1)
if code == "I":
return unpack("!i", read(4))[0]
if code == "D":
size = unpack("!L", read(4))[0]
x = [_decode(read) for i in range(size*2)]
return dict(zip(x[0::2], x[1::2]))
if code == "T":
size = unpack("!L", read(4))[0]
return tuple([_decode(read) for i in range(size)])
if code == "L":
size = unpack("!L", read(4))[0]
return [_decode(read) for i in range(size)]
if code == "N":
return None
if code == "S":
size = unpack("!L", read(4))[0]
return read(size)
if code == "F":
return unpack("!f", read(4))[0]
if code == "B":
size = unpack("!L", read(4))[0]
return long(read(size), 16)
raise DecodeError(code)
dumps = encode
loads = decode
I wonder if this could be improved by a "struct2" module
which could compile a pack/unpack format once. Eg,
float_struct = struct2.struct("!f")
float_struct.pack(f)
return float_struct.unpack('?\x80\x00\x00')[0]
which might the same as
return float_struct.unpack1('?\x80\x00\x00')
Andrew
dalke at dalkescientific.com
More information about the Python-list
mailing list