Forgiving marshal module

Mon Aug 23 14:57:35 EDT 1999

Guido wrote a version of marshal in Python for an early version of JPython.
(I think it may have been supplanted by a Java version by now.)  I modified
its Unmarshaller class to be forgiving in the face of truncated marshal
strings.  I call it marshalp.py so as to allow it to be imported alongside
the real thing.  Perhaps someone else will find it useful in an emergency.

half-a-loaf-is-better-than-none-ly y'rs...

Skip Montanaro | http://www.mojam.com/
skip at mojam.com | http://www.musi-cal.com/~skip/
847-971-7098   | Python: Programming the way Guido indented...

"""Marshal module written in Python.

This module will marshal code objects if the new module is available.
Performance or careful error checking is not an issue.

In the face of truncated input, the Unmarshaller object will (should) return
what it can instead of simply raising an exception.  While this generally
not the desired behavior, it can be helpful when trying to recover the
contents of damaged files.

"""

import sys, string
try:
    import cStringIO
    StringIO = cStringIO
except ImportError:
    import StringIO

from types import *
try:
    import new
except ImportError:
    new = None

TYPE_NULL     = '0'
TYPE_NONE     = 'N'
TYPE_ELLIPSIS = '.'
TYPE_INT      = 'i'
TYPE_INT64    = 'I'
TYPE_FLOAT    = 'f'
TYPE_COMPLEX  = 'x'
TYPE_LONG     = 'l'
TYPE_STRING   = 's'
TYPE_TUPLE    = '('
TYPE_LIST     = '['
TYPE_DICT     = '{'
TYPE_CODE     = 'c'
TYPE_UNKNOWN  = '?'

class Marshaller:

    dispatch = {}

    def __init__(self, f):
	self.f = f

    def dump(self, x):
	self.dispatch[type(x)](self, x)

    def w_long64(self, x):
	self.w_long(x)
	self.w_long(x>>32)

    def w_long(self, x):
	write = self.f.write
	write(chr((x)     & 0xff))
	write(chr((x>> 8) & 0xff))
	write(chr((x>>16) & 0xff))
	write(chr((x>>24) & 0xff))

    def w_short(self, x):
	write = self.f.write
	write(chr((x)     & 0xff))
	write(chr((x>> 8) & 0xff))

    def dump_none(self, x):
	self.f.write(TYPE_NONE)
    dispatch[NoneType] = dump_none

    def dump_ellipsis(self, x):
	self.f.write(TYPE_ELLIPSIS)
    try:
	dispatch[EllipsisType] = dump_ellipsis
    except NameError:
	pass

    def dump_int(self, x):
	y = x>>31
	if y and y != -1:
	    self.f.write(TYPE_INT64)
	    self.w_long64(x)
	else:
	    self.f.write(TYPE_INT)
	    self.w_long(x)
    dispatch[IntType] = dump_int

    def dump_long(self, x):
	self.f.write(TYPE_LONG)
	sign = 1
	if x < 0:
	    sign = -1
	    x = -x
	digits = []
	while x:
	    digits.append(x & 0x7FFF)
	    x = x>>15
	self.w_long(len(digits) * sign)
	for d in digits:
	    self.w_short(d)
    dispatch[LongType] = dump_long

    def dump_float(self, x):
	write = self.f.write
	write(TYPE_FLOAT)
	s = `x`
	write(chr(len(s)))
	write(s)
    dispatch[FloatType] = dump_float

    def dump_complex(self, x):
	write = self.f.write
	write(TYPE_COMPLEX)
	s = `x.real`
	write(chr(len(s)))
	write(s)
	s = `x.imag`
	write(chr(len(s)))
	write(s)
    try:
	dispatch[ComplexType] = dump_complex
    except NameError:
	pass

    def dump_string(self, x):
	self.f.write(TYPE_STRING)
	self.w_long(len(x))
	self.f.write(x)
    dispatch[StringType] = dump_string

    def dump_tuple(self, x):
	self.f.write(TYPE_TUPLE)
	self.w_long(len(x))
	for item in x:
	    self.dump(item)
    dispatch[TupleType] = dump_tuple

    def dump_list(self, x):
	self.f.write(TYPE_LIST)
	self.w_long(len(x))
	for item in x:
	    self.dump(item)
    dispatch[ListType] = dump_list

    def dump_dict(self, x):
	self.f.write(TYPE_DICT)
	for key, value in x.items():
	    self.dump(key)
	    self.dump(value)
	self.f.write(TYPE_NULL)
    dispatch[DictionaryType] = dump_dict

    def dump_code(self, x):
	self.f.write(TYPE_CODE)
	self.w_short(x.co_argcount)
	self.w_short(x.co_nlocals)
	self.w_short(x.co_stacksize)
	self.w_short(x.co_flags)
	self.dump(x.co_code)
	self.dump(x.co_consts)
	self.dump(x.co_names)
	self.dump(x.co_varnames)
	self.dump(x.co_filename)
	self.dump(x.co_name)
	self.w_short(x.co_firstlineno)
	self.dump(x.co_lnotab)
    try:
	dispatch[CodeType] = dump_code
    except NameError:
	pass

class NULL:
    pass

class Unmarshaller:

    dispatch = {}

    def __init__(self, f):
	self.f = f

    def read(self,n):
	bytes = self.f.read(n)
	if len(bytes) < n: raise EOFError
	return bytes

    def load(self):
	c = self.read(1)
	return self.dispatch[c](self)

    def r_short(self):
	read = self.read
	lo = ord(read(1))
	hi = ord(read(1))
	x = lo | (hi<<8)
	if x & 0x8000:
	    x = x - 0x10000
	return x

    def r_long(self):
	read = self.read
	a = ord(read(1))
	b = ord(read(1))
	c = ord(read(1))
	d = ord(read(1))
	x = a | (b<<8) | (c<<16) | (d<<24)
	if x & 0x80000000 and x > 0:
	    x = string.atoi(x - 0x100000000L)
	return x

    def r_long64(self):
	a = self.r_long()
	b = self.r_long()
	return a | (b<<32)

    def load_null(self):
	return NULL
    dispatch[TYPE_NULL] = load_null

    def load_none(self):
	return None
    dispatch[TYPE_NONE] = load_none

    def load_ellipsis(self):
	return EllipsisType
    dispatch[TYPE_ELLIPSIS] = load_ellipsis

    def load_int(self):
	return self.r_long()
    dispatch[TYPE_INT] = load_int

    def load_int64(self):
	return self.r_long64()
    dispatch[TYPE_INT64] = load_int64

    def load_long(self):
	size = self.r_long()
	sign = 1
	if size < 0:
	    sign = -1
	    size = -size
	x = 0L
	for i in range(size):
	    d = self.r_short()
	    x = x | (d<<(i*15L))
	return x * sign
    dispatch[TYPE_LONG] = load_long

    def load_float(self):
	n = ord(self.read(1))
	s = self.read(n)
	return string.atof(s)
    dispatch[TYPE_FLOAT] = load_float

    def load_complex(self):
	n = ord(self.read(1))
	s = self.read(n)
	real = float(s)
	n = ord(self.read(1))
	s = self.read(n)
	imag = float(s)
	return complex(real, imag)
    dispatch[TYPE_COMPLEX] = load_complex

    def load_string(self):
	n = self.r_long()
	return self.read(n)
    dispatch[TYPE_STRING] = load_string

    def load_tuple(self):
	return tuple(self.load_list())
    dispatch[TYPE_TUPLE] = load_tuple

    def load_list(self):
	n = self.r_long()
	list = []
	for i in range(n):
	    try:
		list.append(self.load())
	    except EOFError:
		sys.stderr.write("EOF on input ... list incomplete\n")
		break
	return list
    dispatch[TYPE_LIST] = load_list

    def load_dict(self):
	d = {}
	while 1:
	    try:
		key = self.load()
	    except EOFError:
		sys.stderr.write("EOF on input ... dict incomplete\n")
		break
	    if key is NULL:
		break
	    try:
		value = self.load()
	    except EOFError:
		sys.stderr.write("EOF on input ... dict incomplete\n")
		break
	    d[key] = value
	return d
    dispatch[TYPE_DICT] = load_dict

    def load_code(self):
	try:
	    argcount = self.r_short()
	    nlocals = self.r_short()
	    stacksize = self.r_short()
	    flags = self.r_short()
	    code = self.load()
	    consts = self.load()
	    names = self.load()
	    varnames = self.load()
	    filename = self.load()
	    name = self.load()
	    firstlineno = self.r_short()
	    lnotab = self.load()
	except EOFError:
	    sys.stderr.write("EOF on input ... broken code object\n")
	    return NULL

	if not new:
	    raise RuntimeError, "can't unmarshal code objects; no 'new' module"
	return new.code(argcount, nlocals, stacksize, flags, code, consts,
			names, varnames, filename, name, firstlineno, lnotab)
    dispatch[TYPE_CODE] = load_code

def dump(x, f):
    Marshaller(f).dump(x)

def load(f):
    return Unmarshaller(f).load()

def dumps(x):
    f = StringIO.StringIO()
    dump(x, f)
    return f.getvalue()

def loads(s):
    f = StringIO.StringIO(s)
    return load(f)