[Patches] marshalling recursive objects

Vladimir Marangozov Vladimir.Marangozov@inrialpes.fr
Fri, 12 May 2000 03:19:58 +0200 (CEST)


I saw recently in c.l.py a discussion about recursive objects. It was
noted that recursive comparisons were fixed, but marshalling recursive
objects still core dumps.

>>> l = []
>>> l.append(l)
>>> marshal.dumps(l)
Paf!

The following patch aims at fixing this. The approach is the same as the
one used in pickle (I believe). Container objects are kept in an internal
list. When serializing, on recursion branches, the list index of the container
is stored in the byte stream, instead of its contents. This index should
be the same when deserializing, because of the strict object ordering.
Recursions are thus restored, and we have the invariant:

   o == marshal.loads(marshal.dumps(o))

even when o involves cyclic references.

-- 
       Vladimir MARANGOZOV          | Vladimir.Marangozov@inrialpes.fr
http://sirac.inrialpes.fr/~marangoz | tel:(+33-4)76615277 fax:76615252

--

Disclaimer:

I confirm that, to the best of my knowledge and belief, this contribution is
free of any claims of third parties under copyright, patent or other rights
or interests ("claims").  To the extent that I have any such claims, I
hereby grant to CNRI a nonexclusive, irrevocable, royalty-free, worldwide
license to reproduce, distribute, perform and/or display publicly, prepare
derivative versions, and otherwise use this contribution as part of the
Python software and its related documentation, or any derivative versions
thereof, at no cost to CNRI or its licensed users, and to authorize others
to do so.

I acknowledge that CNRI may, at its sole discretion, decide whether or not
to incorporate this contribution in the Python software and its related
documentation.  I further grant CNRI permission to use my name and other
identifying information provided to CNRI by me for use in connection with
the Python software and its related documentation.

-------------------------------[ cut here ]---------------------------
*** marshal.c-CVS	Fri May 12 02:51:29 2000
--- marshal.c	Fri May 12 02:46:17 2000
***************
*** 52,57 ****
--- 52,58 ----
  #define TYPE_LIST	'['
  #define TYPE_DICT	'{'
  #define TYPE_CODE	'c'
+ #define TYPE_OBJECT	'o'
  #define TYPE_UNICODE	'u'
  #define TYPE_UNKNOWN	'?'
  
***************
*** 68,73 ****
--- 69,114 ----
  		      else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
  			   else w_more(c, p)
  
+ static PyObject *victims = NULL; /* processed containers while in progress */
+ static int _PyMarshalState_nesting = 0;
+ 
+ static int
+ set_victim(op)
+ 	PyObject *op;
+ {
+ 	if (op == NULL)
+ 		return -1;
+ 	if (victims == NULL) {
+ 		victims = PyList_New(0);
+ 		if (victims == NULL)
+ 			return -1;
+ 	}
+ 	return PyList_Append(victims, op);
+ }
+ 
+ static PyObject *
+ get_victim(n)
+ 	int n;
+ {
+ 	if (victims == NULL)
+ 		return NULL;
+ 	return PyList_GetItem(victims, n);
+ }
+ 
+ static int
+ victim_index(op)
+ 	PyObject *op;
+ {
+ 	int i;
+ 	if (op == NULL || victims == NULL)
+ 		return -1;
+ 	for (i = 0; i < PyList_GET_SIZE(victims); i++) {
+ 		if (op == PyList_GET_ITEM(victims, i))
+ 			return i;
+ 	}
+ 	return -1;
+ }
+ 
  static void
  w_more(c, p)
  	char c;
***************
*** 144,150 ****
  {
  	int i, n;
  	PyBufferProcs *pb;
! 	
  	if (v == NULL) {
  		w_byte(TYPE_NULL, p);
  	}
--- 185,193 ----
  {
  	int i, n;
  	PyBufferProcs *pb;
! 
! 	++_PyMarshalState_nesting;
! 
  	if (v == NULL) {
  		w_byte(TYPE_NULL, p);
  	}
***************
*** 223,229 ****
  		utf8 = PyUnicode_AsUTF8String(v);
  		if (utf8 == NULL) {
  		    p->error = 1;
! 		    return;
  		}
  		w_byte(TYPE_UNICODE, p);
  		n = PyString_GET_SIZE(utf8);
--- 266,272 ----
  		utf8 = PyUnicode_AsUTF8String(v);
  		if (utf8 == NULL) {
  		    p->error = 1;
! 		    goto finally;
  		}
  		w_byte(TYPE_UNICODE, p);
  		n = PyString_GET_SIZE(utf8);
***************
*** 232,263 ****
  		Py_DECREF(utf8);
  	}
  	else if (PyTuple_Check(v)) {
! 		w_byte(TYPE_TUPLE, p);
! 		n = PyTuple_Size(v);
! 		w_long((long)n, p);
! 		for (i = 0; i < n; i++) {
! 			w_object(PyTuple_GET_ITEM(v, i), p);
  		}
  	}
  	else if (PyList_Check(v)) {
! 		w_byte(TYPE_LIST, p);
! 		n = PyList_GET_SIZE(v);
! 		w_long((long)n, p);
! 		for (i = 0; i < n; i++) {
! 			w_object(PyList_GET_ITEM(v, i), p);
  		}
  	}
  	else if (PyDict_Check(v)) {
  		int pos;
  		PyObject *key, *value;
! 		w_byte(TYPE_DICT, p);
! 		/* This one is NULL object terminated! */
! 		pos = 0;
! 		while (PyDict_Next(v, &pos, &key, &value)) {
! 			w_object(key, p);
! 			w_object(value, p);
  		}
- 		w_object((PyObject *)NULL, p);
  	}
  	else if (PyCode_Check(v)) {
  		PyCodeObject *co = (PyCodeObject *)v;
--- 275,333 ----
  		Py_DECREF(utf8);
  	}
  	else if (PyTuple_Check(v)) {
! 		int x = victim_index(v);
! 		if (x >= 0) {
! 			w_byte(TYPE_OBJECT, p);
! 			w_long((long)x, p);
! 		}
! 		else {
! 			if (set_victim(v) < 0)
! 				goto finally;
! 			w_byte(TYPE_TUPLE, p);
! 			n = PyTuple_Size(v);
! 			w_long((long)n, p);
! 			for (i = 0; i < n; i++) {
! 				w_object(PyTuple_GET_ITEM(v, i), p);
! 			}
  		}
  	}
  	else if (PyList_Check(v)) {
! 		int x = victim_index(v);
! 		if (x >= 0) {
! 			w_byte(TYPE_OBJECT, p);
! 			w_long((long)x, p);
! 		}
! 		else {
! 			if (set_victim(v) < 0)
! 				goto finally;
! 			w_byte(TYPE_LIST, p);
! 			n = PyList_GET_SIZE(v);
! 			w_long((long)n, p);
! 			for (i = 0; i < n; i++) {
! 				w_object(PyList_GET_ITEM(v, i), p);
! 			}
  		}
  	}
  	else if (PyDict_Check(v)) {
  		int pos;
  		PyObject *key, *value;
! 		int x = victim_index(v);
! 		if (x >= 0) {
! 			w_byte(TYPE_OBJECT, p);
! 			w_long((long)x, p);
! 		}
! 		else {
! 			if (set_victim(v) < 0)
! 				goto finally;
! 			w_byte(TYPE_DICT, p);
! 			/* This one is NULL object terminated! */
! 			pos = 0;
! 			while (PyDict_Next(v, &pos, &key, &value)) {
! 				w_object(key, p);
! 				w_object(value, p);
! 			}
! 			w_object((PyObject *)NULL, p);
  		}
  	}
  	else if (PyCode_Check(v)) {
  		PyCodeObject *co = (PyCodeObject *)v;
***************
*** 291,296 ****
--- 361,373 ----
  		w_byte(TYPE_UNKNOWN, p);
  		p->error = 1;
  	}
+ 
+  finally:
+ 	--_PyMarshalState_nesting;
+ 	if (_PyMarshalState_nesting == 0) {
+ 		Py_XDECREF(victims);
+ 		victims = NULL;
+ 	}
  }
  
  void
***************
*** 397,429 ****
  r_object(p)
  	RFILE *p;
  {
! 	PyObject *v, *v2;
  	long i, n;
  	int type = r_byte(p);
  	
  	switch (type) {
  	
  	case EOF:
  		PyErr_SetString(PyExc_EOFError,
  				"EOF read where object expected");
! 		return NULL;
  	
  	case TYPE_NULL:
! 		return NULL;
  	
  	case TYPE_NONE:
  		Py_INCREF(Py_None);
! 		return Py_None;
  	
  	case TYPE_ELLIPSIS:
  		Py_INCREF(Py_Ellipsis);
! 		return Py_Ellipsis;
  	
  	case TYPE_INT:
! 		return PyInt_FromLong(r_long(p));
  	
  	case TYPE_INT64:
! 		return PyInt_FromLong(r_long64(p));
  	
  	case TYPE_LONG:
  		{
--- 474,514 ----
  r_object(p)
  	RFILE *p;
  {
! 	PyObject *v, *v2, *ret;
  	long i, n;
  	int type = r_byte(p);
  	
+ 	++_PyMarshalState_nesting;
+ 
  	switch (type) {
  	
  	case EOF:
  		PyErr_SetString(PyExc_EOFError,
  				"EOF read where object expected");
! 		ret = NULL;
! 		break;
  	
  	case TYPE_NULL:
! 		ret = NULL;
! 		break;
  	
  	case TYPE_NONE:
  		Py_INCREF(Py_None);
! 		ret = Py_None;
! 		break;
  	
  	case TYPE_ELLIPSIS:
  		Py_INCREF(Py_Ellipsis);
! 		ret = Py_Ellipsis;
! 		break;
  	
  	case TYPE_INT:
! 		ret = PyInt_FromLong(r_long(p));
! 		break;
  	
  	case TYPE_INT64:
! 		ret = PyInt_FromLong(r_long64(p));
! 		break;
  	
  	case TYPE_LONG:
  		{
***************
*** 437,443 ****
  			ob->ob_size = n;
  			for (i = 0; i < size; i++)
  				ob->ob_digit[i] = r_short(p);
! 			return (PyObject *)ob;
  		}
  	
  	case TYPE_FLOAT:
--- 522,529 ----
  			ob->ob_size = n;
  			for (i = 0; i < size; i++)
  				ob->ob_digit[i] = r_short(p);
! 			ret = (PyObject *)ob;
! 			break;
  		}
  	
  	case TYPE_FLOAT:
***************
*** 449,461 ****
  			if (r_string(buf, (int)n, p) != n) {
  				PyErr_SetString(PyExc_EOFError,
  					"EOF read where object expected");
! 				return NULL;
  			}
  			buf[n] = '\0';
! 			PyFPE_START_PROTECT("atof", return 0)
  			dx = atof(buf);
  			PyFPE_END_PROTECT(dx)
! 			return PyFloat_FromDouble(dx);
  		}
  	
  #ifndef WITHOUT_COMPLEX
--- 535,549 ----
  			if (r_string(buf, (int)n, p) != n) {
  				PyErr_SetString(PyExc_EOFError,
  					"EOF read where object expected");
! 				ret = NULL;
! 				break;
  			}
  			buf[n] = '\0';
! 			PyFPE_START_PROTECT("atof", break)
  			dx = atof(buf);
  			PyFPE_END_PROTECT(dx)
! 			ret = PyFloat_FromDouble(dx);
! 			break;
  		}
  	
  #ifndef WITHOUT_COMPLEX
***************
*** 468,490 ****
  			if (r_string(buf, (int)n, p) != n) {
  				PyErr_SetString(PyExc_EOFError,
  					"EOF read where object expected");
! 				return NULL;
  			}
  			buf[n] = '\0';
! 			PyFPE_START_PROTECT("atof", return 0)
  			c.real = atof(buf);
  			PyFPE_END_PROTECT(c)
  			n = r_byte(p);
  			if (r_string(buf, (int)n, p) != n) {
  				PyErr_SetString(PyExc_EOFError,
  					"EOF read where object expected");
! 				return NULL;
  			}
  			buf[n] = '\0';
! 			PyFPE_START_PROTECT("atof", return 0)
  			c.imag = atof(buf);
  			PyFPE_END_PROTECT(c)
! 			return PyComplex_FromCComplex(c);
  		}
  #endif
  	
--- 556,581 ----
  			if (r_string(buf, (int)n, p) != n) {
  				PyErr_SetString(PyExc_EOFError,
  					"EOF read where object expected");
! 				ret = NULL;
! 				break;
  			}
  			buf[n] = '\0';
! 			PyFPE_START_PROTECT("atof", break)
  			c.real = atof(buf);
  			PyFPE_END_PROTECT(c)
  			n = r_byte(p);
  			if (r_string(buf, (int)n, p) != n) {
  				PyErr_SetString(PyExc_EOFError,
  					"EOF read where object expected");
! 				ret = NULL;
! 				break;
  			}
  			buf[n] = '\0';
! 			PyFPE_START_PROTECT("atof", break)
  			c.imag = atof(buf);
  			PyFPE_END_PROTECT(c)
! 			ret = PyComplex_FromCComplex(c);
! 			break;
  		}
  #endif
  	
***************
*** 492,498 ****
  		n = r_long(p);
  		if (n < 0) {
  			PyErr_SetString(PyExc_ValueError, "bad marshal data");
! 			return NULL;
  		}
  		v = PyString_FromStringAndSize((char *)NULL, n);
  		if (v != NULL) {
--- 583,590 ----
  		n = r_long(p);
  		if (n < 0) {
  			PyErr_SetString(PyExc_ValueError, "bad marshal data");
! 			ret = NULL;
! 			break;
  		}
  		v = PyString_FromStringAndSize((char *)NULL, n);
  		if (v != NULL) {
***************
*** 503,509 ****
  					"EOF read where object expected");
  			}
  		}
! 		return v;
  	
  	case TYPE_UNICODE:
  	    {
--- 595,602 ----
  					"EOF read where object expected");
  			}
  		}
! 		ret = v;
! 		break;
  	
  	case TYPE_UNICODE:
  	    {
***************
*** 512,542 ****
  		n = r_long(p);
  		if (n < 0) {
  			PyErr_SetString(PyExc_ValueError, "bad marshal data");
! 			return NULL;
  		}
  		buffer = PyMem_NEW(char, n);
! 		if (buffer == NULL)
! 			return PyErr_NoMemory();
  		if (r_string(buffer, (int)n, p) != n) {
  			PyMem_DEL(buffer);
  			PyErr_SetString(PyExc_EOFError,
  				"EOF read where object expected");
! 			return NULL;
  		}
  		v = PyUnicode_DecodeUTF8(buffer, n, NULL);
  		PyMem_DEL(buffer);
! 		return v;
  	    }
  	    
  	case TYPE_TUPLE:
  		n = r_long(p);
  		if (n < 0) {
  			PyErr_SetString(PyExc_ValueError, "bad marshal data");
! 			return NULL;
  		}
  		v = PyTuple_New((int)n);
! 		if (v == NULL)
! 			return v;
  		for (i = 0; i < n; i++) {
  			v2 = r_object(p);
  			if ( v2 == NULL ) {
--- 605,644 ----
  		n = r_long(p);
  		if (n < 0) {
  			PyErr_SetString(PyExc_ValueError, "bad marshal data");
! 			ret = NULL;
! 			break;
  		}
  		buffer = PyMem_NEW(char, n);
! 		if (buffer == NULL) {
! 			ret = PyErr_NoMemory();
! 			break;
! 		}
  		if (r_string(buffer, (int)n, p) != n) {
  			PyMem_DEL(buffer);
  			PyErr_SetString(PyExc_EOFError,
  				"EOF read where object expected");
! 			ret = NULL;
! 			break;
  		}
  		v = PyUnicode_DecodeUTF8(buffer, n, NULL);
  		PyMem_DEL(buffer);
! 		ret = v;
! 		break;
  	    }
  	    
  	case TYPE_TUPLE:
  		n = r_long(p);
  		if (n < 0) {
  			PyErr_SetString(PyExc_ValueError, "bad marshal data");
! 			ret = NULL;
! 			break;
  		}
  		v = PyTuple_New((int)n);
! 		if (v == NULL ||
! 		    set_victim(v) < 0) {
! 			ret = NULL;
! 			break;
! 		}
  		for (i = 0; i < n; i++) {
  			v2 = r_object(p);
  			if ( v2 == NULL ) {
***************
*** 546,562 ****
  			}
  			PyTuple_SET_ITEM(v, (int)i, v2);
  		}
! 		return v;
  	
  	case TYPE_LIST:
  		n = r_long(p);
  		if (n < 0) {
  			PyErr_SetString(PyExc_ValueError, "bad marshal data");
! 			return NULL;
  		}
  		v = PyList_New((int)n);
! 		if (v == NULL)
! 			return v;
  		for (i = 0; i < n; i++) {
  			v2 = r_object(p);
  			if ( v2 == NULL ) {
--- 648,669 ----
  			}
  			PyTuple_SET_ITEM(v, (int)i, v2);
  		}
! 		ret = v;
! 		break;
  	
  	case TYPE_LIST:
  		n = r_long(p);
  		if (n < 0) {
  			PyErr_SetString(PyExc_ValueError, "bad marshal data");
! 			ret = NULL;
! 			break;
  		}
  		v = PyList_New((int)n);
! 		if (v == NULL ||
! 		    set_victim(v) < 0) {
! 			ret = NULL;
! 			break;
! 		}
  		for (i = 0; i < n; i++) {
  			v2 = r_object(p);
  			if ( v2 == NULL ) {
***************
*** 566,577 ****
  			}
  			PyList_SetItem(v, (int)i, v2);
  		}
! 		return v;
  	
  	case TYPE_DICT:
  		v = PyDict_New();
! 		if (v == NULL)
! 			return NULL;
  		for (;;) {
  			PyObject *key, *val;
  			key = r_object(p);
--- 673,688 ----
  			}
  			PyList_SetItem(v, (int)i, v2);
  		}
! 		ret = v;
! 		break;
  	
  	case TYPE_DICT:
  		v = PyDict_New();
! 		if (v == NULL ||
! 		    set_victim(v) < 0) {
! 			ret = NULL;
! 			break;
! 		}
  		for (;;) {
  			PyObject *key, *val;
  			key = r_object(p);
***************
*** 583,589 ****
  			Py_DECREF(key);
  			Py_XDECREF(val);
  		}
! 		return v;
  	
  	case TYPE_CODE:
  		{
--- 694,712 ----
  			Py_DECREF(key);
  			Py_XDECREF(val);
  		}
! 		ret = v;
! 		break;
! 	
! 	case TYPE_OBJECT:
! 		n = r_long(p);
! 		v = get_victim((int)n);
! 		if (v == NULL) {
! 			ret = NULL;
! 			break;
! 		}
! 		Py_INCREF(v);
! 		ret = v;
! 		break;
  	
  	case TYPE_CODE:
  		{
***************
*** 628,642 ****
  			Py_XDECREF(lnotab);
  
  		}
! 		return v;
  	
  	default:
  		/* Bogus data got written, which isn't ideal.
  		   This will let you keep working and recover. */
  		PyErr_SetString(PyExc_ValueError, "bad marshal data");
! 		return NULL;
  	
  	}
  }
  
  long
--- 751,775 ----
  			Py_XDECREF(lnotab);
  
  		}
! 		ret = v;
! 		break;
  	
  	default:
  		/* Bogus data got written, which isn't ideal.
  		   This will let you keep working and recover. */
  		PyErr_SetString(PyExc_ValueError, "bad marshal data");
! 		ret = NULL;
! 		break;
  	
  	}
+ 
+ 	--_PyMarshalState_nesting;
+ 	if (_PyMarshalState_nesting == 0) {
+ 		Py_XDECREF(victims);
+ 		victims = NULL;
+ 	}
+ 
+ 	return ret;
  }
  
  long