[Python-Dev] very slow compare of recursive objects

Tim Peters tim.one@comcast.net
Sun, 19 Jan 2003 19:38:16 -0500


This is a multi-part message in MIME format.

--Boundary_(ID_LbvhV0B4RROIr+t2rcTs3Q)
Content-type: text/plain; charset=iso-8859-1
Content-transfer-encoding: 7BIT

[Neal Norwitz]
> Could someone take a look at the patch attached to this bug report?
>
>         http://python.org/sf/625698

-1:  it doesn't really solve the problem, and it's not necessarily the case
anymore that

    x is y

implies

    x == y

> It fixes a problem comparing recursive objects by adding a check in
> PyObject_RichCompare.

The bug report is confusing:  the original report claimed crashes, but
nobody was able to reproduce that, and jaw-dropping slowness isn't a bug.

> Seems to work fine for the specific problem,
> however, it still doesn't help this case:
>
>         a = []
>         b = []
>         for i in range(2): a.append(a)
>         for i in range(2): b.append(b)
>         print a == b

That runs in an eyeblink with the attached patch, but this is delicate stuff
and the attached is pure hackery.  I think there are two "deep" problems
with the recursive compare gimmicks:

1. Entries in the inprogress dict are cleared too early, requiring
   exponential time to rebuild them over & over & ... & over again.

2. By the time check_recursion() is called, compare_nesting is already
   larger than NESTING_LIMIT.  As a result, the tuple rich comparisons
   *implied by* the PyDict_GetItem and PyDict_SetItem calls within
   check_recursion also trigger the "recursive compare" gimmicks:  the
   very machinery used to check whether recursion is in progress ends
   up exacerbating the problem by doing "hidden" comparisons on the
   (address#1, address#2, comparison_op) tuples it uses to index the
   dict.  So, the addresses of those tuples also end up in the inprogress
   dict in new tuples of their own, and so on -- it's something of
   a miracle that it ever stops <0.9 wink>.

The attached worms around both of those without any grac, so I'm -1 on the
attached too.  But seeing one way that seems to get close to solving the
problem "for real" may inspire someone to do it gracefully.



--Boundary_(ID_LbvhV0B4RROIr+t2rcTs3Q)
Content-type: text/plain; name=patch.txt
Content-transfer-encoding: 7BIT
Content-disposition: attachment; filename=patch.txt

Index: object.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/object.c,v
retrieving revision 2.195
diff -u -r2.195 object.c
--- object.c	13 Jan 2003 20:13:04 -0000	2.195
+++ object.c	20 Jan 2003 00:32:21 -0000
@@ -708,6 +708,7 @@
 #define NESTING_LIMIT 20
 
 static int compare_nesting = 0;
+static int use_dict = 0;
 
 static PyObject*
 get_inprogress_dict(void)
@@ -746,18 +747,21 @@
 check_recursion(PyObject *v, PyObject *w, int op)
 {
 	PyObject *inprogress;
-	PyObject *token;
+	PyObject *token = NULL;
 	Py_uintptr_t iv = (Py_uintptr_t)v;
 	Py_uintptr_t iw = (Py_uintptr_t)w;
 	PyObject *x, *y, *z;
+	int save_use_dict = use_dict;
+
+	use_dict = 0;
 
 	inprogress = get_inprogress_dict();
 	if (inprogress == NULL)
-		return NULL;
+		goto Done;
 
 	token = PyTuple_New(3);
 	if (token == NULL)
-		return NULL;
+		goto Done;
 
 	if (iv <= iw) {
 		PyTuple_SET_ITEM(token, 0, x = PyLong_FromVoidPtr((void *)v));
@@ -771,19 +775,24 @@
 	PyTuple_SET_ITEM(token, 2, z = PyInt_FromLong((long)op));
 	if (x == NULL || y == NULL || z == NULL) {
 		Py_DECREF(token);
-		return NULL;
+		token = NULL;
+		goto Done;;
 	}
 
 	if (PyDict_GetItem(inprogress, token) != NULL) {
 		Py_DECREF(token);
-		return Py_None; /* Without INCREF! */
+		token = Py_None; /* Without INCREF! */
+		goto Done;
 	}
 
 	if (PyDict_SetItem(inprogress, token, token) < 0) {
 		Py_DECREF(token);
-		return NULL;
+		token = NULL;
+		goto Done;
 	}
 
+Done:
+	use_dict = save_use_dict;
 	return token;
 }
 
@@ -802,6 +811,18 @@
 	Py_DECREF(token);
 }
 
+static void
+clear_inprogress_dict()
+{
+	PyObject *inprogress;
+
+	inprogress = get_inprogress_dict();
+	if (inprogress == NULL)
+		PyErr_Clear();
+	else
+		PyDict_Clear(inprogress);
+}
+
 /* Compare v to w.  Return
    -1 if v <  w or exception (PyErr_Occurred() true in latter case).
     0 if v == w.
@@ -926,18 +947,21 @@
 	assert(Py_LT <= op && op <= Py_GE);
 
 	compare_nesting++;
-	if (compare_nesting > NESTING_LIMIT &&
+	if ((use_dict || compare_nesting > NESTING_LIMIT) &&
 		(v->ob_type->tp_as_mapping
 		 || (v->ob_type->tp_as_sequence
 		     && !PyString_Check(v)
 		     && !PyTuple_Check(v)))) {
 
 		/* try to detect circular data structures */
-		PyObject *token = check_recursion(v, w, op);
-		if (token == NULL) {
+		int save_compare_nesting = compare_nesting;
+		PyObject *token;
+
+		use_dict = 1;
+		compare_nesting = NESTING_LIMIT - 2;
+		token = check_recursion(v, w, op);
+		if (token == NULL)
 			res = NULL;
-			goto Done;
-		}
 		else if (token == Py_None) {
 			/* already comparing these objects with this operator.
 			   assume they're equal until shown otherwise */
@@ -952,10 +976,9 @@
 			}
 			Py_XINCREF(res);
 		}
-		else {
+		else
 			res = do_richcmp(v, w, op);
-			delete_token(token);
-		}
+		compare_nesting = save_compare_nesting;
 		goto Done;
 	}
 
@@ -993,6 +1016,10 @@
 	res = do_richcmp(v, w, op);
 Done:
 	compare_nesting--;
+	if (use_dict && compare_nesting <= 0) {
+		clear_inprogress_dict();
+		use_dict = 0;
+	}
 	return res;
 }
 

--Boundary_(ID_LbvhV0B4RROIr+t2rcTs3Q)--