[Python-Dev] very slow compare of recursive objects
Tim Peters
tim.one@comcast.net
Sun, 19 Jan 2003 19:38:16 -0500
This is a multi-part message in MIME format.
--Boundary_(ID_LbvhV0B4RROIr+t2rcTs3Q)
Content-type: text/plain; charset=iso-8859-1
Content-transfer-encoding: 7BIT
[Neal Norwitz]
> Could someone take a look at the patch attached to this bug report?
>
> http://python.org/sf/625698
-1: it doesn't really solve the problem, and it's not necessarily the case
anymore that
x is y
implies
x == y
> It fixes a problem comparing recursive objects by adding a check in
> PyObject_RichCompare.
The bug report is confusing: the original report claimed crashes, but
nobody was able to reproduce that, and jaw-dropping slowness isn't a bug.
> Seems to work fine for the specific problem,
> however, it still doesn't help this case:
>
> a = []
> b = []
> for i in range(2): a.append(a)
> for i in range(2): b.append(b)
> print a == b
That runs in an eyeblink with the attached patch, but this is delicate stuff
and the attached is pure hackery. I think there are two "deep" problems
with the recursive compare gimmicks:
1. Entries in the inprogress dict are cleared too early, requiring
exponential time to rebuild them over & over & ... & over again.
2. By the time check_recursion() is called, compare_nesting is already
larger than NESTING_LIMIT. As a result, the tuple rich comparisons
*implied by* the PyDict_GetItem and PyDict_SetItem calls within
check_recursion also trigger the "recursive compare" gimmicks: the
very machinery used to check whether recursion is in progress ends
up exacerbating the problem by doing "hidden" comparisons on the
(address#1, address#2, comparison_op) tuples it uses to index the
dict. So, the addresses of those tuples also end up in the inprogress
dict in new tuples of their own, and so on -- it's something of
a miracle that it ever stops <0.9 wink>.
The attached worms around both of those without any grac, so I'm -1 on the
attached too. But seeing one way that seems to get close to solving the
problem "for real" may inspire someone to do it gracefully.
--Boundary_(ID_LbvhV0B4RROIr+t2rcTs3Q)
Content-type: text/plain; name=patch.txt
Content-transfer-encoding: 7BIT
Content-disposition: attachment; filename=patch.txt
Index: object.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/object.c,v
retrieving revision 2.195
diff -u -r2.195 object.c
--- object.c 13 Jan 2003 20:13:04 -0000 2.195
+++ object.c 20 Jan 2003 00:32:21 -0000
@@ -708,6 +708,7 @@
#define NESTING_LIMIT 20
static int compare_nesting = 0;
+static int use_dict = 0;
static PyObject*
get_inprogress_dict(void)
@@ -746,18 +747,21 @@
check_recursion(PyObject *v, PyObject *w, int op)
{
PyObject *inprogress;
- PyObject *token;
+ PyObject *token = NULL;
Py_uintptr_t iv = (Py_uintptr_t)v;
Py_uintptr_t iw = (Py_uintptr_t)w;
PyObject *x, *y, *z;
+ int save_use_dict = use_dict;
+
+ use_dict = 0;
inprogress = get_inprogress_dict();
if (inprogress == NULL)
- return NULL;
+ goto Done;
token = PyTuple_New(3);
if (token == NULL)
- return NULL;
+ goto Done;
if (iv <= iw) {
PyTuple_SET_ITEM(token, 0, x = PyLong_FromVoidPtr((void *)v));
@@ -771,19 +775,24 @@
PyTuple_SET_ITEM(token, 2, z = PyInt_FromLong((long)op));
if (x == NULL || y == NULL || z == NULL) {
Py_DECREF(token);
- return NULL;
+ token = NULL;
+ goto Done;;
}
if (PyDict_GetItem(inprogress, token) != NULL) {
Py_DECREF(token);
- return Py_None; /* Without INCREF! */
+ token = Py_None; /* Without INCREF! */
+ goto Done;
}
if (PyDict_SetItem(inprogress, token, token) < 0) {
Py_DECREF(token);
- return NULL;
+ token = NULL;
+ goto Done;
}
+Done:
+ use_dict = save_use_dict;
return token;
}
@@ -802,6 +811,18 @@
Py_DECREF(token);
}
+static void
+clear_inprogress_dict()
+{
+ PyObject *inprogress;
+
+ inprogress = get_inprogress_dict();
+ if (inprogress == NULL)
+ PyErr_Clear();
+ else
+ PyDict_Clear(inprogress);
+}
+
/* Compare v to w. Return
-1 if v < w or exception (PyErr_Occurred() true in latter case).
0 if v == w.
@@ -926,18 +947,21 @@
assert(Py_LT <= op && op <= Py_GE);
compare_nesting++;
- if (compare_nesting > NESTING_LIMIT &&
+ if ((use_dict || compare_nesting > NESTING_LIMIT) &&
(v->ob_type->tp_as_mapping
|| (v->ob_type->tp_as_sequence
&& !PyString_Check(v)
&& !PyTuple_Check(v)))) {
/* try to detect circular data structures */
- PyObject *token = check_recursion(v, w, op);
- if (token == NULL) {
+ int save_compare_nesting = compare_nesting;
+ PyObject *token;
+
+ use_dict = 1;
+ compare_nesting = NESTING_LIMIT - 2;
+ token = check_recursion(v, w, op);
+ if (token == NULL)
res = NULL;
- goto Done;
- }
else if (token == Py_None) {
/* already comparing these objects with this operator.
assume they're equal until shown otherwise */
@@ -952,10 +976,9 @@
}
Py_XINCREF(res);
}
- else {
+ else
res = do_richcmp(v, w, op);
- delete_token(token);
- }
+ compare_nesting = save_compare_nesting;
goto Done;
}
@@ -993,6 +1016,10 @@
res = do_richcmp(v, w, op);
Done:
compare_nesting--;
+ if (use_dict && compare_nesting <= 0) {
+ clear_inprogress_dict();
+ use_dict = 0;
+ }
return res;
}
--Boundary_(ID_LbvhV0B4RROIr+t2rcTs3Q)--