[Python-checkins] r46589 - in python/trunk: Lib/test/crashers/dictresize_attack.py Lib/test/output/test_operations Lib/test/test_operations.py Misc/NEWS Objects/dictobject.c Python/ceval.c

armin.rigo python-checkins at python.org
Thu Jun 1 15:19:13 CEST 2006


Author: armin.rigo
Date: Thu Jun  1 15:19:12 2006
New Revision: 46589

Removed:
   python/trunk/Lib/test/crashers/dictresize_attack.py
Modified:
   python/trunk/Lib/test/output/test_operations
   python/trunk/Lib/test/test_operations.py
   python/trunk/Misc/NEWS
   python/trunk/Objects/dictobject.c
   python/trunk/Python/ceval.c
Log:
[ 1497053 ] Let dicts propagate the exceptions in user __eq__().

[ 1456209 ] dictresize() vulnerability ( <- backport candidate ).



Deleted: /python/trunk/Lib/test/crashers/dictresize_attack.py
==============================================================================
--- /python/trunk/Lib/test/crashers/dictresize_attack.py	Thu Jun  1 15:19:12 2006
+++ (empty file)
@@ -1,32 +0,0 @@
-# http://www.python.org/sf/1456209
-
-# A dictresize() attack.  If oldtable == mp->ma_smalltable then pure
-# Python code can mangle with mp->ma_smalltable while it is being walked
-# over.
-
-class X(object):
-
-    def __hash__(self):
-        return 5
-
-    def __eq__(self, other):
-        if resizing:
-            d.clear()
-        return False
-
-
-d = {}
-
-resizing = False
-
-d[X()] = 1
-d[X()] = 2
-d[X()] = 3
-d[X()] = 4
-d[X()] = 5
-
-# now trigger a resize
-resizing = True
-d[9] = 6
-
-# ^^^ I get Segmentation fault or Illegal instruction here.

Modified: python/trunk/Lib/test/output/test_operations
==============================================================================
--- python/trunk/Lib/test/output/test_operations	(original)
+++ python/trunk/Lib/test/output/test_operations	Thu Jun  1 15:19:12 2006
@@ -1,6 +1,21 @@
 test_operations
 3. Operations
 XXX Mostly not yet implemented
-3.1 Dictionary lookups succeed even if __cmp__() raises an exception
+3.1 Dictionary lookups fail if __cmp__() raises an exception
 raising error
-No exception passed through.
+d[x2] = 2: caught the RuntimeError outside
+raising error
+z = d[x2]: caught the RuntimeError outside
+raising error
+x2 in d: caught the RuntimeError outside
+raising error
+d.has_key(x2): caught the RuntimeError outside
+raising error
+d.get(x2): caught the RuntimeError outside
+raising error
+d.setdefault(x2, 42): caught the RuntimeError outside
+raising error
+d.pop(x2): caught the RuntimeError outside
+raising error
+d.update({x2: 2}): caught the RuntimeError outside
+resize bugs not triggered.

Modified: python/trunk/Lib/test/test_operations.py
==============================================================================
--- python/trunk/Lib/test/test_operations.py	(original)
+++ python/trunk/Lib/test/test_operations.py	Thu Jun  1 15:19:12 2006
@@ -5,27 +5,16 @@
 print 'XXX Mostly not yet implemented'
 
 
-print '3.1 Dictionary lookups succeed even if __cmp__() raises an exception'
-
-# SourceForge bug #112558:
-# http://sourceforge.net/bugs/?func=detailbug&bug_id=112558&group_id=5470
+print '3.1 Dictionary lookups fail if __cmp__() raises an exception'
 
 class BadDictKey:
-    already_printed_raising_error = 0
 
     def __hash__(self):
         return hash(self.__class__)
 
     def __cmp__(self, other):
         if isinstance(other, self.__class__):
-            if not BadDictKey.already_printed_raising_error:
-                # How many times __cmp__ gets called depends on the hash
-                # code and the internals of the dict implementation; we
-                # know it will be called at least once, but that's it.
-                # already_printed_raising_error makes sure the expected-
-                # output file prints the msg at most once.
-                BadDictKey.already_printed_raising_error = 1
-                print "raising error"
+            print "raising error"
             raise RuntimeError, "gotcha"
         return other
 
@@ -33,8 +22,21 @@
 x1 = BadDictKey()
 x2 = BadDictKey()
 d[x1] = 1
-d[x2] = 2
-print "No exception passed through."
+for stmt in ['d[x2] = 2',
+             'z = d[x2]',
+             'x2 in d',
+             'd.has_key(x2)',
+             'd.get(x2)',
+             'd.setdefault(x2, 42)',
+             'd.pop(x2)',
+             'd.update({x2: 2})']:
+    try:
+        exec stmt
+    except RuntimeError:
+        print "%s: caught the RuntimeError outside" % (stmt,)
+    else:
+        print "%s: No exception passed through!"     # old CPython behavior
+
 
 # Dict resizing bug, found by Jack Jansen in 2.2 CVS development.
 # This version got an assert failure in debug build, infinite loop in
@@ -50,3 +52,27 @@
     del d[i]
 for i in range(5, 9):  # i==8 was the problem
     d[i] = i
+
+
+# Another dict resizing bug (SF bug #1456209).
+# This caused Segmentation faults or Illegal instructions.
+
+class X(object):
+    def __hash__(self):
+        return 5
+    def __eq__(self, other):
+        if resizing:
+            d.clear()
+        return False
+d = {}
+resizing = False
+d[X()] = 1
+d[X()] = 2
+d[X()] = 3
+d[X()] = 4
+d[X()] = 5
+# now trigger a resize
+resizing = True
+d[9] = 6
+
+print 'resize bugs not triggered.'

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Thu Jun  1 15:19:12 2006
@@ -59,6 +59,11 @@
 
 - Patch #1479181: split open() and file() from being aliases for each other.
 
+- Patch #1497053: Exceptions occurring in __eq__() methods were always
+  silently ignored by dictionaries when comparing keys.  They are now
+  passed through (except when using the C API function PyDict_GetItem(),
+  whose semantics did not change).
+
 Extension Modules
 -----------------
 

Modified: python/trunk/Objects/dictobject.c
==============================================================================
--- python/trunk/Objects/dictobject.c	(original)
+++ python/trunk/Objects/dictobject.c	Thu Jun  1 15:19:12 2006
@@ -241,10 +241,7 @@
 	register Py_ssize_t mask = mp->ma_mask;
 	dictentry *ep0 = mp->ma_table;
 	register dictentry *ep;
-	register int restore_error;
-	register int checked_error;
 	register int cmp;
-	PyObject *err_type, *err_value, *err_tb;
 	PyObject *startkey;
 
 	i = hash & mask;
@@ -252,24 +249,17 @@
 	if (ep->me_key == NULL || ep->me_key == key)
 		return ep;
 
-	restore_error = checked_error = 0;
 	if (ep->me_key == dummy)
 		freeslot = ep;
 	else {
 		if (ep->me_hash == hash) {
-			/* error can't have been checked yet */
-			checked_error = 1;
-			if (PyErr_Occurred()) {
-				restore_error = 1;
-				PyErr_Fetch(&err_type, &err_value, &err_tb);
-			}
 			startkey = ep->me_key;
 			cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
 			if (cmp < 0)
-				PyErr_Clear();
+				return NULL;
 			if (ep0 == mp->ma_table && ep->me_key == startkey) {
 				if (cmp > 0)
-					goto Done;
+					return ep;
 			}
 			else {
 				/* The compare did major nasty stuff to the
@@ -277,8 +267,7 @@
 				 * XXX A clever adversary could prevent this
 				 * XXX from terminating.
  				 */
- 				ep = lookdict(mp, key, hash);
- 				goto Done;
+ 				return lookdict(mp, key, hash);
  			}
 		}
 		freeslot = NULL;
@@ -289,29 +278,18 @@
 	for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
 		i = (i << 2) + i + perturb + 1;
 		ep = &ep0[i & mask];
-		if (ep->me_key == NULL) {
-			if (freeslot != NULL)
-				ep = freeslot;
-			break;
-		}
+		if (ep->me_key == NULL)
+			return freeslot == NULL ? ep : freeslot;
 		if (ep->me_key == key)
-			break;
+			return ep;
 		if (ep->me_hash == hash && ep->me_key != dummy) {
-			if (!checked_error) {
-				checked_error = 1;
-				if (PyErr_Occurred()) {
-					restore_error = 1;
-					PyErr_Fetch(&err_type, &err_value,
-						    &err_tb);
-				}
-			}
 			startkey = ep->me_key;
 			cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
 			if (cmp < 0)
-				PyErr_Clear();
+				return NULL;
 			if (ep0 == mp->ma_table && ep->me_key == startkey) {
 				if (cmp > 0)
-					break;
+					return ep;
 			}
 			else {
 				/* The compare did major nasty stuff to the
@@ -319,18 +297,12 @@
 				 * XXX A clever adversary could prevent this
 				 * XXX from terminating.
  				 */
- 				ep = lookdict(mp, key, hash);
- 				break;
+ 				return lookdict(mp, key, hash);
  			}
 		}
 		else if (ep->me_key == dummy && freeslot == NULL)
 			freeslot = ep;
 	}
-
-Done:
-	if (restore_error)
-		PyErr_Restore(err_type, err_value, err_tb);
-	return ep;
 }
 
 /*
@@ -400,7 +372,7 @@
 Used both by the internal resize routine and by the public insert routine.
 Eats a reference to key and one to value.
 */
-static void
+static int
 insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value)
 {
 	PyObject *old_value;
@@ -409,6 +381,11 @@
 
 	assert(mp->ma_lookup != NULL);
 	ep = mp->ma_lookup(mp, key, hash);
+	if (ep == NULL) {
+		Py_DECREF(key);
+		Py_DECREF(value);
+		return -1;
+	}
 	if (ep->me_value != NULL) {
 		old_value = ep->me_value;
 		ep->me_value = value;
@@ -427,6 +404,36 @@
 		ep->me_value = value;
 		mp->ma_used++;
 	}
+	return 0;
+}
+
+/*
+Internal routine used by dictresize() to insert an item which is
+known to be absent from the dict.  This routine also assumes that
+the dict contains no deleted entries.  Besides the performance benefit,
+using insertdict() in dictresize() is dangerous (SF bug #1456209).
+*/
+static void
+insertdict_clean(register dictobject *mp, PyObject *key, long hash,
+		 PyObject *value)
+{
+	register Py_ssize_t i;
+	register size_t perturb;
+	register unsigned int mask = mp->ma_mask;
+	dictentry *ep0 = mp->ma_table;
+	register dictentry *ep;
+
+	i = hash & mask;
+	ep = &ep0[i];
+	for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
+		i = (i << 2) + i + perturb + 1;
+		ep = &ep0[i & mask];
+	}
+	mp->ma_fill++;
+	ep->me_key = key;
+	ep->me_hash = (Py_ssize_t)hash;
+	ep->me_value = value;
+	mp->ma_used++;
 }
 
 /*
@@ -501,7 +508,8 @@
 	for (ep = oldtable; i > 0; ep++) {
 		if (ep->me_value != NULL) {	/* active entry */
 			--i;
-			insertdict(mp, ep->me_key, ep->me_hash, ep->me_value);
+			insertdict_clean(mp, ep->me_key, (long)ep->me_hash,
+					 ep->me_value);
 		}
 		else if (ep->me_key != NULL) {	/* dummy entry */
 			--i;
@@ -521,6 +529,8 @@
 {
 	long hash;
 	dictobject *mp = (dictobject *)op;
+	dictentry *ep;
+	PyThreadState *tstate;
 	if (!PyDict_Check(op)) {
 		return NULL;
 	}
@@ -533,7 +543,29 @@
 			return NULL;
 		}
 	}
-	return (mp->ma_lookup)(mp, key, hash)->me_value;
+
+	/* We can arrive here with a NULL tstate during initialization:
+	   try running "python -Wi" for an example related to string
+	   interning.  Let's just hope that no exception occurs then... */
+	tstate = PyThreadState_GET();
+	if (tstate != NULL && tstate->curexc_type != NULL) {
+		/* preserve the existing exception */
+		PyObject *err_type, *err_value, *err_tb;
+		PyErr_Fetch(&err_type, &err_value, &err_tb);
+		ep = (mp->ma_lookup)(mp, key, hash);
+		/* ignore errors */
+		PyErr_Restore(err_type, err_value, err_tb);
+		if (ep == NULL)
+			return NULL;
+	}
+	else {
+		ep = (mp->ma_lookup)(mp, key, hash);
+		if (ep == NULL) {
+			PyErr_Clear();
+			return NULL;
+		}
+	}
+	return ep->me_value;
 }
 
 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
@@ -568,7 +600,8 @@
 	n_used = mp->ma_used;
 	Py_INCREF(value);
 	Py_INCREF(key);
-	insertdict(mp, key, hash, value);
+	if (insertdict(mp, key, hash, value) != 0)
+		return -1;
 	/* If we added a key, we can safely resize.  Otherwise just return!
 	 * If fill >= 2/3 size, adjust size.  Normally, this doubles or
 	 * quaduples the size, but it's also possible for the dict to shrink
@@ -608,6 +641,8 @@
 	}
 	mp = (dictobject *)op;
 	ep = (mp->ma_lookup)(mp, key, hash);
+	if (ep == NULL)
+		return -1;
 	if (ep->me_value == NULL) {
 		PyErr_SetObject(PyExc_KeyError, key);
 		return -1;
@@ -893,6 +928,7 @@
 {
 	PyObject *v;
 	long hash;
+	dictentry *ep;
 	assert(mp->ma_table != NULL);
 	if (!PyString_CheckExact(key) ||
 	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
@@ -900,7 +936,10 @@
 		if (hash == -1)
 			return NULL;
 	}
-	v = (mp->ma_lookup)(mp, key, hash) -> me_value;
+	ep = (mp->ma_lookup)(mp, key, hash);
+	if (ep == NULL)
+		return NULL;
+	v = ep->me_value;
 	if (v == NULL) {
 		if (!PyDict_CheckExact(mp)) {
 			/* Look up __missing__ method if we're a subclass. */
@@ -1258,9 +1297,10 @@
 			     PyDict_GetItem(a, entry->me_key) == NULL)) {
 				Py_INCREF(entry->me_key);
 				Py_INCREF(entry->me_value);
-				insertdict(mp, entry->me_key,
-					   (long)entry->me_hash,
-					   entry->me_value);
+				if (insertdict(mp, entry->me_key,
+					       (long)entry->me_hash,
+					       entry->me_value) != 0)
+					return -1;
 			}
 		}
 	}
@@ -1568,13 +1608,17 @@
 {
 	long hash;
 	register long ok;
+	dictentry *ep;
 	if (!PyString_CheckExact(key) ||
 	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
 		hash = PyObject_Hash(key);
 		if (hash == -1)
 			return NULL;
 	}
-	ok = (mp->ma_lookup)(mp, key, hash)->me_value != NULL;
+	ep = (mp->ma_lookup)(mp, key, hash);
+	if (ep == NULL)
+		return NULL;
+	ok = ep->me_value != NULL;
 	return PyBool_FromLong(ok);
 }
 
@@ -1585,6 +1629,7 @@
 	PyObject *failobj = Py_None;
 	PyObject *val = NULL;
 	long hash;
+	dictentry *ep;
 
 	if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
 		return NULL;
@@ -1595,8 +1640,10 @@
 		if (hash == -1)
 			return NULL;
 	}
-	val = (mp->ma_lookup)(mp, key, hash)->me_value;
-
+	ep = (mp->ma_lookup)(mp, key, hash);
+	if (ep == NULL)
+		return NULL;
+	val = ep->me_value;
 	if (val == NULL)
 		val = failobj;
 	Py_INCREF(val);
@@ -1611,6 +1658,7 @@
 	PyObject *failobj = Py_None;
 	PyObject *val = NULL;
 	long hash;
+	dictentry *ep;
 
 	if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj))
 		return NULL;
@@ -1621,7 +1669,10 @@
 		if (hash == -1)
 			return NULL;
 	}
-	val = (mp->ma_lookup)(mp, key, hash)->me_value;
+	ep = (mp->ma_lookup)(mp, key, hash);
+	if (ep == NULL)
+		return NULL;
+	val = ep->me_value;
 	if (val == NULL) {
 		val = failobj;
 		if (PyDict_SetItem((PyObject*)mp, key, failobj))
@@ -1665,6 +1716,8 @@
 			return NULL;
 	}
 	ep = (mp->ma_lookup)(mp, key, hash);
+	if (ep == NULL)
+		return NULL;
 	if (ep->me_value == NULL) {
 		if (deflt) {
 			Py_INCREF(deflt);
@@ -1884,6 +1937,7 @@
 {
 	long hash;
 	dictobject *mp = (dictobject *)op;
+	dictentry *ep;
 
 	if (!PyString_CheckExact(key) ||
 	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
@@ -1891,7 +1945,10 @@
 		if (hash == -1)
 			return -1;
 	}
-	return (mp->ma_lookup)(mp, key, hash)->me_value != NULL;
+	ep = (mp->ma_lookup)(mp, key, hash);
+	if (ep == NULL)
+		return -1;
+	return ep->me_value != NULL;
 }
 
 /* Hack to implement "key in dict" */

Modified: python/trunk/Python/ceval.c
==============================================================================
--- python/trunk/Python/ceval.c	(original)
+++ python/trunk/Python/ceval.c	Thu Jun  1 15:19:12 2006
@@ -1855,15 +1855,26 @@
 				long hash = ((PyStringObject *)w)->ob_shash;
 				if (hash != -1) {
 					PyDictObject *d;
+					PyDictEntry *e;
 					d = (PyDictObject *)(f->f_globals);
-					x = d->ma_lookup(d, w, hash)->me_value;
+					e = d->ma_lookup(d, w, hash);
+					if (e == NULL) {
+						x = NULL;
+						break;
+					}
+					x = e->me_value;
 					if (x != NULL) {
 						Py_INCREF(x);
 						PUSH(x);
 						continue;
 					}
 					d = (PyDictObject *)(f->f_builtins);
-					x = d->ma_lookup(d, w, hash)->me_value;
+					e = d->ma_lookup(d, w, hash);
+					if (e == NULL) {
+						x = NULL;
+						break;
+					}
+					x = e->me_value;
 					if (x != NULL) {
 						Py_INCREF(x);
 						PUSH(x);


More information about the Python-checkins mailing list