[Python-checkins] cpython: Close #14205: dict lookup raises a RuntimeError if the dict is modified during

Tue Mar 6 01:13:02 CET 2012

http://hg.python.org/cpython/rev/934aaf2191d0
changeset:   75445:934aaf2191d0
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Tue Mar 06 01:03:13 2012 +0100
summary:
  Close #14205: dict lookup raises a RuntimeError if the dict is modified during
a lookup.

"if you want to make a sandbox on top of CPython, you have to fix segfaults"
so let's fix segfaults!

files:
  Lib/test/crashers/nasty_eq_vs_dict.py |   47 --
  Lib/test/test_dict.py                 |   22 +-
  Lib/test/test_mutants.py              |  291 --------------
  Misc/NEWS                             |    5 +-
  Objects/dictobject.c                  |   18 +-
  5 files changed, 31 insertions(+), 352 deletions(-)

diff --git a/Lib/test/crashers/nasty_eq_vs_dict.py b/Lib/test/crashers/nasty_eq_vs_dict.py
deleted file mode 100644
--- a/Lib/test/crashers/nasty_eq_vs_dict.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# from http://mail.python.org/pipermail/python-dev/2001-June/015239.html
-
-# if you keep changing a dictionary while looking up a key, you can
-# provoke an infinite recursion in C
-
-# At the time neither Tim nor Michael could be bothered to think of a
-# way to fix it.
-
-class Yuck:
-    def __init__(self):
-        self.i = 0
-
-    def make_dangerous(self):
-        self.i = 1
-
-    def __hash__(self):
-        # direct to slot 4 in table of size 8; slot 12 when size 16
-        return 4 + 8
-
-    def __eq__(self, other):
-        if self.i == 0:
-            # leave dict alone
-            pass
-        elif self.i == 1:
-            # fiddle to 16 slots
-            self.__fill_dict(6)
-            self.i = 2
-        else:
-            # fiddle to 8 slots
-            self.__fill_dict(4)
-            self.i = 1
-
-        return 1
-
-    def __fill_dict(self, n):
-        self.i = 0
-        dict.clear()
-        for i in range(n):
-            dict[i] = i
-        dict[self] = "OK!"
-
-y = Yuck()
-dict = {y: "OK!"}
-
-z = Yuck()
-y.make_dangerous()
-print(dict[z])
diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -379,7 +379,7 @@
         x.fail = True
         self.assertRaises(Exc, d.pop, x)
 
-    def test_mutatingiteration(self):
+    def test_mutating_iteration(self):
         # changing dict size during iteration
         d = {}
         d[1] = 1
@@ -387,6 +387,26 @@
             for i in d:
                 d[i+1] = 1
 
+    def test_mutating_lookup(self):
+        # changing dict during a lookup
+        class NastyKey:
+            mutate_dict = None
+
+            def __hash__(self):
+                # hash collision!
+                return 1
+
+            def __eq__(self, other):
+                if self.mutate_dict:
+                    self.mutate_dict[self] = 1
+                return self == other
+
+        d = {}
+        d[NastyKey()] = 0
+        NastyKey.mutate_dict = d
+        with self.assertRaises(RuntimeError):
+            d[NastyKey()] = None
+
     def test_repr(self):
         d = {}
         self.assertEqual(repr(d), '{}')
diff --git a/Lib/test/test_mutants.py b/Lib/test/test_mutants.py
deleted file mode 100644
--- a/Lib/test/test_mutants.py
+++ /dev/null
@@ -1,291 +0,0 @@
-from test.support import verbose, TESTFN
-import random
-import os
-
-# From SF bug #422121:  Insecurities in dict comparison.
-
-# Safety of code doing comparisons has been an historical Python weak spot.
-# The problem is that comparison of structures written in C *naturally*
-# wants to hold on to things like the size of the container, or "the
-# biggest" containee so far, across a traversal of the container; but
-# code to do containee comparisons can call back into Python and mutate
-# the container in arbitrary ways while the C loop is in midstream.  If the
-# C code isn't extremely paranoid about digging things out of memory on
-# each trip, and artificially boosting refcounts for the duration, anything
-# from infinite loops to OS crashes can result (yes, I use Windows <wink>).
-#
-# The other problem is that code designed to provoke a weakness is usually
-# white-box code, and so catches only the particular vulnerabilities the
-# author knew to protect against.  For example, Python's list.sort() code
-# went thru many iterations as one "new" vulnerability after another was
-# discovered.
-#
-# So the dict comparison test here uses a black-box approach instead,
-# generating dicts of various sizes at random, and performing random
-# mutations on them at random times.  This proved very effective,
-# triggering at least six distinct failure modes the first 20 times I
-# ran it.  Indeed, at the start, the driver never got beyond 6 iterations
-# before the test died.
-
-# The dicts are global to make it easy to mutate tham from within functions.
-dict1 = {}
-dict2 = {}
-
-# The current set of keys in dict1 and dict2.  These are materialized as
-# lists to make it easy to pick a dict key at random.
-dict1keys = []
-dict2keys = []
-
-# Global flag telling maybe_mutate() whether to *consider* mutating.
-mutate = 0
-
-# If global mutate is true, consider mutating a dict.  May or may not
-# mutate a dict even if mutate is true.  If it does decide to mutate a
-# dict, it picks one of {dict1, dict2} at random, and deletes a random
-# entry from it; or, more rarely, adds a random element.
-
-def maybe_mutate():
-    global mutate
-    if not mutate:
-        return
-    if random.random() < 0.5:
-        return
-
-    if random.random() < 0.5:
-        target, keys = dict1, dict1keys
-    else:
-        target, keys = dict2, dict2keys
-
-    if random.random() < 0.2:
-        # Insert a new key.
-        mutate = 0   # disable mutation until key inserted
-        while 1:
-            newkey = Horrid(random.randrange(100))
-            if newkey not in target:
-                break
-        target[newkey] = Horrid(random.randrange(100))
-        keys.append(newkey)
-        mutate = 1
-
-    elif keys:
-        # Delete a key at random.
-        mutate = 0   # disable mutation until key deleted
-        i = random.randrange(len(keys))
-        key = keys[i]
-        del target[key]
-        del keys[i]
-        mutate = 1
-
-# A horrid class that triggers random mutations of dict1 and dict2 when
-# instances are compared.
-
-class Horrid:
-    def __init__(self, i):
-        # Comparison outcomes are determined by the value of i.
-        self.i = i
-
-        # An artificial hashcode is selected at random so that we don't
-        # have any systematic relationship between comparison outcomes
-        # (based on self.i and other.i) and relative position within the
-        # hash vector (based on hashcode).
-        # XXX This is no longer effective.
-        ##self.hashcode = random.randrange(1000000000)
-
-    def __hash__(self):
-        return 42
-        return self.hashcode
-
-    def __eq__(self, other):
-        maybe_mutate()   # The point of the test.
-        return self.i == other.i
-
-    def __ne__(self, other):
-        raise RuntimeError("I didn't expect some kind of Spanish inquisition!")
-
-    __lt__ = __le__ = __gt__ = __ge__ = __ne__
-
-    def __repr__(self):
-        return "Horrid(%d)" % self.i
-
-# Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs,
-# where i and j are selected at random from the candidates list.
-# Return d.keys() after filling.
-
-def fill_dict(d, candidates, numentries):
-    d.clear()
-    for i in range(numentries):
-        d[Horrid(random.choice(candidates))] = \
-            Horrid(random.choice(candidates))
-    return list(d.keys())
-
-# Test one pair of randomly generated dicts, each with n entries.
-# Note that dict comparison is trivial if they don't have the same number
-# of entires (then the "shorter" dict is instantly considered to be the
-# smaller one, without even looking at the entries).
-
-def test_one(n):
-    global mutate, dict1, dict2, dict1keys, dict2keys
-
-    # Fill the dicts without mutating them.
-    mutate = 0
-    dict1keys = fill_dict(dict1, range(n), n)
-    dict2keys = fill_dict(dict2, range(n), n)
-
-    # Enable mutation, then compare the dicts so long as they have the
-    # same size.
-    mutate = 1
-    if verbose:
-        print("trying w/ lengths", len(dict1), len(dict2), end=' ')
-    while dict1 and len(dict1) == len(dict2):
-        if verbose:
-            print(".", end=' ')
-        c = dict1 == dict2
-    if verbose:
-        print()
-
-# Run test_one n times.  At the start (before the bugs were fixed), 20
-# consecutive runs of this test each blew up on or before the sixth time
-# test_one was run.  So n doesn't have to be large to get an interesting
-# test.
-# OTOH, calling with large n is also interesting, to ensure that the fixed
-# code doesn't hold on to refcounts *too* long (in which case memory would
-# leak).
-
-def test(n):
-    for i in range(n):
-        test_one(random.randrange(1, 100))
-
-# See last comment block for clues about good values for n.
-test(100)
-
-##########################################################################
-# Another segfault bug, distilled by Michael Hudson from a c.l.py post.
-
-class Child:
-    def __init__(self, parent):
-        self.__dict__['parent'] = parent
-    def __getattr__(self, attr):
-        self.parent.a = 1
-        self.parent.b = 1
-        self.parent.c = 1
-        self.parent.d = 1
-        self.parent.e = 1
-        self.parent.f = 1
-        self.parent.g = 1
-        self.parent.h = 1
-        self.parent.i = 1
-        return getattr(self.parent, attr)
-
-class Parent:
-    def __init__(self):
-        self.a = Child(self)
-
-# Hard to say what this will print!  May vary from time to time.  But
-# we're specifically trying to test the tp_print slot here, and this is
-# the clearest way to do it.  We print the result to a temp file so that
-# the expected-output file doesn't need to change.
-
-f = open(TESTFN, "w")
-print(Parent().__dict__, file=f)
-f.close()
-os.unlink(TESTFN)
-
-##########################################################################
-# And another core-dumper from Michael Hudson.
-
-dict = {}
-
-# Force dict to malloc its table.
-for i in range(1, 10):
-    dict[i] = i
-
-f = open(TESTFN, "w")
-
-class Machiavelli:
-    def __repr__(self):
-        dict.clear()
-
-        # Michael sez:  "doesn't crash without this.  don't know why."
-        # Tim sez:  "luck of the draw; crashes with or without for me."
-        print(file=f)
-
-        return repr("machiavelli")
-
-    def __hash__(self):
-        return 0
-
-dict[Machiavelli()] = Machiavelli()
-
-print(str(dict), file=f)
-f.close()
-os.unlink(TESTFN)
-del f, dict
-
-
-##########################################################################
-# And another core-dumper from Michael Hudson.
-
-dict = {}
-
-# let's force dict to malloc its table
-for i in range(1, 10):
-    dict[i] = i
-
-class Machiavelli2:
-    def __eq__(self, other):
-        dict.clear()
-        return 1
-
-    def __hash__(self):
-        return 0
-
-dict[Machiavelli2()] = Machiavelli2()
-
-try:
-    dict[Machiavelli2()]
-except KeyError:
-    pass
-
-del dict
-
-##########################################################################
-# And another core-dumper from Michael Hudson.
-
-dict = {}
-
-# let's force dict to malloc its table
-for i in range(1, 10):
-    dict[i] = i
-
-class Machiavelli3:
-    def __init__(self, id):
-        self.id = id
-
-    def __eq__(self, other):
-        if self.id == other.id:
-            dict.clear()
-            return 1
-        else:
-            return 0
-
-    def __repr__(self):
-        return "%s(%s)"%(self.__class__.__name__, self.id)
-
-    def __hash__(self):
-        return 0
-
-dict[Machiavelli3(1)] = Machiavelli3(0)
-dict[Machiavelli3(2)] = Machiavelli3(0)
-
-f = open(TESTFN, "w")
-try:
-    try:
-        print(dict[Machiavelli3(2)], file=f)
-    except KeyError:
-        pass
-finally:
-    f.close()
-    os.unlink(TESTFN)
-
-del dict
-del dict1, dict2, dict1keys, dict2keys
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,10 +10,13 @@
 Core and Builtins
 -----------------
 
+- Issue #14205: dict lookup raises a RuntimeError if the dict is modified
+  during a lookup.
+
 Library
 -------
 
-- Issue #14168: Check for presence of Element._attrs in minidom before 
+- Issue #14168: Check for presence of Element._attrs in minidom before
   accessing it.
 
 - Issue #12328: Fix multiprocessing's use of overlapped I/O on Windows.
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -347,12 +347,9 @@
                     return ep;
             }
             else {
-                /* The compare did major nasty stuff to the
-                 * dict:  start over.
-                 * XXX A clever adversary could prevent this
-                 * XXX from terminating.
-                 */
-                return lookdict(mp, key, hash);
+                PyErr_SetString(PyExc_RuntimeError,
+                                "dictionary changed size during lookup");
+                return NULL;
             }
         }
         freeslot = NULL;
@@ -379,12 +376,9 @@
                     return ep;
             }
             else {
-                /* The compare did major nasty stuff to the
-                 * dict:  start over.
-                 * XXX A clever adversary could prevent this
-                 * XXX from terminating.
-                 */
-                return lookdict(mp, key, hash);
+                PyErr_SetString(PyExc_RuntimeError,
+                                "dictionary changed size during lookup");
+                return NULL;
             }
         }
         else if (ep->me_key == dummy && freeslot == NULL)

-- 
Repository URL: http://hg.python.org/cpython