[issue13703] Hash collision security issue

Dave Malcolm report at bugs.python.org
Wed Jan 25 18:49:19 CET 2012


Dave Malcolm <dmalcolm at redhat.com> added the comment:

On Wed, 2012-01-25 at 12:45 +0000, Dave Malcolm wrote:
> Dave Malcolm <dmalcolm at redhat.com> added the comment:
> 
> I've found a bug in my patch; insertdict writes the old non-randomized
> hash value into me_hash at:
>         ep->me_hash = hash;
> rather than using the randomized hash, leading to issues when tested
> against a real attack.

I'm attaching a revised version of the patch that should fix the above
issue:
  hybrid-approach-dmalcolm-2012-01-25-002.patch

Changes relative to -001.patch:
* updated insertdict() so that when it write ep->me_hash, it uses the
correct hash value.  Unfortunately there doesn't seem to be a good way
of reusing the value we calculated in the "paranoid" ma_lookup
callbacks, without breaking ABI (suggestions welcome),  so we call
PyObject_RandomizedHash again.
* slightly reworked the two _paranoid ma_lookup callbacks to capture the
randomized hash as a local variable, in case there's a way of reusing it
in insertdict()
* when lookdict() calls into itself, it now calls mp->ma_lookup instead
* don't generate a fatal error with an unknown ma_lookup callback.

With this, I'm able to insert 200,000 non-equal PyUnicodeObject with
hash()==0 into a dict on a 32-bit build --with-pydebug in 2.2 seconds;
it can retrieve all the values correctly in about 4 seconds [compare
with ~1.5 hours of CPU churn for inserting the same data on an optimized
build without the patch on the same guest].

The amortized ratio of total work done per modification increases
linearly when under an O(N^2) attack, and the dict switches itself to
paranoid mode 56 insertions after ma_table stops using ma_smalltable
(that's when we start tracking stats).  After the transition to paranoid
mode, it drops to an average of a little under 2 probes per insertion
(the amortized ratio seems to be converging to about 1.9 probes per key
insertion at the point where my hostile test data runs out).

----------
Added file: http://bugs.python.org/file24324/hybrid-approach-dmalcolm-2012-01-25-002.patch

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue13703>
_______________________________________
-------------- next part --------------
diff -r 3be60a4c8c63 Doc/using/cmdline.rst
--- a/Doc/using/cmdline.rst	Fri Jan 20 11:01:06 2012 -0500
+++ b/Doc/using/cmdline.rst	Wed Jan 25 12:28:05 2012 -0500
@@ -460,6 +460,12 @@
    option.
 
 
+.. envvar:: PYTHONHASHSEED
+
+   If this is set, it is used as a fixed seed when randomizing hashes.
+   It should be a number in the range [0; 4294967295].  The value 0 disables
+   the hash randomization.
+
 .. envvar:: PYTHONIOENCODING
 
    If this is set before running the interpreter, it overrides the encoding used
diff -r 3be60a4c8c63 Include/bytesobject.h
--- a/Include/bytesobject.h	Fri Jan 20 11:01:06 2012 -0500
+++ b/Include/bytesobject.h	Wed Jan 25 12:28:05 2012 -0500
@@ -124,6 +124,8 @@
 #define F_ALT	(1<<3)
 #define F_ZERO	(1<<4)
 
+PyAPI_FUNC(Py_hash_t) _PyBytes_RandomizedHash(PyObject *bytes);
+
 #ifdef __cplusplus
 }
 #endif
diff -r 3be60a4c8c63 Include/object.h
--- a/Include/object.h	Fri Jan 20 11:01:06 2012 -0500
+++ b/Include/object.h	Wed Jan 25 12:28:05 2012 -0500
@@ -488,6 +488,8 @@
 PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *,
                                               PyObject *, PyObject *);
 PyAPI_FUNC(Py_hash_t) PyObject_Hash(PyObject *);
+PyAPI_FUNC(Py_hash_t) PyObject_RandomizedHash(PyObject *);
+
 PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *);
 PyAPI_FUNC(int) PyObject_IsTrue(PyObject *);
 PyAPI_FUNC(int) PyObject_Not(PyObject *);
@@ -521,6 +523,12 @@
 PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
 PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
 PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t);
+PyAPI_FUNC(Py_hash_t) _Py_RandomizedHashBytes(unsigned char*, Py_ssize_t);
+typedef struct {
+    Py_hash_t prefix;
+    Py_hash_t suffix;
+} _Py_HashSecret_t;
+PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
 #endif
 
 /* Helper for passing objects to printf and the like */
diff -r 3be60a4c8c63 Include/pythonrun.h
--- a/Include/pythonrun.h	Fri Jan 20 11:01:06 2012 -0500
+++ b/Include/pythonrun.h	Wed Jan 25 12:28:05 2012 -0500
@@ -246,6 +246,8 @@
 PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int);
 PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t);
 
+/* Random */
+PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size);
 
 #ifdef __cplusplus
 }
diff -r 3be60a4c8c63 Include/unicodeobject.h
--- a/Include/unicodeobject.h	Fri Jan 20 11:01:06 2012 -0500
+++ b/Include/unicodeobject.h	Wed Jan 25 12:28:05 2012 -0500
@@ -2154,6 +2154,8 @@
 /* Clear all static strings. */
 PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
 
+PyAPI_FUNC(Py_hash_t) _PyUnicode_RandomizedHash(PyObject *unicode);
+
 #ifdef __cplusplus
 }
 #endif
diff -r 3be60a4c8c63 Lib/os.py
--- a/Lib/os.py	Fri Jan 20 11:01:06 2012 -0500
+++ b/Lib/os.py	Wed Jan 25 12:28:05 2012 -0500
@@ -761,23 +761,6 @@
 except NameError: # statvfs_result may not exist
     pass
 
-if not _exists("urandom"):
-    def urandom(n):
-        """urandom(n) -> str
-
-        Return a string of n random bytes suitable for cryptographic use.
-
-        """
-        try:
-            _urandomfd = open("/dev/urandom", O_RDONLY)
-        except (OSError, IOError):
-            raise NotImplementedError("/dev/urandom (or equivalent) not found")
-        bs = b""
-        while len(bs) < n:
-            bs += read(_urandomfd, n - len(bs))
-        close(_urandomfd)
-        return bs
-
 # Supply os.popen()
 def popen(cmd, mode="r", buffering=-1):
     if not isinstance(cmd, str):
diff -r 3be60a4c8c63 Lib/test/regrtest.py
--- a/Lib/test/regrtest.py	Fri Jan 20 11:01:06 2012 -0500
+++ b/Lib/test/regrtest.py	Wed Jan 25 12:28:05 2012 -0500
@@ -559,6 +559,11 @@
         except ValueError:
             print("Couldn't find starting test (%s), using all tests" % start)
     if randomize:
+        hashseed = os.getenv('PYTHONHASHSEED')
+        if not hashseed:
+            os.environ['PYTHONHASHSEED'] = str(random_seed)
+            os.execv(sys.executable, [sys.executable] + sys.argv)
+            return
         random.seed(random_seed)
         print("Using random seed", random_seed)
         random.shuffle(selected)
diff -r 3be60a4c8c63 Lib/test/test_dict.py
--- a/Lib/test/test_dict.py	Fri Jan 20 11:01:06 2012 -0500
+++ b/Lib/test/test_dict.py	Wed Jan 25 12:28:05 2012 -0500
@@ -3,6 +3,8 @@
 
 import collections, random, string
 import gc, weakref
+import sys
+import time
 
 
 class DictTest(unittest.TestCase):
@@ -757,6 +759,301 @@
         self._tracked(MyDict())
 
 
+# Support classes for HashCollisionTests:
+class ChosenHash:
+    """
+    Use this to create arbitrary collections of keys that are non-equal
+    but have equal hashes, without needing to include hostile data
+    within the test suite.
+    """
+    def __init__(self, variability, hash):
+        self.variability = variability
+        self.hash = hash
+
+    def __eq__(self, other):
+        # The variability field is used to handle non-equalness:
+        return self.variability == other.variability
+
+    def __hash__(self):
+        return self.hash
+
+    def __repr__(self):
+        return 'ChosenHash(%r, %r)' % (self.variability,
+                                       self.hash)
+
+class Timer:
+    """
+    Simple way to measure time elapsed during a test case
+    """
+    def __init__(self):
+        self.starttime = time.time()
+
+    def get_elapsed_time(self):
+        """Get elapsed time in seconds as a float"""
+        curtime = time.time()
+        return curtime - self.starttime
+
+    def elapsed_time_as_str(self):
+        """Get elapsed time as a string (with units)"""
+        return '%0.3f seconds' % self.get_elapsed_time()
+
+class TookTooLong(RuntimeError):
+    def __init__(self, timelimit, elapsed, itercount=None):
+        self.timelimit = timelimit
+        self.elapsed = elapsed
+        self.itercount = itercount
+
+    def __str__(self):
+        result = 'took >= %s seconds' % self.timelimit
+        if self.itercount is not None:
+            result += (' (%0.3f seconds elapsed after %i iterations)'
+                       % (self.elapsed, self.itercount))
+        else:
+            result += ' (%0.3f seconds elapsed)' % self.elapsed
+        return result
+
+# Some of the tests involve constructing large dictionaries.  How big
+# should they be?
+ITEM_COUNT = 100000
+
+# Arbitrary threshold (in seconds) for a "reasonable amount of time"
+# that it should take to work with ITEM_COUNT items:
+TIME_LIMIT = 5
+
+class _FasterThanContext(object):
+    """
+    A context manager for implementing assertFasterThan
+    """
+    def __init__(self, test_case, **kwargs):
+        self.test_case = test_case
+        if 'seconds' in kwargs:
+            self.timelimit = kwargs['seconds']
+        else:
+            raise ValueError()
+
+    def __enter__(self):
+        self.timer = Timer()
+        return self
+
+    def __exit__(self, exc_type, exc_value, tb):
+        if exc_type is not None:
+            # let unexpected exceptions pass through
+            return
+
+        self.check_for_timeout()
+        if 1:
+            print('timer within %s took %s'
+                  % (self.test_case, self.timer.elapsed_time_as_str()))
+
+    def handle(self, callable_obj, args, kwargs):
+        """
+        If callable_obj is None, assertRaises/Warns is being used as a
+        context manager, so check for a 'msg' kwarg and return self.
+        If callable_obj is not None, call it passing args and kwargs.
+        """
+        if callable_obj is None:
+            self.msg = kwargs.pop('msg', None)
+            return self
+        with self:
+            callable_obj(*args, **kwargs)
+
+    def check_for_timeout(self, itercount=None):
+        """
+        Allow directly checking for timeouts,  potentially supplying an
+        iteration count if within a loop.  If the timer has elapsed, this will
+        raise a TookTooLong exception (potentially indicating how many
+        iterations were completed when the time limit was reached).
+
+        Otherwise, it does nothing.
+        """
+        elapsed_time = self.timer.get_elapsed_time()
+        if elapsed_time > self.timelimit:
+            raise TookTooLong(self.timelimit,
+                              elapsed_time,
+                              itercount)
+        
+ at support.cpython_only
+class HashCollisionTests(unittest.TestCase):
+    """
+    Issue 13703: tests about the behavior of dicts in the face of hostile data
+    """
+
+    def assertFasterThan(self, callableObj=None, *args, **kwargs):
+        context = _FasterThanContext(self, *args, **kwargs)
+        return context.handle(callableObj, args, kwargs)
+
+    def test_timings_with_benign_data(self):
+        # Verify that inserting many keys into a dict only takes a few seconds
+        d = dict()
+        with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+            for i in range(ITEM_COUNT):
+                d[i] = 0
+
+        # Verify that we can also retrieve the values quickly:
+        with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+            d[i]
+
+        # Verify that we can quickly insert the same item many times
+        # (overwriting each time):
+        d = dict()
+        with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+            for i in range(ITEM_COUNT):
+                d[0] = 0
+
+    def assert_ma_lookup(self, d, name):
+        # verify that a dict's ma_lookup hook has a specific value
+        if hasattr(d, '_stats'):
+            self.assertEqual(d._stats()['ma_lookup'],
+                             name)
+
+    def test_dict_stats(self):
+        # test for the diagnostic _stats() method, if it exists:
+        if hasattr(dict, '_stats'):
+            # Simple cases in which no probing should happen at all:
+            for d in [dict(zip(range(100), range(100))),
+                      dict(zip(range(1000), range(1000))),
+                      dict(zip(range(10000), range(10000)))]:
+                stats = d._stats()
+                self.assertIn('ma_mask', stats)
+                # ma_mask will be large for these dicts
+
+                # We don't expect any probing happened during construction:
+                self.assertEqual(stats['insertion_iter_count'], 0)
+                # They transition from ma_smalltable at size 6:
+                self.assertEqual(stats['num_insertions'], len(d) - 6)
+                self.assertEqual(stats['iter_count_per_insertion'], 0.0)
+
+                # We shouldn't have transitioned to paranoid mode:
+                self.assert_ma_lookup(d, 'lookdict')
+
+            # Cases in which we expect some hash collisions:
+            from itertools import permutations
+
+            for name, d, exp_length in [
+                ('All strings of length 4',
+                 dict([(''.join(chars), 1)
+                       for chars in permutations('abcdefghijklmnopqrstuvwxyz', 4)]),
+                 358800),
+                ]:
+
+                self.assertEqual(len(d), exp_length)
+                stats = d._stats()
+
+                # ma_mask will be large for this dict:
+                self.assertGreater(stats['ma_mask'], 7)
+
+                self.assertEqual(stats['num_insertions'], len(d) - 6)
+
+                # We expect significant probing happened during construction:
+                self.assertGreater(stats['insertion_iter_count'], 100000)
+                # ...but it should be sane relative to the amount of work we were
+                # asked to do:
+                self.assertLessEqual(stats['iter_count_per_insertion'], 2.0)
+                # and so we didn't transition:
+                self.assert_ma_lookup(d, 'lookdict_unicode')
+            
+    def test_not_reaching_limit(self):
+        # Ensure that we can insert equal-hashed keys up to (but not reaching)
+        # the collision limit:
+        with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+            d = dict()
+            for i in range(50):
+                key = ChosenHash(variability=i, hash=42)
+                d[key] = 0
+            self.assert_ma_lookup(d, 'lookdict')
+
+    def test_many_hash_collisions(self):
+        """
+        Ensure we gracefully handle inserting a large number of non-equal keys
+        with the same hash that transition the dict to "paranoid" mode:
+        """
+        with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+            d = dict()
+            for i in range(60):
+                key = ChosenHash(variability=i, hash=42)
+                d[key] = 0
+
+            # Verify that we exceeded the safety threshold:
+            if hasattr(d, '_stats'):
+                self.assertGreaterEqual(d._stats()['iter_count_per_insertion'],
+                                        32.0)
+
+            # Verify that the dict transitioned to paranoid mode:
+            self.assert_ma_lookup(d, 'lookdict_paranoid')
+
+            # Verify key lookup:
+            for i in range(60):
+                key = ChosenHash(variability=i, hash=42)
+                self.assertEqual(d[key], 0)
+
+    # Frank Sievertsen found scenarios in which the collision-counting
+    # scheme could be attacked:
+    #   http://mail.python.org/pipermail/python-dev/2012-January/115726.html
+
+    def test_scenario_b_from_Frank_Sievertsen(self):
+        # This typically takes about 3 seconds on my box (with a debug build):
+        with self.assertFasterThan(seconds=TIME_LIMIT * 2) as cm:
+            d = dict()
+
+            # Insert non-equal hash collisions up to but not reaching the
+            # limit where we take effect:
+            for i in range(50):
+                key = ChosenHash(variability=i, hash=42)
+                d[key] = 0
+                cm.check_for_timeout(i)
+
+            # We shouldn't have transitioned yet:
+            self.assert_ma_lookup(d, 'lookdict')
+
+            # Now try to add many equal values that collide
+            # with the hash, and see how long it takes
+            for i in range(ITEM_COUNT):
+                key = ChosenHash(variability=0, hash=42)
+                d[key] = 0
+                cm.check_for_timeout(i)
+
+            # We still shouldn't have transitioned:
+            self.assert_ma_lookup(d, 'lookdict')
+
+    def test_paranoid_str_dict(self):
+        # Exercise the handling of pure-str paranoid dict:
+        if hasattr(dict, '_make_paranoid'):
+            from itertools import permutations
+
+            d = dict([(''.join(chars), 1)
+                       for chars in permutations('abcdefghijklmnopqrstuvwxyz', 4)])
+
+            # It shouldn't be in paranoid mode:
+            self.assert_ma_lookup(d, 'lookdict_unicode')
+
+            with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+                # Force it into paranoid mode:
+                d._make_paranoid()
+                self.assert_ma_lookup(d, 'lookdict_unicode_paranoid')
+                self.assertIn('abcd', d)
+                self.assertEqual(d['zyxw'], 1)
+
+                # Now empty the dict:
+                d.clear()
+
+                # We should now have a smalltable dict, but it still uses the
+                # paranoid ma_lookup:
+                if hasattr(d, '_stats'):
+                    self.assertEqual(d._stats()['ma_mask'], 7)
+                    self.assert_ma_lookup(d, 'lookdict_unicode_paranoid')
+                
+                # Verify that the now-empty dict is still usable:
+                self.assertNotIn('abcd', d)
+                d['foo'] = 'bar'
+                self.assertIn('foo', d)
+                self.assertEqual(d['foo'], 'bar')
+
+                # Transition it from pure-str to mixed mode:
+                d[1] = 'not a str'
+                self.assert_ma_lookup(d, 'lookdict_paranoid')
+                self.assertIn(1, d)
+                self.assertEqual(d[1], 'not a str')
+
 from test import mapping_tests
 
 class GeneralMappingTests(mapping_tests.BasicTestMappingProtocol):
@@ -771,6 +1068,7 @@
 def test_main():
     support.run_unittest(
         DictTest,
+        HashCollisionTests,
         GeneralMappingTests,
         SubclassMappingTests,
     )
diff -r 3be60a4c8c63 Lib/test/test_hash.py
--- a/Lib/test/test_hash.py	Fri Jan 20 11:01:06 2012 -0500
+++ b/Lib/test/test_hash.py	Wed Jan 25 12:28:05 2012 -0500
@@ -4,9 +4,12 @@
 # Also test that hash implementations are inherited as expected
 
 import unittest
+import struct
 from test import support
+from test.script_helper import assert_python_ok
 from collections import Hashable
 
+IS_64BIT = (struct.calcsize('l') == 8)
 
 class HashEqualityTestCase(unittest.TestCase):
 
@@ -117,10 +120,56 @@
         for obj in self.hashes_to_check:
             self.assertEqual(hash(obj), _default_hash(obj))
 
+ at support.cpython_only
+class HashRandomizationTest(unittest.TestCase):
+    types_to_check = [str,
+                      lambda s: bytes(s, encoding='ascii')]
+
+    def get_randomized_hash(self, text, seed=None):
+        env = {}
+        if seed is not None:
+            env['PYTHONHASHSEED'] = str(seed)
+        out = assert_python_ok(
+            '-c', 'import sys; print(sys._getrandomizedhash(%r))' % text,
+            **env)
+        stdout = out[1].strip()
+        return int(stdout)
+
+    def test_empty_string(self):
+        for t in self.types_to_check:
+            self.assertEqual(hash(t("")), 0)
+
+    def test_null_hash(self):
+        # PYTHONHASHSEED=0 disables the randomized hash
+        if IS_64BIT:
+            h = 1453079729188098211
+        else:
+            h = -1600925533
+        for t in self.types_to_check:
+            self.assertEqual(self.get_randomized_hash(t("abc"), seed=0), h)
+
+    def test_fixed_hash(self):
+        # test a fixed seed for the randomized hash
+        if IS_64BIT:
+            h = -4410911502303878509
+        else:
+            h = -206076799
+        for t in self.types_to_check:
+            self.assertEqual(self.get_randomized_hash(t("abc"), seed=42), h)
+
+    def test_randomized_hash(self):
+        # two runs should return different hashes
+        for t in self.types_to_check:
+            run1 = self.get_randomized_hash(t("abc"))
+            run2 = self.get_randomized_hash(t("abc"))
+            self.assertNotEqual(run1, run2)
+
+
 def test_main():
     support.run_unittest(HashEqualityTestCase,
-                              HashInheritanceTestCase,
-                              HashBuiltinsTestCase)
+                         HashInheritanceTestCase,
+                         HashBuiltinsTestCase,
+                         HashRandomizationTest)
 
 
 if __name__ == "__main__":
diff -r 3be60a4c8c63 Lib/test/test_os.py
--- a/Lib/test/test_os.py	Fri Jan 20 11:01:06 2012 -0500
+++ b/Lib/test/test_os.py	Wed Jan 25 12:28:05 2012 -0500
@@ -12,6 +12,7 @@
 import time
 import shutil
 from test import support
+from test.script_helper import assert_python_ok
 import contextlib
 import mmap
 import platform
@@ -630,14 +631,32 @@
             self.assertEqual(f.read(), b'')
 
 class URandomTests(unittest.TestCase):
-    def test_urandom(self):
-        try:
-            self.assertEqual(len(os.urandom(1)), 1)
-            self.assertEqual(len(os.urandom(10)), 10)
-            self.assertEqual(len(os.urandom(100)), 100)
-            self.assertEqual(len(os.urandom(1000)), 1000)
-        except NotImplementedError:
-            pass
+    def test_urandom_length(self):
+        self.assertEqual(len(os.urandom(1)), 1)
+        self.assertEqual(len(os.urandom(10)), 10)
+        self.assertEqual(len(os.urandom(100)), 100)
+        self.assertEqual(len(os.urandom(1000)), 1000)
+
+    def test_urandom_value(self):
+        data1 = os.urandom(16)
+        data2 = os.urandom(16)
+        self.assertNotEqual(data1, data2)
+
+    def get_urandom_subprocess(self, count):
+        code = '\n'.join((
+            'import os, sys',
+            'data = os.urandom(%s)' % count,
+            'sys.stdout.buffer.write(data)',
+            'sys.stdout.buffer.flush()'))
+        out = assert_python_ok('-c', code)
+        stdout = out[1]
+        self.assertEqual(len(stdout), 16)
+        return stdout
+
+    def test_urandom_subprocess(self):
+        data1 = self.get_urandom_subprocess(16)
+        data2 = self.get_urandom_subprocess(16)
+        self.assertNotEqual(data1, data2)
 
 @contextlib.contextmanager
 def _execvpe_mockup(defpath=None):
diff -r 3be60a4c8c63 Makefile.pre.in
--- a/Makefile.pre.in	Fri Jan 20 11:01:06 2012 -0500
+++ b/Makefile.pre.in	Wed Jan 25 12:28:05 2012 -0500
@@ -322,6 +322,7 @@
 		Python/pystate.o \
 		Python/pythonrun.o \
 		Python/pytime.o \
+		Python/random.o \
 		Python/structmember.o \
 		Python/symtable.o \
 		Python/sysmodule.o \
diff -r 3be60a4c8c63 Modules/posixmodule.c
--- a/Modules/posixmodule.c	Fri Jan 20 11:01:06 2012 -0500
+++ b/Modules/posixmodule.c	Wed Jan 25 12:28:05 2012 -0500
@@ -9329,81 +9329,36 @@
 }
 #endif
 
-#ifdef MS_WINDOWS
-
-PyDoc_STRVAR(win32_urandom__doc__,
+PyDoc_STRVAR(posix_urandom__doc__,
 "urandom(n) -> str\n\n\
-Return n random bytes suitable for cryptographic use.");
-
-typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
-              LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
-              DWORD dwFlags );
-typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
-              BYTE *pbBuffer );
-
-static CRYPTGENRANDOM pCryptGenRandom = NULL;
-/* This handle is never explicitly released. Instead, the operating
-   system will release it when the process terminates. */
-static HCRYPTPROV hCryptProv = 0;
+Return n pseudo-random bytes.");
 
 static PyObject*
-win32_urandom(PyObject *self, PyObject *args)
-{
-    int howMany;
-    PyObject* result;
+posix_urandom(PyObject *self, PyObject *args)
+{
+    Py_ssize_t size;
+    PyObject *result;
+    int ret;
 
     /* Read arguments */
-    if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
-        return NULL;
-    if (howMany < 0)
+    if (!PyArg_ParseTuple(args, "n:urandom", &size))
+        return NULL;
+    if (size < 0)
         return PyErr_Format(PyExc_ValueError,
                             "negative argument not allowed");
 
-    if (hCryptProv == 0) {
-        HINSTANCE hAdvAPI32 = NULL;
-        CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
-
-        /* Obtain handle to the DLL containing CryptoAPI
-           This should not fail         */
-        hAdvAPI32 = GetModuleHandleW(L"advapi32.dll");
-        if(hAdvAPI32 == NULL)
-            return win32_error("GetModuleHandle", NULL);
-
-        /* Obtain pointers to the CryptoAPI functions
-           This will fail on some early versions of Win95 */
-        pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
-                                        hAdvAPI32,
-                                        "CryptAcquireContextA");
-        if (pCryptAcquireContext == NULL)
-            return PyErr_Format(PyExc_NotImplementedError,
-                                "CryptAcquireContextA not found");
-
-        pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(
-                                        hAdvAPI32, "CryptGenRandom");
-        if (pCryptGenRandom == NULL)
-            return PyErr_Format(PyExc_NotImplementedError,
-                                "CryptGenRandom not found");
-
-        /* Acquire context */
-        if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
-                                   PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
-            return win32_error("CryptAcquireContext", NULL);
-    }
-
-    /* Allocate bytes */
-    result = PyBytes_FromStringAndSize(NULL, howMany);
-    if (result != NULL) {
-        /* Get random data */
-        memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */
-        if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*)
-                              PyBytes_AS_STRING(result))) {
-            Py_DECREF(result);
-            return win32_error("CryptGenRandom", NULL);
-        }
+    result = PyBytes_FromStringAndSize(NULL, size);
+    if (result == NULL)
+        return NULL;
+
+    ret = _PyOS_URandom(PyBytes_AS_STRING(result),
+                        PyBytes_GET_SIZE(result));
+    if (ret == -1) {
+        Py_DECREF(result);
+        return NULL;
     }
     return result;
 }
-#endif
 
 PyDoc_STRVAR(device_encoding__doc__,
 "device_encoding(fd) -> str\n\n\
@@ -9445,42 +9400,6 @@
     return Py_None;
 }
 
-#ifdef __VMS
-/* Use openssl random routine */
-#include <openssl/rand.h>
-PyDoc_STRVAR(vms_urandom__doc__,
-"urandom(n) -> str\n\n\
-Return n random bytes suitable for cryptographic use.");
-
-static PyObject*
-vms_urandom(PyObject *self, PyObject *args)
-{
-    int howMany;
-    PyObject* result;
-
-    /* Read arguments */
-    if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
-        return NULL;
-    if (howMany < 0)
-        return PyErr_Format(PyExc_ValueError,
-                            "negative argument not allowed");
-
-    /* Allocate bytes */
-    result = PyBytes_FromStringAndSize(NULL, howMany);
-    if (result != NULL) {
-        /* Get random data */
-        if (RAND_pseudo_bytes((unsigned char*)
-                              PyBytes_AS_STRING(result),
-                              howMany) < 0) {
-            Py_DECREF(result);
-            return PyErr_Format(PyExc_ValueError,
-                                "RAND_pseudo_bytes");
-        }
-    }
-    return result;
-}
-#endif
-
 #ifdef HAVE_SETRESUID
 PyDoc_STRVAR(posix_setresuid__doc__,
 "setresuid(ruid, euid, suid)\n\n\
@@ -10887,12 +10806,7 @@
 #ifdef HAVE_GETLOADAVG
     {"getloadavg",      posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__},
 #endif
- #ifdef MS_WINDOWS
-    {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__},
- #endif
- #ifdef __VMS
-    {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__},
- #endif
+    {"urandom",         posix_urandom,   METH_VARARGS, posix_urandom__doc__},
 #ifdef HAVE_SETRESUID
     {"setresuid",       posix_setresuid, METH_VARARGS, posix_setresuid__doc__},
 #endif
diff -r 3be60a4c8c63 Objects/bytesobject.c
--- a/Objects/bytesobject.c	Fri Jan 20 11:01:06 2012 -0500
+++ b/Objects/bytesobject.c	Wed Jan 25 12:28:05 2012 -0500
@@ -867,6 +867,13 @@
     return a->ob_shash;
 }
 
+Py_hash_t
+_PyBytes_RandomizedHash(PyObject *a)
+{
+    return _Py_RandomizedHashBytes((unsigned char *)((PyBytesObject *)a)->ob_sval,
+                                   Py_SIZE(a));
+}
+
 static PyObject*
 bytes_subscript(PyBytesObject* self, PyObject* item)
 {
diff -r 3be60a4c8c63 Objects/dictobject.c
--- a/Objects/dictobject.c	Fri Jan 20 11:01:06 2012 -0500
+++ b/Objects/dictobject.c	Wed Jan 25 12:28:05 2012 -0500
@@ -10,6 +10,32 @@
 #include "Python.h"
 #include "stringlib/eq.h"
 
+/* undefine this: */
+#define DICT_PROTECTION_TRACKING 1
+
+#define Py_MAX_AVERAGE_PROBES_PER_INSERT 32
+/* power-of-two to make multiplication fast */
+
+/* Avoid overflow by dividing stats when they exceed this: */
+#define Py_DICT_STATS_ROUNDING_THRESHOLD 0x40000000
+
+/* For large dictionaries, reuse the space allocated for ma_smalltable */
+typedef struct _Py_LargeDictFields {
+    int is_inserting;
+
+    /* Check the ratio between these, to track amortized cost per operation */
+    size_t num_insertions;
+    size_t insertion_iter_count;
+
+} _Py_LargeDictFields;
+
+#define DICT_IS_PARANOID(mp) \
+  ((mp)->ma_lookup == lookdict_paranoid \
+   || ((mp)->ma_lookup == lookdict_unicode_paranoid))
+
+#define PyDict_LARGEDICTFIELDS(mp) \
+  ((_Py_LargeDictFields*)&(mp)->ma_smalltable)
+/* FIXME: add assert(mp->ma_table != mp->ma_smalltable) */
 
 /* Set a key error with the specified argument, wrapping it in a
  * tuple automatically so that tuple keys are not unpacked as the
@@ -139,11 +165,28 @@
 
 /* forward declarations */
 static PyDictEntry *
+lookdict(PyDictObject *mp, PyObject *key, Py_hash_t hash);
+
+static PyDictEntry *
 lookdict_unicode(PyDictObject *mp, PyObject *key, Py_hash_t hash);
 
+static PyDictEntry *
+lookdict_paranoid(PyDictObject *mp, PyObject *key, Py_hash_t hash);
+
+static PyDictEntry *
+lookdict_unicode_paranoid(PyDictObject *mp, PyObject *key, Py_hash_t hash);
+
+static int
+transition_to_paranoid_dict(PyDictObject *mp);
+
+static int
+check_iter_count(PyDictObject *mp, PyObject *key, Py_hash_t hash);
+
 #ifdef SHOW_CONVERSION_COUNTS
 static long created = 0L;
-static long converted = 0L;
+static long converted_unicode_to_general = 0L;
+static long converted_unicode_to_paranoid = 0L;
+static long converted_general_to_paranoid = 0L;
 
 static void
 show_counts(void)
@@ -352,7 +395,7 @@
                  * XXX A clever adversary could prevent this
                  * XXX from terminating.
                  */
-                return lookdict(mp, key, hash);
+                return mp->ma_lookup(mp, key, hash);
             }
         }
         freeslot = NULL;
@@ -384,11 +427,21 @@
                  * XXX A clever adversary could prevent this
                  * XXX from terminating.
                  */
-                return lookdict(mp, key, hash);
+                return mp->ma_lookup(mp, key, hash);
             }
         }
         else if (ep->me_key == dummy && freeslot == NULL)
             freeslot = ep;
+
+        if (check_iter_count(mp, key, hash)) {
+            if (-1 == transition_to_paranoid_dict(mp)) {
+                return NULL;
+            }
+            
+            /* Everything has changed.
+               Restart the lookup using the new function: */
+            return mp->ma_lookup(mp, key, hash);
+        }
     }
     assert(0);          /* NOT REACHED */
     return 0;
@@ -422,7 +475,14 @@
 #ifdef SHOW_CONVERSION_COUNTS
         ++converted;
 #endif
-        mp->ma_lookup = lookdict;
+        if (mp->ma_lookup == lookdict_unicode) {
+            /* transition from lookdict_unicode -> lookdict */
+            mp->ma_lookup = lookdict;
+        } else {
+            /* transition from lookdict_unicode_paranoid -> lookdict_paranoid */
+            assert(mp->ma_lookup == lookdict_unicode_paranoid);
+            mp->ma_lookup = lookdict_paranoid;
+        }
         return lookdict(mp, key, hash);
     }
     i = (size_t)hash & mask;
@@ -451,11 +511,66 @@
             return ep;
         if (ep->me_key == dummy && freeslot == NULL)
             freeslot = ep;
+        if (check_iter_count(mp, key, hash)) {
+            if (-1 == transition_to_paranoid_dict(mp)) {
+                return NULL;
+            }
+            
+            /* Everything has changed.
+               Restart the lookup using the new function: */
+            return mp->ma_lookup(mp, key, hash);
+        }
     }
     assert(0);          /* NOT REACHED */
     return 0;
 }
 
+/* Version of lookdict which ignores the hash value passed in, and instead
+   generates an alternate hash for each object using a secret.
+   Note that if the object can't do this, it simply reuses its cached hash
+*/
+static PyDictEntry *
+lookdict_paranoid(PyDictObject *mp, PyObject *key, Py_hash_t hash)
+{
+    /*
+      Ignore the object's cached hash value, and instead compute and use a
+      "secret" one.  This won't be cached, meaning that the per-object cost
+      of the call gets a lot larger.  However, it ought to still be O(1)
+    */
+    Py_hash_t randomized_hash = PyObject_RandomizedHash(key);
+    PyDictEntry *ep = lookdict(mp, key, randomized_hash);
+    return ep;
+}
+
+/*
+  Similar to lookdict_paranoid and lookdict_unicode: used when a dict has
+  received hostile data, where all keys are PyUnicodeObject
+*/
+static PyDictEntry *
+lookdict_unicode_paranoid(PyDictObject *mp, PyObject *key,
+                          Py_hash_t hash)
+{
+    /*
+      As per lookdict_paranoid, we ignore the object's cached hash value
+    */
+    Py_hash_t randomized_hash;
+    PyDictEntry *ep;
+
+    if (!PyUnicode_CheckExact(key)) {
+        randomized_hash = PyObject_RandomizedHash(key);
+        /* this call will transition internally to lookdict_paranoid: */
+        ep = lookdict_unicode(mp, key, randomized_hash); 
+    } else {
+        /* use optimized implementation when obtaining randomized hash, since
+           we know it's a PyUnicodeObject: */
+        randomized_hash = _PyUnicode_RandomizedHash(key);
+        ep = lookdict_unicode(mp, key, randomized_hash);
+    }
+
+    return ep;
+}
+
+
 int
 _PyDict_HasOnlyStringKeys(PyObject *dict)
 {
@@ -463,7 +578,8 @@
     PyObject *key, *value;
     assert(PyDict_Check(dict));
     /* Shortcut */
-    if (((PyDictObject *)dict)->ma_lookup == lookdict_unicode)
+    if (((PyDictObject *)dict)->ma_lookup == lookdict_unicode || 
+        ((PyDictObject *)dict)->ma_lookup == lookdict_unicode_paranoid)
         return 1;
     while (PyDict_Next(dict, &pos, &key, &value))
         if (!PyUnicode_Check(key))
@@ -530,9 +646,23 @@
     PyObject *old_value;
     register PyDictEntry *ep;
     typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, Py_hash_t);
+    _Py_LargeDictFields *large_fields = PyDict_LARGEDICTFIELDS(mp);
 
     assert(mp->ma_lookup != NULL);
+    if (mp->ma_mask >= PyDict_MINSIZE) {
+        large_fields->is_inserting = 1;
+
+        /* Increment num_insertions, protecting against stats overflow: */
+        if (++large_fields->num_insertions > Py_DICT_STATS_ROUNDING_THRESHOLD) {
+            /* Preserve the ratio between these two: */
+            large_fields->insertion_iter_count >>= 4;
+            large_fields->num_insertions >>= 4;
+        }
+    }
     ep = mp->ma_lookup(mp, key, hash);
+    if (mp->ma_mask >= PyDict_MINSIZE) {
+        large_fields->is_inserting = 0;
+    }
     if (ep == NULL) {
         Py_DECREF(key);
         Py_DECREF(value);
@@ -553,7 +683,15 @@
             Py_DECREF(dummy);
         }
         ep->me_key = key;
-        ep->me_hash = hash;
+        if (!DICT_IS_PARANOID(mp)) {
+            ep->me_hash = hash;
+        } else {
+            /* Ideally we need some way of getting back the randomized hash
+	       from the ma_lookup function, but there doesn't seem to be a
+	       way of doing that without breaking ABI.
+               For now, recalculate the randomized hash: */
+            ep->me_hash = PyObject_RandomizedHash(key);
+        }
         ep->me_value = value;
         mp->ma_used++;
     }
@@ -675,8 +813,15 @@
         /* else key == value == NULL:  nothing to do */
     }
 
-    if (is_oldtable_malloced)
+    if (is_oldtable_malloced) {
         PyMem_DEL(oldtable);
+    } else {
+        if (mp->ma_table != mp->ma_smalltable) {
+            /* clean ma_smalltable for use as _Py_LargeDictFields: */
+            memset(mp->ma_smalltable, 0, sizeof(mp->ma_smalltable));
+        }
+    }
+
     return 0;
 }
 
@@ -1996,6 +2141,91 @@
 PyDoc_STRVAR(copy__doc__,
 "D.copy() -> a shallow copy of D");
 
+#ifdef DICT_PROTECTION_TRACKING
+/* Debug code for use by selftests: */
+static const char *
+get_name_of_ma_lookup(PyDictObject *mp)
+{
+    /* can't do this as a switch statement */
+    if (mp->ma_lookup == lookdict) {
+        return "lookdict";
+    } else if (mp->ma_lookup == lookdict_unicode) {
+        return "lookdict_unicode";
+    } else if (mp->ma_lookup == lookdict_paranoid) {
+        return "lookdict_paranoid";
+    } else if (mp->ma_lookup == lookdict_unicode_paranoid) {
+        return "lookdict_unicode_paranoid";
+    } else {
+        return "unknown ma_lookup";
+    }
+}
+
+static PyObject *
+dict__stats(PyDictObject *mp)
+{
+    PyObject *result = PyDict_New();
+    if (!result) goto error;
+
+#define ADD_STAT(name, expr) \
+    {                                                             \
+        PyObject *value = (expr);                                 \
+        if (!value)                                               \
+            goto error;                                           \
+        if (-1 == PyDict_SetItemString(result, (name), value)) {  \
+            Py_DECREF(value);                                     \
+            goto error;                                           \
+        }                                                         \
+        Py_DECREF(value);                                         \
+    }
+
+    ADD_STAT("ma_mask", PyLong_FromLong(mp->ma_mask));
+    ADD_STAT("ma_lookup", PyUnicode_FromString(get_name_of_ma_lookup(mp)));
+    if (mp->ma_mask >= PyDict_MINSIZE) {
+        _Py_LargeDictFields *large_fields = PyDict_LARGEDICTFIELDS(mp);
+        ADD_STAT("num_insertions", PyLong_FromLong(large_fields->num_insertions));
+        ADD_STAT("insertion_iter_count", PyLong_FromLong(large_fields->insertion_iter_count));
+        if (large_fields->num_insertions > 0) {
+          ADD_STAT("iter_count_per_insertion",
+                   PyFloat_FromDouble((double)large_fields->insertion_iter_count
+                                      / (double)large_fields->num_insertions));
+        }
+    }
+
+    return result;
+
+ error:
+    Py_XDECREF(result);
+    return NULL;
+}
+
+static PyObject *
+dict__make_paranoid(register PyDictObject *mp)
+{
+    if (mp->ma_mask >= PyDict_MINSIZE) {
+        if (DICT_IS_PARANOID(mp)) {
+            PyErr_SetString(PyExc_RuntimeError,
+                            "dict is already in paranoid mode");
+            return NULL;
+        } else {
+            transition_to_paranoid_dict(mp);
+            Py_RETURN_NONE;
+        }
+    } else {
+        PyErr_SetString(PyExc_RuntimeError,
+                        ("dict is not large enough to be forced"
+                         " into paranoid mode"));
+        return NULL;        
+    }
+}
+
+
+PyDoc_STRVAR(_stats__doc__,
+"D._stats() -> for CPython selftests: obtain debugging stats on D");
+
+PyDoc_STRVAR(_make_paranoid__doc__,
+"D._make_paranoid() -> for CPython selftests: forcibly transition this dict into paranoid mode");
+#endif
+
 /* Forward */
 static PyObject *dictkeys_new(PyObject *);
 static PyObject *dictitems_new(PyObject *);
@@ -2037,6 +2267,12 @@
      clear__doc__},
     {"copy",            (PyCFunction)dict_copy,         METH_NOARGS,
      copy__doc__},
+#ifdef DICT_PROTECTION_TRACKING
+    {"_stats",          (PyCFunction)dict__stats,       METH_NOARGS,
+     _stats__doc__},
+    {"_make_paranoid",  (PyCFunction)dict__make_paranoid, METH_NOARGS,
+     _make_paranoid__doc__},
+#endif
     {NULL,              NULL}   /* sentinel */
 };
 
@@ -3067,3 +3303,144 @@
 {
     return dictview_new(dict, &PyDictValues_Type);
 }
+
+/*
+  Swap the hash values for the active entries with the values
+  in the corresponding indices in the given array (of size
+  (mp->ma_mask + 1) values)
+ */
+static void
+swap_hash_values(PyDictObject *mp, Py_hash_t *hashes)
+{
+    Py_ssize_t i;
+
+    assert(mp);
+    assert(hashes);
+
+    for (i=0; i <= mp->ma_mask;  i++) {
+        PyDictEntry *ep = &mp->ma_table[i];
+        if (ep->me_value != NULL) { /* active entry */
+            Py_hash_t old_hash = ep->me_hash;
+            ep->me_hash = hashes[i];
+            hashes[i] = old_hash;
+        }
+    }
+}
+
+/*
+  Switch to new lookup implementation, using secret hashes, reordering
+  all of the me_hash slots.
+
+  Returns -1 for failure, 0 for success
+*/
+static int
+transition_to_paranoid_dict(PyDictObject *mp)
+{
+    Py_hash_t *hashes;
+    Py_ssize_t i;
+
+    assert(mp->ma_table != mp->ma_smalltable);
+
+    assert(!DICT_IS_PARANOID(mp));
+
+    /*
+      Recalculate all of the me_hash values within ma_table
+      using the randomized new algorithm.
+
+      An error could occur during this process, so allocate
+      a temporary buffer:
+    */
+    hashes = PyMem_NEW(Py_hash_t, mp->ma_mask + 1);
+    if (hashes == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+
+    /* Populate "hashes" with new values for the active entries,
+       leaving the other indices untouched: */
+    for (i=0; i <= mp->ma_mask;  i++) {
+        PyDictEntry *ep = &mp->ma_table[i];
+        if (ep->me_value != NULL) { /* active entry */
+            Py_hash_t new_hash = PyObject_RandomizedHash(ep->me_key);
+            if (new_hash != -1) {
+                hashes[i] = new_hash;
+            } else {
+                /* Error: */
+                PyMem_DEL(hashes);
+                return -1;
+            }
+        }
+    }
+
+    /* OK, we have new hashes for all active entries, and no errors
+       occurred.  Copy them up into the ma_table, swapping the old
+       value into the buffer in case we need to reconstruct things
+       later: */
+    swap_hash_values(mp, hashes);
+
+    /* 
+       "hashes" now holds the old hash values ; mp->ma_table holds
+       the new hash values
+
+       Now use dictresize to reorganize the order of the entries.
+       We use ma_mask here as the minimum size to ensure that the dict
+       doesn't start using ma_smalltable again
+    */
+    assert(mp->ma_mask >= PyDict_MINSIZE);
+    if (-1 == dictresize(mp, mp->ma_mask)) {
+        /* This can fail under low memory conditions, before changing
+           anything.  If so, swap the old hash values back in: */
+        swap_hash_values(mp, hashes);
+        PyMem_DEL(hashes);
+        return -1;
+    }
+
+    /* Success.  We're done with the "hashes" buffer: */
+    PyMem_DEL(hashes);
+
+    /* Change ma_lookup so that we use randomized hashes: */
+    if (mp->ma_lookup == lookdict_unicode) {
+        /* transition from lookdict_unicode -> lookdict_unicode_paranoid */
+        mp->ma_lookup = lookdict_unicode_paranoid;
+    } else {
+        /* transition from lookdict -> lookdict_paranoid */
+        assert(mp->ma_lookup == lookdict);
+        mp->ma_lookup = lookdict_paranoid;
+    }
+
+    /* FIXME: track conversions? */
+    return 0;
+}
+
+/* may want to hand-inline this one
+
+  Returns 1 when the transition-to-paranoid should occur, 0 otherwise
+ */
+static int
+check_iter_count(PyDictObject *mp, PyObject *key, Py_hash_t hash)
+{
+    if (mp->ma_mask >= PyDict_MINSIZE) {
+        _Py_LargeDictFields *large_fields = PyDict_LARGEDICTFIELDS(mp);
+        assert(mp->ma_table != mp->ma_smalltable);
+        if (large_fields->is_inserting) {
+
+            /* Increment insertion_iter_count,
+               protecting against stats overflow: */
+            if (++large_fields->insertion_iter_count
+                >= Py_DICT_STATS_ROUNDING_THRESHOLD) {
+               /* Preserve the ratio between these two: */
+               large_fields->insertion_iter_count >>= 4;
+               large_fields->num_insertions >>= 4;
+            }
+
+            /* Transition into paranoid mode if the ratio is too bad: */
+            if (large_fields->insertion_iter_count
+                > (large_fields->num_insertions * Py_MAX_AVERAGE_PROBES_PER_INSERT) ) {
+                if (!DICT_IS_PARANOID(mp)) {
+                    return 1;
+                }
+            }
+        }
+    }
+    return 0;
+}
diff -r 3be60a4c8c63 Objects/object.c
--- a/Objects/object.c	Fri Jan 20 11:01:06 2012 -0500
+++ b/Objects/object.c	Wed Jan 25 12:28:05 2012 -0500
@@ -769,6 +769,22 @@
 }
 
 Py_hash_t
+_Py_RandomizedHashBytes(unsigned char *p, Py_ssize_t len)
+{
+    Py_uhash_t x;
+    Py_ssize_t i;
+
+    x = _Py_HashSecret.prefix ^ (Py_uhash_t) *p << 7;
+    for (i = 0; i < len; i++)
+        x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
+    x ^= (Py_uhash_t) len;
+    x ^= _Py_HashSecret.suffix;
+    if (x == -1)
+        x = -2;
+    return x;
+}
+
+Py_hash_t
 PyObject_HashNotImplemented(PyObject *v)
 {
     PyErr_Format(PyExc_TypeError, "unhashable type: '%.200s'",
@@ -797,6 +813,27 @@
     return PyObject_HashNotImplemented(v);
 }
 
+_Py_HashSecret_t _Py_HashSecret;
+
+Py_hash_t
+PyObject_RandomizedHash(PyObject *v)
+{
+    /*
+      We only randomize the hash for a few specific types for now, to ease
+      backporting.  Ultimately this could be a new entry in PyTypeObject
+    */
+    if (Py_TYPE(v) == &PyUnicode_Type) {
+        return _PyUnicode_RandomizedHash(v);
+    } else if (Py_TYPE(v) == &PyBytes_Type) {
+        return _PyBytes_RandomizedHash(v);
+    }
+    /* FIXME: for Python 2 backport extend to cover PyBuffer_Type */
+
+    /* Otherwise, just use the regular hash value: */
+    return PyObject_Hash(v);
+}
+
+
 PyObject *
 PyObject_GetAttrString(PyObject *v, const char *name)
 {
diff -r 3be60a4c8c63 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Fri Jan 20 11:01:06 2012 -0500
+++ b/Objects/unicodeobject.c	Wed Jan 25 12:28:05 2012 -0500
@@ -11242,6 +11242,57 @@
 }
 #undef HASH
 
+Py_hash_t
+_PyUnicode_RandomizedHash(PyObject *self)
+{
+    Py_ssize_t len;
+    Py_uhash_t x;
+
+    if (PyUnicode_READY(self) == -1)
+        return -1;
+    len = PyUnicode_GET_LENGTH(self);
+    if (len == 0) {
+        _PyUnicode_HASH(self) = 0;
+        return 0;
+    }
+
+    /* Alternate definition of the hash function as a macro to above */
+#define HASH(P) \
+    x = _Py_HashSecret.prefix ^ (Py_uhash_t)*P << 7; \
+    while (--len >= 0) \
+        x = (_PyHASH_MULTIPLIER*x) ^ (Py_uhash_t)*P++;
+
+    switch (PyUnicode_KIND(self)) {
+    case PyUnicode_1BYTE_KIND: {
+        const unsigned char *c = PyUnicode_1BYTE_DATA(self);
+        HASH(c);
+        break;
+    }
+    case PyUnicode_2BYTE_KIND: {
+        const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self);
+        HASH(s);
+        break;
+    }
+    default: {
+        Py_UCS4 *l;
+        assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND &&
+               "Impossible switch case in unicode_hash");
+        l = PyUnicode_4BYTE_DATA(self);
+        HASH(l);
+        break;
+    }
+    }
+    x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self);
+    x ^= _Py_HashSecret.suffix;
+
+    if (x == -1)
+        x = -2;
+    /* we do *not* update _PyUnicode_HASH(self) here */
+    return x;
+}
+#undef HASH
+
+
 PyDoc_STRVAR(index__doc__,
              "S.index(sub[, start[, end]]) -> int\n\
 \n\
diff -r 3be60a4c8c63 PCbuild/pythoncore.vcproj
--- a/PCbuild/pythoncore.vcproj	Fri Jan 20 11:01:06 2012 -0500
+++ b/PCbuild/pythoncore.vcproj	Wed Jan 25 12:28:05 2012 -0500
@@ -1891,6 +1891,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\Python\random.c"
+				>
+			</File>
+			<File
 				RelativePath="..\Python\structmember.c"
 				>
 			</File>
diff -r 3be60a4c8c63 Python/pythonrun.c
--- a/Python/pythonrun.c	Fri Jan 20 11:01:06 2012 -0500
+++ b/Python/pythonrun.c	Wed Jan 25 12:28:05 2012 -0500
@@ -71,6 +71,7 @@
 extern void _PyUnicode_Fini(void);
 extern int _PyLong_Init(void);
 extern void PyLong_Fini(void);
+extern void _PyRandom_Init(void);
 extern int _PyFaulthandler_Init(void);
 extern void _PyFaulthandler_Fini(void);
 
@@ -219,6 +220,8 @@
     if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0')
         Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p);
 
+    _PyRandom_Init();
+
     interp = PyInterpreterState_New();
     if (interp == NULL)
         Py_FatalError("Py_Initialize: can't make first interpreter");
diff -r 3be60a4c8c63 Python/random.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Python/random.c	Wed Jan 25 12:28:05 2012 -0500
@@ -0,0 +1,268 @@
+#include "Python.h"
+#ifdef MS_WINDOWS
+#include <windows.h>
+#else
+#include <fcntl.h>
+#endif
+
+static int random_initialized = 0;
+
+#ifdef MS_WINDOWS
+typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
+              LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
+              DWORD dwFlags );
+typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
+              BYTE *pbBuffer );
+
+static CRYPTGENRANDOM pCryptGenRandom = NULL;
+/* This handle is never explicitly released. Instead, the operating
+   system will release it when the process terminates. */
+static HCRYPTPROV hCryptProv = 0;
+
+static int
+win32_urandom_init(int raise)
+{
+    HINSTANCE hAdvAPI32 = NULL;
+    CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
+
+    /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */
+    hAdvAPI32 = GetModuleHandle("advapi32.dll");
+    if(hAdvAPI32 == NULL)
+        goto error;
+
+    /* Obtain pointers to the CryptoAPI functions. This will fail on some early
+       versions of Win95. */
+    pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
+                               hAdvAPI32, "CryptAcquireContextA");
+    if (pCryptAcquireContext == NULL)
+        goto error;
+
+    pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32,
+                                                     "CryptGenRandom");
+    if (pCryptGenRandom == NULL)
+        goto error;
+
+    /* Acquire context */
+    if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
+                               PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
+        goto error;
+
+    return 0;
+
+error:
+    if (raise)
+        PyErr_SetFromWindowsErr(0);
+    else
+        Py_FatalError("Fail to initialize Windows random API (CryptoGen)");
+    return -1;
+}
+
+/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
+   API. Return 0 on success, or -1 on error. */
+static int
+win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
+{
+    Py_ssize_t orig_size = size;
+    Py_ssize_t chunk;
+
+    if (hCryptProv == 0)
+    {
+        if (win32_urandom_init(raise) == -1)
+            return -1;
+    }
+
+    while (size > 0)
+    {
+        chunk = Py_MIN(size, INT_MAX);
+        if (!pCryptGenRandom(hCryptProv, chunk, buffer))
+        {
+            /* CryptGenRandom() failed */
+            if (raise)
+                PyErr_SetFromWindowsErr(0);
+            else
+                Py_FatalError("Fail to initialized the randomized hash "
+                        "secret using CryptoGen)");
+            return -1;
+        }
+        buffer += chunk;
+        size -= chunk;
+    }
+    return 0;
+}
+
+#else
+
+/* Read size bytes from /dev/urandom into buffer.
+   Call Py_FatalError() on error. */
+static void
+dev_urandom_noraise(char *buffer, Py_ssize_t size)
+{
+    int fd;
+    Py_ssize_t n;
+
+    assert (0 < size);
+
+    fd = open("/dev/urandom", O_RDONLY);
+    if (fd < 0)
+        Py_FatalError("Fail to open /dev/urandom");
+
+    while (0 < size)
+    {
+        do {
+            n = read(fd, buffer, (size_t)size);
+        } while (n < 0 && errno == EINTR);
+        if (n <= 0)
+        {
+            /* stop on error or if read(size) returned 0 */
+            Py_FatalError("Fail to read bytes from /dev/urandom");
+            break;
+        }
+        buffer += n;
+        size -= (Py_ssize_t)n;
+    }
+    close(fd);
+}
+
+/* Read size bytes from /dev/urandom into buffer.
+   Return 0 on success, raise an exception and return -1 on error. */
+static int
+dev_urandom_python(char *buffer, Py_ssize_t size)
+{
+    int fd;
+    Py_ssize_t n;
+
+    if (size <= 0)
+        return 0;
+
+    Py_BEGIN_ALLOW_THREADS
+    fd = open("/dev/urandom", O_RDONLY);
+    Py_END_ALLOW_THREADS
+    if (fd < 0)
+    {
+        PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom");
+        return -1;
+    }
+
+    Py_BEGIN_ALLOW_THREADS
+    do {
+        do {
+            n = read(fd, buffer, (size_t)size);
+        } while (n < 0 && errno == EINTR);
+        if (n <= 0)
+            break;
+        buffer += n;
+        size -= (Py_ssize_t)n;
+    } while (0 < size);
+    Py_END_ALLOW_THREADS
+
+    if (n <= 0)
+    {
+        /* stop on error or if read(size) returned 0 */
+        if (n < 0)
+            PyErr_SetFromErrno(PyExc_OSError);
+        else
+            PyErr_Format(PyExc_RuntimeError,
+                         "Fail to read %zi bytes from /dev/urandom",
+                         size);
+        close(fd);
+        return -1;
+    }
+    close(fd);
+    return 0;
+}
+#endif
+
+/* Fill buffer with pseudo-random bytes generated by a linear congruent
+   generator (LCG):
+
+       x(n+1) = (x(n) * 214013 + 2531011) % 2^32
+
+   Use bits 23..16 of x(n) to generate a byte. */
+static void
+lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size)
+{
+    size_t index;
+    unsigned int x;
+
+    x = x0;
+    for (index=0; index < size; index++) {
+        x *= 214013;
+        x += 2531011;
+        /* modulo 2 ^ (8 * sizeof(int)) */
+        buffer[index] = (x >> 16) & 0xff;
+    }
+}
+
+/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic
+   use, from the operating random number generator (RNG).
+
+   Return 0 on success, raise an exception and return -1 on error. */
+int
+_PyOS_URandom(void *buffer, Py_ssize_t size)
+{
+    if (size < 0) {
+        PyErr_Format(PyExc_ValueError,
+                     "negative argument not allowed");
+        return -1;
+    }
+    if (size == 0)
+        return 0;
+
+#ifdef MS_WINDOWS
+    return win32_urandom((unsigned char *)buffer, size, 1);
+#else
+    return dev_urandom_python((char*)buffer, size);
+#endif
+}
+
+void
+_PyRandom_Init(void)
+{
+    char *env;
+    void *secret = &_Py_HashSecret;
+    Py_ssize_t secret_size = sizeof(_Py_HashSecret);
+
+    if (random_initialized)
+        return;
+    random_initialized = 1;
+
+    env = Py_GETENV("PYTHONHASHSEED");
+    if (env && *env != '\0') {
+        char *endptr = env;
+        unsigned long seed;
+        seed = strtoul(env, &endptr, 10);
+        if (*endptr != '\0'
+            || seed > 4294967295UL
+            || (errno == ERANGE && seed == ULONG_MAX))
+        {
+            Py_FatalError("PYTHONHASHSEED must be an integer "
+                          "in range [0; 4294967295]");
+        }
+        if (seed == 0) {
+            /* disable the randomized Unicode hash */
+            memset(secret, 0, secret_size);
+        }
+        else {
+            lcg_urandom(seed, (unsigned char*)secret, secret_size);
+        }
+    }
+    else {
+#ifdef MS_WINDOWS
+#if 1
+        (void)win32_urandom((unsigned char *)secret, secret_size, 0);
+#else
+        /* fast but weak RNG (fast initialization, weak seed) */
+        _PyTime_timeval t;
+        unsigned int seed;
+        _PyTime_gettimeofday(&t);
+        seed = (unsigned int)t.tv_sec;
+        seed ^= t.tv_usec;
+        seed ^= getpid();
+        lcg_urandom(seed, (unsigned char*)secret, secret_size);
+#endif
+#else
+        dev_urandom_noraise((char*)secret, secret_size);
+#endif
+    }
+}
+
diff -r 3be60a4c8c63 Python/sysmodule.c
--- a/Python/sysmodule.c	Fri Jan 20 11:01:06 2012 -0500
+++ b/Python/sysmodule.c	Wed Jan 25 12:28:05 2012 -0500
@@ -993,6 +993,20 @@
 10. Number of stack pops performed by call_function()"
 );
 
+PyDoc_STRVAR(_getrandomizedhash_doc,
+"_getrandomizedhash(object) -> int\n\
+\n\
+For selftests only: return PyObject_RandomizedHash for the given object.");
+
+static PyObject *
+sys__getrandomizedhash(PyObject *self, PyObject *arg)
+{
+    return PyLong_FromSsize_t(PyObject_RandomizedHash(arg));
+    /* FIXME: when backporting, note the Py_hash_t to long change,
+      and make it just PyLong_FromLong on Python 2.* */
+}
+
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -1093,6 +1107,8 @@
     {"settrace",        sys_settrace, METH_O, settrace_doc},
     {"gettrace",        sys_gettrace, METH_NOARGS, gettrace_doc},
     {"call_tracing", sys_call_tracing, METH_VARARGS, call_tracing_doc},
+    {"_getrandomizedhash",   (PyCFunction)sys__getrandomizedhash,
+     METH_O, _getrandomizedhash_doc},
     {NULL,              NULL}           /* sentinel */
 };
 


More information about the Python-bugs-list mailing list