[issue13703] Hash collision security issue
Dave Malcolm
report at bugs.python.org
Wed Jan 25 18:49:19 CET 2012
Dave Malcolm <dmalcolm at redhat.com> added the comment:
On Wed, 2012-01-25 at 12:45 +0000, Dave Malcolm wrote:
> Dave Malcolm <dmalcolm at redhat.com> added the comment:
>
> I've found a bug in my patch; insertdict writes the old non-randomized
> hash value into me_hash at:
> ep->me_hash = hash;
> rather than using the randomized hash, leading to issues when tested
> against a real attack.
I'm attaching a revised version of the patch that should fix the above
issue:
hybrid-approach-dmalcolm-2012-01-25-002.patch
Changes relative to -001.patch:
* updated insertdict() so that when it write ep->me_hash, it uses the
correct hash value. Unfortunately there doesn't seem to be a good way
of reusing the value we calculated in the "paranoid" ma_lookup
callbacks, without breaking ABI (suggestions welcome), so we call
PyObject_RandomizedHash again.
* slightly reworked the two _paranoid ma_lookup callbacks to capture the
randomized hash as a local variable, in case there's a way of reusing it
in insertdict()
* when lookdict() calls into itself, it now calls mp->ma_lookup instead
* don't generate a fatal error with an unknown ma_lookup callback.
With this, I'm able to insert 200,000 non-equal PyUnicodeObject with
hash()==0 into a dict on a 32-bit build --with-pydebug in 2.2 seconds;
it can retrieve all the values correctly in about 4 seconds [compare
with ~1.5 hours of CPU churn for inserting the same data on an optimized
build without the patch on the same guest].
The amortized ratio of total work done per modification increases
linearly when under an O(N^2) attack, and the dict switches itself to
paranoid mode 56 insertions after ma_table stops using ma_smalltable
(that's when we start tracking stats). After the transition to paranoid
mode, it drops to an average of a little under 2 probes per insertion
(the amortized ratio seems to be converging to about 1.9 probes per key
insertion at the point where my hostile test data runs out).
----------
Added file: http://bugs.python.org/file24324/hybrid-approach-dmalcolm-2012-01-25-002.patch
_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue13703>
_______________________________________
-------------- next part --------------
diff -r 3be60a4c8c63 Doc/using/cmdline.rst
--- a/Doc/using/cmdline.rst Fri Jan 20 11:01:06 2012 -0500
+++ b/Doc/using/cmdline.rst Wed Jan 25 12:28:05 2012 -0500
@@ -460,6 +460,12 @@
option.
+.. envvar:: PYTHONHASHSEED
+
+ If this is set, it is used as a fixed seed when randomizing hashes.
+ It should be a number in the range [0; 4294967295]. The value 0 disables
+ the hash randomization.
+
.. envvar:: PYTHONIOENCODING
If this is set before running the interpreter, it overrides the encoding used
diff -r 3be60a4c8c63 Include/bytesobject.h
--- a/Include/bytesobject.h Fri Jan 20 11:01:06 2012 -0500
+++ b/Include/bytesobject.h Wed Jan 25 12:28:05 2012 -0500
@@ -124,6 +124,8 @@
#define F_ALT (1<<3)
#define F_ZERO (1<<4)
+PyAPI_FUNC(Py_hash_t) _PyBytes_RandomizedHash(PyObject *bytes);
+
#ifdef __cplusplus
}
#endif
diff -r 3be60a4c8c63 Include/object.h
--- a/Include/object.h Fri Jan 20 11:01:06 2012 -0500
+++ b/Include/object.h Wed Jan 25 12:28:05 2012 -0500
@@ -488,6 +488,8 @@
PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *,
PyObject *, PyObject *);
PyAPI_FUNC(Py_hash_t) PyObject_Hash(PyObject *);
+PyAPI_FUNC(Py_hash_t) PyObject_RandomizedHash(PyObject *);
+
PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *);
PyAPI_FUNC(int) PyObject_IsTrue(PyObject *);
PyAPI_FUNC(int) PyObject_Not(PyObject *);
@@ -521,6 +523,12 @@
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t);
+PyAPI_FUNC(Py_hash_t) _Py_RandomizedHashBytes(unsigned char*, Py_ssize_t);
+typedef struct {
+ Py_hash_t prefix;
+ Py_hash_t suffix;
+} _Py_HashSecret_t;
+PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
#endif
/* Helper for passing objects to printf and the like */
diff -r 3be60a4c8c63 Include/pythonrun.h
--- a/Include/pythonrun.h Fri Jan 20 11:01:06 2012 -0500
+++ b/Include/pythonrun.h Wed Jan 25 12:28:05 2012 -0500
@@ -246,6 +246,8 @@
PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int);
PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t);
+/* Random */
+PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size);
#ifdef __cplusplus
}
diff -r 3be60a4c8c63 Include/unicodeobject.h
--- a/Include/unicodeobject.h Fri Jan 20 11:01:06 2012 -0500
+++ b/Include/unicodeobject.h Wed Jan 25 12:28:05 2012 -0500
@@ -2154,6 +2154,8 @@
/* Clear all static strings. */
PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
+PyAPI_FUNC(Py_hash_t) _PyUnicode_RandomizedHash(PyObject *unicode);
+
#ifdef __cplusplus
}
#endif
diff -r 3be60a4c8c63 Lib/os.py
--- a/Lib/os.py Fri Jan 20 11:01:06 2012 -0500
+++ b/Lib/os.py Wed Jan 25 12:28:05 2012 -0500
@@ -761,23 +761,6 @@
except NameError: # statvfs_result may not exist
pass
-if not _exists("urandom"):
- def urandom(n):
- """urandom(n) -> str
-
- Return a string of n random bytes suitable for cryptographic use.
-
- """
- try:
- _urandomfd = open("/dev/urandom", O_RDONLY)
- except (OSError, IOError):
- raise NotImplementedError("/dev/urandom (or equivalent) not found")
- bs = b""
- while len(bs) < n:
- bs += read(_urandomfd, n - len(bs))
- close(_urandomfd)
- return bs
-
# Supply os.popen()
def popen(cmd, mode="r", buffering=-1):
if not isinstance(cmd, str):
diff -r 3be60a4c8c63 Lib/test/regrtest.py
--- a/Lib/test/regrtest.py Fri Jan 20 11:01:06 2012 -0500
+++ b/Lib/test/regrtest.py Wed Jan 25 12:28:05 2012 -0500
@@ -559,6 +559,11 @@
except ValueError:
print("Couldn't find starting test (%s), using all tests" % start)
if randomize:
+ hashseed = os.getenv('PYTHONHASHSEED')
+ if not hashseed:
+ os.environ['PYTHONHASHSEED'] = str(random_seed)
+ os.execv(sys.executable, [sys.executable] + sys.argv)
+ return
random.seed(random_seed)
print("Using random seed", random_seed)
random.shuffle(selected)
diff -r 3be60a4c8c63 Lib/test/test_dict.py
--- a/Lib/test/test_dict.py Fri Jan 20 11:01:06 2012 -0500
+++ b/Lib/test/test_dict.py Wed Jan 25 12:28:05 2012 -0500
@@ -3,6 +3,8 @@
import collections, random, string
import gc, weakref
+import sys
+import time
class DictTest(unittest.TestCase):
@@ -757,6 +759,301 @@
self._tracked(MyDict())
+# Support classes for HashCollisionTests:
+class ChosenHash:
+ """
+ Use this to create arbitrary collections of keys that are non-equal
+ but have equal hashes, without needing to include hostile data
+ within the test suite.
+ """
+ def __init__(self, variability, hash):
+ self.variability = variability
+ self.hash = hash
+
+ def __eq__(self, other):
+ # The variability field is used to handle non-equalness:
+ return self.variability == other.variability
+
+ def __hash__(self):
+ return self.hash
+
+ def __repr__(self):
+ return 'ChosenHash(%r, %r)' % (self.variability,
+ self.hash)
+
+class Timer:
+ """
+ Simple way to measure time elapsed during a test case
+ """
+ def __init__(self):
+ self.starttime = time.time()
+
+ def get_elapsed_time(self):
+ """Get elapsed time in seconds as a float"""
+ curtime = time.time()
+ return curtime - self.starttime
+
+ def elapsed_time_as_str(self):
+ """Get elapsed time as a string (with units)"""
+ return '%0.3f seconds' % self.get_elapsed_time()
+
+class TookTooLong(RuntimeError):
+ def __init__(self, timelimit, elapsed, itercount=None):
+ self.timelimit = timelimit
+ self.elapsed = elapsed
+ self.itercount = itercount
+
+ def __str__(self):
+ result = 'took >= %s seconds' % self.timelimit
+ if self.itercount is not None:
+ result += (' (%0.3f seconds elapsed after %i iterations)'
+ % (self.elapsed, self.itercount))
+ else:
+ result += ' (%0.3f seconds elapsed)' % self.elapsed
+ return result
+
+# Some of the tests involve constructing large dictionaries. How big
+# should they be?
+ITEM_COUNT = 100000
+
+# Arbitrary threshold (in seconds) for a "reasonable amount of time"
+# that it should take to work with ITEM_COUNT items:
+TIME_LIMIT = 5
+
+class _FasterThanContext(object):
+ """
+ A context manager for implementing assertFasterThan
+ """
+ def __init__(self, test_case, **kwargs):
+ self.test_case = test_case
+ if 'seconds' in kwargs:
+ self.timelimit = kwargs['seconds']
+ else:
+ raise ValueError()
+
+ def __enter__(self):
+ self.timer = Timer()
+ return self
+
+ def __exit__(self, exc_type, exc_value, tb):
+ if exc_type is not None:
+ # let unexpected exceptions pass through
+ return
+
+ self.check_for_timeout()
+ if 1:
+ print('timer within %s took %s'
+ % (self.test_case, self.timer.elapsed_time_as_str()))
+
+ def handle(self, callable_obj, args, kwargs):
+ """
+ If callable_obj is None, assertRaises/Warns is being used as a
+ context manager, so check for a 'msg' kwarg and return self.
+ If callable_obj is not None, call it passing args and kwargs.
+ """
+ if callable_obj is None:
+ self.msg = kwargs.pop('msg', None)
+ return self
+ with self:
+ callable_obj(*args, **kwargs)
+
+ def check_for_timeout(self, itercount=None):
+ """
+ Allow directly checking for timeouts, potentially supplying an
+ iteration count if within a loop. If the timer has elapsed, this will
+ raise a TookTooLong exception (potentially indicating how many
+ iterations were completed when the time limit was reached).
+
+ Otherwise, it does nothing.
+ """
+ elapsed_time = self.timer.get_elapsed_time()
+ if elapsed_time > self.timelimit:
+ raise TookTooLong(self.timelimit,
+ elapsed_time,
+ itercount)
+
+ at support.cpython_only
+class HashCollisionTests(unittest.TestCase):
+ """
+ Issue 13703: tests about the behavior of dicts in the face of hostile data
+ """
+
+ def assertFasterThan(self, callableObj=None, *args, **kwargs):
+ context = _FasterThanContext(self, *args, **kwargs)
+ return context.handle(callableObj, args, kwargs)
+
+ def test_timings_with_benign_data(self):
+ # Verify that inserting many keys into a dict only takes a few seconds
+ d = dict()
+ with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+ for i in range(ITEM_COUNT):
+ d[i] = 0
+
+ # Verify that we can also retrieve the values quickly:
+ with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+ d[i]
+
+ # Verify that we can quickly insert the same item many times
+ # (overwriting each time):
+ d = dict()
+ with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+ for i in range(ITEM_COUNT):
+ d[0] = 0
+
+ def assert_ma_lookup(self, d, name):
+ # verify that a dict's ma_lookup hook has a specific value
+ if hasattr(d, '_stats'):
+ self.assertEqual(d._stats()['ma_lookup'],
+ name)
+
+ def test_dict_stats(self):
+ # test for the diagnostic _stats() method, if it exists:
+ if hasattr(dict, '_stats'):
+ # Simple cases in which no probing should happen at all:
+ for d in [dict(zip(range(100), range(100))),
+ dict(zip(range(1000), range(1000))),
+ dict(zip(range(10000), range(10000)))]:
+ stats = d._stats()
+ self.assertIn('ma_mask', stats)
+ # ma_mask will be large for these dicts
+
+ # We don't expect any probing happened during construction:
+ self.assertEqual(stats['insertion_iter_count'], 0)
+ # They transition from ma_smalltable at size 6:
+ self.assertEqual(stats['num_insertions'], len(d) - 6)
+ self.assertEqual(stats['iter_count_per_insertion'], 0.0)
+
+ # We shouldn't have transitioned to paranoid mode:
+ self.assert_ma_lookup(d, 'lookdict')
+
+ # Cases in which we expect some hash collisions:
+ from itertools import permutations
+
+ for name, d, exp_length in [
+ ('All strings of length 4',
+ dict([(''.join(chars), 1)
+ for chars in permutations('abcdefghijklmnopqrstuvwxyz', 4)]),
+ 358800),
+ ]:
+
+ self.assertEqual(len(d), exp_length)
+ stats = d._stats()
+
+ # ma_mask will be large for this dict:
+ self.assertGreater(stats['ma_mask'], 7)
+
+ self.assertEqual(stats['num_insertions'], len(d) - 6)
+
+ # We expect significant probing happened during construction:
+ self.assertGreater(stats['insertion_iter_count'], 100000)
+ # ...but it should be sane relative to the amount of work we were
+ # asked to do:
+ self.assertLessEqual(stats['iter_count_per_insertion'], 2.0)
+ # and so we didn't transition:
+ self.assert_ma_lookup(d, 'lookdict_unicode')
+
+ def test_not_reaching_limit(self):
+ # Ensure that we can insert equal-hashed keys up to (but not reaching)
+ # the collision limit:
+ with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+ d = dict()
+ for i in range(50):
+ key = ChosenHash(variability=i, hash=42)
+ d[key] = 0
+ self.assert_ma_lookup(d, 'lookdict')
+
+ def test_many_hash_collisions(self):
+ """
+ Ensure we gracefully handle inserting a large number of non-equal keys
+ with the same hash that transition the dict to "paranoid" mode:
+ """
+ with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+ d = dict()
+ for i in range(60):
+ key = ChosenHash(variability=i, hash=42)
+ d[key] = 0
+
+ # Verify that we exceeded the safety threshold:
+ if hasattr(d, '_stats'):
+ self.assertGreaterEqual(d._stats()['iter_count_per_insertion'],
+ 32.0)
+
+ # Verify that the dict transitioned to paranoid mode:
+ self.assert_ma_lookup(d, 'lookdict_paranoid')
+
+ # Verify key lookup:
+ for i in range(60):
+ key = ChosenHash(variability=i, hash=42)
+ self.assertEqual(d[key], 0)
+
+ # Frank Sievertsen found scenarios in which the collision-counting
+ # scheme could be attacked:
+ # http://mail.python.org/pipermail/python-dev/2012-January/115726.html
+
+ def test_scenario_b_from_Frank_Sievertsen(self):
+ # This typically takes about 3 seconds on my box (with a debug build):
+ with self.assertFasterThan(seconds=TIME_LIMIT * 2) as cm:
+ d = dict()
+
+ # Insert non-equal hash collisions up to but not reaching the
+ # limit where we take effect:
+ for i in range(50):
+ key = ChosenHash(variability=i, hash=42)
+ d[key] = 0
+ cm.check_for_timeout(i)
+
+ # We shouldn't have transitioned yet:
+ self.assert_ma_lookup(d, 'lookdict')
+
+ # Now try to add many equal values that collide
+ # with the hash, and see how long it takes
+ for i in range(ITEM_COUNT):
+ key = ChosenHash(variability=0, hash=42)
+ d[key] = 0
+ cm.check_for_timeout(i)
+
+ # We still shouldn't have transitioned:
+ self.assert_ma_lookup(d, 'lookdict')
+
+ def test_paranoid_str_dict(self):
+ # Exercise the handling of pure-str paranoid dict:
+ if hasattr(dict, '_make_paranoid'):
+ from itertools import permutations
+
+ d = dict([(''.join(chars), 1)
+ for chars in permutations('abcdefghijklmnopqrstuvwxyz', 4)])
+
+ # It shouldn't be in paranoid mode:
+ self.assert_ma_lookup(d, 'lookdict_unicode')
+
+ with self.assertFasterThan(seconds=TIME_LIMIT) as cm:
+ # Force it into paranoid mode:
+ d._make_paranoid()
+ self.assert_ma_lookup(d, 'lookdict_unicode_paranoid')
+ self.assertIn('abcd', d)
+ self.assertEqual(d['zyxw'], 1)
+
+ # Now empty the dict:
+ d.clear()
+
+ # We should now have a smalltable dict, but it still uses the
+ # paranoid ma_lookup:
+ if hasattr(d, '_stats'):
+ self.assertEqual(d._stats()['ma_mask'], 7)
+ self.assert_ma_lookup(d, 'lookdict_unicode_paranoid')
+
+ # Verify that the now-empty dict is still usable:
+ self.assertNotIn('abcd', d)
+ d['foo'] = 'bar'
+ self.assertIn('foo', d)
+ self.assertEqual(d['foo'], 'bar')
+
+ # Transition it from pure-str to mixed mode:
+ d[1] = 'not a str'
+ self.assert_ma_lookup(d, 'lookdict_paranoid')
+ self.assertIn(1, d)
+ self.assertEqual(d[1], 'not a str')
+
from test import mapping_tests
class GeneralMappingTests(mapping_tests.BasicTestMappingProtocol):
@@ -771,6 +1068,7 @@
def test_main():
support.run_unittest(
DictTest,
+ HashCollisionTests,
GeneralMappingTests,
SubclassMappingTests,
)
diff -r 3be60a4c8c63 Lib/test/test_hash.py
--- a/Lib/test/test_hash.py Fri Jan 20 11:01:06 2012 -0500
+++ b/Lib/test/test_hash.py Wed Jan 25 12:28:05 2012 -0500
@@ -4,9 +4,12 @@
# Also test that hash implementations are inherited as expected
import unittest
+import struct
from test import support
+from test.script_helper import assert_python_ok
from collections import Hashable
+IS_64BIT = (struct.calcsize('l') == 8)
class HashEqualityTestCase(unittest.TestCase):
@@ -117,10 +120,56 @@
for obj in self.hashes_to_check:
self.assertEqual(hash(obj), _default_hash(obj))
+ at support.cpython_only
+class HashRandomizationTest(unittest.TestCase):
+ types_to_check = [str,
+ lambda s: bytes(s, encoding='ascii')]
+
+ def get_randomized_hash(self, text, seed=None):
+ env = {}
+ if seed is not None:
+ env['PYTHONHASHSEED'] = str(seed)
+ out = assert_python_ok(
+ '-c', 'import sys; print(sys._getrandomizedhash(%r))' % text,
+ **env)
+ stdout = out[1].strip()
+ return int(stdout)
+
+ def test_empty_string(self):
+ for t in self.types_to_check:
+ self.assertEqual(hash(t("")), 0)
+
+ def test_null_hash(self):
+ # PYTHONHASHSEED=0 disables the randomized hash
+ if IS_64BIT:
+ h = 1453079729188098211
+ else:
+ h = -1600925533
+ for t in self.types_to_check:
+ self.assertEqual(self.get_randomized_hash(t("abc"), seed=0), h)
+
+ def test_fixed_hash(self):
+ # test a fixed seed for the randomized hash
+ if IS_64BIT:
+ h = -4410911502303878509
+ else:
+ h = -206076799
+ for t in self.types_to_check:
+ self.assertEqual(self.get_randomized_hash(t("abc"), seed=42), h)
+
+ def test_randomized_hash(self):
+ # two runs should return different hashes
+ for t in self.types_to_check:
+ run1 = self.get_randomized_hash(t("abc"))
+ run2 = self.get_randomized_hash(t("abc"))
+ self.assertNotEqual(run1, run2)
+
+
def test_main():
support.run_unittest(HashEqualityTestCase,
- HashInheritanceTestCase,
- HashBuiltinsTestCase)
+ HashInheritanceTestCase,
+ HashBuiltinsTestCase,
+ HashRandomizationTest)
if __name__ == "__main__":
diff -r 3be60a4c8c63 Lib/test/test_os.py
--- a/Lib/test/test_os.py Fri Jan 20 11:01:06 2012 -0500
+++ b/Lib/test/test_os.py Wed Jan 25 12:28:05 2012 -0500
@@ -12,6 +12,7 @@
import time
import shutil
from test import support
+from test.script_helper import assert_python_ok
import contextlib
import mmap
import platform
@@ -630,14 +631,32 @@
self.assertEqual(f.read(), b'')
class URandomTests(unittest.TestCase):
- def test_urandom(self):
- try:
- self.assertEqual(len(os.urandom(1)), 1)
- self.assertEqual(len(os.urandom(10)), 10)
- self.assertEqual(len(os.urandom(100)), 100)
- self.assertEqual(len(os.urandom(1000)), 1000)
- except NotImplementedError:
- pass
+ def test_urandom_length(self):
+ self.assertEqual(len(os.urandom(1)), 1)
+ self.assertEqual(len(os.urandom(10)), 10)
+ self.assertEqual(len(os.urandom(100)), 100)
+ self.assertEqual(len(os.urandom(1000)), 1000)
+
+ def test_urandom_value(self):
+ data1 = os.urandom(16)
+ data2 = os.urandom(16)
+ self.assertNotEqual(data1, data2)
+
+ def get_urandom_subprocess(self, count):
+ code = '\n'.join((
+ 'import os, sys',
+ 'data = os.urandom(%s)' % count,
+ 'sys.stdout.buffer.write(data)',
+ 'sys.stdout.buffer.flush()'))
+ out = assert_python_ok('-c', code)
+ stdout = out[1]
+ self.assertEqual(len(stdout), 16)
+ return stdout
+
+ def test_urandom_subprocess(self):
+ data1 = self.get_urandom_subprocess(16)
+ data2 = self.get_urandom_subprocess(16)
+ self.assertNotEqual(data1, data2)
@contextlib.contextmanager
def _execvpe_mockup(defpath=None):
diff -r 3be60a4c8c63 Makefile.pre.in
--- a/Makefile.pre.in Fri Jan 20 11:01:06 2012 -0500
+++ b/Makefile.pre.in Wed Jan 25 12:28:05 2012 -0500
@@ -322,6 +322,7 @@
Python/pystate.o \
Python/pythonrun.o \
Python/pytime.o \
+ Python/random.o \
Python/structmember.o \
Python/symtable.o \
Python/sysmodule.o \
diff -r 3be60a4c8c63 Modules/posixmodule.c
--- a/Modules/posixmodule.c Fri Jan 20 11:01:06 2012 -0500
+++ b/Modules/posixmodule.c Wed Jan 25 12:28:05 2012 -0500
@@ -9329,81 +9329,36 @@
}
#endif
-#ifdef MS_WINDOWS
-
-PyDoc_STRVAR(win32_urandom__doc__,
+PyDoc_STRVAR(posix_urandom__doc__,
"urandom(n) -> str\n\n\
-Return n random bytes suitable for cryptographic use.");
-
-typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
- LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
- DWORD dwFlags );
-typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
- BYTE *pbBuffer );
-
-static CRYPTGENRANDOM pCryptGenRandom = NULL;
-/* This handle is never explicitly released. Instead, the operating
- system will release it when the process terminates. */
-static HCRYPTPROV hCryptProv = 0;
+Return n pseudo-random bytes.");
static PyObject*
-win32_urandom(PyObject *self, PyObject *args)
-{
- int howMany;
- PyObject* result;
+posix_urandom(PyObject *self, PyObject *args)
+{
+ Py_ssize_t size;
+ PyObject *result;
+ int ret;
/* Read arguments */
- if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
- return NULL;
- if (howMany < 0)
+ if (!PyArg_ParseTuple(args, "n:urandom", &size))
+ return NULL;
+ if (size < 0)
return PyErr_Format(PyExc_ValueError,
"negative argument not allowed");
- if (hCryptProv == 0) {
- HINSTANCE hAdvAPI32 = NULL;
- CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
-
- /* Obtain handle to the DLL containing CryptoAPI
- This should not fail */
- hAdvAPI32 = GetModuleHandleW(L"advapi32.dll");
- if(hAdvAPI32 == NULL)
- return win32_error("GetModuleHandle", NULL);
-
- /* Obtain pointers to the CryptoAPI functions
- This will fail on some early versions of Win95 */
- pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
- hAdvAPI32,
- "CryptAcquireContextA");
- if (pCryptAcquireContext == NULL)
- return PyErr_Format(PyExc_NotImplementedError,
- "CryptAcquireContextA not found");
-
- pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(
- hAdvAPI32, "CryptGenRandom");
- if (pCryptGenRandom == NULL)
- return PyErr_Format(PyExc_NotImplementedError,
- "CryptGenRandom not found");
-
- /* Acquire context */
- if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
- PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
- return win32_error("CryptAcquireContext", NULL);
- }
-
- /* Allocate bytes */
- result = PyBytes_FromStringAndSize(NULL, howMany);
- if (result != NULL) {
- /* Get random data */
- memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */
- if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*)
- PyBytes_AS_STRING(result))) {
- Py_DECREF(result);
- return win32_error("CryptGenRandom", NULL);
- }
+ result = PyBytes_FromStringAndSize(NULL, size);
+ if (result == NULL)
+ return NULL;
+
+ ret = _PyOS_URandom(PyBytes_AS_STRING(result),
+ PyBytes_GET_SIZE(result));
+ if (ret == -1) {
+ Py_DECREF(result);
+ return NULL;
}
return result;
}
-#endif
PyDoc_STRVAR(device_encoding__doc__,
"device_encoding(fd) -> str\n\n\
@@ -9445,42 +9400,6 @@
return Py_None;
}
-#ifdef __VMS
-/* Use openssl random routine */
-#include <openssl/rand.h>
-PyDoc_STRVAR(vms_urandom__doc__,
-"urandom(n) -> str\n\n\
-Return n random bytes suitable for cryptographic use.");
-
-static PyObject*
-vms_urandom(PyObject *self, PyObject *args)
-{
- int howMany;
- PyObject* result;
-
- /* Read arguments */
- if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
- return NULL;
- if (howMany < 0)
- return PyErr_Format(PyExc_ValueError,
- "negative argument not allowed");
-
- /* Allocate bytes */
- result = PyBytes_FromStringAndSize(NULL, howMany);
- if (result != NULL) {
- /* Get random data */
- if (RAND_pseudo_bytes((unsigned char*)
- PyBytes_AS_STRING(result),
- howMany) < 0) {
- Py_DECREF(result);
- return PyErr_Format(PyExc_ValueError,
- "RAND_pseudo_bytes");
- }
- }
- return result;
-}
-#endif
-
#ifdef HAVE_SETRESUID
PyDoc_STRVAR(posix_setresuid__doc__,
"setresuid(ruid, euid, suid)\n\n\
@@ -10887,12 +10806,7 @@
#ifdef HAVE_GETLOADAVG
{"getloadavg", posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__},
#endif
- #ifdef MS_WINDOWS
- {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__},
- #endif
- #ifdef __VMS
- {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__},
- #endif
+ {"urandom", posix_urandom, METH_VARARGS, posix_urandom__doc__},
#ifdef HAVE_SETRESUID
{"setresuid", posix_setresuid, METH_VARARGS, posix_setresuid__doc__},
#endif
diff -r 3be60a4c8c63 Objects/bytesobject.c
--- a/Objects/bytesobject.c Fri Jan 20 11:01:06 2012 -0500
+++ b/Objects/bytesobject.c Wed Jan 25 12:28:05 2012 -0500
@@ -867,6 +867,13 @@
return a->ob_shash;
}
+Py_hash_t
+_PyBytes_RandomizedHash(PyObject *a)
+{
+ return _Py_RandomizedHashBytes((unsigned char *)((PyBytesObject *)a)->ob_sval,
+ Py_SIZE(a));
+}
+
static PyObject*
bytes_subscript(PyBytesObject* self, PyObject* item)
{
diff -r 3be60a4c8c63 Objects/dictobject.c
--- a/Objects/dictobject.c Fri Jan 20 11:01:06 2012 -0500
+++ b/Objects/dictobject.c Wed Jan 25 12:28:05 2012 -0500
@@ -10,6 +10,32 @@
#include "Python.h"
#include "stringlib/eq.h"
+/* undefine this: */
+#define DICT_PROTECTION_TRACKING 1
+
+#define Py_MAX_AVERAGE_PROBES_PER_INSERT 32
+/* power-of-two to make multiplication fast */
+
+/* Avoid overflow by dividing stats when they exceed this: */
+#define Py_DICT_STATS_ROUNDING_THRESHOLD 0x40000000
+
+/* For large dictionaries, reuse the space allocated for ma_smalltable */
+typedef struct _Py_LargeDictFields {
+ int is_inserting;
+
+ /* Check the ratio between these, to track amortized cost per operation */
+ size_t num_insertions;
+ size_t insertion_iter_count;
+
+} _Py_LargeDictFields;
+
+#define DICT_IS_PARANOID(mp) \
+ ((mp)->ma_lookup == lookdict_paranoid \
+ || ((mp)->ma_lookup == lookdict_unicode_paranoid))
+
+#define PyDict_LARGEDICTFIELDS(mp) \
+ ((_Py_LargeDictFields*)&(mp)->ma_smalltable)
+/* FIXME: add assert(mp->ma_table != mp->ma_smalltable) */
/* Set a key error with the specified argument, wrapping it in a
* tuple automatically so that tuple keys are not unpacked as the
@@ -139,11 +165,28 @@
/* forward declarations */
static PyDictEntry *
+lookdict(PyDictObject *mp, PyObject *key, Py_hash_t hash);
+
+static PyDictEntry *
lookdict_unicode(PyDictObject *mp, PyObject *key, Py_hash_t hash);
+static PyDictEntry *
+lookdict_paranoid(PyDictObject *mp, PyObject *key, Py_hash_t hash);
+
+static PyDictEntry *
+lookdict_unicode_paranoid(PyDictObject *mp, PyObject *key, Py_hash_t hash);
+
+static int
+transition_to_paranoid_dict(PyDictObject *mp);
+
+static int
+check_iter_count(PyDictObject *mp, PyObject *key, Py_hash_t hash);
+
#ifdef SHOW_CONVERSION_COUNTS
static long created = 0L;
-static long converted = 0L;
+static long converted_unicode_to_general = 0L;
+static long converted_unicode_to_paranoid = 0L;
+static long converted_general_to_paranoid = 0L;
static void
show_counts(void)
@@ -352,7 +395,7 @@
* XXX A clever adversary could prevent this
* XXX from terminating.
*/
- return lookdict(mp, key, hash);
+ return mp->ma_lookup(mp, key, hash);
}
}
freeslot = NULL;
@@ -384,11 +427,21 @@
* XXX A clever adversary could prevent this
* XXX from terminating.
*/
- return lookdict(mp, key, hash);
+ return mp->ma_lookup(mp, key, hash);
}
}
else if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
+
+ if (check_iter_count(mp, key, hash)) {
+ if (-1 == transition_to_paranoid_dict(mp)) {
+ return NULL;
+ }
+
+ /* Everything has changed.
+ Restart the lookup using the new function: */
+ return mp->ma_lookup(mp, key, hash);
+ }
}
assert(0); /* NOT REACHED */
return 0;
@@ -422,7 +475,14 @@
#ifdef SHOW_CONVERSION_COUNTS
++converted;
#endif
- mp->ma_lookup = lookdict;
+ if (mp->ma_lookup == lookdict_unicode) {
+ /* transition from lookdict_unicode -> lookdict */
+ mp->ma_lookup = lookdict;
+ } else {
+ /* transition from lookdict_unicode_paranoid -> lookdict_paranoid */
+ assert(mp->ma_lookup == lookdict_unicode_paranoid);
+ mp->ma_lookup = lookdict_paranoid;
+ }
return lookdict(mp, key, hash);
}
i = (size_t)hash & mask;
@@ -451,11 +511,66 @@
return ep;
if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
+ if (check_iter_count(mp, key, hash)) {
+ if (-1 == transition_to_paranoid_dict(mp)) {
+ return NULL;
+ }
+
+ /* Everything has changed.
+ Restart the lookup using the new function: */
+ return mp->ma_lookup(mp, key, hash);
+ }
}
assert(0); /* NOT REACHED */
return 0;
}
+/* Version of lookdict which ignores the hash value passed in, and instead
+ generates an alternate hash for each object using a secret.
+ Note that if the object can't do this, it simply reuses its cached hash
+*/
+static PyDictEntry *
+lookdict_paranoid(PyDictObject *mp, PyObject *key, Py_hash_t hash)
+{
+ /*
+ Ignore the object's cached hash value, and instead compute and use a
+ "secret" one. This won't be cached, meaning that the per-object cost
+ of the call gets a lot larger. However, it ought to still be O(1)
+ */
+ Py_hash_t randomized_hash = PyObject_RandomizedHash(key);
+ PyDictEntry *ep = lookdict(mp, key, randomized_hash);
+ return ep;
+}
+
+/*
+ Similar to lookdict_paranoid and lookdict_unicode: used when a dict has
+ received hostile data, where all keys are PyUnicodeObject
+*/
+static PyDictEntry *
+lookdict_unicode_paranoid(PyDictObject *mp, PyObject *key,
+ Py_hash_t hash)
+{
+ /*
+ As per lookdict_paranoid, we ignore the object's cached hash value
+ */
+ Py_hash_t randomized_hash;
+ PyDictEntry *ep;
+
+ if (!PyUnicode_CheckExact(key)) {
+ randomized_hash = PyObject_RandomizedHash(key);
+ /* this call will transition internally to lookdict_paranoid: */
+ ep = lookdict_unicode(mp, key, randomized_hash);
+ } else {
+ /* use optimized implementation when obtaining randomized hash, since
+ we know it's a PyUnicodeObject: */
+ randomized_hash = _PyUnicode_RandomizedHash(key);
+ ep = lookdict_unicode(mp, key, randomized_hash);
+ }
+
+ return ep;
+}
+
+
int
_PyDict_HasOnlyStringKeys(PyObject *dict)
{
@@ -463,7 +578,8 @@
PyObject *key, *value;
assert(PyDict_Check(dict));
/* Shortcut */
- if (((PyDictObject *)dict)->ma_lookup == lookdict_unicode)
+ if (((PyDictObject *)dict)->ma_lookup == lookdict_unicode ||
+ ((PyDictObject *)dict)->ma_lookup == lookdict_unicode_paranoid)
return 1;
while (PyDict_Next(dict, &pos, &key, &value))
if (!PyUnicode_Check(key))
@@ -530,9 +646,23 @@
PyObject *old_value;
register PyDictEntry *ep;
typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, Py_hash_t);
+ _Py_LargeDictFields *large_fields = PyDict_LARGEDICTFIELDS(mp);
assert(mp->ma_lookup != NULL);
+ if (mp->ma_mask >= PyDict_MINSIZE) {
+ large_fields->is_inserting = 1;
+
+ /* Increment num_insertions, protecting against stats overflow: */
+ if (++large_fields->num_insertions > Py_DICT_STATS_ROUNDING_THRESHOLD) {
+ /* Preserve the ratio between these two: */
+ large_fields->insertion_iter_count >>= 4;
+ large_fields->num_insertions >>= 4;
+ }
+ }
ep = mp->ma_lookup(mp, key, hash);
+ if (mp->ma_mask >= PyDict_MINSIZE) {
+ large_fields->is_inserting = 0;
+ }
if (ep == NULL) {
Py_DECREF(key);
Py_DECREF(value);
@@ -553,7 +683,15 @@
Py_DECREF(dummy);
}
ep->me_key = key;
- ep->me_hash = hash;
+ if (!DICT_IS_PARANOID(mp)) {
+ ep->me_hash = hash;
+ } else {
+ /* Ideally we need some way of getting back the randomized hash
+ from the ma_lookup function, but there doesn't seem to be a
+ way of doing that without breaking ABI.
+ For now, recalculate the randomized hash: */
+ ep->me_hash = PyObject_RandomizedHash(key);
+ }
ep->me_value = value;
mp->ma_used++;
}
@@ -675,8 +813,15 @@
/* else key == value == NULL: nothing to do */
}
- if (is_oldtable_malloced)
+ if (is_oldtable_malloced) {
PyMem_DEL(oldtable);
+ } else {
+ if (mp->ma_table != mp->ma_smalltable) {
+ /* clean ma_smalltable for use as _Py_LargeDictFields: */
+ memset(mp->ma_smalltable, 0, sizeof(mp->ma_smalltable));
+ }
+ }
+
return 0;
}
@@ -1996,6 +2141,91 @@
PyDoc_STRVAR(copy__doc__,
"D.copy() -> a shallow copy of D");
+#ifdef DICT_PROTECTION_TRACKING
+/* Debug code for use by selftests: */
+static const char *
+get_name_of_ma_lookup(PyDictObject *mp)
+{
+ /* can't do this as a switch statement */
+ if (mp->ma_lookup == lookdict) {
+ return "lookdict";
+ } else if (mp->ma_lookup == lookdict_unicode) {
+ return "lookdict_unicode";
+ } else if (mp->ma_lookup == lookdict_paranoid) {
+ return "lookdict_paranoid";
+ } else if (mp->ma_lookup == lookdict_unicode_paranoid) {
+ return "lookdict_unicode_paranoid";
+ } else {
+ return "unknown ma_lookup";
+ }
+}
+
+static PyObject *
+dict__stats(PyDictObject *mp)
+{
+ PyObject *result = PyDict_New();
+ if (!result) goto error;
+
+#define ADD_STAT(name, expr) \
+ { \
+ PyObject *value = (expr); \
+ if (!value) \
+ goto error; \
+ if (-1 == PyDict_SetItemString(result, (name), value)) { \
+ Py_DECREF(value); \
+ goto error; \
+ } \
+ Py_DECREF(value); \
+ }
+
+ ADD_STAT("ma_mask", PyLong_FromLong(mp->ma_mask));
+ ADD_STAT("ma_lookup", PyUnicode_FromString(get_name_of_ma_lookup(mp)));
+ if (mp->ma_mask >= PyDict_MINSIZE) {
+ _Py_LargeDictFields *large_fields = PyDict_LARGEDICTFIELDS(mp);
+ ADD_STAT("num_insertions", PyLong_FromLong(large_fields->num_insertions));
+ ADD_STAT("insertion_iter_count", PyLong_FromLong(large_fields->insertion_iter_count));
+ if (large_fields->num_insertions > 0) {
+ ADD_STAT("iter_count_per_insertion",
+ PyFloat_FromDouble((double)large_fields->insertion_iter_count
+ / (double)large_fields->num_insertions));
+ }
+ }
+
+ return result;
+
+ error:
+ Py_XDECREF(result);
+ return NULL;
+}
+
+static PyObject *
+dict__make_paranoid(register PyDictObject *mp)
+{
+ if (mp->ma_mask >= PyDict_MINSIZE) {
+ if (DICT_IS_PARANOID(mp)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "dict is already in paranoid mode");
+ return NULL;
+ } else {
+ transition_to_paranoid_dict(mp);
+ Py_RETURN_NONE;
+ }
+ } else {
+ PyErr_SetString(PyExc_RuntimeError,
+ ("dict is not large enough to be forced"
+ " into paranoid mode"));
+ return NULL;
+ }
+}
+
+
+PyDoc_STRVAR(_stats__doc__,
+"D._stats() -> for CPython selftests: obtain debugging stats on D");
+
+PyDoc_STRVAR(_make_paranoid__doc__,
+"D._make_paranoid() -> for CPython selftests: forcibly transition this dict into paranoid mode");
+#endif
+
/* Forward */
static PyObject *dictkeys_new(PyObject *);
static PyObject *dictitems_new(PyObject *);
@@ -2037,6 +2267,12 @@
clear__doc__},
{"copy", (PyCFunction)dict_copy, METH_NOARGS,
copy__doc__},
+#ifdef DICT_PROTECTION_TRACKING
+ {"_stats", (PyCFunction)dict__stats, METH_NOARGS,
+ _stats__doc__},
+ {"_make_paranoid", (PyCFunction)dict__make_paranoid, METH_NOARGS,
+ _make_paranoid__doc__},
+#endif
{NULL, NULL} /* sentinel */
};
@@ -3067,3 +3303,144 @@
{
return dictview_new(dict, &PyDictValues_Type);
}
+
+/*
+ Swap the hash values for the active entries with the values
+ in the corresponding indices in the given array (of size
+ (mp->ma_mask + 1) values)
+ */
+static void
+swap_hash_values(PyDictObject *mp, Py_hash_t *hashes)
+{
+ Py_ssize_t i;
+
+ assert(mp);
+ assert(hashes);
+
+ for (i=0; i <= mp->ma_mask; i++) {
+ PyDictEntry *ep = &mp->ma_table[i];
+ if (ep->me_value != NULL) { /* active entry */
+ Py_hash_t old_hash = ep->me_hash;
+ ep->me_hash = hashes[i];
+ hashes[i] = old_hash;
+ }
+ }
+}
+
+/*
+ Switch to new lookup implementation, using secret hashes, reordering
+ all of the me_hash slots.
+
+ Returns -1 for failure, 0 for success
+*/
+static int
+transition_to_paranoid_dict(PyDictObject *mp)
+{
+ Py_hash_t *hashes;
+ Py_ssize_t i;
+
+ assert(mp->ma_table != mp->ma_smalltable);
+
+ assert(!DICT_IS_PARANOID(mp));
+
+ /*
+ Recalculate all of the me_hash values within ma_table
+ using the randomized new algorithm.
+
+ An error could occur during this process, so allocate
+ a temporary buffer:
+ */
+ hashes = PyMem_NEW(Py_hash_t, mp->ma_mask + 1);
+ if (hashes == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ /* Populate "hashes" with new values for the active entries,
+ leaving the other indices untouched: */
+ for (i=0; i <= mp->ma_mask; i++) {
+ PyDictEntry *ep = &mp->ma_table[i];
+ if (ep->me_value != NULL) { /* active entry */
+ Py_hash_t new_hash = PyObject_RandomizedHash(ep->me_key);
+ if (new_hash != -1) {
+ hashes[i] = new_hash;
+ } else {
+ /* Error: */
+ PyMem_DEL(hashes);
+ return -1;
+ }
+ }
+ }
+
+ /* OK, we have new hashes for all active entries, and no errors
+ occurred. Copy them up into the ma_table, swapping the old
+ value into the buffer in case we need to reconstruct things
+ later: */
+ swap_hash_values(mp, hashes);
+
+ /*
+ "hashes" now holds the old hash values ; mp->ma_table holds
+ the new hash values
+
+ Now use dictresize to reorganize the order of the entries.
+ We use ma_mask here as the minimum size to ensure that the dict
+ doesn't start using ma_smalltable again
+ */
+ assert(mp->ma_mask >= PyDict_MINSIZE);
+ if (-1 == dictresize(mp, mp->ma_mask)) {
+ /* This can fail under low memory conditions, before changing
+ anything. If so, swap the old hash values back in: */
+ swap_hash_values(mp, hashes);
+ PyMem_DEL(hashes);
+ return -1;
+ }
+
+ /* Success. We're done with the "hashes" buffer: */
+ PyMem_DEL(hashes);
+
+ /* Change ma_lookup so that we use randomized hashes: */
+ if (mp->ma_lookup == lookdict_unicode) {
+ /* transition from lookdict_unicode -> lookdict_unicode_paranoid */
+ mp->ma_lookup = lookdict_unicode_paranoid;
+ } else {
+ /* transition from lookdict -> lookdict_paranoid */
+ assert(mp->ma_lookup == lookdict);
+ mp->ma_lookup = lookdict_paranoid;
+ }
+
+ /* FIXME: track conversions? */
+ return 0;
+}
+
+/* may want to hand-inline this one
+
+ Returns 1 when the transition-to-paranoid should occur, 0 otherwise
+ */
+static int
+check_iter_count(PyDictObject *mp, PyObject *key, Py_hash_t hash)
+{
+ if (mp->ma_mask >= PyDict_MINSIZE) {
+ _Py_LargeDictFields *large_fields = PyDict_LARGEDICTFIELDS(mp);
+ assert(mp->ma_table != mp->ma_smalltable);
+ if (large_fields->is_inserting) {
+
+ /* Increment insertion_iter_count,
+ protecting against stats overflow: */
+ if (++large_fields->insertion_iter_count
+ >= Py_DICT_STATS_ROUNDING_THRESHOLD) {
+ /* Preserve the ratio between these two: */
+ large_fields->insertion_iter_count >>= 4;
+ large_fields->num_insertions >>= 4;
+ }
+
+ /* Transition into paranoid mode if the ratio is too bad: */
+ if (large_fields->insertion_iter_count
+ > (large_fields->num_insertions * Py_MAX_AVERAGE_PROBES_PER_INSERT) ) {
+ if (!DICT_IS_PARANOID(mp)) {
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
diff -r 3be60a4c8c63 Objects/object.c
--- a/Objects/object.c Fri Jan 20 11:01:06 2012 -0500
+++ b/Objects/object.c Wed Jan 25 12:28:05 2012 -0500
@@ -769,6 +769,22 @@
}
Py_hash_t
+_Py_RandomizedHashBytes(unsigned char *p, Py_ssize_t len)
+{
+ Py_uhash_t x;
+ Py_ssize_t i;
+
+ x = _Py_HashSecret.prefix ^ (Py_uhash_t) *p << 7;
+ for (i = 0; i < len; i++)
+ x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
+ x ^= (Py_uhash_t) len;
+ x ^= _Py_HashSecret.suffix;
+ if (x == -1)
+ x = -2;
+ return x;
+}
+
+Py_hash_t
PyObject_HashNotImplemented(PyObject *v)
{
PyErr_Format(PyExc_TypeError, "unhashable type: '%.200s'",
@@ -797,6 +813,27 @@
return PyObject_HashNotImplemented(v);
}
+_Py_HashSecret_t _Py_HashSecret;
+
+Py_hash_t
+PyObject_RandomizedHash(PyObject *v)
+{
+ /*
+ We only randomize the hash for a few specific types for now, to ease
+ backporting. Ultimately this could be a new entry in PyTypeObject
+ */
+ if (Py_TYPE(v) == &PyUnicode_Type) {
+ return _PyUnicode_RandomizedHash(v);
+ } else if (Py_TYPE(v) == &PyBytes_Type) {
+ return _PyBytes_RandomizedHash(v);
+ }
+ /* FIXME: for Python 2 backport extend to cover PyBuffer_Type */
+
+ /* Otherwise, just use the regular hash value: */
+ return PyObject_Hash(v);
+}
+
+
PyObject *
PyObject_GetAttrString(PyObject *v, const char *name)
{
diff -r 3be60a4c8c63 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Fri Jan 20 11:01:06 2012 -0500
+++ b/Objects/unicodeobject.c Wed Jan 25 12:28:05 2012 -0500
@@ -11242,6 +11242,57 @@
}
#undef HASH
+Py_hash_t
+_PyUnicode_RandomizedHash(PyObject *self)
+{
+ Py_ssize_t len;
+ Py_uhash_t x;
+
+ if (PyUnicode_READY(self) == -1)
+ return -1;
+ len = PyUnicode_GET_LENGTH(self);
+ if (len == 0) {
+ _PyUnicode_HASH(self) = 0;
+ return 0;
+ }
+
+ /* Alternate definition of the hash function as a macro to above */
+#define HASH(P) \
+ x = _Py_HashSecret.prefix ^ (Py_uhash_t)*P << 7; \
+ while (--len >= 0) \
+ x = (_PyHASH_MULTIPLIER*x) ^ (Py_uhash_t)*P++;
+
+ switch (PyUnicode_KIND(self)) {
+ case PyUnicode_1BYTE_KIND: {
+ const unsigned char *c = PyUnicode_1BYTE_DATA(self);
+ HASH(c);
+ break;
+ }
+ case PyUnicode_2BYTE_KIND: {
+ const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self);
+ HASH(s);
+ break;
+ }
+ default: {
+ Py_UCS4 *l;
+ assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND &&
+ "Impossible switch case in unicode_hash");
+ l = PyUnicode_4BYTE_DATA(self);
+ HASH(l);
+ break;
+ }
+ }
+ x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self);
+ x ^= _Py_HashSecret.suffix;
+
+ if (x == -1)
+ x = -2;
+ /* we do *not* update _PyUnicode_HASH(self) here */
+ return x;
+}
+#undef HASH
+
+
PyDoc_STRVAR(index__doc__,
"S.index(sub[, start[, end]]) -> int\n\
\n\
diff -r 3be60a4c8c63 PCbuild/pythoncore.vcproj
--- a/PCbuild/pythoncore.vcproj Fri Jan 20 11:01:06 2012 -0500
+++ b/PCbuild/pythoncore.vcproj Wed Jan 25 12:28:05 2012 -0500
@@ -1891,6 +1891,10 @@
>
</File>
<File
+ RelativePath="..\Python\random.c"
+ >
+ </File>
+ <File
RelativePath="..\Python\structmember.c"
>
</File>
diff -r 3be60a4c8c63 Python/pythonrun.c
--- a/Python/pythonrun.c Fri Jan 20 11:01:06 2012 -0500
+++ b/Python/pythonrun.c Wed Jan 25 12:28:05 2012 -0500
@@ -71,6 +71,7 @@
extern void _PyUnicode_Fini(void);
extern int _PyLong_Init(void);
extern void PyLong_Fini(void);
+extern void _PyRandom_Init(void);
extern int _PyFaulthandler_Init(void);
extern void _PyFaulthandler_Fini(void);
@@ -219,6 +220,8 @@
if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0')
Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p);
+ _PyRandom_Init();
+
interp = PyInterpreterState_New();
if (interp == NULL)
Py_FatalError("Py_Initialize: can't make first interpreter");
diff -r 3be60a4c8c63 Python/random.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Python/random.c Wed Jan 25 12:28:05 2012 -0500
@@ -0,0 +1,268 @@
+#include "Python.h"
+#ifdef MS_WINDOWS
+#include <windows.h>
+#else
+#include <fcntl.h>
+#endif
+
+static int random_initialized = 0;
+
+#ifdef MS_WINDOWS
+typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
+ LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
+ DWORD dwFlags );
+typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
+ BYTE *pbBuffer );
+
+static CRYPTGENRANDOM pCryptGenRandom = NULL;
+/* This handle is never explicitly released. Instead, the operating
+ system will release it when the process terminates. */
+static HCRYPTPROV hCryptProv = 0;
+
+static int
+win32_urandom_init(int raise)
+{
+ HINSTANCE hAdvAPI32 = NULL;
+ CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
+
+ /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */
+ hAdvAPI32 = GetModuleHandle("advapi32.dll");
+ if(hAdvAPI32 == NULL)
+ goto error;
+
+ /* Obtain pointers to the CryptoAPI functions. This will fail on some early
+ versions of Win95. */
+ pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
+ hAdvAPI32, "CryptAcquireContextA");
+ if (pCryptAcquireContext == NULL)
+ goto error;
+
+ pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32,
+ "CryptGenRandom");
+ if (pCryptGenRandom == NULL)
+ goto error;
+
+ /* Acquire context */
+ if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
+ PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
+ goto error;
+
+ return 0;
+
+error:
+ if (raise)
+ PyErr_SetFromWindowsErr(0);
+ else
+ Py_FatalError("Fail to initialize Windows random API (CryptoGen)");
+ return -1;
+}
+
+/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
+ API. Return 0 on success, or -1 on error. */
+static int
+win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
+{
+ Py_ssize_t orig_size = size;
+ Py_ssize_t chunk;
+
+ if (hCryptProv == 0)
+ {
+ if (win32_urandom_init(raise) == -1)
+ return -1;
+ }
+
+ while (size > 0)
+ {
+ chunk = Py_MIN(size, INT_MAX);
+ if (!pCryptGenRandom(hCryptProv, chunk, buffer))
+ {
+ /* CryptGenRandom() failed */
+ if (raise)
+ PyErr_SetFromWindowsErr(0);
+ else
+ Py_FatalError("Fail to initialized the randomized hash "
+ "secret using CryptoGen)");
+ return -1;
+ }
+ buffer += chunk;
+ size -= chunk;
+ }
+ return 0;
+}
+
+#else
+
+/* Read size bytes from /dev/urandom into buffer.
+ Call Py_FatalError() on error. */
+static void
+dev_urandom_noraise(char *buffer, Py_ssize_t size)
+{
+ int fd;
+ Py_ssize_t n;
+
+ assert (0 < size);
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd < 0)
+ Py_FatalError("Fail to open /dev/urandom");
+
+ while (0 < size)
+ {
+ do {
+ n = read(fd, buffer, (size_t)size);
+ } while (n < 0 && errno == EINTR);
+ if (n <= 0)
+ {
+ /* stop on error or if read(size) returned 0 */
+ Py_FatalError("Fail to read bytes from /dev/urandom");
+ break;
+ }
+ buffer += n;
+ size -= (Py_ssize_t)n;
+ }
+ close(fd);
+}
+
+/* Read size bytes from /dev/urandom into buffer.
+ Return 0 on success, raise an exception and return -1 on error. */
+static int
+dev_urandom_python(char *buffer, Py_ssize_t size)
+{
+ int fd;
+ Py_ssize_t n;
+
+ if (size <= 0)
+ return 0;
+
+ Py_BEGIN_ALLOW_THREADS
+ fd = open("/dev/urandom", O_RDONLY);
+ Py_END_ALLOW_THREADS
+ if (fd < 0)
+ {
+ PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom");
+ return -1;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ do {
+ do {
+ n = read(fd, buffer, (size_t)size);
+ } while (n < 0 && errno == EINTR);
+ if (n <= 0)
+ break;
+ buffer += n;
+ size -= (Py_ssize_t)n;
+ } while (0 < size);
+ Py_END_ALLOW_THREADS
+
+ if (n <= 0)
+ {
+ /* stop on error or if read(size) returned 0 */
+ if (n < 0)
+ PyErr_SetFromErrno(PyExc_OSError);
+ else
+ PyErr_Format(PyExc_RuntimeError,
+ "Fail to read %zi bytes from /dev/urandom",
+ size);
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return 0;
+}
+#endif
+
+/* Fill buffer with pseudo-random bytes generated by a linear congruent
+ generator (LCG):
+
+ x(n+1) = (x(n) * 214013 + 2531011) % 2^32
+
+ Use bits 23..16 of x(n) to generate a byte. */
+static void
+lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size)
+{
+ size_t index;
+ unsigned int x;
+
+ x = x0;
+ for (index=0; index < size; index++) {
+ x *= 214013;
+ x += 2531011;
+ /* modulo 2 ^ (8 * sizeof(int)) */
+ buffer[index] = (x >> 16) & 0xff;
+ }
+}
+
+/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic
+ use, from the operating random number generator (RNG).
+
+ Return 0 on success, raise an exception and return -1 on error. */
+int
+_PyOS_URandom(void *buffer, Py_ssize_t size)
+{
+ if (size < 0) {
+ PyErr_Format(PyExc_ValueError,
+ "negative argument not allowed");
+ return -1;
+ }
+ if (size == 0)
+ return 0;
+
+#ifdef MS_WINDOWS
+ return win32_urandom((unsigned char *)buffer, size, 1);
+#else
+ return dev_urandom_python((char*)buffer, size);
+#endif
+}
+
+void
+_PyRandom_Init(void)
+{
+ char *env;
+ void *secret = &_Py_HashSecret;
+ Py_ssize_t secret_size = sizeof(_Py_HashSecret);
+
+ if (random_initialized)
+ return;
+ random_initialized = 1;
+
+ env = Py_GETENV("PYTHONHASHSEED");
+ if (env && *env != '\0') {
+ char *endptr = env;
+ unsigned long seed;
+ seed = strtoul(env, &endptr, 10);
+ if (*endptr != '\0'
+ || seed > 4294967295UL
+ || (errno == ERANGE && seed == ULONG_MAX))
+ {
+ Py_FatalError("PYTHONHASHSEED must be an integer "
+ "in range [0; 4294967295]");
+ }
+ if (seed == 0) {
+ /* disable the randomized Unicode hash */
+ memset(secret, 0, secret_size);
+ }
+ else {
+ lcg_urandom(seed, (unsigned char*)secret, secret_size);
+ }
+ }
+ else {
+#ifdef MS_WINDOWS
+#if 1
+ (void)win32_urandom((unsigned char *)secret, secret_size, 0);
+#else
+ /* fast but weak RNG (fast initialization, weak seed) */
+ _PyTime_timeval t;
+ unsigned int seed;
+ _PyTime_gettimeofday(&t);
+ seed = (unsigned int)t.tv_sec;
+ seed ^= t.tv_usec;
+ seed ^= getpid();
+ lcg_urandom(seed, (unsigned char*)secret, secret_size);
+#endif
+#else
+ dev_urandom_noraise((char*)secret, secret_size);
+#endif
+ }
+}
+
diff -r 3be60a4c8c63 Python/sysmodule.c
--- a/Python/sysmodule.c Fri Jan 20 11:01:06 2012 -0500
+++ b/Python/sysmodule.c Wed Jan 25 12:28:05 2012 -0500
@@ -993,6 +993,20 @@
10. Number of stack pops performed by call_function()"
);
+PyDoc_STRVAR(_getrandomizedhash_doc,
+"_getrandomizedhash(object) -> int\n\
+\n\
+For selftests only: return PyObject_RandomizedHash for the given object.");
+
+static PyObject *
+sys__getrandomizedhash(PyObject *self, PyObject *arg)
+{
+ return PyLong_FromSsize_t(PyObject_RandomizedHash(arg));
+ /* FIXME: when backporting, note the Py_hash_t to long change,
+ and make it just PyLong_FromLong on Python 2.* */
+}
+
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -1093,6 +1107,8 @@
{"settrace", sys_settrace, METH_O, settrace_doc},
{"gettrace", sys_gettrace, METH_NOARGS, gettrace_doc},
{"call_tracing", sys_call_tracing, METH_VARARGS, call_tracing_doc},
+ {"_getrandomizedhash", (PyCFunction)sys__getrandomizedhash,
+ METH_O, _getrandomizedhash_doc},
{NULL, NULL} /* sentinel */
};
More information about the Python-bugs-list
mailing list