[issue13703] Hash collision security issue

STINNER Victor report at bugs.python.org
Tue Jan 17 13:21:33 CET 2012


STINNER Victor <victor.stinner at haypocalc.com> added the comment:

Patch version 8: the whole test suite now pass successfully.

The remaining question is if CryptoGen should be used instead of the
weak LCG initialized by gettimeofday() and getpid(). According to
Martin von Loewis, we must link statically Python to advapi32.dll. It
should speed up the startup.

----------
Added file: http://bugs.python.org/file24259/random-8.patch

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue13703>
_______________________________________
-------------- next part --------------
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -460,6 +460,13 @@ These environment variables influence Py
    option.
 
 
+.. envvar:: PYTHONHASHSEED
+
+   If this is set, it is used as a fixed seed for the Unicode randomized hash:
+   number in range [0; 4294967295]. The value 0 disables the Unicode randomized
+   hash.
+
+
 .. envvar:: PYTHONIOENCODING
 
    If this is set before running the interpreter, it overrides the encoding used
diff --git a/Include/pythonrun.h b/Include/pythonrun.h
--- a/Include/pythonrun.h
+++ b/Include/pythonrun.h
@@ -246,6 +246,8 @@ typedef void (*PyOS_sighandler_t)(int);
 PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int);
 PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t);
 
+/* Random */
+PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size);
 
 #ifdef __cplusplus
 }
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -376,6 +376,12 @@ typedef struct {
 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
 
+typedef struct {
+    Py_hash_t prefix;
+    Py_hash_t suffix;
+} _Py_unicode_hash_secret_t;
+PyAPI_DATA(_Py_unicode_hash_secret_t) _Py_unicode_hash_secret;
+
 #define PyUnicode_Check(op) \
                  PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -31,7 +31,9 @@ Encoding basic Python object hierarchies
 Compact encoding::
 
     >>> import json
-    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':'))
+    >>> from collections import OrderedDict
+    >>> mydict = OrderedDict([('4', 5), ('6', 7)])
+    >>> json.dumps([1,2,3,mydict], separators=(',', ':'))
     '[1,2,3,{"4":5,"6":7}]'
 
 Pretty printing::
diff --git a/Lib/os.py b/Lib/os.py
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -761,23 +761,6 @@ try:
 except NameError: # statvfs_result may not exist
     pass
 
-if not _exists("urandom"):
-    def urandom(n):
-        """urandom(n) -> str
-
-        Return a string of n random bytes suitable for cryptographic use.
-
-        """
-        try:
-            _urandomfd = open("/dev/urandom", O_RDONLY)
-        except (OSError, IOError):
-            raise NotImplementedError("/dev/urandom (or equivalent) not found")
-        bs = b""
-        while len(bs) < n:
-            bs += read(_urandomfd, n - len(bs))
-        close(_urandomfd)
-        return bs
-
 # Supply os.popen()
 def popen(cmd, mode="r", buffering=-1):
     if not isinstance(cmd, str):
diff --git a/Lib/packaging/create.py b/Lib/packaging/create.py
--- a/Lib/packaging/create.py
+++ b/Lib/packaging/create.py
@@ -311,8 +311,11 @@ class MainProgram:
                          'package_data', 'extra_files'):
                 if not(name in self.data and self.data[name]):
                     continue
-                fp.write('%s = %s\n'
-                         % (name, '\n    '.join(self.data[name]).strip()))
+                data = self.data[name]
+                if name == "extra_files":
+                    data.sort()
+                data = '\n    '.join(data).strip()
+                fp.write('%s = %s\n' % (name, data))
             fp.write('\nresources =\n')
             for src, dest in self.data['resources']:
                 fp.write('    %s = %s\n' % (src, dest))
diff --git a/Lib/packaging/tests/test_create.py b/Lib/packaging/tests/test_create.py
--- a/Lib/packaging/tests/test_create.py
+++ b/Lib/packaging/tests/test_create.py
@@ -150,16 +150,16 @@ class CreateTestCase(support.TempdirMana
                 mymodule
             scripts = my_script
                 bin/run
-            extra_files = Martinique/Lamentin/dady
+            extra_files = Alexander
+                Flora
+                Martinique/Lamentin/bro
+                Martinique/Lamentin/dady
                 Martinique/Lamentin/mumy
                 Martinique/Lamentin/sys
-                Martinique/Lamentin/bro
+                Pom
+                README
+                pyxfoil/fengine.so
                 setup.py
-                README
-                Pom
-                Flora
-                Alexander
-                pyxfoil/fengine.so
 
             resources =
                 README.rst = {doc}
@@ -217,8 +217,8 @@ ho, baby!
 
             [files]
             packages = pyxfoil
-            extra_files = pyxfoil/fengine.so
-                pyxfoil/babar.so
+            extra_files = pyxfoil/babar.so
+                pyxfoil/fengine.so
 
             resources =
                 README.rst = {doc}
diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py
--- a/Lib/test/mapping_tests.py
+++ b/Lib/test/mapping_tests.py
@@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest.
     def _reference(self):
         """Return a dictionary of values which are invariant by storage
         in the object under test."""
-        return {1:2, "key1":"value1", "key2":(1,2,3)}
+        return {"1": "2", "key1":"value1", "key2":(1,2,3)}
     def _empty_mapping(self):
         """Return an empty mapping object"""
         return self.type2test()
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -559,6 +559,11 @@ def main(tests=None, testdir=None, verbo
         except ValueError:
             print("Couldn't find starting test (%s), using all tests" % start)
     if randomize:
+        hashseed = os.getenv('PYTHONHASHSEED')
+        if not hashseed:
+            os.environ['PYTHONHASHSEED'] = str(random_seed)
+            os.execv(sys.executable, [sys.executable] + sys.argv)
+            return
         random.seed(random_seed)
         print("Using random seed", random_seed)
         random.shuffle(selected)
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -597,7 +597,6 @@ class BuiltinTest(unittest.TestCase):
         self.assertEqual(hash(1), hash(1))
         self.assertEqual(hash(1), hash(1.0))
         hash('spam')
-        self.assertEqual(hash('spam'), hash(b'spam'))
         hash((0,1,2,3))
         def f(): pass
         self.assertRaises(TypeError, hash, [])
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py
--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -6,6 +6,7 @@ import unittest
 import sys
 import dis
 import io
+from test.script_helper import assert_python_ok
 
 class _C:
     def __init__(self, x):
@@ -322,12 +323,6 @@ Variable names:
    0: x""" % (('Formatted details of methods, functions, or code.', '   6: None\n')
               if sys.flags.optimize < 2 else (None, ''))
 
- at staticmethod
-def tricky(x, y, z=True, *args, c, d, e=[], **kwds):
-    def f(c=c):
-        print(x, y, z, c, d, e, f)
-    yield x, y, z, c, d, e, f
-
 code_info_tricky = """\
 Name:              tricky
 Filename:          (.*)
@@ -357,8 +352,6 @@ Cell variables:
    4: x
    5: z"""
 
-co_tricky_nested_f = tricky.__func__.__code__.co_consts[1]
-
 code_info_tricky_nested_f = """\
 Name:              f
 Filename:          (.*)
@@ -426,13 +419,36 @@ Names:
 class CodeInfoTests(unittest.TestCase):
     test_pairs = [
       (dis.code_info, code_info_code_info),
-      (tricky, code_info_tricky),
-      (co_tricky_nested_f, code_info_tricky_nested_f),
       (expr_str, code_info_expr_str),
       (simple_stmt_str, code_info_simple_stmt_str),
       (compound_stmt_str, code_info_compound_stmt_str),
     ]
 
+    def get_tricky_output(self, func):
+        code = """
+import dis
+
+ at staticmethod
+def tricky(x, y, z=True, *args, c, d, e=[], **kwds):
+    def f(c=c):
+        print(x, y, z, c, d, e, f)
+    yield x, y, z, c, d, e, f
+
+%s
+        """ % func
+        err, stdout, stderr = assert_python_ok('-c', code, PYTHONHASHSEED='0')
+        return (stdout + stderr).decode('ascii', 'surrogateescape')
+
+    def test_tricky(self):
+        stdout = self.get_tricky_output('dis.show_code(tricky)')
+        self.assertRegex(stdout, code_info_tricky)
+
+    def test_tricky_nested(self):
+        stdout = self.get_tricky_output(
+            'co_tricky_nested_f = tricky.__func__.__code__.co_consts[1]; '
+            'dis.show_code(co_tricky_nested_f)')
+        self.assertRegex(stdout, code_info_tricky_nested_f)
+
     def test_code_info(self):
         self.maxDiff = 1000
         for x, expected in self.test_pairs:
diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py
--- a/Lib/test/test_gdb.py
+++ b/Lib/test/test_gdb.py
@@ -52,13 +52,15 @@ class DebuggerTests(unittest.TestCase):
 
     """Test that the debugger can debug Python."""
 
-    def run_gdb(self, *args):
+    def run_gdb(self, *args, **env):
         """Runs gdb with the command line given by *args.
 
         Returns its stdout, stderr
         """
+        if not env:
+            env = None
         out, err = subprocess.Popen(
-            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env,
             ).communicate()
         return out.decode('utf-8', 'replace'), err.decode('utf-8', 'replace')
 
@@ -118,7 +120,7 @@ class DebuggerTests(unittest.TestCase):
         # print ' '.join(args)
 
         # Use "args" to invoke gdb, capturing stdout, stderr:
-        out, err = self.run_gdb(*args)
+        out, err = self.run_gdb(*args, PYTHONHASHSEED='0')
 
         # Ignore some noise on stderr due to the pending breakpoint:
         err = err.replace('Function "%s" not defined.\n' % breakpoint, '')
@@ -207,7 +209,8 @@ class PrettyPrintTests(DebuggerTests):
         'Verify the pretty-printing of dictionaries'
         self.assertGdbRepr({})
         self.assertGdbRepr({'foo': 'bar'})
-        self.assertGdbRepr({'foo': 'bar', 'douglas':42})
+        self.assertGdbRepr({'foo': 'bar', 'douglas': 42},
+                           "{'foo': 'bar', 'douglas': 42}")
 
     def test_lists(self):
         'Verify the pretty-printing of lists'
diff --git a/Lib/test/test_inspect.py b/Lib/test/test_inspect.py
--- a/Lib/test/test_inspect.py
+++ b/Lib/test/test_inspect.py
@@ -799,7 +799,6 @@ class TestGetcallargsFunctions(unittest.
             self.assertEqualException(f, '2, 3, 4')
             self.assertEqualException(f, '1, 2, 3, a=1')
             self.assertEqualException(f, '2, 3, 4, c=5')
-            self.assertEqualException(f, '2, 3, 4, a=1, c=5')
             # f got an unexpected keyword argument
             self.assertEqualException(f, 'c=2')
             self.assertEqualException(f, '2, c=3')
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -12,6 +12,7 @@ import subprocess
 import time
 import shutil
 from test import support
+from test.script_helper import assert_python_ok
 import contextlib
 import mmap
 import platform
@@ -630,14 +631,32 @@ class DevNullTests(unittest.TestCase):
             self.assertEqual(f.read(), b'')
 
 class URandomTests(unittest.TestCase):
-    def test_urandom(self):
-        try:
-            self.assertEqual(len(os.urandom(1)), 1)
-            self.assertEqual(len(os.urandom(10)), 10)
-            self.assertEqual(len(os.urandom(100)), 100)
-            self.assertEqual(len(os.urandom(1000)), 1000)
-        except NotImplementedError:
-            pass
+    def test_urandom_length(self):
+        self.assertEqual(len(os.urandom(1)), 1)
+        self.assertEqual(len(os.urandom(10)), 10)
+        self.assertEqual(len(os.urandom(100)), 100)
+        self.assertEqual(len(os.urandom(1000)), 1000)
+
+    def test_urandom_value(self):
+        data1 = os.urandom(16)
+        data2 = os.urandom(16)
+        self.assertNotEqual(data1, data2)
+
+    def get_urandom_subprocess(self, count):
+        code = '\n'.join((
+            'import os, sys',
+            'data = os.urandom(%s)' % count,
+            'sys.stdout.buffer.write(data)',
+            'sys.stdout.buffer.flush()'))
+        out = assert_python_ok('-c', code)
+        stdout = out[1]
+        self.assertEqual(len(stdout), 16)
+        return stdout
+
+    def test_urandom_subprocess(self):
+        data1 = self.get_urandom_subprocess(16)
+        data2 = self.get_urandom_subprocess(16)
+        self.assertNotEqual(data1, data2)
 
 @contextlib.contextmanager
 def _execvpe_mockup(defpath=None):
diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py
--- a/Lib/test/test_set.py
+++ b/Lib/test/test_set.py
@@ -891,7 +891,17 @@ class TestBasicOpsString(TestBasicOps):
         self.set    = set(self.values)
         self.dup    = set(self.values)
         self.length = 3
-        self.repr   = "{'a', 'c', 'b'}"
+
+    def test_repr(self):
+        text = repr(self.set)
+        self.assertTrue(text.startswith('{'))
+        self.assertTrue(text.endswith('}'))
+
+        result = text[1:-1].split(', ')
+        result.sort()
+        sorted_repr_values = [repr(value) for value in self.values]
+        sorted_repr_values.sort()
+        self.assertEqual(result, sorted_repr_values)
 
 #------------------------------------------------------------------------------
 
@@ -916,11 +926,22 @@ class TestBasicOpsMixedStringBytes(TestB
         self.set    = set(self.values)
         self.dup    = set(self.values)
         self.length = 4
-        self.repr   = "{'a', b'a', 'b', b'b'}"
 
     def tearDown(self):
         self._warning_filters.__exit__(None, None, None)
 
+    def test_repr(self):
+        text = repr(self.set)
+        self.assertTrue(text.startswith('{'))
+        self.assertTrue(text.endswith('}'))
+
+        result = text[1:-1].split(', ')
+        result.sort()
+        sorted_repr_values = [repr(value) for value in self.values]
+        sorted_repr_values.sort()
+        self.assertEqual(result, sorted_repr_values)
+
+
 #==============================================================================
 
 def baditer():
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -11,8 +11,11 @@ import struct
 import sys
 import unittest
 import warnings
+from test.script_helper import assert_python_ok
 from test import support, string_tests
 
+IS_64BIT = (struct.calcsize('l') == 8)
+
 # Error handling (bad decoder return)
 def search_function(encoding):
     def decode1(input, errors="strict"):
@@ -1943,6 +1946,42 @@ class StringModuleTest(unittest.TestCase
         self.assertRaises(TypeError, _string.formatter_field_name_split, 1)
 
 
+class HashTest(unittest.TestCase):
+    def get_hash(self, text, seed=None):
+        env = {}
+        if seed is not None:
+            env['PYTHONHASHSEED'] = str(seed)
+        out = assert_python_ok(
+            '-c', 'print(hash(%r))' % text,
+            **env)
+        stdout = out[1].strip()
+        return int(stdout)
+
+    def test_empty_string(self):
+        self.assertEqual(hash(""), 0)
+
+    def test_null_hash(self):
+        # PYTHONHASHSEED=0 disables the randomized hash
+        if IS_64BIT:
+            hash_empty = 1453079729188098211
+        else:
+            hash_empty = -1600925533
+        self.assertEqual(self.get_hash("abc", 0), hash_empty)
+
+    def test_fixed_hash(self):
+        # test a fixed seed for the randomized hash
+        if IS_64BIT:
+            h = -4410911502303878509
+        else:
+            h = -206076799
+        self.assertEqual(self.get_hash("abc", 42), h)
+
+    def test_randomized_hash(self):
+        # two runs should return different hashes
+        run1 = self.get_hash("abc")
+        run2 = self.get_hash("abc")
+        self.assertNotEqual(run1, run2)
+
 def test_main():
     support.run_unittest(__name__)
 
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1,16 +1,17 @@
 """Regresssion tests for urllib"""
 
-import urllib.parse
-import urllib.request
-import urllib.error
+import collections
+import email.message
 import http.client
-import email.message
 import io
-import unittest
-from test import support
 import os
 import sys
 import tempfile
+from test import support
+import unittest
+import urllib.error
+import urllib.parse
+import urllib.request
 
 from base64 import b64encode
 
@@ -950,8 +951,9 @@ class urlencode_Tests(unittest.TestCase)
         self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
         self.assertEqual("a=None&a=a",
                          urllib.parse.urlencode({"a": [None, "a"]}, True))
+        data = collections.OrderedDict([("a", 1), ("b", 1)])
         self.assertEqual("a=a&a=b",
-                         urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
+                         urllib.parse.urlencode({"a": data}, True))
 
     def test_urlencode_encoding(self):
         # ASCII encoding. Expect %3F with errors="replace'
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -1,6 +1,7 @@
 #! /usr/bin/env python3
 
 from test import support
+import collections
 import unittest
 import urllib.parse
 
@@ -768,7 +769,10 @@ class UrlParseTestCase(unittest.TestCase
     def test_urlencode_sequences(self):
         # Other tests incidentally urlencode things; test non-covered cases:
         # Sequence and object values.
-        result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
+        data = collections.OrderedDict()
+        data['a'] = [1, 2]
+        data['b'] = (3, 4, 5)
+        result = urllib.parse.urlencode(data, True)
         self.assertEqual(result, 'a=1&a=2&b=3&b=4&b=5')
 
         class Trivial:
diff --git a/Lib/tkinter/test/test_ttk/test_functions.py b/Lib/tkinter/test/test_ttk/test_functions.py
--- a/Lib/tkinter/test/test_ttk/test_functions.py
+++ b/Lib/tkinter/test/test_ttk/test_functions.py
@@ -143,7 +143,7 @@ class InternalFunctionsTest(unittest.Tes
             ('a', 'b', 'c')), ("test {a b} c", ()))
         # state spec and options
         self.assertEqual(ttk._format_elemcreate('image', False, 'test',
-            ('a', 'b'), a='x', b='y'), ("test a b", ("-a", "x", "-b", "y")))
+            ('a', 'b'), a='x'), ("test a b", ("-a", "x")))
         # format returned values as a tcl script
         # state spec with multiple states and an option with a multivalue
         self.assertEqual(ttk._format_elemcreate('image', True, 'test',
diff --git a/Makefile.pre.in b/Makefile.pre.in
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -322,6 +322,7 @@ PYTHON_OBJS=	\
 		Python/pystate.o \
 		Python/pythonrun.o \
 		Python/pytime.o \
+		Python/random.o \
 		Python/structmember.o \
 		Python/symtable.o \
 		Python/sysmodule.o \
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -9329,81 +9329,36 @@ posix_getloadavg(PyObject *self, PyObjec
 }
 #endif
 
-#ifdef MS_WINDOWS
-
-PyDoc_STRVAR(win32_urandom__doc__,
+PyDoc_STRVAR(posix_urandom__doc__,
 "urandom(n) -> str\n\n\
-Return n random bytes suitable for cryptographic use.");
-
-typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
-              LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
-              DWORD dwFlags );
-typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
-              BYTE *pbBuffer );
-
-static CRYPTGENRANDOM pCryptGenRandom = NULL;
-/* This handle is never explicitly released. Instead, the operating
-   system will release it when the process terminates. */
-static HCRYPTPROV hCryptProv = 0;
+Return n pseudo-random bytes.");
 
 static PyObject*
-win32_urandom(PyObject *self, PyObject *args)
-{
-    int howMany;
-    PyObject* result;
+posix_urandom(PyObject *self, PyObject *args)
+{
+    Py_ssize_t size;
+    PyObject *result;
+    int ret;
 
     /* Read arguments */
-    if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
-        return NULL;
-    if (howMany < 0)
+    if (!PyArg_ParseTuple(args, "n:urandom", &size))
+        return NULL;
+    if (size < 0)
         return PyErr_Format(PyExc_ValueError,
                             "negative argument not allowed");
 
-    if (hCryptProv == 0) {
-        HINSTANCE hAdvAPI32 = NULL;
-        CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
-
-        /* Obtain handle to the DLL containing CryptoAPI
-           This should not fail         */
-        hAdvAPI32 = GetModuleHandleW(L"advapi32.dll");
-        if(hAdvAPI32 == NULL)
-            return win32_error("GetModuleHandle", NULL);
-
-        /* Obtain pointers to the CryptoAPI functions
-           This will fail on some early versions of Win95 */
-        pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
-                                        hAdvAPI32,
-                                        "CryptAcquireContextA");
-        if (pCryptAcquireContext == NULL)
-            return PyErr_Format(PyExc_NotImplementedError,
-                                "CryptAcquireContextA not found");
-
-        pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(
-                                        hAdvAPI32, "CryptGenRandom");
-        if (pCryptGenRandom == NULL)
-            return PyErr_Format(PyExc_NotImplementedError,
-                                "CryptGenRandom not found");
-
-        /* Acquire context */
-        if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
-                                   PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
-            return win32_error("CryptAcquireContext", NULL);
-    }
-
-    /* Allocate bytes */
-    result = PyBytes_FromStringAndSize(NULL, howMany);
-    if (result != NULL) {
-        /* Get random data */
-        memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */
-        if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*)
-                              PyBytes_AS_STRING(result))) {
-            Py_DECREF(result);
-            return win32_error("CryptGenRandom", NULL);
-        }
+    result = PyBytes_FromStringAndSize(NULL, size);
+    if (result == NULL)
+        return NULL;
+
+    ret = _PyOS_URandom(PyBytes_AS_STRING(result),
+                        PyBytes_GET_SIZE(result));
+    if (ret == -1) {
+        Py_DECREF(result);
+        return NULL;
     }
     return result;
 }
-#endif
 
 PyDoc_STRVAR(device_encoding__doc__,
 "device_encoding(fd) -> str\n\n\
@@ -9445,42 +9400,6 @@ device_encoding(PyObject *self, PyObject
     return Py_None;
 }
 
-#ifdef __VMS
-/* Use openssl random routine */
-#include <openssl/rand.h>
-PyDoc_STRVAR(vms_urandom__doc__,
-"urandom(n) -> str\n\n\
-Return n random bytes suitable for cryptographic use.");
-
-static PyObject*
-vms_urandom(PyObject *self, PyObject *args)
-{
-    int howMany;
-    PyObject* result;
-
-    /* Read arguments */
-    if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
-        return NULL;
-    if (howMany < 0)
-        return PyErr_Format(PyExc_ValueError,
-                            "negative argument not allowed");
-
-    /* Allocate bytes */
-    result = PyBytes_FromStringAndSize(NULL, howMany);
-    if (result != NULL) {
-        /* Get random data */
-        if (RAND_pseudo_bytes((unsigned char*)
-                              PyBytes_AS_STRING(result),
-                              howMany) < 0) {
-            Py_DECREF(result);
-            return PyErr_Format(PyExc_ValueError,
-                                "RAND_pseudo_bytes");
-        }
-    }
-    return result;
-}
-#endif
-
 #ifdef HAVE_SETRESUID
 PyDoc_STRVAR(posix_setresuid__doc__,
 "setresuid(ruid, euid, suid)\n\n\
@@ -10887,12 +10806,7 @@ static PyMethodDef posix_methods[] = {
 #ifdef HAVE_GETLOADAVG
     {"getloadavg",      posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__},
 #endif
- #ifdef MS_WINDOWS
-    {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__},
- #endif
- #ifdef __VMS
-    {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__},
- #endif
+    {"urandom",         posix_urandom,   METH_VARARGS, posix_urandom__doc__},
 #ifdef HAVE_SETRESUID
     {"setresuid",       posix_setresuid, METH_VARARGS, posix_setresuid__doc__},
 #endif
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -166,6 +166,8 @@ extern "C" {
             *_to++ = (to_type) *_iter++;                \
     } while (0)
 
+_Py_unicode_hash_secret_t _Py_unicode_hash_secret;
+
 /* This dictionary holds all interned unicode strings.  Note that references
    to strings in this dictionary are *not* counted in the string's ob_refcnt.
    When the interned string reaches a refcnt of 0 the string deallocation
@@ -11193,8 +11195,6 @@ unicode_getitem(PyObject *self, Py_ssize
     return PyUnicode_FromOrdinal(ch);
 }
 
-/* Believe it or not, this produces the same value for ASCII strings
-   as bytes_hash(). */
 static Py_hash_t
 unicode_hash(PyObject *self)
 {
@@ -11206,10 +11206,23 @@ unicode_hash(PyObject *self)
     if (PyUnicode_READY(self) == -1)
         return -1;
     len = PyUnicode_GET_LENGTH(self);
+    if (len == 0) {
+        _PyUnicode_HASH(self) = 0;
+        return 0;
+    }
+
+    /* Issue #13703: add 2 x sizeof(Py_hash_t) random bytes (prefix and suffix)
+       to the output of hash(str) to protect Python against the hash collision
+       attack on dictionary. Without these random bytes, an attacker can
+       computes N strings with the same hash value to exploit to worst case of
+       a dict lookup, O(n^2) complexity, to cause a denial of service. See
+       oCERT advisory for the details:
+       http://www.ocert.org/advisories/ocert-2011-003.html */
+    x = _Py_unicode_hash_secret.prefix;
 
     /* The hash function as a macro, gets expanded three times below. */
 #define HASH(P) \
-    x = (Py_uhash_t)*P << 7; \
+    x ^= (Py_uhash_t)*P << 7; \
     while (--len >= 0) \
         x = (_PyHASH_MULTIPLIER*x) ^ (Py_uhash_t)*P++;
 
@@ -11234,6 +11247,7 @@ unicode_hash(PyObject *self)
     }
     }
     x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self);
+    x ^= _Py_unicode_hash_secret.suffix;
 
     if (x == -1)
         x = -2;
diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj
--- a/PCbuild/pythoncore.vcproj
+++ b/PCbuild/pythoncore.vcproj
@@ -1891,6 +1891,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\Python\random.c"
+				>
+			</File>
+			<File
 				RelativePath="..\Python\structmember.c"
 				>
 			</File>
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -71,6 +71,7 @@ extern int _PyUnicode_Init(void);
 extern void _PyUnicode_Fini(void);
 extern int _PyLong_Init(void);
 extern void PyLong_Fini(void);
+extern void _PyRandom_Init(void);
 extern int _PyFaulthandler_Init(void);
 extern void _PyFaulthandler_Fini(void);
 
@@ -219,6 +220,8 @@ Py_InitializeEx(int install_sigs)
     if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0')
         Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p);
 
+    _PyRandom_Init();
+
     interp = PyInterpreterState_New();
     if (interp == NULL)
         Py_FatalError("Py_Initialize: can't make first interpreter");
diff --git a/Python/random.c b/Python/random.c
new file mode 100644
--- /dev/null
+++ b/Python/random.c
@@ -0,0 +1,268 @@
+#include "Python.h"
+#ifdef MS_WINDOWS
+#include <windows.h>
+#else
+#include <fcntl.h>
+#endif
+
+static int random_initialized = 0;
+
+#ifdef MS_WINDOWS
+typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
+              LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
+              DWORD dwFlags );
+typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
+              BYTE *pbBuffer );
+
+static CRYPTGENRANDOM pCryptGenRandom = NULL;
+/* This handle is never explicitly released. Instead, the operating
+   system will release it when the process terminates. */
+static HCRYPTPROV hCryptProv = 0;
+
+static int
+win32_urandom_init(int raise)
+{
+    HINSTANCE hAdvAPI32 = NULL;
+    CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
+
+    /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */
+    hAdvAPI32 = GetModuleHandle("advapi32.dll");
+    if(hAdvAPI32 == NULL)
+        goto error;
+
+    /* Obtain pointers to the CryptoAPI functions. This will fail on some early
+       versions of Win95. */
+    pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
+                               hAdvAPI32, "CryptAcquireContextA");
+    if (pCryptAcquireContext == NULL)
+        goto error;
+
+    pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32,
+                                                     "CryptGenRandom");
+    if (pCryptGenRandom == NULL)
+        goto error;
+
+    /* Acquire context */
+    if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
+                               PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
+        goto error;
+
+    return 0;
+
+error:
+    if (raise)
+        PyErr_SetFromWindowsErr(0);
+    else
+        Py_FatalError("Fail to initialize Windows random API (CryptoGen)");
+    return -1;
+}
+
+/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
+   API. Return 0 on success, or -1 on error. */
+static int
+win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
+{
+    Py_ssize_t orig_size = size;
+    Py_ssize_t chunk;
+
+    if (hCryptProv == 0)
+    {
+        if (win32_urandom_init(raise) == -1)
+            return -1;
+    }
+
+    while (size > 0)
+    {
+        chunk = Py_MIN(size, INT_MAX);
+        if (!pCryptGenRandom(hCryptProv, chunk, buffer))
+        {
+            /* CryptGenRandom() failed */
+            if (raise)
+                PyErr_SetFromWindowsErr(0);
+            else
+                Py_FatalError("Fail to initialized the randomized hash "
+                        "secret using CryptoGen)");
+            return -1;
+        }
+        buffer += chunk;
+        size -= chunk;
+    }
+    return 0;
+}
+
+#else
+
+/* Read size bytes from /dev/urandom into buffer.
+   Call Py_FatalError() on error. */
+static void
+dev_urandom_noraise(char *buffer, Py_ssize_t size)
+{
+    int fd;
+    Py_ssize_t n;
+
+    assert (0 < size);
+
+    fd = open("/dev/urandom", O_RDONLY);
+    if (fd < 0)
+        Py_FatalError("Fail to open /dev/urandom");
+
+    while (0 < size)
+    {
+        do {
+            n = read(fd, buffer, (size_t)size);
+        } while (n < 0 && errno == EINTR);
+        if (n <= 0)
+        {
+            /* stop on error or if read(size) returned 0 */
+            Py_FatalError("Fail to read bytes from /dev/urandom");
+            break;
+        }
+        buffer += n;
+        size -= (Py_ssize_t)n;
+    }
+    close(fd);
+}
+
+/* Read size bytes from /dev/urandom into buffer.
+   Return 0 on success, raise an exception and return -1 on error. */
+static int
+dev_urandom_python(char *buffer, Py_ssize_t size)
+{
+    int fd;
+    Py_ssize_t n;
+
+    if (size <= 0)
+        return 0;
+
+    Py_BEGIN_ALLOW_THREADS
+    fd = open("/dev/urandom", O_RDONLY);
+    Py_END_ALLOW_THREADS
+    if (fd < 0)
+    {
+        PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom");
+        return -1;
+    }
+
+    Py_BEGIN_ALLOW_THREADS
+    do {
+        do {
+            n = read(fd, buffer, (size_t)size);
+        } while (n < 0 && errno == EINTR);
+        if (n <= 0)
+            break;
+        buffer += n;
+        size -= (Py_ssize_t)n;
+    } while (0 < size);
+    Py_END_ALLOW_THREADS
+
+    if (n <= 0)
+    {
+        /* stop on error or if read(size) returned 0 */
+        if (n < 0)
+            PyErr_SetFromErrno(PyExc_OSError);
+        else
+            PyErr_Format(PyExc_RuntimeError,
+                         "Fail to read %zi bytes from /dev/urandom",
+                         size);
+        close(fd);
+        return -1;
+    }
+    close(fd);
+    return 0;
+}
+#endif
+
+/* Fill buffer with pseudo-random bytes generated by a linear congruent
+   generator (LCG):
+
+       x(n+1) = (x(n) * 214013 + 2531011) % 2^32
+
+   Use bits 23..16 of x(n) to generate a byte. */
+static void
+lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size)
+{
+    size_t index;
+    unsigned int x;
+
+    x = x0;
+    for (index=0; index < size; index++) {
+        x *= 214013;
+        x += 2531011;
+        /* modulo 2 ^ (8 * sizeof(int)) */
+        buffer[index] = (x >> 16) & 0xff;
+    }
+}
+
+/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic
+   use, from the operating random number generator (RNG).
+
+   Return 0 on success, raise an exception and return -1 on error. */
+int
+_PyOS_URandom(void *buffer, Py_ssize_t size)
+{
+    if (size < 0) {
+        PyErr_Format(PyExc_ValueError,
+                     "negative argument not allowed");
+        return -1;
+    }
+    if (size == 0)
+        return 0;
+
+#ifdef MS_WINDOWS
+    return win32_urandom((unsigned char *)buffer, size, 1);
+#else
+    return dev_urandom_python((char*)buffer, size);
+#endif
+}
+
+void
+_PyRandom_Init(void)
+{
+    char *env;
+    void *secret = &_Py_unicode_hash_secret;
+    Py_ssize_t secret_size = sizeof _Py_unicode_hash_secret;
+
+    if (random_initialized)
+        return;
+    random_initialized = 1;
+
+    env = Py_GETENV("PYTHONHASHSEED");
+    if (env && *env != '\0') {
+        char *endptr = env;
+        unsigned long seed;
+        seed = strtoul(env, &endptr, 10);
+        if (*endptr != '\0'
+            || seed > 4294967295UL
+            || (errno == ERANGE && seed == ULONG_MAX))
+        {
+            Py_FatalError("PYTHONHASHSEED must be an integer "
+                          "in range [0; 4294967295]");
+        }
+        if (seed == 0) {
+            /* disable the randomized Unicode hash */
+            memset(secret, 0, secret_size);
+        }
+        else {
+            lcg_urandom(seed, (unsigned char*)secret, secret_size);
+        }
+    }
+    else {
+#ifdef MS_WINDOWS
+#if 1
+        (void)win32_urandom((unsigned char *)secret, secret_size, 0);
+#else
+        /* fast but weak RNG (fast initialization, weak seed) */
+        _PyTime_timeval t;
+        unsigned int seed;
+        _PyTime_gettimeofday(&t);
+        seed = (unsigned int)t.tv_sec;
+        seed ^= t.tv_usec;
+        seed ^= getpid();
+        lcg_urandom(seed, (unsigned char*)secret, secret_size);
+#endif
+#else
+        dev_urandom_noraise((char*)secret, secret_size);
+#endif
+    }
+}
+


More information about the Python-bugs-list mailing list