[issue13703] Hash collision security issue

STINNER Victor report at bugs.python.org
Tue Jan 10 12:38:00 CET 2012


STINNER Victor <victor.stinner at haypocalc.com> added the comment:

Version 3 of my patch:
 - Add PYTHONHASHSEED environment variable to get a fixed seed or to
disable the randomized hash function (PYTHONHASHSEED=0)
 - Add tests on the randomized hash function
 - Add more tests on os.urandom()

----------
Added file: http://bugs.python.org/file24193/random-3.patch

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue13703>
_______________________________________
-------------- next part --------------
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -460,6 +460,13 @@ These environment variables influence Py
    option.
 
 
+.. envvar:: PYTHONHASHSEED
+
+   If this is set, it is used as a fixed seed for the Unicode randomized hash:
+   number in range [0; 4294967295]. The value 0 disables the Unicode randomized
+   hash.
+
+
 .. envvar:: PYTHONIOENCODING
 
    If this is set before running the interpreter, it overrides the encoding used
diff --git a/Include/pythonrun.h b/Include/pythonrun.h
--- a/Include/pythonrun.h
+++ b/Include/pythonrun.h
@@ -246,6 +246,8 @@ typedef void (*PyOS_sighandler_t)(int);
 PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int);
 PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t);
 
+/* Random */
+PyAPI_FUNC(int PyOS_URandom) (void *buffer, Py_ssize_t size);
 
 #ifdef __cplusplus
 }
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -376,6 +376,18 @@ typedef struct {
 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
 
+/* Issue #13703: add 2 x sizeof(Py_hash_t) random bytes to the output of
+   hash(str) to make the dict hash attack more complex. The attack computes N
+   strings with the same hash value to exploit to worst case of a dict lookup:
+   O(n^2) complexity. Without these random bytes, an attacker can precompute
+   the N keys only once to cause a denial of service. See oCERT advisory for the
+   details: http://www.ocert.org/advisories/ocert-2011-003.html */
+typedef struct {
+    Py_hash_t prefix;
+    Py_hash_t suffix;
+} _Py_unicode_hash_secret_t;
+PyAPI_DATA(_Py_unicode_hash_secret_t) _Py_unicode_hash_secret;
+
 #define PyUnicode_Check(op) \
                  PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
diff --git a/Lib/os.py b/Lib/os.py
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -761,23 +761,6 @@ try:
 except NameError: # statvfs_result may not exist
     pass
 
-if not _exists("urandom"):
-    def urandom(n):
-        """urandom(n) -> str
-
-        Return a string of n random bytes suitable for cryptographic use.
-
-        """
-        try:
-            _urandomfd = open("/dev/urandom", O_RDONLY)
-        except (OSError, IOError):
-            raise NotImplementedError("/dev/urandom (or equivalent) not found")
-        bs = b""
-        while len(bs) < n:
-            bs += read(_urandomfd, n - len(bs))
-        close(_urandomfd)
-        return bs
-
 # Supply os.popen()
 def popen(cmd, mode="r", buffering=-1):
     if not isinstance(cmd, str):
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -12,6 +12,7 @@ import subprocess
 import time
 import shutil
 from test import support
+from test.script_helper import assert_python_ok
 import contextlib
 import mmap
 import platform
@@ -630,14 +631,32 @@ class DevNullTests(unittest.TestCase):
             self.assertEqual(f.read(), b'')
 
 class URandomTests(unittest.TestCase):
-    def test_urandom(self):
-        try:
-            self.assertEqual(len(os.urandom(1)), 1)
-            self.assertEqual(len(os.urandom(10)), 10)
-            self.assertEqual(len(os.urandom(100)), 100)
-            self.assertEqual(len(os.urandom(1000)), 1000)
-        except NotImplementedError:
-            pass
+    def test_urandom_length(self):
+        self.assertEqual(len(os.urandom(1)), 1)
+        self.assertEqual(len(os.urandom(10)), 10)
+        self.assertEqual(len(os.urandom(100)), 100)
+        self.assertEqual(len(os.urandom(1000)), 1000)
+
+    def test_urandom_value(self):
+        data1 = os.urandom(16)
+        data2 = os.urandom(16)
+        self.assertNotEqual(data1, data2)
+
+    def get_urandom_subprocess(self, count):
+        code = '\n'.join((
+            'import os, sys',
+            'data = os.urandom(%s)' % count,
+            'sys.stdout.buffer.write(data)',
+            'sys.stdout.buffer.flush()'))
+        out = assert_python_ok('-c', code)
+        stdout = out[1]
+        self.assertEqual(len(stdout), 16)
+        return stdout
+
+    def test_urandom_subprocess(self):
+        data1 = self.get_urandom_subprocess(16)
+        data2 = self.get_urandom_subprocess(16)
+        self.assertNotEqual(data1, data2)
 
 @contextlib.contextmanager
 def _execvpe_mockup(defpath=None):
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -11,6 +11,7 @@ import struct
 import sys
 import unittest
 import warnings
+from test.script_helper import assert_python_ok
 from test import support, string_tests
 
 # Error handling (bad decoder return)
@@ -1891,6 +1892,34 @@ class StringModuleTest(unittest.TestCase
         self.assertRaises(TypeError, _string.formatter_field_name_split, 1)
 
 
+class HashTest(unittest.TestCase):
+    def get_hash(self, text, seed=None):
+        env = {}
+        if seed is not None:
+            env['PYTHONHASHSEED'] = str(seed)
+        out = assert_python_ok(
+            '-c', 'print(hash(%r))' % text,
+            **env)
+        stdout = out[1].strip()
+        return int(stdout)
+
+    def test_empty_string(self):
+        self.assertEqual(hash(""), 0)
+
+    def test_null_hash(self):
+        # PYTHONHASHSEED=0 disables the randomized hash
+        self.assertEqual(self.get_hash("abc", 0), -1600925533)
+
+    def test_fixed_hash(self):
+        # test a fixed seed for the randomized hash
+        self.assertEqual(self.get_hash("abc", 42), -206076799)
+
+    def test_randomized_hash(self):
+        # two runs should return different hashes
+        run1 = self.get_hash("abc")
+        run2 = self.get_hash("abc")
+        self.assertNotEqual(run1, run2)
+
 def test_main():
     support.run_unittest(__name__)
 
diff --git a/Makefile.pre.in b/Makefile.pre.in
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -322,6 +322,7 @@ PYTHON_OBJS=	\
 		Python/pystate.o \
 		Python/pythonrun.o \
 		Python/pytime.o \
+		Python/random.o \
 		Python/structmember.o \
 		Python/symtable.o \
 		Python/sysmodule.o \
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -9326,81 +9326,37 @@ posix_getloadavg(PyObject *self, PyObjec
 }
 #endif
 
-#ifdef MS_WINDOWS
-
-PyDoc_STRVAR(win32_urandom__doc__,
-"urandom(n) -> str\n\n\
+PyDoc_STRVAR(posix_urandom__doc__,
+"urandom(n) -> bytes\n\n\
 Return n random bytes suitable for cryptographic use.");
 
-typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
-              LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
-              DWORD dwFlags );
-typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
-              BYTE *pbBuffer );
-
-static CRYPTGENRANDOM pCryptGenRandom = NULL;
-/* This handle is never explicitly released. Instead, the operating
-   system will release it when the process terminates. */
-static HCRYPTPROV hCryptProv = 0;
-
 static PyObject*
-win32_urandom(PyObject *self, PyObject *args)
-{
-    int howMany;
-    PyObject* result;
+posix_urandom(PyObject *self, PyObject *args)
+{
+    Py_ssize_t howMany;
+    PyObject *result;
+    int ret;
 
     /* Read arguments */
-    if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
+    if (! PyArg_ParseTuple(args, "n:urandom", &howMany))
         return NULL;
     if (howMany < 0)
         return PyErr_Format(PyExc_ValueError,
                             "negative argument not allowed");
 
-    if (hCryptProv == 0) {
-        HINSTANCE hAdvAPI32 = NULL;
-        CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
-
-        /* Obtain handle to the DLL containing CryptoAPI
-           This should not fail         */
-        hAdvAPI32 = GetModuleHandle("advapi32.dll");
-        if(hAdvAPI32 == NULL)
-            return win32_error("GetModuleHandle", NULL);
-
-        /* Obtain pointers to the CryptoAPI functions
-           This will fail on some early versions of Win95 */
-        pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
-                                        hAdvAPI32,
-                                        "CryptAcquireContextA");
-        if (pCryptAcquireContext == NULL)
-            return PyErr_Format(PyExc_NotImplementedError,
-                                "CryptAcquireContextA not found");
-
-        pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(
-                                        hAdvAPI32, "CryptGenRandom");
-        if (pCryptGenRandom == NULL)
-            return PyErr_Format(PyExc_NotImplementedError,
-                                "CryptGenRandom not found");
-
-        /* Acquire context */
-        if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
-                                   PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
-            return win32_error("CryptAcquireContext", NULL);
-    }
-
-    /* Allocate bytes */
     result = PyBytes_FromStringAndSize(NULL, howMany);
-    if (result != NULL) {
-        /* Get random data */
-        memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */
-        if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*)
-                              PyBytes_AS_STRING(result))) {
-            Py_DECREF(result);
-            return win32_error("CryptGenRandom", NULL);
-        }
+    if (result == NULL)
+        return NULL;
+
+    ret = PyOS_URandom(PyBytes_AS_STRING(result),
+                       PyBytes_GET_SIZE(result));
+    if (ret == -1) {
+        Py_DECREF(result);
+        PyErr_SetString(PyExc_RuntimeError, "Fail to generate random bytes");
+        return NULL;
     }
     return result;
 }
-#endif
 
 PyDoc_STRVAR(device_encoding__doc__,
 "device_encoding(fd) -> str\n\n\
@@ -9442,42 +9398,6 @@ device_encoding(PyObject *self, PyObject
     return Py_None;
 }
 
-#ifdef __VMS
-/* Use openssl random routine */
-#include <openssl/rand.h>
-PyDoc_STRVAR(vms_urandom__doc__,
-"urandom(n) -> str\n\n\
-Return n random bytes suitable for cryptographic use.");
-
-static PyObject*
-vms_urandom(PyObject *self, PyObject *args)
-{
-    int howMany;
-    PyObject* result;
-
-    /* Read arguments */
-    if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
-        return NULL;
-    if (howMany < 0)
-        return PyErr_Format(PyExc_ValueError,
-                            "negative argument not allowed");
-
-    /* Allocate bytes */
-    result = PyBytes_FromStringAndSize(NULL, howMany);
-    if (result != NULL) {
-        /* Get random data */
-        if (RAND_pseudo_bytes((unsigned char*)
-                              PyBytes_AS_STRING(result),
-                              howMany) < 0) {
-            Py_DECREF(result);
-            return PyErr_Format(PyExc_ValueError,
-                                "RAND_pseudo_bytes");
-        }
-    }
-    return result;
-}
-#endif
-
 #ifdef HAVE_SETRESUID
 PyDoc_STRVAR(posix_setresuid__doc__,
 "setresuid(ruid, euid, suid)\n\n\
@@ -10884,12 +10804,7 @@ static PyMethodDef posix_methods[] = {
 #ifdef HAVE_GETLOADAVG
     {"getloadavg",      posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__},
 #endif
- #ifdef MS_WINDOWS
-    {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__},
- #endif
- #ifdef __VMS
-    {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__},
- #endif
+    {"urandom",         posix_urandom,   METH_VARARGS, posix_urandom__doc__},
 #ifdef HAVE_SETRESUID
     {"setresuid",       posix_setresuid, METH_VARARGS, posix_setresuid__doc__},
 #endif
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -165,6 +165,8 @@ extern "C" {
             *_to++ = (to_type) *_iter++;                \
     } while (0)
 
+_Py_unicode_hash_secret_t _Py_unicode_hash_secret;
+
 /* This dictionary holds all interned unicode strings.  Note that references
    to strings in this dictionary are *not* counted in the string's ob_refcnt.
    When the interned string reaches a refcnt of 0 the string deallocation
@@ -11143,8 +11145,6 @@ unicode_getitem(PyObject *self, Py_ssize
     return PyUnicode_FromOrdinal(ch);
 }
 
-/* Believe it or not, this produces the same value for ASCII strings
-   as bytes_hash(). */
 static Py_hash_t
 unicode_hash(PyObject *self)
 {
@@ -11156,13 +11156,19 @@ unicode_hash(PyObject *self)
     if (PyUnicode_READY(self) == -1)
         return -1;
     len = PyUnicode_GET_LENGTH(self);
+    if (len == 0) {
+        _PyUnicode_HASH(self) = 0;
+        return 0;
+    }
 
     /* The hash function as a macro, gets expanded three times below. */
 #define HASH(P) \
-    x = (Py_uhash_t)*P << 7; \
+    x ^= (Py_uhash_t)*P << 7; \
     while (--len >= 0) \
         x = (1000003*x) ^ (Py_uhash_t)*P++;
 
+    x = _Py_unicode_hash_secret.prefix;
+
     switch (PyUnicode_KIND(self)) {
     case PyUnicode_1BYTE_KIND: {
         const unsigned char *c = PyUnicode_1BYTE_DATA(self);
@@ -11184,6 +11190,7 @@ unicode_hash(PyObject *self)
     }
     }
     x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self);
+    x ^= _Py_unicode_hash_secret.suffix;
 
     if (x == -1)
         x = -2;
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -71,6 +71,7 @@ extern int _PyUnicode_Init(void);
 extern void _PyUnicode_Fini(void);
 extern int _PyLong_Init(void);
 extern void PyLong_Fini(void);
+extern void _PyRandom_Init(void);
 extern int _PyFaulthandler_Init(void);
 extern void _PyFaulthandler_Fini(void);
 
@@ -219,6 +220,8 @@ Py_InitializeEx(int install_sigs)
     if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0')
         Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p);
 
+    _PyRandom_Init();
+
     interp = PyInterpreterState_New();
     if (interp == NULL)
         Py_FatalError("Py_Initialize: can't make first interpreter");


More information about the Python-bugs-list mailing list