[Python-checkins] cpython: enable hash randomization by default

benjamin.peterson python-checkins at python.org
Tue Feb 21 22:08:11 CET 2012


http://hg.python.org/cpython/rev/198e31774f0f
changeset:   75152:198e31774f0f
parent:      75146:75990a013d4d
user:        Benjamin Peterson <benjamin at python.org>
date:        Tue Feb 21 16:08:05 2012 -0500
summary:
  enable hash randomization by default

files:
  Doc/reference/datamodel.rst |  24 +++++++++++++++++++++++-
  Doc/using/cmdline.rst       |  10 ++++------
  Lib/test/test_cmd_line.py   |   4 ++--
  Lib/test/test_hash.py       |   4 ++--
  Misc/NEWS                   |   7 +++----
  Misc/python.man             |  23 +++--------------------
  Modules/main.c              |  25 +++++++------------------
  Python/random.c             |  11 -----------
  Tools/scripts/run_tests.py  |   1 -
  9 files changed, 44 insertions(+), 65 deletions(-)


diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst
--- a/Doc/reference/datamodel.rst
+++ b/Doc/reference/datamodel.rst
@@ -1277,7 +1277,29 @@
    inheritance of :meth:`__hash__` will be blocked, just as if :attr:`__hash__`
    had been explicitly set to :const:`None`.
 
-   See also the :option:`-R` command-line option.
+
+   .. note::
+
+      Note by default the :meth:`__hash__` values of str, bytes and datetime
+      objects are "salted" with an unpredictable random value.  Although they
+      remain constant within an individual Python process, they are not
+      predictable between repeated invocations of Python.
+
+      This is intended to provide protection against a denial-of-service caused
+      by carefully-chosen inputs that exploit the worst case performance of a
+      dict insertion, O(n^2) complexity.  See
+      http://www.ocert.org/advisories/ocert-2011-003.html for details.
+
+      Changing hash values affects the order in which keys are retrieved from a
+      dict.  Although Python has never made guarantees about this ordering (and
+      it typically varies between 32-bit and 64-bit builds), enough real-world
+      code implicitly relies on this non-guaranteed behavior that the
+      randomization is disabled by default.
+
+      See also :envvar:`PYTHONHASHSEED`.
+
+   .. versionchanged:: 3.3
+      Hash randomization is enabled by default.
 
 
 .. method:: object.__bool__(self)
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -24,7 +24,7 @@
 
 When invoking Python, you may specify any of these options::
 
-    python [-bBdEhiORqsSuvVWx?] [-c command | -m module-name | script | - ] [args]
+    python [-bBdEhiOqsSuvVWx?] [-c command | -m module-name | script | - ] [args]
 
 The most common use case is, of course, a simple invocation of a script::
 
@@ -486,9 +486,8 @@
 
 .. envvar:: PYTHONHASHSEED
 
-   If this variable is set to ``random``, the effect is the same as specifying
-   the :option:`-R` option: a random value is used to seed the hashes of str,
-   bytes and datetime objects.
+   If this variable is set to ``random``, a random value is used to seed the
+   hashes of str, bytes and datetime objects.
 
    If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a fixed
    seed for generating the hash() of the types covered by the hash
@@ -499,8 +498,7 @@
    values.
 
    The integer must be a decimal number in the range [0,4294967295].  Specifying
-   the value 0 will lead to the same hash values as when hash randomization is
-   disabled.
+   the value 0 will disable hash randomization.
 
    .. versionadded:: 3.2.3
 
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -330,14 +330,14 @@
         hashes = []
         for i in range(2):
             code = 'print(hash("spam"))'
-            rc, out, err = assert_python_ok('-R', '-c', code)
+            rc, out, err = assert_python_ok('-c', code)
             self.assertEqual(rc, 0)
             hashes.append(out)
         self.assertNotEqual(hashes[0], hashes[1])
 
         # Verify that sys.flags contains hash_randomization
         code = 'import sys; print("random is", sys.flags.hash_randomization)'
-        rc, out, err = assert_python_ok('-R', '-c', code)
+        rc, out, err = assert_python_ok('-c', code)
         self.assertEqual(rc, 0)
         self.assertIn(b'random is 1', out)
 
diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py
--- a/Lib/test/test_hash.py
+++ b/Lib/test/test_hash.py
@@ -159,8 +159,8 @@
         else:
             known_hash_of_obj = -1600925533
 
-        # Randomization is disabled by default:
-        self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj)
+        # Randomization is enabled by default:
+        self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj)
 
         # It can also be disabled by setting the seed to 0:
         self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -18,10 +18,9 @@
 - Issue #14051: Allow arbitrary attributes to be set of classmethod and
   staticmethod.
 
-- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED
-  environment variable, to provide an opt-in way to protect against denial of
-  service attacks due to hash collisions within the dict and set types.  Patch
-  by David Malcolm, based on work by Victor Stinner.
+- Issue #13703: oCERT-2011-003: Randomize hashes of str and bytes to protect
+  against denial of service attacks due to hash collisions within the dict and
+  set types.  Patch by David Malcolm, based on work by Victor Stinner.
 
 - Issue #13020: Fix a reference leak when allocating a structsequence object
   fails.  Patch by Suman Saha.
diff --git a/Misc/python.man b/Misc/python.man
--- a/Misc/python.man
+++ b/Misc/python.man
@@ -37,9 +37,6 @@
 .B \-OO
 ]
 [
-.B \-R
-]
-[
 .B \-s
 ]
 [
@@ -151,18 +148,6 @@
 Do not print the version and copyright messages. These messages are 
 also suppressed in non-interactive mode.
 .TP
-.B \-R
-Turn on "hash randomization", so that the hash() values of str, bytes and
-datetime objects are "salted" with an unpredictable pseudo-random value.
-Although they remain constant within an individual Python process, they are
-not predictable between repeated invocations of Python.
-.IP
-This is intended to provide protection against a denial of service
-caused by carefully-chosen inputs that exploit the worst case performance
-of a dict insertion, O(n^2) complexity.  See
-http://www.ocert.org/advisories/ocert-2011-003.html
-for details.
-.TP
 .B \-s
 Don't add user site directory to sys.path.
 .TP
@@ -418,9 +403,8 @@
 If this is set to a comma-separated string it is equivalent to
 specifying the \fB\-W\fP option for each separate value.
 .IP PYTHONHASHSEED
-If this variable is set to "random", the effect is the same as specifying
-the \fB-R\fP option: a random value is used to seed the hashes of str,
-bytes and datetime objects.
+If this variable is set to "random", a random value is used to seed the hashes
+of str, bytes and datetime objects.
 
 If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for
 generating the hash() of the types covered by the hash randomization.  Its
@@ -429,8 +413,7 @@
 values.
 
 The integer must be a decimal number in the range [0,4294967295].  Specifying
-the value 0 will lead to the same hash values as when hash randomization is
-disabled.
+the value 0 will disable hash randomization.
 .SH AUTHOR
 The Python Software Foundation: http://www.python.org/psf
 .SH INTERNET RESOURCES
diff --git a/Modules/main.c b/Modules/main.c
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -73,9 +73,6 @@
 -O     : optimize generated bytecode slightly; also PYTHONOPTIMIZE=x\n\
 -OO    : remove doc-strings in addition to the -O optimizations\n\
 -q     : don't print version and copyright messages on interactive startup\n\
--R     : use a pseudo-random salt to make hash() values of various types be\n\
-         unpredictable between separate invocations of the interpreter, as\n\
-         a defence against denial-of-service attacks\n\
 -s     : don't add user site directory to sys.path; also PYTHONNOUSERSITE\n\
 -S     : don't imply 'import site' on initialization\n\
 ";
@@ -107,10 +104,10 @@
 "PYTHONFAULTHANDLER: dump the Python traceback on fatal errors.\n\
 ";
 static char *usage_6 = "\
-PYTHONHASHSEED: if this variable is set to ``random``, the effect is the same \n\
-   as specifying the :option:`-R` option: a random value is used to seed the\n\
-   hashes of str, bytes and datetime objects.  It can also be set to an integer\n\
-   in the range [0,4294967295] to get hash values with a predictable seed.\n\
+PYTHONHASHSEED: if this variable is set to ``random``, a random value is used\n\
+   to seed the hashes of str, bytes and datetime objects.  It can also be\n\
+   set to an integer in the range [0,4294967295] to get hash values with a\n\
+   predictable seed.\n\
 ";
 
 static int
@@ -347,21 +344,13 @@
                not interpreter options. */
             break;
         }
-        switch (c) {
-        case 'E':
+        if (c == 'E') {
             Py_IgnoreEnvironmentFlag++;
             break;
-        case 'R':
-            Py_HashRandomizationFlag++;
-            break;
         }
     }
-    /* The variable is only tested for existence here; _PyRandom_Init will
-       check its value further. */
-    if (!Py_HashRandomizationFlag &&
-        (p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0')
-        Py_HashRandomizationFlag = 1;
 
+    Py_HashRandomizationFlag = 1;
     _PyRandom_Init();
 
     PySys_ResetWarnOptions();
@@ -468,7 +457,7 @@
             break;
 
         case 'R':
-            /* Already handled above */
+            /* Ignored */
             break;
 
         /* This space reserved for other options */
diff --git a/Python/random.c b/Python/random.c
--- a/Python/random.c
+++ b/Python/random.c
@@ -257,17 +257,6 @@
     _Py_HashSecret_Initialized = 1;
 
     /*
-      By default, hash randomization is disabled, and only
-      enabled if PYTHONHASHSEED is set to non-empty or if
-      "-R" is provided at the command line:
-    */
-    if (!Py_HashRandomizationFlag) {
-        /* Disable the randomized hash: */
-        memset(secret, 0, secret_size);
-        return;
-    }
-
-    /*
       Hash randomization is enabled.  Generate a per-process secret,
       using PYTHONHASHSEED if provided.
     */
diff --git a/Tools/scripts/run_tests.py b/Tools/scripts/run_tests.py
--- a/Tools/scripts/run_tests.py
+++ b/Tools/scripts/run_tests.py
@@ -25,7 +25,6 @@
             '-W', 'default',      # Warnings set to 'default'
             '-bb',                # Warnings about bytes/bytearray
             '-E',                 # Ignore environment variables
-            '-R',                 # Randomize hashing
             ]
     # Allow user-specified interpreter options to override our defaults.
     args.extend(test.support.args_from_interpreter_flags())

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list