[Python-checkins] cpython (merge 3.2 -> 3.3): Fix the internals of our hash functions to used unsigned values during hash

gregory.p.smith python-checkins at python.org
Tue Dec 11 04:53:01 CET 2012


http://hg.python.org/cpython/rev/8b4fff41dee5
changeset:   80821:8b4fff41dee5
branch:      3.3
parent:      80816:e47a547e63dc
parent:      80820:5c4e440852db
user:        Gregory P. Smith <greg at krypto.org>
date:        Mon Dec 10 18:32:53 2012 -0800
summary:
  Fix the internals of our hash functions to used unsigned values during hash
computation as the overflow behavior of signed integers is undefined.

NOTE: This change is smaller compared to 3.2 as much of this cleanup had
already been done.  I added the comment that my change in 3.2 added so that the
code would match up.  Otherwise this just adds or synchronizes appropriate UL
designations on some constants to be pedantic.

In practice we require compiling everything with -fwrapv which forces overflow
to be defined as twos compliment but this keeps the code cleaner for checkers
or in the case where someone has compiled it without -fwrapv or their
compiler's equivalent.

Found by Clang trunk's Undefined Behavior Sanitizer (UBSan).

Cleanup only - no functionality or hash values change.

files:
  Include/pyport.h        |   2 +-
  Objects/setobject.c     |  12 ++++++------
  Objects/tupleobject.c   |   8 ++++----
  Objects/unicodeobject.c |   2 +-
  4 files changed, 12 insertions(+), 12 deletions(-)


diff --git a/Include/pyport.h b/Include/pyport.h
--- a/Include/pyport.h
+++ b/Include/pyport.h
@@ -145,7 +145,7 @@
 #endif
 
 /* Prime multiplier used in string and various other hashes. */
-#define _PyHASH_MULTIPLIER 1000003  /* 0xf4243 */
+#define _PyHASH_MULTIPLIER 1000003UL  /* 0xf4243 */
 
 /* Parameters used for the numeric hash implementation.  See notes for
    _Py_HashDouble in Objects/object.c.  Numeric hashes are based on
diff --git a/Objects/setobject.c b/Objects/setobject.c
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -77,7 +77,7 @@
 static setentry *
 set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
 {
-    register size_t i;
+    register size_t i;  /* Unsigned for defined overflow behavior. */
     register size_t perturb;
     register setentry *freeslot;
     register size_t mask = so->mask;
@@ -159,7 +159,7 @@
 static setentry *
 set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
 {
-    register size_t i;
+    register size_t i;  /* Unsigned for defined overflow behavior. */
     register size_t perturb;
     register setentry *freeslot;
     register size_t mask = so->mask;
@@ -760,7 +760,7 @@
 frozenset_hash(PyObject *self)
 {
     PySetObject *so = (PySetObject *)self;
-    Py_uhash_t h, hash = 1927868237U;
+    Py_uhash_t h, hash = 1927868237UL;
     setentry *entry;
     Py_ssize_t pos = 0;
 
@@ -775,11 +775,11 @@
            hashes so that many distinct combinations collapse to only
            a handful of distinct hash values. */
         h = entry->hash;
-        hash ^= (h ^ (h << 16) ^ 89869747U)  * 3644798167U;
+        hash ^= (h ^ (h << 16) ^ 89869747UL)  * 3644798167UL;
     }
-    hash = hash * 69069U + 907133923U;
+    hash = hash * 69069U + 907133923UL;
     if (hash == -1)
-        hash = 590923713U;
+        hash = 590923713UL;
     so->hash = hash;
     return hash;
 }
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -327,12 +327,12 @@
 static Py_hash_t
 tuplehash(PyTupleObject *v)
 {
-    register Py_uhash_t x;
+    register Py_uhash_t x;  /* Unsigned for defined overflow behavior. */
     register Py_hash_t y;
     register Py_ssize_t len = Py_SIZE(v);
     register PyObject **p;
     Py_uhash_t mult = _PyHASH_MULTIPLIER;
-    x = 0x345678;
+    x = 0x345678UL;
     p = v->ob_item;
     while (--len >= 0) {
         y = PyObject_Hash(*p++);
@@ -340,9 +340,9 @@
             return -1;
         x = (x ^ y) * mult;
         /* the cast might truncate len; that doesn't change hash stability */
-        mult += (Py_hash_t)(82520L + len + len);
+        mult += (Py_hash_t)(82520UL + len + len);
     }
-    x += 97531L;
+    x += 97531UL;
     if (x == (Py_uhash_t)-1)
         x = -2;
     return x;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -11008,7 +11008,7 @@
 unicode_hash(PyObject *self)
 {
     Py_ssize_t len;
-    Py_uhash_t x;
+    Py_uhash_t x;  /* Unsigned for defined overflow behavior. */
 
 #ifdef Py_DEBUG
     assert(_Py_HashSecret_Initialized);

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list