[pypy-svn] pypy default: Test and "fix" for hash(bigint), whose value is supposed to

arigo commits-noreply at bitbucket.org
Thu Jan 27 17:35:25 CET 2011


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r41400:fcb6010db6ed
Date: 2011-01-27 17:34 +0100
http://bitbucket.org/pypy/pypy/changeset/fcb6010db6ed/

Log:	Test and "fix" for hash(bigint), whose value is supposed to be
	invariant modulo 2**64-1 to make decimal.py happy.

diff --git a/pypy/rlib/test/test_rbigint.py b/pypy/rlib/test/test_rbigint.py
--- a/pypy/rlib/test/test_rbigint.py
+++ b/pypy/rlib/test/test_rbigint.py
@@ -370,6 +370,23 @@
         assert rbigint.fromlong(-4).bit_length() == 3
         assert rbigint.fromlong(1<<40).bit_length() == 41
 
+    def test_hash(self):
+        for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+                  sys.maxint-3, sys.maxint-2, sys.maxint-1, sys.maxint,
+                  ] + [randint(0, sys.maxint) for _ in range(100)]:
+            # hash of machine-sized integers
+            assert rbigint.fromint(i).hash() == i
+            # hash of negative machine-sized integers
+            assert rbigint.fromint(-i-1).hash() == -i-1
+        #
+        for i in range(200):
+            # hash of large integers: should be equal to the hash of the
+            # integer reduced modulo 2**64-1, to make decimal.py happy
+            x = randint(0, sys.maxint**5)
+            y = x % (2**64-1)
+            assert rbigint.fromlong(x).hash() == rbigint.fromlong(y).hash()
+            assert rbigint.fromlong(-x).hash() == rbigint.fromlong(-y).hash()
+
 class TestInternalFunctions(object):
     def test__inplace_divrem1(self):
         # signs are not handled in the helpers!

diff --git a/pypy/rlib/rbigint.py b/pypy/rlib/rbigint.py
--- a/pypy/rlib/rbigint.py
+++ b/pypy/rlib/rbigint.py
@@ -1633,15 +1633,23 @@
 def _hash(v):
     # This is designed so that Python ints and longs with the
     # same value hash to the same value, otherwise comparisons
-    # of mapping keys will turn out weird
+    # of mapping keys will turn out weird.  Moreover, purely
+    # to please decimal.py, we return a hash that satisfies
+    # hash(x) == hash(x % ULONG_MAX).  In particular, this
+    # implies that hash(x) == hash(x % (2**64-1)).
     i = v._numdigits() - 1
     sign = v.sign
-    x = 0
+    x = r_uint(0)
     LONG_BIT_SHIFT = LONG_BIT - SHIFT
     while i >= 0:
         # Force a native long #-bits (32 or 64) circular shift
-        x = ((x << SHIFT) & ~MASK) | ((x >> LONG_BIT_SHIFT) & MASK)
-        x += v.digits[i]
+        x = (x << SHIFT) | (x >> LONG_BIT_SHIFT)
+        x += r_uint(v.digits[i])
+        # If the addition above overflowed we compensate by
+        # incrementing.  This preserves the value modulo
+        # ULONG_MAX.
+        if x < r_uint(v.digits[i]):
+            x += 1
         i -= 1
     x = intmask(x * sign)
     return x


More information about the Pypy-commit mailing list