[pypy-commit] pypy py3.5: hg merge py3.5-siphash24

arigo pypy.commits at gmail.com
Tue Jan 31 11:44:17 EST 2017


Author: Armin Rigo <arigo at tunes.org>
Branch: py3.5
Changeset: r89858:94325768daae
Date: 2017-01-31 17:37 +0100
http://bitbucket.org/pypy/pypy/changeset/94325768daae/

Log:	hg merge py3.5-siphash24

diff too long, truncating to 2000 out of 2956 lines

diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -201,6 +201,13 @@
                "issue, you can disable them here",
                default=True),
 
+    ChoiceOption("hash",
+                 "The hash function to use for strings: fnv from CPython 2.7"
+                 " or siphash24 from CPython >= 3.4",
+                 ["fnv", "siphash24"],
+                 default="siphash24",
+                 cmdline="--hash"),
+
     OptionDescription("std", "Standard Object Space Options", [
         BoolOption("withtproxy", "support transparent proxies",
                    default=True),
diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -36,6 +36,7 @@
         w_run_toplevel = space.getitem(w_dict, space.wrap('run_toplevel'))
         w_initstdio = space.getitem(w_dict, space.wrap('initstdio'))
         withjit = space.config.objspace.usemodules.pypyjit
+        hashfunc = space.config.objspace.hash
     else:
         w_initstdio = space.appexec([], """():
             return lambda unbuffered: None
@@ -46,6 +47,10 @@
             from rpython.jit.backend.hlinfo import highleveljitinfo
             highleveljitinfo.sys_executable = argv[0]
 
+        if hashfunc == "siphash24":
+            from rpython.rlib import rsiphash
+            rsiphash.enable_siphash24()
+
         #debug("entry point starting")
         #for arg in argv:
         #    debug(" argv -> " + arg)
diff --git a/pypy/module/_cffi_backend/newtype.py b/pypy/module/_cffi_backend/newtype.py
--- a/pypy/module/_cffi_backend/newtype.py
+++ b/pypy/module/_cffi_backend/newtype.py
@@ -23,13 +23,34 @@
 # ____________________________________________________________
 
 class UniqueCache:
+    for_testing = False    # set to True on the class level in test_c.py
+
     def __init__(self, space):
         self.ctvoid = None      # Cache for the 'void' type
         self.ctvoidp = None     # Cache for the 'void *' type
         self.ctchara = None     # Cache for the 'char[]' type
         self.primitives = {}    # Cache for {name: primitive_type}
         self.functions = []     # see _new_function_type()
-        self.for_testing = False
+        self.functions_packed = None     # only across translation
+
+    def _cleanup_(self):
+        import gc
+        assert self.functions_packed is None
+        # Note: a full PyPy translation may still have
+        # 'self.functions == []' at this point, possibly depending
+        # on details.  Code tested directly in test_ffi_obj
+        gc.collect()
+        funcs = []
+        for weakdict in self.functions:
+            funcs += weakdict._dict.values()
+        del self.functions[:]
+        self.functions_packed = funcs if len(funcs) > 0 else None
+
+    def unpack_functions(self):
+        for fct in self.functions_packed:
+            _record_function_type(self, fct)
+        self.functions_packed = None
+
 
 def _clean_cache(space):
     "NOT_RPYTHON"
@@ -622,7 +643,7 @@
     for w_arg in fargs:
         y = compute_identity_hash(w_arg)
         x = intmask((1000003 * x) ^ y)
-    x ^= (ellipsis - abi)
+    x ^= ellipsis + 2 * abi
     if unique_cache.for_testing:    # constant-folded to False in translation;
         x &= 3                      # but for test, keep only 2 bits of hash
     return x
@@ -646,6 +667,8 @@
     # one such dict, but in case of hash collision, there might be
     # more.
     unique_cache = space.fromcache(UniqueCache)
+    if unique_cache.functions_packed is not None:
+        unique_cache.unpack_functions()
     func_hash = _func_key_hash(unique_cache, fargs, fresult, ellipsis, abi)
     for weakdict in unique_cache.functions:
         ctype = weakdict.get(func_hash)
@@ -674,13 +697,18 @@
     #
     fct = ctypefunc.W_CTypeFunc(space, fargs, fresult, ellipsis, abi)
     unique_cache = space.fromcache(UniqueCache)
-    func_hash = _func_key_hash(unique_cache, fargs, fresult, ellipsis, abi)
+    _record_function_type(unique_cache, fct)
+    return fct
+
+def _record_function_type(unique_cache, fct):
+    from pypy.module._cffi_backend import ctypefunc
+    #
+    func_hash = _func_key_hash(unique_cache, fct.fargs, fct.ctitem,
+                               fct.ellipsis, fct.abi)
     for weakdict in unique_cache.functions:
         if weakdict.get(func_hash) is None:
-            weakdict.set(func_hash, fct)
             break
     else:
         weakdict = rweakref.RWeakValueDictionary(int, ctypefunc.W_CTypeFunc)
         unique_cache.functions.append(weakdict)
-        weakdict.set(func_hash, fct)
-    return fct
+    weakdict.set(func_hash, fct)
diff --git a/pypy/module/_cffi_backend/test/test_c.py b/pypy/module/_cffi_backend/test/test_c.py
--- a/pypy/module/_cffi_backend/test/test_c.py
+++ b/pypy/module/_cffi_backend/test/test_c.py
@@ -37,6 +37,7 @@
     def setup_class(cls):
         testfuncs_w = []
         keepalive_funcs = []
+        UniqueCache.for_testing = True
 
         test_lib_c = tmpdir.join('_test_lib.c')
         src_test_lib_c = py.path.local(__file__).dirpath().join('_test_lib.c')
@@ -100,11 +101,12 @@
             _all_test_c.find_and_load_library = func
             _all_test_c._testfunc = testfunc
         """)
-        UniqueCache.for_testing = True
 
     def teardown_method(self, method):
+        _clean_cache(self.space)
+
+    def teardown_class(cls):
         UniqueCache.for_testing = False
-        _clean_cache(self.space)
 
 
 all_names = ', '.join(Module.interpleveldefs.keys())
diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py
--- a/pypy/module/_cffi_backend/test/test_ffi_obj.py
+++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py
@@ -1,5 +1,23 @@
+from pypy.module._cffi_backend import newtype
 from pypy.module._cffi_backend.newtype import _clean_cache
 
+
+class TestFFIObj:
+    spaceconfig = dict(usemodules=('_cffi_backend', 'array'))
+
+    def teardown_method(self, meth):
+        _clean_cache(self.space)
+
+    def test_new_function_type_during_translation(self):
+        space = self.space
+        BInt = newtype.new_primitive_type(space, "int")
+        BFunc = newtype.new_function_type(space, space.wrap([BInt]), BInt)
+        assert BFunc is newtype.new_function_type(space,space.wrap([BInt]),BInt)
+        unique_cache = space.fromcache(newtype.UniqueCache)
+        unique_cache._cleanup_()
+        assert BFunc is newtype.new_function_type(space,space.wrap([BInt]),BInt)
+
+
 class AppTestFFIObj:
     spaceconfig = dict(usemodules=('_cffi_backend', 'array'))
 
diff --git a/pypy/module/_weakref/interp__weakref.py b/pypy/module/_weakref/interp__weakref.py
--- a/pypy/module/_weakref/interp__weakref.py
+++ b/pypy/module/_weakref/interp__weakref.py
@@ -194,6 +194,15 @@
         W_WeakrefBase.__init__(self, space, w_obj, w_callable)
         self.w_hash = None
 
+    def _cleanup_(self):
+        # When a prebuilt weakref is frozen inside a translation, if
+        # this weakref has got an already-cached w_hash, then throw it
+        # away.  That's because the hash value will change after
+        # translation.  It will be recomputed the first time we ask for
+        # it.  Note that such a frozen weakref, if not dead, will point
+        # to a frozen object, so it will never die.
+        self.w_hash = None
+
     def descr__init__weakref(self, space, w_obj, w_callable=None,
                              __args__=None):
         if __args__.arguments_w:
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -2127,6 +2127,12 @@
         raise wrap_oserror(space, e, eintr_retry=False)
     return space.wrap(res)
 
+class SigCheck:
+    pass
+_sigcheck = SigCheck()
+def _signal_checker():
+    _sigcheck.space.getexecutioncontext().checksignals()
+
 @unwrap_spec(size=int)
 def urandom(space, size):
     """urandom(size) -> str
@@ -2134,9 +2140,12 @@
     Return a string of 'size' random bytes suitable for cryptographic use.
     """
     context = get(space).random_context
-    signal_checker = space.getexecutioncontext().checksignals
     try:
-        return space.newbytes(rurandom.urandom(context, n, signal_checker))
+        # urandom() takes a final argument that should be a regular function,
+        # not a bound method like 'getexecutioncontext().checksignals'.
+        # Otherwise, we can't use it from several independent places.
+        _sigcheck.space = space
+        return space.newbytes(rurandom.urandom(context, n, _signal_checker))
     except OSError as e:
         # 'rurandom' should catch and retry internally if it gets EINTR
         # (at least in os.read(), which is probably enough in practice)
diff --git a/pypy/module/sys/system.py b/pypy/module/sys/system.py
--- a/pypy/module/sys/system.py
+++ b/pypy/module/sys/system.py
@@ -5,7 +5,6 @@
 from pypy.objspace.std.complexobject import HASH_IMAG
 from pypy.objspace.std.floatobject import HASH_INF, HASH_NAN
 from pypy.objspace.std.intobject import HASH_MODULUS
-from pypy.objspace.std.bytesobject import HASH_ALGORITHM
 from pypy.interpreter import gateway
 from rpython.rlib import rbigint, rfloat
 from rpython.rtyper.lltypesystem import lltype, rffi
@@ -79,11 +78,22 @@
     return space.call_function(w_int_info, space.newtuple(info_w))
 
 def get_hash_info(space):
-    HASH_HASH_BITS = 8 * rffi.sizeof(lltype.Signed)
-    HASH_SEED_BITS = 0    # XXX don't know what this is supposed to be
+    HASH_ALGORITHM = space.config.objspace.hash
+    if space.config.objspace.hash == "fnv":
+        HASH_HASH_BITS = 8 * rffi.sizeof(lltype.Signed)
+        HASH_SEED_BITS = 0
+        #   CPython has  ^ > 0  here, but the seed of "fnv" is of limited
+        #   use, so we don't implement it
+    elif space.config.objspace.hash == "siphash24":
+        HASH_HASH_BITS = 64
+        HASH_SEED_BITS = 128
+    else:
+        assert 0, "please add the parameters for this different hash function"
+
+    HASH_WIDTH = 8 * rffi.sizeof(lltype.Signed)
     HASH_CUTOFF = 0
     info_w = [
-        space.wrap(8 * rffi.sizeof(lltype.Signed)),
+        space.wrap(HASH_WIDTH),
         space.wrap(HASH_MODULUS),
         space.wrap(HASH_INF),
         space.wrap(HASH_NAN),
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -3,7 +3,7 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import (
     compute_hash, compute_unique_id, import_from_mixin, newlist_hint,
-    resizelist_hint, HASH_ALGORITHM)
+    resizelist_hint)
 from rpython.rlib.buffer import StringBuffer
 from rpython.rlib.rstring import StringBuilder
 
diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -563,6 +563,11 @@
 class W_FrozensetObject(W_BaseSetObject):
     hash = 0
 
+    def _cleanup_(self):
+        # in case there are frozenset objects existing during
+        # translation, make sure we don't translate a cached hash
+        self.hash = 0
+
     def is_w(self, space, w_other):
         if not isinstance(w_other, W_FrozensetObject):
             return False
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -287,7 +287,7 @@
                     for ek, ev in items:
                         result.dictdef.generalize_key(self.immutablevalue(ek))
                         result.dictdef.generalize_value(self.immutablevalue(ev))
-                        result.dictdef.seen_prebuilt_key(ek)
+                        #dictdef.seen_prebuilt_key(ek)---not needed any more
                     seen_elements = len(items)
                     # if the dictionary grew during the iteration,
                     # start over again
diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py
--- a/rpython/annotator/dictdef.py
+++ b/rpython/annotator/dictdef.py
@@ -115,13 +115,5 @@
     def generalize_value(self, s_value):
         self.dictvalue.generalize(s_value)
 
-    def seen_prebuilt_key(self, x):
-        # In case we are an r_dict, we don't ask for the hash ourselves.
-        # Note that if the custom hashing function ends up asking for
-        # the hash of x, then it must use compute_hash() itself, so it
-        # works out.
-        if not self.dictkey.custom_eq_hash:
-            compute_hash(x)
-
     def __repr__(self):
         return '<{%r: %r}>' % (self.dictkey.s_value, self.dictvalue.s_value)
diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py
--- a/rpython/annotator/test/test_annrpython.py
+++ b/rpython/annotator/test/test_annrpython.py
@@ -3704,25 +3704,6 @@
         s = a.build_types(f, [int])
         assert s.const == 0
 
-    def test_hash_sideeffect(self):
-        class X:
-            pass
-        x1 = X()
-        x2 = X()
-        x3 = X()
-        d = {(2, x1): 5, (3, x2): 7}
-        def f(n, m):
-            if   m == 1: x = x1
-            elif m == 2: x = x2
-            else:        x = x3
-            return d[n, x]
-        a = self.RPythonAnnotator()
-        s = a.build_types(f, [int, int])
-        assert s.knowntype == int
-        assert hasattr(x1, '__precomputed_identity_hash')
-        assert hasattr(x2, '__precomputed_identity_hash')
-        assert not hasattr(x3, '__precomputed_identity_hash')
-
     def test_contains_of_empty_dict(self):
         class A(object):
             def meth(self):
diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -201,10 +201,6 @@
     StrOption("icon", "Path to the (Windows) icon to use for the executable"),
     StrOption("libname",
               "Windows: name and possibly location of the lib file to create"),
-    ChoiceOption("hash",
-                 "The hash to use for strings",
-                 ["rpython", "siphash24"],
-                 default="rpython", cmdline="--hash"),
 
     OptionDescription("backendopt", "Backend Optimization Options", [
         # control inlining
@@ -394,12 +390,6 @@
         if sys.platform == "darwin" or sys.platform =="win32":
             raise ConfigError("'asmgcc' not supported on this platform")
 
-def apply_extra_settings(config):
-    # make the setting of config.hash definitive
-    from rpython.rlib.objectmodel import set_hash_algorithm
-    config.translation.hash = config.translation.hash
-    set_hash_algorithm(config.translation.hash)
-
 # ----------------------------------------------------------------
 
 def set_platform(config):
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -117,9 +117,7 @@
 
 # The following flag is set on nursery objects of which we asked the id
 # or the identityhash.  It means that a space of the size of the object
-# has already been allocated in the nonmovable part.  The same flag is
-# abused to mark prebuilt objects whose hash has been taken during
-# translation and is statically recorded.
+# has already been allocated in the nonmovable part.
 GCFLAG_HAS_SHADOW   = first_gcflag << 3
 
 # The following flag is set temporarily on some objects during a major
@@ -208,10 +206,6 @@
     # by GCFLAG_xxx above.
     HDR = lltype.Struct('header', ('tid', lltype.Signed))
     typeid_is_in_field = 'tid'
-    withhash_flag_is_in_field = 'tid', GCFLAG_HAS_SHADOW
-    # ^^^ prebuilt objects may have the flag GCFLAG_HAS_SHADOW;
-    #     then they are one word longer, the extra word storing the hash.
-
 
     # During a minor collection, the objects in the nursery that are
     # moved outside are changed in-place: their header is replaced with
@@ -2640,40 +2634,22 @@
         return shadow
     _find_shadow._dont_inline_ = True
 
-    @specialize.arg(2)
-    def id_or_identityhash(self, gcobj, is_hash):
+    def id_or_identityhash(self, gcobj):
         """Implement the common logic of id() and identityhash()
         of an object, given as a GCREF.
         """
         obj = llmemory.cast_ptr_to_adr(gcobj)
-        #
         if self.is_valid_gc_object(obj):
             if self.is_in_nursery(obj):
                 obj = self._find_shadow(obj)
-            elif is_hash:
-                if self.header(obj).tid & GCFLAG_HAS_SHADOW:
-                    #
-                    # For identityhash(), we need a special case for some
-                    # prebuilt objects: their hash must be the same before
-                    # and after translation.  It is stored as an extra word
-                    # after the object.  But we cannot use it for id()
-                    # because the stored value might clash with a real one.
-                    size = self.get_size(obj)
-                    i = (obj + size).signed[0]
-                    # Important: the returned value is not mangle_hash()ed!
-                    return i
-        #
-        i = llmemory.cast_adr_to_int(obj)
-        if is_hash:
-            i = mangle_hash(i)
-        return i
+        return llmemory.cast_adr_to_int(obj)
     id_or_identityhash._always_inline_ = True
 
     def id(self, gcobj):
-        return self.id_or_identityhash(gcobj, False)
+        return self.id_or_identityhash(gcobj)
 
     def identityhash(self, gcobj):
-        return self.id_or_identityhash(gcobj, True)
+        return mangle_hash(self.id_or_identityhash(gcobj))
 
     # ----------
     # Finalizers
diff --git a/rpython/memory/gc/minimark.py b/rpython/memory/gc/minimark.py
--- a/rpython/memory/gc/minimark.py
+++ b/rpython/memory/gc/minimark.py
@@ -104,9 +104,7 @@
 
 # The following flag is set on nursery objects of which we asked the id
 # or the identityhash.  It means that a space of the size of the object
-# has already been allocated in the nonmovable part.  The same flag is
-# abused to mark prebuilt objects whose hash has been taken during
-# translation and is statically recorded.
+# has already been allocated in the nonmovable part.
 GCFLAG_HAS_SHADOW   = first_gcflag << 3
 
 # The following flag is set temporarily on some objects during a major
@@ -149,9 +147,6 @@
     # by GCFLAG_xxx above.
     HDR = lltype.Struct('header', ('tid', lltype.Signed))
     typeid_is_in_field = 'tid'
-    withhash_flag_is_in_field = 'tid', GCFLAG_HAS_SHADOW
-    # ^^^ prebuilt objects may have the flag GCFLAG_HAS_SHADOW;
-    #     then they are one word longer, the extra word storing the hash.
 
     _ADDRARRAY = lltype.Array(llmemory.Address, hints={'nolength': True})
 
@@ -1868,40 +1863,22 @@
         return shadow
     _find_shadow._dont_inline_ = True
 
-    @specialize.arg(2)
-    def id_or_identityhash(self, gcobj, is_hash):
+    def id_or_identityhash(self, gcobj):
         """Implement the common logic of id() and identityhash()
         of an object, given as a GCREF.
         """
         obj = llmemory.cast_ptr_to_adr(gcobj)
-        #
         if self.is_valid_gc_object(obj):
             if self.is_in_nursery(obj):
                 obj = self._find_shadow(obj)
-            elif is_hash:
-                if self.header(obj).tid & GCFLAG_HAS_SHADOW:
-                    #
-                    # For identityhash(), we need a special case for some
-                    # prebuilt objects: their hash must be the same before
-                    # and after translation.  It is stored as an extra word
-                    # after the object.  But we cannot use it for id()
-                    # because the stored value might clash with a real one.
-                    size = self.get_size(obj)
-                    i = (obj + size).signed[0]
-                    # Important: the returned value is not mangle_hash()ed!
-                    return i
-        #
-        i = llmemory.cast_adr_to_int(obj)
-        if is_hash:
-            i = mangle_hash(i)
-        return i
+        return llmemory.cast_adr_to_int(obj)
     id_or_identityhash._always_inline_ = True
 
     def id(self, gcobj):
-        return self.id_or_identityhash(gcobj, False)
+        return self.id_or_identityhash(gcobj)
 
     def identityhash(self, gcobj):
-        return self.id_or_identityhash(gcobj, True)
+        return mangle_hash(self.id_or_identityhash(gcobj))
 
     # ----------
     # Finalizers
diff --git a/rpython/memory/gc/semispace.py b/rpython/memory/gc/semispace.py
--- a/rpython/memory/gc/semispace.py
+++ b/rpython/memory/gc/semispace.py
@@ -48,9 +48,6 @@
 
     HDR = lltype.Struct('header', ('tid', lltype.Signed))   # XXX or rffi.INT?
     typeid_is_in_field = 'tid'
-    withhash_flag_is_in_field = 'tid', _GCFLAG_HASH_BASE * 0x2
-    # ^^^ prebuilt objects either have GC_HASH_TAKEN_ADDR or they
-    #     have GC_HASH_HASFIELD (and then they are one word longer).
     FORWARDSTUB = lltype.GcStruct('forwarding_stub',
                                   ('forw', llmemory.Address))
     FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
diff --git a/rpython/memory/gctransform/boehm.py b/rpython/memory/gctransform/boehm.py
--- a/rpython/memory/gctransform/boehm.py
+++ b/rpython/memory/gctransform/boehm.py
@@ -11,7 +11,7 @@
 class BoehmGCTransformer(GCTransformer):
     malloc_zero_filled = True
     FINALIZER_PTR = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void))
-    HDR = lltype.Struct("header", ("hash", lltype.Signed))
+    NO_HEADER = True
 
     def __init__(self, translator, inline=False):
         super(BoehmGCTransformer, self).__init__(translator, inline=inline)
@@ -29,13 +29,8 @@
         ll_malloc_varsize_no_length = mh.ll_malloc_varsize_no_length
         ll_malloc_varsize = mh.ll_malloc_varsize
 
-        HDRPTR = lltype.Ptr(self.HDR)
-
         def ll_identityhash(addr):
-            obj = llmemory.cast_adr_to_ptr(addr, HDRPTR)
-            h = obj.hash
-            if h == 0:
-                obj.hash = h = ~llmemory.cast_adr_to_int(addr)
+            h = ~llmemory.cast_adr_to_int(addr)
             return h
 
         if self.translator:
@@ -194,11 +189,6 @@
                           resulttype = lltype.Signed)
         hop.genop('int_invert', [v_int], resultvar=hop.spaceop.result)
 
-    def gcheader_initdata(self, obj):
-        hdr = lltype.malloc(self.HDR, immortal=True)
-        hdr.hash = lltype.identityhash_nocache(obj._as_ptr())
-        return hdr._obj
-
 
 ########## weakrefs ##########
 # Boehm: weakref objects are small structures containing only a Boehm
diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -610,25 +610,6 @@
     def special_funcptr_for_type(self, TYPE):
         return self.layoutbuilder.special_funcptr_for_type(TYPE)
 
-    def gc_header_for(self, obj, needs_hash=False):
-        hdr = self.gcdata.gc.gcheaderbuilder.header_of_object(obj)
-        withhash, flag = self.gcdata.gc.withhash_flag_is_in_field
-        x = getattr(hdr, withhash)
-        TYPE = lltype.typeOf(x)
-        x = lltype.cast_primitive(lltype.Signed, x)
-        if needs_hash:
-            x |= flag       # set the flag in the header
-        else:
-            x &= ~flag      # clear the flag in the header
-        x = lltype.cast_primitive(TYPE, x)
-        setattr(hdr, withhash, x)
-        return hdr
-
-    def get_hash_offset(self, T):
-        type_id = self.get_type_id(T)
-        assert not self.gcdata.q_is_varsize(type_id)
-        return self.gcdata.q_fixed_size(type_id)
-
     def finish_tables(self):
         group = self.layoutbuilder.close_table()
         log.info("assigned %s typeids" % (len(group.members), ))
@@ -1514,22 +1495,9 @@
 
     def gcheader_initdata(self, obj):
         o = lltype.top_container(obj)
-        needs_hash = self.get_prebuilt_hash(o) is not None
-        hdr = self.gc_header_for(o, needs_hash)
+        hdr = self.gcdata.gc.gcheaderbuilder.header_of_object(o)
         return hdr._obj
 
-    def get_prebuilt_hash(self, obj):
-        # for prebuilt objects that need to have their hash stored and
-        # restored.  Note that only structures that are StructNodes all
-        # the way have their hash stored (and not e.g. structs with var-
-        # sized arrays at the end).  'obj' must be the top_container.
-        TYPE = lltype.typeOf(obj)
-        if not isinstance(TYPE, lltype.GcStruct):
-            return None
-        if TYPE._is_varsize():
-            return None
-        return getattr(obj, '_hash_cache_', None)
-
     def get_finalizer_queue_index(self, hop):
         fq_tag = hop.spaceop.args[0].value
         assert 'FinalizerQueue TAG' in fq_tag.expr
diff --git a/rpython/memory/gctransform/refcounting.py b/rpython/memory/gctransform/refcounting.py
--- a/rpython/memory/gctransform/refcounting.py
+++ b/rpython/memory/gctransform/refcounting.py
@@ -18,8 +18,7 @@
 class RefcountingGCTransformer(GCTransformer):
     malloc_zero_filled = True
 
-    HDR = lltype.Struct("header", ("refcount", lltype.Signed),
-                                  ("hash", lltype.Signed))
+    HDR = lltype.Struct("header", ("refcount", lltype.Signed))
 
     def __init__(self, translator):
         super(RefcountingGCTransformer, self).__init__(translator, inline=True)
@@ -77,10 +76,7 @@
         ll_malloc_varsize = mh.ll_malloc_varsize
 
         def ll_identityhash(addr):
-            obj = llmemory.cast_adr_to_ptr(addr, HDRPTR)
-            h = obj.hash
-            if h == 0:
-                obj.hash = h = llmemory.cast_adr_to_int(addr)
+            h = llmemory.cast_adr_to_int(addr)
             return h
 
         if self.translator:
@@ -178,7 +174,6 @@
             if not self.gcheaderbuilder.get_header(p):
                 hdr = self.gcheaderbuilder.new_header(p)
                 hdr.refcount = sys.maxint // 2
-                hdr.hash = lltype.identityhash_nocache(p)
 
     def static_deallocation_funcptr_for_type(self, TYPE):
         if TYPE in self.static_deallocator_funcptrs:
diff --git a/rpython/memory/gctransform/transform.py b/rpython/memory/gctransform/transform.py
--- a/rpython/memory/gctransform/transform.py
+++ b/rpython/memory/gctransform/transform.py
@@ -374,9 +374,6 @@
         return hop.cast_result(rmodel.inputconst(lltype.Ptr(ARRAY_TYPEID_MAP),
                                         lltype.nullptr(ARRAY_TYPEID_MAP)))
 
-    def get_prebuilt_hash(self, obj):
-        return None
-
 
 class MinimalGCTransformer(BaseGCTransformer):
     def __init__(self, parenttransformer):
diff --git a/rpython/rlib/_rweakvaldict.py b/rpython/rlib/_rweakvaldict.py
--- a/rpython/rlib/_rweakvaldict.py
+++ b/rpython/rlib/_rweakvaldict.py
@@ -76,12 +76,16 @@
             bk = self.rtyper.annotator.bookkeeper
             classdef = bk.getuniqueclassdef(weakdict._valueclass)
             r_value = getinstancerepr(self.rtyper, classdef)
+            any_value = False
             for dictkey, dictvalue in weakdict._dict.items():
                 llkey = self.r_key.convert_const(dictkey)
                 llvalue = r_value.convert_const(dictvalue)
                 if llvalue:
                     llvalue = lltype.cast_pointer(rclass.OBJECTPTR, llvalue)
                     self.ll_set_nonnull(l_dict, llkey, llvalue)
+                    any_value = True
+            if any_value:
+                l_dict.resize_counter = -1
             return l_dict
 
     def rtype_method_get(self, hop):
@@ -114,6 +118,8 @@
 
     @jit.dont_look_inside
     def ll_get(self, d, llkey):
+        if d.resize_counter < 0:
+            self.ll_weakdict_resize(d)  # initialize prebuilt dicts at runtime
         hash = self.ll_keyhash(llkey)
         i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
         #llop.debug_print(lltype.Void, i, 'get')
@@ -132,6 +138,8 @@
 
     @jit.dont_look_inside
     def ll_set_nonnull(self, d, llkey, llvalue):
+        if d.resize_counter < 0:
+            self.ll_weakdict_resize(d)  # initialize prebuilt dicts at runtime
         hash = self.ll_keyhash(llkey)
         valueref = weakref_create(llvalue)    # GC effects here, before the rest
         i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
@@ -147,6 +155,8 @@
 
     @jit.dont_look_inside
     def ll_set_null(self, d, llkey):
+        if d.resize_counter < 0:
+            self.ll_weakdict_resize(d)  # initialize prebuilt dicts at runtime
         hash = self.ll_keyhash(llkey)
         i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
         if d.entries.everused(i):
diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py
--- a/rpython/rlib/debug.py
+++ b/rpython/rlib/debug.py
@@ -441,7 +441,7 @@
             except OSError as e:
                 os.write(2, "Could not start GDB: %s" % (
                     os.strerror(e.errno)))
-                raise SystemExit
+                os._exit(1)
         else:
             time.sleep(1)  # give the GDB time to attach
 
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -465,8 +465,14 @@
 
     Note that this can return 0 or -1 too.
 
-    It returns the same number, both before and after translation.
-    Dictionaries don't need to be rehashed after translation.
+    NOTE: It returns a different number before and after translation!
+    Dictionaries will be rehashed when the translated program starts.
+    Be careful about other places that store or depend on a hash value:
+    if such a place can exist before translation, you should add for
+    example a _cleanup_() method to clear this cache during translation.
+
+    (Nowadays we could completely remove compute_hash() and decide that
+    hash(x) is valid RPython instead, at least for the types listed here.)
     """
     if isinstance(x, (str, unicode)):
         return _hash_string(x)
@@ -484,17 +490,11 @@
     """RPython equivalent of object.__hash__(x).  This returns the
     so-called 'identity hash', which is the non-overridable default hash
     of Python.  Can be called for any RPython-level object that turns
-    into a GC object, but not NULL.  The value is not guaranteed to be the
-    same before and after translation, except for RPython instances on the
-    lltypesystem.
+    into a GC object, but not NULL.  The value will be different before
+    and after translation (WARNING: this is a change with older RPythons!)
     """
     assert x is not None
-    result = object.__hash__(x)
-    try:
-        x.__dict__['__precomputed_identity_hash'] = result
-    except (TypeError, AttributeError):
-        pass
-    return result
+    return object.__hash__(x)
 
 def compute_unique_id(x):
     """RPython equivalent of id(x).  The 'x' must be an RPython-level
@@ -519,21 +519,17 @@
 
 # ----------
 
-HASH_ALGORITHM = "rpython"  # XXX Is there a better name?
-HASH_ALGORITHM_FIXED = False
+def _hash_string(s):
+    """The default algorithm behind compute_hash() for a string or a unicode.
+    This is a modified Fowler-Noll-Vo (FNV) hash.  According to Wikipedia,
+    FNV needs carefully-computed constants called FNV primes and FNV offset
+    basis, which are absent from the present algorithm.  Nevertheless,
+    this matches CPython 2.7 without -R, which has proven a good hash in
+    practice (even if not crypographical nor randomizable).
 
- at not_rpython
-def set_hash_algorithm(algo):
-    """Must be called very early, before any string is hashed with
-    compute_hash()!"""
-    global HASH_ALGORITHM
-    if HASH_ALGORITHM != algo:
-        assert not HASH_ALGORITHM_FIXED, "compute_hash() already called!"
-        assert algo in ("rpython", "siphash24")
-        HASH_ALGORITHM = algo
-
-
-def _hash_string_rpython(s):
+    There is a mechanism to use another one in programs after translation.
+    See rsiphash.py, which implements the algorithm of CPython >= 3.4.
+    """
     from rpython.rlib.rarithmetic import intmask
 
     length = len(s)
@@ -547,100 +543,8 @@
     x ^= length
     return intmask(x)
 
-
- at not_rpython
-def _hash_string_siphash24(s):
-    """This version is called when untranslated only."""
-    import array
-    from rpython.rlib.rsiphash import siphash24
-    from rpython.rtyper.lltypesystem import lltype, rffi
-    from rpython.rlib.rarithmetic import intmask
-
-    if not isinstance(s, str):
-        if isinstance(s, unicode):
-            lst = map(ord, s)
-        else:
-            lst = map(ord, s.chars)    # for rstr.STR or UNICODE
-        # NOTE: a latin-1 unicode string must have the same hash as the
-        # corresponding byte string.
-        if all(n <= 0xFF for n in lst):
-            kind = "B"
-        elif rffi.sizeof(lltype.UniChar) == 4:
-            kind = "I"
-        else:
-            kind = "H"
-        s = array.array(kind, lst).tostring()
-    ptr = rffi.str2charp(s)
-    x = siphash24(ptr, len(s))
-    rffi.free_charp(ptr)
-    return intmask(x)
-
-def ll_hash_string_siphash24(ll_s):
-    """Called from lltypesystem/rstr.py.  'll_s' is a rstr.STR or UNICODE."""
-    from rpython.rlib.rsiphash import siphash24
-    from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr
-    from rpython.rlib.rarithmetic import intmask
-
-    length = len(ll_s.chars)
-    if lltype.typeOf(ll_s).TO.chars.OF == lltype.Char:
-        # no GC operation from here!
-        addr = rstr._get_raw_buf_string(rstr.STR, ll_s, 0)
-    else:
-        # NOTE: a latin-1 unicode string must have the same hash as the
-        # corresponding byte string.  If the unicode is all within
-        # 0-255, then we need to allocate a byte buffer and copy the
-        # latin-1 encoding in it manually.
-        for i in range(length):
-            if ord(ll_s.chars[i]) > 0xFF:
-                # no GC operation from here!
-                addr = rstr._get_raw_buf_unicode(rstr.UNICODE, ll_s, 0)
-                length *= rffi.sizeof(rstr.UNICODE.chars.OF)
-                break
-        else:
-            p = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw')
-            i = 0
-            while i < length:
-                p[i] = chr(ord(ll_s.chars[i]))
-                i += 1
-            x = siphash24(llmemory.cast_ptr_to_adr(p), length)
-            lltype.free(p, flavor='raw')
-            return intmask(x)
-    x = siphash24(addr, length)
-    keepalive_until_here(ll_s)
-    return intmask(x)
-ll_hash_string_siphash24._jit_look_inside_ = False
-
-
- at not_rpython
-def _hash_string(s):
-    """The algorithm behind compute_hash() for a string or a unicode.
-    This version is only for untranslated usage, and 's' is a str or unicode.
-    """
-    global HASH_ALGORITHM_FIXED
-    HASH_ALGORITHM_FIXED = True
-    if HASH_ALGORITHM == "rpython":
-        return _hash_string_rpython(s)
-    if HASH_ALGORITHM == "siphash24":
-        return _hash_string_siphash24(s)
-    raise NotImplementedError
-
 def ll_hash_string(ll_s):
-    """The algorithm behind compute_hash() for a string or a unicode.
-    This version is called from lltypesystem/rstr.py, and 'll_s' is a
-    rstr.STR or rstr.UNICODE.
-    """
-    if not we_are_translated():
-        global HASH_ALGORITHM_FIXED
-        HASH_ALGORITHM_FIXED = True
-    if HASH_ALGORITHM == "rpython":
-        return _hash_string_rpython(ll_s.chars)
-    if HASH_ALGORITHM == "siphash24":
-        if we_are_translated():
-            return ll_hash_string_siphash24(ll_s)
-        else:
-            return _hash_string_siphash24(ll_s)
-    raise NotImplementedError
-
+    return _hash_string(ll_s.chars)
 
 def _hash_float(f):
     """The algorithm behind compute_hash() for a float.
@@ -698,6 +602,21 @@
         return hop.gendirectcall(ll_fn, v_obj)
 
 class Entry(ExtRegistryEntry):
+    _about_ = ll_hash_string
+    # this is only used when annotating the code in rstr.py, and so
+    # it always occurs after the RPython program signalled its intent
+    # to use a different hash.  The code below overwrites the use of
+    # ll_hash_string() to make the annotator think a possibly different
+    # function was called.
+
+    def compute_annotation(self):
+        from rpython.annotator import model as annmodel
+        bk = self.bookkeeper
+        translator = bk.annotator.translator
+        fn = getattr(translator, 'll_hash_string', ll_hash_string)
+        return annmodel.SomePBC([bk.getdesc(fn)])
+
+class Entry(ExtRegistryEntry):
     _about_ = compute_identity_hash
 
     def compute_result_annotation(self, s_x):
diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py
--- a/rpython/rlib/rsiphash.py
+++ b/rpython/rlib/rsiphash.py
@@ -1,12 +1,24 @@
-import sys, os, struct
+"""
+This module implements siphash-2-4, the hashing algorithm for strings
+and unicodes.  You can use it explicitly by calling siphash24() with
+a byte string, or you can use enable_siphash24() to enable the use
+of siphash-2-4 on all RPython strings and unicodes in your program
+after translation.
+"""
+import sys, os, errno
 from contextlib import contextmanager
-from rpython.rlib import rarithmetic
+from rpython.rlib import rarithmetic, rurandom
 from rpython.rlib.objectmodel import not_rpython, always_inline
-from rpython.rlib.rgc import no_collect
-from rpython.rlib.rarithmetic import r_uint64
+from rpython.rlib.objectmodel import we_are_translated, dont_inline
+from rpython.rlib.objectmodel import keepalive_until_here
+from rpython.rlib import rgc, jit, rposix
+from rpython.rlib.rarithmetic import r_uint64, r_uint32, r_uint
 from rpython.rlib.rawstorage import misaligned_is_fine
-from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rlib.nonconst import NonConstant
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr
 from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.rtyper.extregistry import ExtRegistryEntry
+from rpython.rtyper.annlowlevel import llhelper
 
 
 if sys.byteorder == 'little':
@@ -16,37 +28,164 @@
     _le64toh = rarithmetic.byteswap
 
 
-# Initialize the values of the secret seed: two 64-bit constants.
-# CPython picks a new seed every time 'python' starts.  PyPy cannot do
-# that as easily because many details may rely on getting the same hash
-# value before and after translation.  We can, however, pick a random
-# seed once per translation, which should already be quite good.
-#
-# XXX no, it is not: e.g. all Ubuntu installations of the same Ubuntu
-# would get the same seed.  That's not good enough.
+class Seed:
+    k0l = k1l = r_uint64(0)
+seed = Seed()
 
- at not_rpython
-def select_random_seed():
-    global k0, k1    # note: the globals k0, k1 are already byte-swapped
-    v0, v1 = struct.unpack("QQ", os.urandom(16))
-    k0 = r_uint64(v0)
-    k1 = r_uint64(v1)
 
-select_random_seed()
+def _decode64(s):
+    return (r_uint64(ord(s[0])) |
+            r_uint64(ord(s[1])) << 8 |
+            r_uint64(ord(s[2])) << 16 |
+            r_uint64(ord(s[3])) << 24 |
+            r_uint64(ord(s[4])) << 32 |
+            r_uint64(ord(s[5])) << 40 |
+            r_uint64(ord(s[6])) << 48 |
+            r_uint64(ord(s[7])) << 56)
+
+def select_random_seed(s):
+    """'s' is a string of length 16"""
+    seed.k0l = _decode64(s)
+    seed.k1l = _decode64(s[8:16])
+
+
+random_ctx = rurandom.init_urandom()
+strtoul = rffi.llexternal("strtoul", [rffi.CCHARP, rffi.CCHARPP, rffi.INT],
+                          rffi.ULONG, save_err=rffi.RFFI_SAVE_ERRNO)
+
+env_var_name = "PYTHONHASHSEED"
+
+def initialize_from_env():
+    # This uses the same algorithms as CPython 3.5.  The environment
+    # variable we read also defaults to "PYTHONHASHSEED".  If needed,
+    # a different RPython interpreter can patch the value of the
+    # global variable 'env_var_name', or just patch the whole
+    # initialize_from_env() function.
+    value = os.environ.get(env_var_name)
+    if value and value != "random":
+        with rffi.scoped_view_charp(value) as ptr:
+            with lltype.scoped_alloc(rffi.CCHARPP.TO, 1) as endptr:
+                endptr[0] = ptr
+                seed = strtoul(ptr, endptr, 10)
+                full = endptr[0][0] == '\x00'
+        seed = lltype.cast_primitive(lltype.Unsigned, seed)
+        if not full or seed > r_uint(4294967295) or (
+            rposix.get_saved_errno() == errno.ERANGE and
+            seed == lltype.cast_primitive(lltype.Unsigned,
+                                          rffi.cast(rffi.ULONG, -1))):
+            os.write(2,
+                "%s must be \"random\" or an integer "
+                "in range [0; 4294967295]\n" % (env_var_name,))
+            os._exit(1)
+        if not seed:
+            # disable the randomized hash
+            s = '\x00' * 16
+        else:
+            s = lcg_urandom(seed)
+    else:
+        try:
+            s = rurandom.urandom(random_ctx, 16)
+        except Exception as e:
+            os.write(2,
+                "%s: failed to get random numbers to initialize Python\n" %
+                (str(e),))
+            os._exit(1)
+            raise   # makes the annotator happy
+    select_random_seed(s)
+
+def lcg_urandom(x):
+    s = ''
+    for index in range(16):
+        x *= 214013
+        x += 2531011
+        s += chr((x >> 16) & 0xff)
+    return s
+
+
+_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+
+def enable_siphash24():
+    """
+    Enable the use of siphash-2-4 for all RPython strings and unicodes
+    in the translated program.  You must call this function anywhere
+    from your interpreter (from a place that is annotated).  Don't call
+    more than once.
+    """
+
+class Entry(ExtRegistryEntry):
+    _about_ = enable_siphash24
+
+    def compute_result_annotation(self):
+        translator = self.bookkeeper.annotator.translator
+        if hasattr(translator, 'll_hash_string'):
+            assert translator.ll_hash_string == ll_hash_string_siphash24
+        else:
+            translator.ll_hash_string = ll_hash_string_siphash24
+        bk = self.bookkeeper
+        s_callable = bk.immutablevalue(initialize_from_env)
+        key = (enable_siphash24,)
+        bk.emulate_pbc_call(key, s_callable, [])
+
+    def specialize_call(self, hop):
+        hop.exception_cannot_occur()
+        bk = hop.rtyper.annotator.bookkeeper
+        s_callable = bk.immutablevalue(initialize_from_env)
+        r_callable = hop.rtyper.getrepr(s_callable)
+        ll_init = r_callable.get_unique_llfn().value
+        bk.annotator.translator._call_at_startup.append(ll_init)
+
+
+ at rgc.no_collect
+def ll_hash_string_siphash24(ll_s):
+    """Called indirectly from lltypesystem/rstr.py, by redirection from
+    objectmodel.ll_string_hash().
+    """
+    from rpython.rlib.rarithmetic import intmask
+
+    # This function is entirely @rgc.no_collect.
+    length = len(ll_s.chars)
+    if lltype.typeOf(ll_s).TO.chars.OF == lltype.Char:   # regular STR
+        addr = rstr._get_raw_buf_string(rstr.STR, ll_s, 0)
+    else:
+        # NOTE: a latin-1 unicode string must have the same hash as the
+        # corresponding byte string.  If the unicode is all within
+        # 0-255, then we need to allocate a byte buffer and copy the
+        # latin-1 encoding in it manually.  Note also that we give a
+        # different hash result than CPython on ucs4 platforms, for
+        # unicode strings where CPython uses 2 bytes per character.
+        for i in range(length):
+            if ord(ll_s.chars[i]) > 0xFF:
+                addr = rstr._get_raw_buf_unicode(rstr.UNICODE, ll_s, 0)
+                length *= rffi.sizeof(rstr.UNICODE.chars.OF)
+                break
+        else:
+            p = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw')
+            i = 0
+            while i < length:
+                p[i] = chr(ord(ll_s.chars[i]))
+                i += 1
+            x = _siphash24(llmemory.cast_ptr_to_adr(p), length)
+            lltype.free(p, flavor='raw')
+            return intmask(x)
+    x = _siphash24(addr, length)
+    keepalive_until_here(ll_s)
+    return intmask(x)
+
 
 @contextmanager
 def choosen_seed(new_k0, new_k1, test_misaligned_path=False):
-    global k0, k1, misaligned_is_fine
-    old = k0, k1, misaligned_is_fine
-    k0 = _le64toh(r_uint64(new_k0))
-    k1 = _le64toh(r_uint64(new_k1))
+    """For tests."""
+    global misaligned_is_fine
+    old = seed.k0l, seed.k1l, misaligned_is_fine
+    seed.k0l = _le64toh(r_uint64(new_k0))
+    seed.k1l = _le64toh(r_uint64(new_k1))
     if test_misaligned_path:
         misaligned_is_fine = False
     yield
-    k0, k1, misaligned_is_fine = old
+    seed.k0l, seed.k1l, misaligned_is_fine = old
 
 def get_current_seed():
-    return _le64toh(k0), _le64toh(k1)
+    return _le64toh(seed.k0l), _le64toh(seed.k1l)
 
 
 magic0 = r_uint64(0x736f6d6570736575)
@@ -77,20 +216,21 @@
     return v0, v1, v2, v3
 
 
- at no_collect
-def siphash24(addr_in, size):
+ at rgc.no_collect
+def _siphash24(addr_in, size):
     """Takes an address pointer and a size.  Returns the hash as a r_uint64,
     which can then be casted to the expected type."""
 
-    direct = (misaligned_is_fine or
-                 (rffi.cast(lltype.Signed, addr_in) & 7) == 0)
-
+    k0 = seed.k0l
+    k1 = seed.k1l
     b = r_uint64(size) << 56
     v0 = k0 ^ magic0
     v1 = k1 ^ magic1
     v2 = k0 ^ magic2
     v3 = k1 ^ magic3
 
+    direct = (misaligned_is_fine or
+                 (rffi.cast(lltype.Signed, addr_in) & 7) == 0)
     index = 0
     if direct:
         while size >= 8:
@@ -113,7 +253,6 @@
                 r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48 |
                 r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 7)) << 56
             )
-            mi = _le64toh(mi)
             size -= 8
             index += 8
             v3 ^= mi
@@ -158,3 +297,13 @@
     v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
 
     return (v0 ^ v1) ^ (v2 ^ v3)
+
+
+ at jit.dont_look_inside
+def siphash24(s):
+    """'s' is a normal string.  Returns its siphash-2-4 as a r_uint64.
+    Don't forget to cast the result to a regular integer if needed,
+    e.g. with rarithmetic.intmask().
+    """
+    with rffi.scoped_nonmovingbuffer(s) as p:
+        return _siphash24(llmemory.cast_ptr_to_adr(p), len(s))
diff --git a/rpython/rlib/rurandom.py b/rpython/rlib/rurandom.py
--- a/rpython/rlib/rurandom.py
+++ b/rpython/rlib/rurandom.py
@@ -57,6 +57,8 @@
                              immortal=True, zero=True)
 
     def urandom(context, n, signal_checker=None):
+        # NOTE: no dictionaries here: rsiphash24 calls this to
+        # initialize the random seed of string hashes
         provider = context[0]
         if not provider:
             # This handle is never explicitly released. The operating
@@ -139,6 +141,8 @@
 
     def urandom(context, n, signal_checker=None):
         "Read n bytes from /dev/urandom."
+        # NOTE: no dictionaries here: rsiphash24 calls this to
+        # initialize the random seed of string hashes
         result = []
         if SYS_getrandom is not None:
             n = _getrandom(n, result, signal_checker)
diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py
--- a/rpython/rlib/test/test_objectmodel.py
+++ b/rpython/rlib/test/test_objectmodel.py
@@ -166,7 +166,6 @@
     foo = Foo()
     h = compute_hash(foo)
     assert h == object.__hash__(foo)
-    assert h == getattr(foo, '__precomputed_identity_hash')
     assert compute_hash(None) == 0
 
 def test_compute_hash_float():
@@ -182,7 +181,6 @@
     foo = Foo()
     h = compute_identity_hash(foo)
     assert h == object.__hash__(foo)
-    assert h == getattr(foo, '__precomputed_identity_hash')
 
 def test_compute_unique_id():
     from rpython.rlib.rarithmetic import intmask
@@ -410,36 +408,6 @@
         res = self.interpret(f, [])
         assert res == 1
 
-    def test_compute_hash_across_translation(self):
-        class Foo(object):
-            pass
-        q = Foo()
-
-        def f(i):
-            assert compute_hash(None) == 0
-            assert compute_hash(i) == h_42
-            assert compute_hash(i + 1.0) == h_43_dot_0
-            assert compute_hash((i + 3) / 6.0) == h_7_dot_5
-            assert compute_hash("Hello" + str(i)) == h_Hello42
-            if i == 42:
-                p = None
-            else:
-                p = Foo()
-            assert compute_hash(p) == h_None
-            assert compute_hash(("world", None, i, 7.5)) == h_tuple
-            assert compute_hash(q) == h_q
-            return i * 2
-        h_42 = compute_hash(42)
-        h_43_dot_0 = compute_hash(43.0)
-        h_7_dot_5 = compute_hash(7.5)
-        h_Hello42 = compute_hash("Hello42")
-        h_None = compute_hash(None)
-        h_tuple = compute_hash(("world", None, 42, 7.5))
-        h_q = compute_hash(q)
-
-        res = self.interpret(f, [42])
-        assert res == 84
-
     def test_fetch_translated_config(self):
         assert fetch_translated_config() is None
         def f():
diff --git a/rpython/rlib/test/test_rsiphash.py b/rpython/rlib/test/test_rsiphash.py
--- a/rpython/rlib/test/test_rsiphash.py
+++ b/rpython/rlib/test/test_rsiphash.py
@@ -1,5 +1,10 @@
-from rpython.rlib.rsiphash import siphash24, choosen_seed
+import os
+from rpython.rlib.rsiphash import siphash24, _siphash24, choosen_seed
+from rpython.rlib.rsiphash import initialize_from_env, enable_siphash24
+from rpython.rlib.objectmodel import compute_hash
+from rpython.rlib.rarithmetic import intmask
 from rpython.rtyper.lltypesystem import llmemory, rffi
+from rpython.translator.c.test.test_genc import compile
 
 
 CASES = [
@@ -28,13 +33,11 @@
 ]
 
 def check(s):
-    p = rffi.str2charp(s)
     q = rffi.str2charp('?' + s)
     with choosen_seed(0x8a9f065a358479f4, 0x11cb1e9ee7f40e1f,
                       test_misaligned_path=True):
-        x = siphash24(llmemory.cast_ptr_to_adr(p), len(s))
-        y = siphash24(llmemory.cast_ptr_to_adr(rffi.ptradd(q, 1)), len(s))
-    rffi.free_charp(p)
+        x = siphash24(s)
+        y = _siphash24(llmemory.cast_ptr_to_adr(rffi.ptradd(q, 1)), len(s))
     rffi.free_charp(q)
     assert x == y
     return x
@@ -42,3 +45,104 @@
 def test_siphash24():
     for expected, string in CASES:
         assert check(string) == expected
+
+def test_fix_seed():
+    old_val = os.environ.get('PYTHONHASHSEED', None)
+    try:
+        os.environ['PYTHONHASHSEED'] = '0'
+        initialize_from_env()
+        assert siphash24("foo") == 15988776847138518036
+        # value checked with CPython 3.5
+
+        os.environ['PYTHONHASHSEED'] = '4000000000'
+        initialize_from_env()
+        assert siphash24("foo") == 13829150778707464258
+        # value checked with CPython 3.5
+
+        for env in ['', 'random']:
+            os.environ['PYTHONHASHSEED'] = env
+            initialize_from_env()
+            hash1 = siphash24("foo")
+            initialize_from_env()
+            hash2 = siphash24("foo")
+            assert hash1 != hash2     # extremely unlikely
+    finally:
+        if old_val is None:
+            del os.environ['PYTHONHASHSEED']
+        else:
+            os.environ['PYTHONHASHSEED'] = old_val
+
+def test_translated():
+    d1 = {"foo": 123}
+    d2 = {u"foo": 456, u"\u1234\u5678": 789}
+    class G:
+        pass
+    g = G()
+    g.v1 = d1.copy()
+    g.v2 = d2.copy()
+
+    def fetch(n):
+        if n == 0: return d1.get("foo", -1)
+        if n == 1: return g.v1.get("foo", -1)
+        if n == 2: return compute_hash("foo")
+        if n == 3: return d2.get(u"foo", -1)
+        if n == 4: return g.v2.get(u"foo", -1)
+        if n == 5: return compute_hash(u"foo")
+        if n == 6: return d2.get(u"\u1234\u5678", -1)
+        if n == 7: return g.v2.get(u"\u1234\u5678", -1)
+        if n == 8: return compute_hash(u"\u1234\u5678")
+        assert 0
+
+    def entrypoint(n):
+        enable_siphash24()
+        g.v1["bar"] = -2
+        g.v2[u"bar"] = -2
+        if n >= 0:    # get items one by one, because otherwise it may
+                      # be the case that one line influences the next
+            return str(fetch(n))
+        else:
+            # ...except in random mode, because we want all results
+            # to be computed with the same seed
+            return ' '.join([str(fetch(n)) for n in range(9)])
+
+    fn = compile(entrypoint, [int])
+
+    def getall():
+        return [int(fn(i)) for i in range(9)]
+
+    old_val = os.environ.get('PYTHONHASHSEED', None)
+    try:
+        os.environ['PYTHONHASHSEED'] = '0'
+        s1 = getall()
+        assert s1[:8] == [
+            123, 123, intmask(15988776847138518036),
+            456, 456, intmask(15988776847138518036),
+            789, 789]
+        assert s1[8] in [intmask(17593683438421985039),    # ucs2 mode
+                         intmask(94801584261658677)]       # ucs4 mode
+
+        os.environ['PYTHONHASHSEED'] = '3987654321'
+        s1 = getall()
+        assert s1[:8] == [
+            123, 123, intmask(5890804383681474441),
+            456, 456, intmask(5890804383681474441),
+            789, 789]
+        assert s1[8] in [intmask(4192582507672183374),     # ucs2 mode
+                         intmask(7179255293164649778)]     # ucs4 mode
+
+        for env in ['', 'random']:
+            os.environ['PYTHONHASHSEED'] = env
+            s1 = map(int, fn(-1).split())
+            s2 = map(int, fn(-1).split())
+            assert s1[0:2]+s1[3:5]+s1[6:8] == [123, 123, 456, 456, 789, 789]
+            assert s1[2] == s1[5]
+            assert s2[0:2]+s2[3:5]+s2[6:8] == [123, 123, 456, 456, 789, 789]
+            assert s2[2] == s2[5]
+            #
+            assert len(set([s1[2], s2[2], s1[8], s2[8]])) == 4
+
+    finally:
+        if old_val is None:
+            del os.environ['PYTHONHASHSEED']
+        else:
+            os.environ['PYTHONHASHSEED'] = old_val
diff --git a/rpython/rlib/test/test_rweakvaldict.py b/rpython/rlib/test/test_rweakvaldict.py
--- a/rpython/rlib/test/test_rweakvaldict.py
+++ b/rpython/rlib/test/test_rweakvaldict.py
@@ -1,8 +1,9 @@
 import py
 from rpython.annotator.model import UnionError
-from rpython.rlib import rgc
+from rpython.rlib import rgc, nonconst
 from rpython.rlib.rweakref import RWeakValueDictionary
 from rpython.rtyper.test.test_llinterp import interpret
+from rpython.translator.c.test.test_genc import compile
 
 class X(object):
     pass
@@ -213,3 +214,33 @@
         assert d.get(keys[3]) is None
     f()
     interpret(f, [])
+
+def test_translation_prebuilt_1():
+    class K:
+        pass
+    d = RWeakValueDictionary(K, X)
+    k1 = K(); k2 = K()
+    x1 = X(); x2 = X()
+    d.set(k1, x1)
+    d.set(k2, x2)
+    def f():
+        assert d.get(k1) is x1
+        assert d.get(k2) is x2
+    f()
+    fc = compile(f, [], gcpolicy="boehm", rweakref=True)
+    fc()
+
+def _test_translation_prebuilt_2():
+    from rpython.rlib import rsiphash
+    d = RWeakValueDictionary(str, X)
+    k1 = "key1"; k2 = "key2"
+    x1 = X(); x2 = X()
+    d.set(k1, x1)
+    d.set(k2, x2)
+    def f():
+        rsiphash.enable_siphash24()
+        i = nonconst.NonConstant(1)
+        assert d.get("key%d" % (i,)) is x1
+        assert d.get("key%d" % (i+1,)) is x2
+    fc = compile(f, [], gcpolicy="boehm", rweakref=True)
+    fc()
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -539,6 +539,7 @@
     'decode_arg_def':       LLOp(canraise=(Exception,)),
     'getslice':             LLOp(canraise=(Exception,)),
     'check_and_clear_exc':  LLOp(),
+    'call_at_startup':      LLOp(canrun=True),
 
     'threadlocalref_addr':  LLOp(),                   # get (or make) addr of tl
     'threadlocalref_get':   LLOp(sideeffects=False),  # read field (no check)
diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py
--- a/rpython/rtyper/lltypesystem/lltype.py
+++ b/rpython/rtyper/lltypesystem/lltype.py
@@ -1380,20 +1380,11 @@
             return callb(*args)
         raise TypeError("%r instance is not a function" % (self._T,))
 
-    def _identityhash(self, cache=True):
+    def _identityhash(self):
         p = normalizeptr(self)
-        try:
-            return p._obj._hash_cache_
-        except AttributeError:
-            assert self._T._gckind == 'gc'
-            assert self      # not for NULL
-            result = hash(p._obj)
-            if cache:
-                try:
-                    p._obj._hash_cache_ = result
-                except AttributeError:
-                    pass
-            return result
+        assert self._T._gckind == 'gc'
+        assert self      # not for NULL
+        return hash(p._obj)
 
 class _ptr(_abstract_ptr):
     __slots__ = ('_TYPE',
@@ -1759,7 +1750,7 @@
 class _struct(_parentable):
     _kind = "structure"
 
-    __slots__ = ('_hash_cache_', '_compilation_info')
+    __slots__ = ('_compilation_info',)
 
     def __new__(self, TYPE, n=None, initialization=None, parent=None,
                 parentindex=None):
@@ -2442,24 +2433,6 @@
     return SomeInteger()
 
 
-def identityhash_nocache(p):
-    """Version of identityhash() to use from backends that don't care about
-    caching."""
-    assert p
-    return p._identityhash(cache=False)
-
-def init_identity_hash(p, value):
-    """For a prebuilt object p, initialize its hash value to 'value'."""
-    assert isinstance(typeOf(p), Ptr)
-    p = normalizeptr(p)
-    if not p:
-        raise ValueError("cannot change hash(NULL)!")
-    if hasattr(p._obj, '_hash_cache_'):
-        raise ValueError("the hash of %r was already computed" % (p,))
-    if typeOf(p).TO._is_varsize():
-        raise ValueError("init_identity_hash(): not for varsized types")
-    p._obj._hash_cache_ = intmask(value)
-
 def isCompatibleType(TYPE1, TYPE2):
     return TYPE1._is_compatible(TYPE2)
 
diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -742,6 +742,9 @@
 def op_gc_move_out_of_nursery(obj):
     return obj
 
+def op_call_at_startup(init_func):
+    pass    # do nothing
+
 # ____________________________________________________________
 
 def get_op_impl(opname):
diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py
--- a/rpython/rtyper/lltypesystem/rdict.py
+++ b/rpython/rtyper/lltypesystem/rdict.py
@@ -236,21 +236,14 @@
                 if self.r_rdict_hashfn.lowleveltype != lltype.Void:
                     l_fn = self.r_rdict_hashfn.convert_const(dictobj.key_hash)
                     l_dict.fnkeyhash = l_fn
-
-                for dictkeycontainer, dictvalue in dictobj._dict.items():
-                    llkey = r_key.convert_const(dictkeycontainer.key)
-                    llvalue = r_value.convert_const(dictvalue)
-                    ll_dict_insertclean(l_dict, llkey, llvalue,
-                                        dictkeycontainer.hash)
-                return l_dict
-
+                any_items = dictobj._dict.items()
             else:
-                for dictkey, dictvalue in dictobj.items():
-                    llkey = r_key.convert_const(dictkey)
-                    llvalue = r_value.convert_const(dictvalue)
-                    ll_dict_insertclean(l_dict, llkey, llvalue,
-                                        l_dict.keyhash(llkey))
-                return l_dict
+                any_items = dictobj.items()
+            if any_items:
+                raise TyperError("found a prebuilt, explicitly non-ordered, "
+                                 "non-empty dict.  it would require additional"
+                                 " support to rehash it at program start-up")
+            return l_dict
 
     def rtype_len(self, hop):
         v_dict, = hop.inputargs(self)
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -829,7 +829,7 @@
         return assert_str0(charpsize2str(cp, size))
     charp2str._annenforceargs_ = [lltype.SomePtr(TYPEP)]
 
-    # str -> char*, bool, bool
+    # str -> char*, flag
     # Can't inline this because of the raw address manipulation.
     @jit.dont_look_inside
     def get_nonmovingbuffer(data):
diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py
--- a/rpython/rtyper/lltypesystem/rordereddict.py
+++ b/rpython/rtyper/lltypesystem/rordereddict.py
@@ -5,7 +5,7 @@
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rlib import objectmodel, jit, rgc, types
 from rpython.rlib.signature import signature
-from rpython.rlib.objectmodel import specialize, likely
+from rpython.rlib.objectmodel import specialize, likely, not_rpython
 from rpython.rtyper.debug import ll_assert
 from rpython.rlib.rarithmetic import r_uint, intmask
 from rpython.rtyper import rmodel
@@ -46,20 +46,23 @@
 @jit.look_inside_iff(lambda d, key, hash, flag: jit.isvirtual(d))
 @jit.oopspec('ordereddict.lookup(d, key, hash, flag)')
 def ll_call_lookup_function(d, key, hash, flag):
-    fun = d.lookup_function_no & FUNC_MASK
-    # This likely() here forces gcc to compile the check for fun == FUNC_BYTE
-    # first.  Otherwise, this is a regular switch and gcc (at least 4.7)
-    # compiles this as a series of checks, with the FUNC_BYTE case last.
-    # It sounds minor, but it is worth 6-7% on a PyPy microbenchmark.
-    if likely(fun == FUNC_BYTE):
-        return ll_dict_lookup(d, key, hash, flag, TYPE_BYTE)
-    elif fun == FUNC_SHORT:
-        return ll_dict_lookup(d, key, hash, flag, TYPE_SHORT)
-    elif IS_64BIT and fun == FUNC_INT:
-        return ll_dict_lookup(d, key, hash, flag, TYPE_INT)
-    elif fun == FUNC_LONG:
-        return ll_dict_lookup(d, key, hash, flag, TYPE_LONG)
-    assert False
+    while True:
+        fun = d.lookup_function_no & FUNC_MASK
+        # This likely() here forces gcc to compile the check for fun==FUNC_BYTE
+        # first.  Otherwise, this is a regular switch and gcc (at least 4.7)
+        # compiles this as a series of checks, with the FUNC_BYTE case last.
+        # It sounds minor, but it is worth 6-7% on a PyPy microbenchmark.
+        if likely(fun == FUNC_BYTE):
+            return ll_dict_lookup(d, key, hash, flag, TYPE_BYTE)
+        elif fun == FUNC_SHORT:
+            return ll_dict_lookup(d, key, hash, flag, TYPE_SHORT)
+        elif IS_64BIT and fun == FUNC_INT:
+            return ll_dict_lookup(d, key, hash, flag, TYPE_INT)
+        elif fun == FUNC_LONG:
+            return ll_dict_lookup(d, key, hash, flag, TYPE_LONG)
+        else:
+            ll_dict_create_initial_index(d)
+            # then, retry
 
 def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None,
                 ll_fasthash_function=None, ll_hash_function=None,
@@ -235,6 +238,7 @@
             self.setup()
             self.setup_final()
             l_dict = ll_newdict_size(self.DICT, len(dictobj))
+            ll_no_initial_index(l_dict)
             self.dict_cache[key] = l_dict
             r_key = self.key_repr
             if r_key.lowleveltype == llmemory.Address:
@@ -252,16 +256,14 @@
                 for dictkeycontainer, dictvalue in dictobj._dict.items():
                     llkey = r_key.convert_const(dictkeycontainer.key)
                     llvalue = r_value.convert_const(dictvalue)
-                    _ll_dict_insertclean(l_dict, llkey, llvalue,
-                                         dictkeycontainer.hash)
+                    _ll_dict_insert_no_index(l_dict, llkey, llvalue)
                 return l_dict
 
             else:
                 for dictkey, dictvalue in dictobj.items():
                     llkey = r_key.convert_const(dictkey)
                     llvalue = r_value.convert_const(dictvalue)
-                    _ll_dict_insertclean(l_dict, llkey, llvalue,
-                                         l_dict.keyhash(llkey))
+                    _ll_dict_insert_no_index(l_dict, llkey, llvalue)
                 return l_dict
 
     def rtype_len(self, hop):
@@ -336,11 +338,15 @@
         return DictIteratorRepr(self, "items").newiter(hop)
 
     def rtype_method_iterkeys_with_hash(self, hop):
-        hop.exception_cannot_occur()
+        v_dic, = hop.inputargs(self)
+        hop.exception_is_here()
+        hop.gendirectcall(ll_ensure_indexes, v_dic)
         return DictIteratorRepr(self, "keys_with_hash").newiter(hop)
 
     def rtype_method_iteritems_with_hash(self, hop):
-        hop.exception_cannot_occur()
+        v_dic, = hop.inputargs(self)
+        hop.exception_is_here()
+        hop.gendirectcall(ll_ensure_indexes, v_dic)
         return DictIteratorRepr(self, "items_with_hash").newiter(hop)
 
     def rtype_method_clear(self, hop):
@@ -458,17 +464,30 @@
 
 IS_64BIT = sys.maxint != 2 ** 31 - 1
 
-FUNC_SHIFT = 2
-FUNC_MASK  = 0x03  # two bits
 if IS_64BIT:
-    FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG = range(4)
+    FUNC_SHIFT = 3
+    FUNC_MASK  = 0x07  # three bits
+    FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG, FUNC_MUST_REINDEX = range(5)
 else:
-    FUNC_BYTE, FUNC_SHORT, FUNC_LONG = range(3)
+    FUNC_SHIFT = 2
+    FUNC_MASK  = 0x03  # two bits
+    FUNC_BYTE, FUNC_SHORT, FUNC_LONG, FUNC_MUST_REINDEX = range(4)
 TYPE_BYTE  = rffi.UCHAR
 TYPE_SHORT = rffi.USHORT
 TYPE_INT   = rffi.UINT
 TYPE_LONG  = lltype.Unsigned
 
+def ll_no_initial_index(d):
+    # Used when making new empty dicts, and when translating prebuilt dicts.
+    # Remove the index completely.  A dictionary must always have an
+    # index unless it is freshly created or freshly translated.  Most
+    # dict operations start with ll_call_lookup_function(), which will
+    # recompute the hashes and create the index.
+    ll_assert(d.num_live_items == d.num_ever_used_items, 
+         "ll_no_initial_index(): dict already in use")
+    d.lookup_function_no = FUNC_MUST_REINDEX
+    d.indexes = lltype.nullptr(llmemory.GCREF.TO)
+
 def ll_malloc_indexes_and_choose_lookup(d, n):
     # keep in sync with ll_clear_indexes() below
     if n <= 256:
@@ -508,6 +527,7 @@
 
 @jit.dont_look_inside
 def ll_call_insert_clean_function(d, hash, i):
+    assert i >= 0
     fun = d.lookup_function_no & FUNC_MASK
     if fun == FUNC_BYTE:
         ll_dict_store_clean(d, hash, i, TYPE_BYTE)
@@ -518,6 +538,8 @@
     elif fun == FUNC_LONG:
         ll_dict_store_clean(d, hash, i, TYPE_LONG)
     else:
+        # can't be still FUNC_MUST_REINDEX here
+        ll_assert(False, "ll_call_insert_clean_function(): invalid lookup_fun")
         assert False
 
 def ll_call_delete_by_entry_index(d, hash, i):
@@ -531,6 +553,8 @@
     elif fun == FUNC_LONG:
         ll_dict_delete_by_entry_index(d, hash, i, TYPE_LONG)
     else:
+        # can't be still FUNC_MUST_REINDEX here
+        ll_assert(False, "ll_call_delete_by_entry_index(): invalid lookup_fun")
         assert False
 
 def ll_valid_from_flag(entries, i):
@@ -648,15 +672,14 @@
     ll_dict_reindex(d, _ll_len_of_d_indexes(d))
 _ll_dict_rescue._dont_inline_ = True
 
-def _ll_dict_insertclean(d, key, value, hash):
+ at not_rpython
+def _ll_dict_insert_no_index(d, key, value):
     # never translated
     ENTRY = lltype.typeOf(d.entries).TO.OF
-    ll_call_insert_clean_function(d, hash, d.num_ever_used_items)
     entry = d.entries[d.num_ever_used_items]
     entry.key = key
     entry.value = value
-    if hasattr(ENTRY, 'f_hash'):
-        entry.f_hash = hash
+    # note that f_hash is left uninitialized in prebuilt dicts
     if hasattr(ENTRY, 'f_valid'):
         entry.f_valid = True
     d.num_ever_used_items += 1
@@ -811,12 +834,13 @@
         # also possible that there are more dead items immediately behind the
         # last one, we reclaim all the dead items at the end of the ordereditem
         # at the same point.
-        i = d.num_ever_used_items - 2
-        while i >= 0 and not d.entries.valid(i):
+        i = index
+        while True:
             i -= 1
-        j = i + 1
-        assert j >= 0
-        d.num_ever_used_items = j
+            assert i >= 0
+            if d.entries.valid(i):    # must be at least one
+                break
+        d.num_ever_used_items = i + 1
 
     # If the dictionary is at least 87.5% dead items, then consider shrinking
     # it.
@@ -844,6 +868,50 @@
     else:
         ll_dict_reindex(d, new_size)
 
+def ll_ensure_indexes(d):
+    num = d.lookup_function_no
+    if num == FUNC_MUST_REINDEX:
+        ll_dict_create_initial_index(d)
+    else:
+        ll_assert((num & FUNC_MASK) != FUNC_MUST_REINDEX,
+                  "bad combination in lookup_function_no")
+
+def ll_dict_create_initial_index(d):
+    """Create the initial index for a dictionary.  The common case is
+    that 'd' is empty.  The uncommon case is that it is a prebuilt
+    dictionary frozen by translation, in which case we must rehash all
+    entries.  The common case must be seen by the JIT.
+    """
+    if d.num_live_items == 0:
+        ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE)
+        d.resize_counter = DICT_INITSIZE * 2
+    else:
+        ll_dict_rehash_after_translation(d)
+
+ at jit.dont_look_inside
+def ll_dict_rehash_after_translation(d):
+    assert d.num_live_items == d.num_ever_used_items
+    assert not d.indexes
+    #
+    # recompute all hashes.  Needed if they are stored in d.entries,
+    # but do it anyway: otherwise, e.g. a string-keyed dictionary
+    # won't have a fasthash on its strings if their hash is still
+    # uncomputed.
+    ENTRY = lltype.typeOf(d.entries).TO.OF
+    for i in range(d.num_ever_used_items):
+        assert d.entries.valid(i)
+        d_entry = d.entries[i]
+        h = d.keyhash(d_entry.key)
+        if hasattr(ENTRY, 'f_hash'):
+            d_entry.f_hash = h
+        #else: purely for the side-effect it can have on d_entry.key
+    #
+    # Use the smallest acceptable size for ll_dict_reindex
+    new_size = DICT_INITSIZE
+    while new_size * 2 - d.num_live_items * 3 <= 0:
+        new_size *= 2
+    ll_dict_reindex(d, new_size)
+
 def ll_dict_reindex(d, new_size):
     if bool(d.indexes) and _ll_len_of_d_indexes(d) == new_size:
         ll_clear_indexes(d, new_size)   # hack: we can reuse the same array
@@ -857,12 +925,33 @@
     entries = d.entries
     i = 0
     ibound = d.num_ever_used_items
-    while i < ibound:
-        if entries.valid(i):
-            hash = entries.hash(i)
-            ll_call_insert_clean_function(d, hash, i)
-        i += 1
-    #old_entries.delete() XXXX!
+    #
+    # Write four loops, moving the check for the value of 'fun' out of
+    # the loops.  A small speed-up over ll_call_insert_clean_function().
+    fun = d.lookup_function_no     # == lookup_function_no & FUNC_MASK
+    if fun == FUNC_BYTE:
+        while i < ibound:
+            if entries.valid(i):
+                ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE)
+            i += 1
+    elif fun == FUNC_SHORT:
+        while i < ibound:
+            if entries.valid(i):
+                ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT)
+            i += 1
+    elif IS_64BIT and fun == FUNC_INT:
+        while i < ibound:
+            if entries.valid(i):
+                ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT)
+            i += 1
+    elif fun == FUNC_LONG:
+        while i < ibound:
+            if entries.valid(i):
+                ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG)
+            i += 1
+    else:
+        assert False
+
 
 # ------- a port of CPython's dictobject.c's lookdict implementation -------
 PERTURB_SHIFT = 5
@@ -1013,10 +1102,11 @@
 def ll_newdict(DICT):
     d = DICT.allocate()
     d.entries = _ll_empty_array(DICT)
-    ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE)
+    # Don't allocate an 'indexes' for empty dict.  It seems a typical
+    # program contains tons of empty dicts, so this might be a memory win.
     d.num_live_items = 0
     d.num_ever_used_items = 0
-    d.resize_counter = DICT_INITSIZE * 2
+    ll_no_initial_index(d)
     return d
 OrderedDictRepr.ll_newdict = staticmethod(ll_newdict)
 
@@ -1101,6 +1191,10 @@
                 # as soon as we do something like ll_dict_reindex().
                 if index == (dict.lookup_function_no >> FUNC_SHIFT):
                     dict.lookup_function_no += (1 << FUNC_SHIFT)
+                # note that we can't have modified a FUNC_MUST_REINDEX
+                # dict here because such dicts have no invalid entries
+                ll_assert((dict.lookup_function_no & FUNC_MASK) !=
+                      FUNC_MUST_REINDEX, "bad combination in _ll_dictnext")
             index = nextindex
         # clear the reference to the dict and prevent restarts
         iter.dict = lltype.nullptr(lltype.typeOf(iter).TO.dict.TO)
@@ -1146,6 +1240,8 @@
         return dict.entries[index].value
 
 def ll_dict_copy(dict):
+    ll_ensure_indexes(dict)
+
     DICT = lltype.typeOf(dict).TO
     newdict = DICT.allocate()
     newdict.entries = DICT.entries.TO.allocate(len(dict.entries))
@@ -1180,6 +1276,10 @@
     DICT = lltype.typeOf(d).TO
     old_entries = d.entries
     d.entries = _ll_empty_array(DICT)
+    # note: we can't remove the index here, because it is possible that
+    # crazy Python code calls d.clear() from the method __eq__() called
+    # from ll_dict_lookup(d).  Instead, stick to the rule that once a
+    # dictionary has got an index, it will always have one.
     ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE)
     d.num_live_items = 0
     d.num_ever_used_items = 0
@@ -1190,6 +1290,7 @@
 def ll_dict_update(dic1, dic2):
     if dic1 == dic2:
         return
+    ll_ensure_indexes(dic2)    # needed for entries.hash() below
     ll_prepare_dict_update(dic1, dic2.num_live_items)
     i = 0
     while i < dic2.num_ever_used_items:
@@ -1216,6 +1317,7 @@
     # the case where dict.update() actually has a lot of collisions.
     # If num_extra is much greater than d.num_live_items the conditional_call
     # will trigger anyway, which is really the goal.
+    ll_ensure_indexes(d)
     x = num_extra - d.num_live_items
     jit.conditional_call(d.resize_counter <= x * 3,
                          _ll_dict_resize_to, d, num_extra)
@@ -1275,6 +1377,7 @@
     if dic.num_live_items == 0:
         raise KeyError
 
+    ll_ensure_indexes(dic)
     entries = dic.entries
 
     # find the last entry.  It's unclear if the loop below is still
diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -1,9 +1,9 @@
 from weakref import WeakValueDictionary
 
 from rpython.annotator import model as annmodel
-from rpython.rlib import jit, types
+from rpython.rlib import jit, types, objectmodel
 from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
-    ll_hash_string, keepalive_until_here, specialize, enforceargs)
+    ll_hash_string, keepalive_until_here, specialize, enforceargs, dont_inline)
 from rpython.rlib.signature import signature
 from rpython.rlib.rarithmetic import ovfcheck
 from rpython.rtyper.error import TyperError
@@ -383,6 +383,8 @@
             return 0
 
     @staticmethod
+    @dont_inline
+    @jit.dont_look_inside
     def _ll_strhash(s):
         # unlike CPython, there is no reason to avoid to return -1
         # but our malloc initializes the memory to zero, so we use zero as the
@@ -400,6 +402,7 @@
 
     @staticmethod
     def ll_strfasthash(s):
+        ll_assert(s.hash != 0, "ll_strfasthash: hash==0")
         return s.hash     # assumes that the hash is already computed
 
     @staticmethod
@@ -1258,7 +1261,8 @@
                               'gethash': LLHelpers.ll_strhash,
                               'length': LLHelpers.ll_length,
                               'find': LLHelpers.ll_find,
-                              'rfind': LLHelpers.ll_rfind}))
+                              'rfind': LLHelpers.ll_rfind},
+                    hints={'remove_hash': True}))
 UNICODE.become(GcStruct('rpy_unicode', ('hash', Signed),
                         ('chars', Array(UniChar, hints={'immutable': True})),
                         adtmeths={'malloc' : staticAdtMethod(mallocunicode),
@@ -1266,8 +1270,8 @@
                                   'copy_contents' : staticAdtMethod(copy_unicode_contents),
                                   'copy_contents_from_str' : staticAdtMethod(copy_unicode_contents),
                                   'gethash': LLHelpers.ll_strhash,
-                                  'length': LLHelpers.ll_length}
-                        ))
+                                  'length': LLHelpers.ll_length},
+                    hints={'remove_hash': True}))
 
 
 # TODO: make the public interface of the rstr module cleaner
diff --git a/rpython/rtyper/lltypesystem/test/test_lltype.py b/rpython/rtyper/lltypesystem/test/test_lltype.py
--- a/rpython/rtyper/lltypesystem/test/test_lltype.py
+++ b/rpython/rtyper/lltypesystem/test/test_lltype.py
@@ -749,22 +749,10 @@
     assert hash3 == identityhash(s3)
     assert hash3 == identityhash(s3.super)
     assert hash3 == identityhash(s3.super.super)
-    py.test.raises(ValueError, init_identity_hash, s3, hash3^1)
-    py.test.raises(ValueError, init_identity_hash, s3.super, hash3^4)
-    py.test.raises(ValueError, init_identity_hash, s3.super.super, hash3^9)
-
-    s3 = malloc(S3)
-    init_identity_hash(s3.super, -123)
-    assert -123 == identityhash(s3)
-    assert -123 == identityhash(s3.super)
-    assert -123 == identityhash(s3.super.super)
-    py.test.raises(ValueError, init_identity_hash, s3, 4313)


More information about the pypy-commit mailing list