[pypy-commit] pypy py3.5: hg merge py3.5-siphash24
arigo
pypy.commits at gmail.com
Tue Jan 31 11:44:17 EST 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: py3.5
Changeset: r89858:94325768daae
Date: 2017-01-31 17:37 +0100
http://bitbucket.org/pypy/pypy/changeset/94325768daae/
Log: hg merge py3.5-siphash24
diff too long, truncating to 2000 out of 2956 lines
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -201,6 +201,13 @@
"issue, you can disable them here",
default=True),
+ ChoiceOption("hash",
+ "The hash function to use for strings: fnv from CPython 2.7"
+ " or siphash24 from CPython >= 3.4",
+ ["fnv", "siphash24"],
+ default="siphash24",
+ cmdline="--hash"),
+
OptionDescription("std", "Standard Object Space Options", [
BoolOption("withtproxy", "support transparent proxies",
default=True),
diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -36,6 +36,7 @@
w_run_toplevel = space.getitem(w_dict, space.wrap('run_toplevel'))
w_initstdio = space.getitem(w_dict, space.wrap('initstdio'))
withjit = space.config.objspace.usemodules.pypyjit
+ hashfunc = space.config.objspace.hash
else:
w_initstdio = space.appexec([], """():
return lambda unbuffered: None
@@ -46,6 +47,10 @@
from rpython.jit.backend.hlinfo import highleveljitinfo
highleveljitinfo.sys_executable = argv[0]
+ if hashfunc == "siphash24":
+ from rpython.rlib import rsiphash
+ rsiphash.enable_siphash24()
+
#debug("entry point starting")
#for arg in argv:
# debug(" argv -> " + arg)
diff --git a/pypy/module/_cffi_backend/newtype.py b/pypy/module/_cffi_backend/newtype.py
--- a/pypy/module/_cffi_backend/newtype.py
+++ b/pypy/module/_cffi_backend/newtype.py
@@ -23,13 +23,34 @@
# ____________________________________________________________
class UniqueCache:
+ for_testing = False # set to True on the class level in test_c.py
+
def __init__(self, space):
self.ctvoid = None # Cache for the 'void' type
self.ctvoidp = None # Cache for the 'void *' type
self.ctchara = None # Cache for the 'char[]' type
self.primitives = {} # Cache for {name: primitive_type}
self.functions = [] # see _new_function_type()
- self.for_testing = False
+ self.functions_packed = None # only across translation
+
+ def _cleanup_(self):
+ import gc
+ assert self.functions_packed is None
+ # Note: a full PyPy translation may still have
+ # 'self.functions == []' at this point, possibly depending
+ # on details. Code tested directly in test_ffi_obj
+ gc.collect()
+ funcs = []
+ for weakdict in self.functions:
+ funcs += weakdict._dict.values()
+ del self.functions[:]
+ self.functions_packed = funcs if len(funcs) > 0 else None
+
+ def unpack_functions(self):
+ for fct in self.functions_packed:
+ _record_function_type(self, fct)
+ self.functions_packed = None
+
def _clean_cache(space):
"NOT_RPYTHON"
@@ -622,7 +643,7 @@
for w_arg in fargs:
y = compute_identity_hash(w_arg)
x = intmask((1000003 * x) ^ y)
- x ^= (ellipsis - abi)
+ x ^= ellipsis + 2 * abi
if unique_cache.for_testing: # constant-folded to False in translation;
x &= 3 # but for test, keep only 2 bits of hash
return x
@@ -646,6 +667,8 @@
# one such dict, but in case of hash collision, there might be
# more.
unique_cache = space.fromcache(UniqueCache)
+ if unique_cache.functions_packed is not None:
+ unique_cache.unpack_functions()
func_hash = _func_key_hash(unique_cache, fargs, fresult, ellipsis, abi)
for weakdict in unique_cache.functions:
ctype = weakdict.get(func_hash)
@@ -674,13 +697,18 @@
#
fct = ctypefunc.W_CTypeFunc(space, fargs, fresult, ellipsis, abi)
unique_cache = space.fromcache(UniqueCache)
- func_hash = _func_key_hash(unique_cache, fargs, fresult, ellipsis, abi)
+ _record_function_type(unique_cache, fct)
+ return fct
+
+def _record_function_type(unique_cache, fct):
+ from pypy.module._cffi_backend import ctypefunc
+ #
+ func_hash = _func_key_hash(unique_cache, fct.fargs, fct.ctitem,
+ fct.ellipsis, fct.abi)
for weakdict in unique_cache.functions:
if weakdict.get(func_hash) is None:
- weakdict.set(func_hash, fct)
break
else:
weakdict = rweakref.RWeakValueDictionary(int, ctypefunc.W_CTypeFunc)
unique_cache.functions.append(weakdict)
- weakdict.set(func_hash, fct)
- return fct
+ weakdict.set(func_hash, fct)
diff --git a/pypy/module/_cffi_backend/test/test_c.py b/pypy/module/_cffi_backend/test/test_c.py
--- a/pypy/module/_cffi_backend/test/test_c.py
+++ b/pypy/module/_cffi_backend/test/test_c.py
@@ -37,6 +37,7 @@
def setup_class(cls):
testfuncs_w = []
keepalive_funcs = []
+ UniqueCache.for_testing = True
test_lib_c = tmpdir.join('_test_lib.c')
src_test_lib_c = py.path.local(__file__).dirpath().join('_test_lib.c')
@@ -100,11 +101,12 @@
_all_test_c.find_and_load_library = func
_all_test_c._testfunc = testfunc
""")
- UniqueCache.for_testing = True
def teardown_method(self, method):
+ _clean_cache(self.space)
+
+ def teardown_class(cls):
UniqueCache.for_testing = False
- _clean_cache(self.space)
all_names = ', '.join(Module.interpleveldefs.keys())
diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py
--- a/pypy/module/_cffi_backend/test/test_ffi_obj.py
+++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py
@@ -1,5 +1,23 @@
+from pypy.module._cffi_backend import newtype
from pypy.module._cffi_backend.newtype import _clean_cache
+
+class TestFFIObj:
+ spaceconfig = dict(usemodules=('_cffi_backend', 'array'))
+
+ def teardown_method(self, meth):
+ _clean_cache(self.space)
+
+ def test_new_function_type_during_translation(self):
+ space = self.space
+ BInt = newtype.new_primitive_type(space, "int")
+ BFunc = newtype.new_function_type(space, space.wrap([BInt]), BInt)
+ assert BFunc is newtype.new_function_type(space,space.wrap([BInt]),BInt)
+ unique_cache = space.fromcache(newtype.UniqueCache)
+ unique_cache._cleanup_()
+ assert BFunc is newtype.new_function_type(space,space.wrap([BInt]),BInt)
+
+
class AppTestFFIObj:
spaceconfig = dict(usemodules=('_cffi_backend', 'array'))
diff --git a/pypy/module/_weakref/interp__weakref.py b/pypy/module/_weakref/interp__weakref.py
--- a/pypy/module/_weakref/interp__weakref.py
+++ b/pypy/module/_weakref/interp__weakref.py
@@ -194,6 +194,15 @@
W_WeakrefBase.__init__(self, space, w_obj, w_callable)
self.w_hash = None
+ def _cleanup_(self):
+ # When a prebuilt weakref is frozen inside a translation, if
+ # this weakref has got an already-cached w_hash, then throw it
+ # away. That's because the hash value will change after
+ # translation. It will be recomputed the first time we ask for
+ # it. Note that such a frozen weakref, if not dead, will point
+ # to a frozen object, so it will never die.
+ self.w_hash = None
+
def descr__init__weakref(self, space, w_obj, w_callable=None,
__args__=None):
if __args__.arguments_w:
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -2127,6 +2127,12 @@
raise wrap_oserror(space, e, eintr_retry=False)
return space.wrap(res)
+class SigCheck:
+ pass
+_sigcheck = SigCheck()
+def _signal_checker():
+ _sigcheck.space.getexecutioncontext().checksignals()
+
@unwrap_spec(size=int)
def urandom(space, size):
"""urandom(size) -> str
@@ -2134,9 +2140,12 @@
Return a string of 'size' random bytes suitable for cryptographic use.
"""
context = get(space).random_context
- signal_checker = space.getexecutioncontext().checksignals
try:
- return space.newbytes(rurandom.urandom(context, n, signal_checker))
+ # urandom() takes a final argument that should be a regular function,
+ # not a bound method like 'getexecutioncontext().checksignals'.
+ # Otherwise, we can't use it from several independent places.
+ _sigcheck.space = space
+ return space.newbytes(rurandom.urandom(context, n, _signal_checker))
except OSError as e:
# 'rurandom' should catch and retry internally if it gets EINTR
# (at least in os.read(), which is probably enough in practice)
diff --git a/pypy/module/sys/system.py b/pypy/module/sys/system.py
--- a/pypy/module/sys/system.py
+++ b/pypy/module/sys/system.py
@@ -5,7 +5,6 @@
from pypy.objspace.std.complexobject import HASH_IMAG
from pypy.objspace.std.floatobject import HASH_INF, HASH_NAN
from pypy.objspace.std.intobject import HASH_MODULUS
-from pypy.objspace.std.bytesobject import HASH_ALGORITHM
from pypy.interpreter import gateway
from rpython.rlib import rbigint, rfloat
from rpython.rtyper.lltypesystem import lltype, rffi
@@ -79,11 +78,22 @@
return space.call_function(w_int_info, space.newtuple(info_w))
def get_hash_info(space):
- HASH_HASH_BITS = 8 * rffi.sizeof(lltype.Signed)
- HASH_SEED_BITS = 0 # XXX don't know what this is supposed to be
+ HASH_ALGORITHM = space.config.objspace.hash
+ if space.config.objspace.hash == "fnv":
+ HASH_HASH_BITS = 8 * rffi.sizeof(lltype.Signed)
+ HASH_SEED_BITS = 0
+ # CPython has ^ > 0 here, but the seed of "fnv" is of limited
+ # use, so we don't implement it
+ elif space.config.objspace.hash == "siphash24":
+ HASH_HASH_BITS = 64
+ HASH_SEED_BITS = 128
+ else:
+ assert 0, "please add the parameters for this different hash function"
+
+ HASH_WIDTH = 8 * rffi.sizeof(lltype.Signed)
HASH_CUTOFF = 0
info_w = [
- space.wrap(8 * rffi.sizeof(lltype.Signed)),
+ space.wrap(HASH_WIDTH),
space.wrap(HASH_MODULUS),
space.wrap(HASH_INF),
space.wrap(HASH_NAN),
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -3,7 +3,7 @@
from rpython.rlib import jit
from rpython.rlib.objectmodel import (
compute_hash, compute_unique_id, import_from_mixin, newlist_hint,
- resizelist_hint, HASH_ALGORITHM)
+ resizelist_hint)
from rpython.rlib.buffer import StringBuffer
from rpython.rlib.rstring import StringBuilder
diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -563,6 +563,11 @@
class W_FrozensetObject(W_BaseSetObject):
hash = 0
+ def _cleanup_(self):
+ # in case there are frozenset objects existing during
+ # translation, make sure we don't translate a cached hash
+ self.hash = 0
+
def is_w(self, space, w_other):
if not isinstance(w_other, W_FrozensetObject):
return False
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -287,7 +287,7 @@
for ek, ev in items:
result.dictdef.generalize_key(self.immutablevalue(ek))
result.dictdef.generalize_value(self.immutablevalue(ev))
- result.dictdef.seen_prebuilt_key(ek)
+ #dictdef.seen_prebuilt_key(ek)---not needed any more
seen_elements = len(items)
# if the dictionary grew during the iteration,
# start over again
diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py
--- a/rpython/annotator/dictdef.py
+++ b/rpython/annotator/dictdef.py
@@ -115,13 +115,5 @@
def generalize_value(self, s_value):
self.dictvalue.generalize(s_value)
- def seen_prebuilt_key(self, x):
- # In case we are an r_dict, we don't ask for the hash ourselves.
- # Note that if the custom hashing function ends up asking for
- # the hash of x, then it must use compute_hash() itself, so it
- # works out.
- if not self.dictkey.custom_eq_hash:
- compute_hash(x)
-
def __repr__(self):
return '<{%r: %r}>' % (self.dictkey.s_value, self.dictvalue.s_value)
diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py
--- a/rpython/annotator/test/test_annrpython.py
+++ b/rpython/annotator/test/test_annrpython.py
@@ -3704,25 +3704,6 @@
s = a.build_types(f, [int])
assert s.const == 0
- def test_hash_sideeffect(self):
- class X:
- pass
- x1 = X()
- x2 = X()
- x3 = X()
- d = {(2, x1): 5, (3, x2): 7}
- def f(n, m):
- if m == 1: x = x1
- elif m == 2: x = x2
- else: x = x3
- return d[n, x]
- a = self.RPythonAnnotator()
- s = a.build_types(f, [int, int])
- assert s.knowntype == int
- assert hasattr(x1, '__precomputed_identity_hash')
- assert hasattr(x2, '__precomputed_identity_hash')
- assert not hasattr(x3, '__precomputed_identity_hash')
-
def test_contains_of_empty_dict(self):
class A(object):
def meth(self):
diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -201,10 +201,6 @@
StrOption("icon", "Path to the (Windows) icon to use for the executable"),
StrOption("libname",
"Windows: name and possibly location of the lib file to create"),
- ChoiceOption("hash",
- "The hash to use for strings",
- ["rpython", "siphash24"],
- default="rpython", cmdline="--hash"),
OptionDescription("backendopt", "Backend Optimization Options", [
# control inlining
@@ -394,12 +390,6 @@
if sys.platform == "darwin" or sys.platform =="win32":
raise ConfigError("'asmgcc' not supported on this platform")
-def apply_extra_settings(config):
- # make the setting of config.hash definitive
- from rpython.rlib.objectmodel import set_hash_algorithm
- config.translation.hash = config.translation.hash
- set_hash_algorithm(config.translation.hash)
-
# ----------------------------------------------------------------
def set_platform(config):
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -117,9 +117,7 @@
# The following flag is set on nursery objects of which we asked the id
# or the identityhash. It means that a space of the size of the object
-# has already been allocated in the nonmovable part. The same flag is
-# abused to mark prebuilt objects whose hash has been taken during
-# translation and is statically recorded.
+# has already been allocated in the nonmovable part.
GCFLAG_HAS_SHADOW = first_gcflag << 3
# The following flag is set temporarily on some objects during a major
@@ -208,10 +206,6 @@
# by GCFLAG_xxx above.
HDR = lltype.Struct('header', ('tid', lltype.Signed))
typeid_is_in_field = 'tid'
- withhash_flag_is_in_field = 'tid', GCFLAG_HAS_SHADOW
- # ^^^ prebuilt objects may have the flag GCFLAG_HAS_SHADOW;
- # then they are one word longer, the extra word storing the hash.
-
# During a minor collection, the objects in the nursery that are
# moved outside are changed in-place: their header is replaced with
@@ -2640,40 +2634,22 @@
return shadow
_find_shadow._dont_inline_ = True
- @specialize.arg(2)
- def id_or_identityhash(self, gcobj, is_hash):
+ def id_or_identityhash(self, gcobj):
"""Implement the common logic of id() and identityhash()
of an object, given as a GCREF.
"""
obj = llmemory.cast_ptr_to_adr(gcobj)
- #
if self.is_valid_gc_object(obj):
if self.is_in_nursery(obj):
obj = self._find_shadow(obj)
- elif is_hash:
- if self.header(obj).tid & GCFLAG_HAS_SHADOW:
- #
- # For identityhash(), we need a special case for some
- # prebuilt objects: their hash must be the same before
- # and after translation. It is stored as an extra word
- # after the object. But we cannot use it for id()
- # because the stored value might clash with a real one.
- size = self.get_size(obj)
- i = (obj + size).signed[0]
- # Important: the returned value is not mangle_hash()ed!
- return i
- #
- i = llmemory.cast_adr_to_int(obj)
- if is_hash:
- i = mangle_hash(i)
- return i
+ return llmemory.cast_adr_to_int(obj)
id_or_identityhash._always_inline_ = True
def id(self, gcobj):
- return self.id_or_identityhash(gcobj, False)
+ return self.id_or_identityhash(gcobj)
def identityhash(self, gcobj):
- return self.id_or_identityhash(gcobj, True)
+ return mangle_hash(self.id_or_identityhash(gcobj))
# ----------
# Finalizers
diff --git a/rpython/memory/gc/minimark.py b/rpython/memory/gc/minimark.py
--- a/rpython/memory/gc/minimark.py
+++ b/rpython/memory/gc/minimark.py
@@ -104,9 +104,7 @@
# The following flag is set on nursery objects of which we asked the id
# or the identityhash. It means that a space of the size of the object
-# has already been allocated in the nonmovable part. The same flag is
-# abused to mark prebuilt objects whose hash has been taken during
-# translation and is statically recorded.
+# has already been allocated in the nonmovable part.
GCFLAG_HAS_SHADOW = first_gcflag << 3
# The following flag is set temporarily on some objects during a major
@@ -149,9 +147,6 @@
# by GCFLAG_xxx above.
HDR = lltype.Struct('header', ('tid', lltype.Signed))
typeid_is_in_field = 'tid'
- withhash_flag_is_in_field = 'tid', GCFLAG_HAS_SHADOW
- # ^^^ prebuilt objects may have the flag GCFLAG_HAS_SHADOW;
- # then they are one word longer, the extra word storing the hash.
_ADDRARRAY = lltype.Array(llmemory.Address, hints={'nolength': True})
@@ -1868,40 +1863,22 @@
return shadow
_find_shadow._dont_inline_ = True
- @specialize.arg(2)
- def id_or_identityhash(self, gcobj, is_hash):
+ def id_or_identityhash(self, gcobj):
"""Implement the common logic of id() and identityhash()
of an object, given as a GCREF.
"""
obj = llmemory.cast_ptr_to_adr(gcobj)
- #
if self.is_valid_gc_object(obj):
if self.is_in_nursery(obj):
obj = self._find_shadow(obj)
- elif is_hash:
- if self.header(obj).tid & GCFLAG_HAS_SHADOW:
- #
- # For identityhash(), we need a special case for some
- # prebuilt objects: their hash must be the same before
- # and after translation. It is stored as an extra word
- # after the object. But we cannot use it for id()
- # because the stored value might clash with a real one.
- size = self.get_size(obj)
- i = (obj + size).signed[0]
- # Important: the returned value is not mangle_hash()ed!
- return i
- #
- i = llmemory.cast_adr_to_int(obj)
- if is_hash:
- i = mangle_hash(i)
- return i
+ return llmemory.cast_adr_to_int(obj)
id_or_identityhash._always_inline_ = True
def id(self, gcobj):
- return self.id_or_identityhash(gcobj, False)
+ return self.id_or_identityhash(gcobj)
def identityhash(self, gcobj):
- return self.id_or_identityhash(gcobj, True)
+ return mangle_hash(self.id_or_identityhash(gcobj))
# ----------
# Finalizers
diff --git a/rpython/memory/gc/semispace.py b/rpython/memory/gc/semispace.py
--- a/rpython/memory/gc/semispace.py
+++ b/rpython/memory/gc/semispace.py
@@ -48,9 +48,6 @@
HDR = lltype.Struct('header', ('tid', lltype.Signed)) # XXX or rffi.INT?
typeid_is_in_field = 'tid'
- withhash_flag_is_in_field = 'tid', _GCFLAG_HASH_BASE * 0x2
- # ^^^ prebuilt objects either have GC_HASH_TAKEN_ADDR or they
- # have GC_HASH_HASFIELD (and then they are one word longer).
FORWARDSTUB = lltype.GcStruct('forwarding_stub',
('forw', llmemory.Address))
FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
diff --git a/rpython/memory/gctransform/boehm.py b/rpython/memory/gctransform/boehm.py
--- a/rpython/memory/gctransform/boehm.py
+++ b/rpython/memory/gctransform/boehm.py
@@ -11,7 +11,7 @@
class BoehmGCTransformer(GCTransformer):
malloc_zero_filled = True
FINALIZER_PTR = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void))
- HDR = lltype.Struct("header", ("hash", lltype.Signed))
+ NO_HEADER = True
def __init__(self, translator, inline=False):
super(BoehmGCTransformer, self).__init__(translator, inline=inline)
@@ -29,13 +29,8 @@
ll_malloc_varsize_no_length = mh.ll_malloc_varsize_no_length
ll_malloc_varsize = mh.ll_malloc_varsize
- HDRPTR = lltype.Ptr(self.HDR)
-
def ll_identityhash(addr):
- obj = llmemory.cast_adr_to_ptr(addr, HDRPTR)
- h = obj.hash
- if h == 0:
- obj.hash = h = ~llmemory.cast_adr_to_int(addr)
+ h = ~llmemory.cast_adr_to_int(addr)
return h
if self.translator:
@@ -194,11 +189,6 @@
resulttype = lltype.Signed)
hop.genop('int_invert', [v_int], resultvar=hop.spaceop.result)
- def gcheader_initdata(self, obj):
- hdr = lltype.malloc(self.HDR, immortal=True)
- hdr.hash = lltype.identityhash_nocache(obj._as_ptr())
- return hdr._obj
-
########## weakrefs ##########
# Boehm: weakref objects are small structures containing only a Boehm
diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -610,25 +610,6 @@
def special_funcptr_for_type(self, TYPE):
return self.layoutbuilder.special_funcptr_for_type(TYPE)
- def gc_header_for(self, obj, needs_hash=False):
- hdr = self.gcdata.gc.gcheaderbuilder.header_of_object(obj)
- withhash, flag = self.gcdata.gc.withhash_flag_is_in_field
- x = getattr(hdr, withhash)
- TYPE = lltype.typeOf(x)
- x = lltype.cast_primitive(lltype.Signed, x)
- if needs_hash:
- x |= flag # set the flag in the header
- else:
- x &= ~flag # clear the flag in the header
- x = lltype.cast_primitive(TYPE, x)
- setattr(hdr, withhash, x)
- return hdr
-
- def get_hash_offset(self, T):
- type_id = self.get_type_id(T)
- assert not self.gcdata.q_is_varsize(type_id)
- return self.gcdata.q_fixed_size(type_id)
-
def finish_tables(self):
group = self.layoutbuilder.close_table()
log.info("assigned %s typeids" % (len(group.members), ))
@@ -1514,22 +1495,9 @@
def gcheader_initdata(self, obj):
o = lltype.top_container(obj)
- needs_hash = self.get_prebuilt_hash(o) is not None
- hdr = self.gc_header_for(o, needs_hash)
+ hdr = self.gcdata.gc.gcheaderbuilder.header_of_object(o)
return hdr._obj
- def get_prebuilt_hash(self, obj):
- # for prebuilt objects that need to have their hash stored and
- # restored. Note that only structures that are StructNodes all
- # the way have their hash stored (and not e.g. structs with var-
- # sized arrays at the end). 'obj' must be the top_container.
- TYPE = lltype.typeOf(obj)
- if not isinstance(TYPE, lltype.GcStruct):
- return None
- if TYPE._is_varsize():
- return None
- return getattr(obj, '_hash_cache_', None)
-
def get_finalizer_queue_index(self, hop):
fq_tag = hop.spaceop.args[0].value
assert 'FinalizerQueue TAG' in fq_tag.expr
diff --git a/rpython/memory/gctransform/refcounting.py b/rpython/memory/gctransform/refcounting.py
--- a/rpython/memory/gctransform/refcounting.py
+++ b/rpython/memory/gctransform/refcounting.py
@@ -18,8 +18,7 @@
class RefcountingGCTransformer(GCTransformer):
malloc_zero_filled = True
- HDR = lltype.Struct("header", ("refcount", lltype.Signed),
- ("hash", lltype.Signed))
+ HDR = lltype.Struct("header", ("refcount", lltype.Signed))
def __init__(self, translator):
super(RefcountingGCTransformer, self).__init__(translator, inline=True)
@@ -77,10 +76,7 @@
ll_malloc_varsize = mh.ll_malloc_varsize
def ll_identityhash(addr):
- obj = llmemory.cast_adr_to_ptr(addr, HDRPTR)
- h = obj.hash
- if h == 0:
- obj.hash = h = llmemory.cast_adr_to_int(addr)
+ h = llmemory.cast_adr_to_int(addr)
return h
if self.translator:
@@ -178,7 +174,6 @@
if not self.gcheaderbuilder.get_header(p):
hdr = self.gcheaderbuilder.new_header(p)
hdr.refcount = sys.maxint // 2
- hdr.hash = lltype.identityhash_nocache(p)
def static_deallocation_funcptr_for_type(self, TYPE):
if TYPE in self.static_deallocator_funcptrs:
diff --git a/rpython/memory/gctransform/transform.py b/rpython/memory/gctransform/transform.py
--- a/rpython/memory/gctransform/transform.py
+++ b/rpython/memory/gctransform/transform.py
@@ -374,9 +374,6 @@
return hop.cast_result(rmodel.inputconst(lltype.Ptr(ARRAY_TYPEID_MAP),
lltype.nullptr(ARRAY_TYPEID_MAP)))
- def get_prebuilt_hash(self, obj):
- return None
-
class MinimalGCTransformer(BaseGCTransformer):
def __init__(self, parenttransformer):
diff --git a/rpython/rlib/_rweakvaldict.py b/rpython/rlib/_rweakvaldict.py
--- a/rpython/rlib/_rweakvaldict.py
+++ b/rpython/rlib/_rweakvaldict.py
@@ -76,12 +76,16 @@
bk = self.rtyper.annotator.bookkeeper
classdef = bk.getuniqueclassdef(weakdict._valueclass)
r_value = getinstancerepr(self.rtyper, classdef)
+ any_value = False
for dictkey, dictvalue in weakdict._dict.items():
llkey = self.r_key.convert_const(dictkey)
llvalue = r_value.convert_const(dictvalue)
if llvalue:
llvalue = lltype.cast_pointer(rclass.OBJECTPTR, llvalue)
self.ll_set_nonnull(l_dict, llkey, llvalue)
+ any_value = True
+ if any_value:
+ l_dict.resize_counter = -1
return l_dict
def rtype_method_get(self, hop):
@@ -114,6 +118,8 @@
@jit.dont_look_inside
def ll_get(self, d, llkey):
+ if d.resize_counter < 0:
+ self.ll_weakdict_resize(d) # initialize prebuilt dicts at runtime
hash = self.ll_keyhash(llkey)
i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
#llop.debug_print(lltype.Void, i, 'get')
@@ -132,6 +138,8 @@
@jit.dont_look_inside
def ll_set_nonnull(self, d, llkey, llvalue):
+ if d.resize_counter < 0:
+ self.ll_weakdict_resize(d) # initialize prebuilt dicts at runtime
hash = self.ll_keyhash(llkey)
valueref = weakref_create(llvalue) # GC effects here, before the rest
i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
@@ -147,6 +155,8 @@
@jit.dont_look_inside
def ll_set_null(self, d, llkey):
+ if d.resize_counter < 0:
+ self.ll_weakdict_resize(d) # initialize prebuilt dicts at runtime
hash = self.ll_keyhash(llkey)
i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
if d.entries.everused(i):
diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py
--- a/rpython/rlib/debug.py
+++ b/rpython/rlib/debug.py
@@ -441,7 +441,7 @@
except OSError as e:
os.write(2, "Could not start GDB: %s" % (
os.strerror(e.errno)))
- raise SystemExit
+ os._exit(1)
else:
time.sleep(1) # give the GDB time to attach
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -465,8 +465,14 @@
Note that this can return 0 or -1 too.
- It returns the same number, both before and after translation.
- Dictionaries don't need to be rehashed after translation.
+ NOTE: It returns a different number before and after translation!
+ Dictionaries will be rehashed when the translated program starts.
+ Be careful about other places that store or depend on a hash value:
+ if such a place can exist before translation, you should add for
+ example a _cleanup_() method to clear this cache during translation.
+
+ (Nowadays we could completely remove compute_hash() and decide that
+ hash(x) is valid RPython instead, at least for the types listed here.)
"""
if isinstance(x, (str, unicode)):
return _hash_string(x)
@@ -484,17 +490,11 @@
"""RPython equivalent of object.__hash__(x). This returns the
so-called 'identity hash', which is the non-overridable default hash
of Python. Can be called for any RPython-level object that turns
- into a GC object, but not NULL. The value is not guaranteed to be the
- same before and after translation, except for RPython instances on the
- lltypesystem.
+ into a GC object, but not NULL. The value will be different before
+ and after translation (WARNING: this is a change with older RPythons!)
"""
assert x is not None
- result = object.__hash__(x)
- try:
- x.__dict__['__precomputed_identity_hash'] = result
- except (TypeError, AttributeError):
- pass
- return result
+ return object.__hash__(x)
def compute_unique_id(x):
"""RPython equivalent of id(x). The 'x' must be an RPython-level
@@ -519,21 +519,17 @@
# ----------
-HASH_ALGORITHM = "rpython" # XXX Is there a better name?
-HASH_ALGORITHM_FIXED = False
+def _hash_string(s):
+ """The default algorithm behind compute_hash() for a string or a unicode.
+ This is a modified Fowler-Noll-Vo (FNV) hash. According to Wikipedia,
+ FNV needs carefully-computed constants called FNV primes and FNV offset
+ basis, which are absent from the present algorithm. Nevertheless,
+ this matches CPython 2.7 without -R, which has proven a good hash in
+ practice (even if not crypographical nor randomizable).
- at not_rpython
-def set_hash_algorithm(algo):
- """Must be called very early, before any string is hashed with
- compute_hash()!"""
- global HASH_ALGORITHM
- if HASH_ALGORITHM != algo:
- assert not HASH_ALGORITHM_FIXED, "compute_hash() already called!"
- assert algo in ("rpython", "siphash24")
- HASH_ALGORITHM = algo
-
-
-def _hash_string_rpython(s):
+ There is a mechanism to use another one in programs after translation.
+ See rsiphash.py, which implements the algorithm of CPython >= 3.4.
+ """
from rpython.rlib.rarithmetic import intmask
length = len(s)
@@ -547,100 +543,8 @@
x ^= length
return intmask(x)
-
- at not_rpython
-def _hash_string_siphash24(s):
- """This version is called when untranslated only."""
- import array
- from rpython.rlib.rsiphash import siphash24
- from rpython.rtyper.lltypesystem import lltype, rffi
- from rpython.rlib.rarithmetic import intmask
-
- if not isinstance(s, str):
- if isinstance(s, unicode):
- lst = map(ord, s)
- else:
- lst = map(ord, s.chars) # for rstr.STR or UNICODE
- # NOTE: a latin-1 unicode string must have the same hash as the
- # corresponding byte string.
- if all(n <= 0xFF for n in lst):
- kind = "B"
- elif rffi.sizeof(lltype.UniChar) == 4:
- kind = "I"
- else:
- kind = "H"
- s = array.array(kind, lst).tostring()
- ptr = rffi.str2charp(s)
- x = siphash24(ptr, len(s))
- rffi.free_charp(ptr)
- return intmask(x)
-
-def ll_hash_string_siphash24(ll_s):
- """Called from lltypesystem/rstr.py. 'll_s' is a rstr.STR or UNICODE."""
- from rpython.rlib.rsiphash import siphash24
- from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr
- from rpython.rlib.rarithmetic import intmask
-
- length = len(ll_s.chars)
- if lltype.typeOf(ll_s).TO.chars.OF == lltype.Char:
- # no GC operation from here!
- addr = rstr._get_raw_buf_string(rstr.STR, ll_s, 0)
- else:
- # NOTE: a latin-1 unicode string must have the same hash as the
- # corresponding byte string. If the unicode is all within
- # 0-255, then we need to allocate a byte buffer and copy the
- # latin-1 encoding in it manually.
- for i in range(length):
- if ord(ll_s.chars[i]) > 0xFF:
- # no GC operation from here!
- addr = rstr._get_raw_buf_unicode(rstr.UNICODE, ll_s, 0)
- length *= rffi.sizeof(rstr.UNICODE.chars.OF)
- break
- else:
- p = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw')
- i = 0
- while i < length:
- p[i] = chr(ord(ll_s.chars[i]))
- i += 1
- x = siphash24(llmemory.cast_ptr_to_adr(p), length)
- lltype.free(p, flavor='raw')
- return intmask(x)
- x = siphash24(addr, length)
- keepalive_until_here(ll_s)
- return intmask(x)
-ll_hash_string_siphash24._jit_look_inside_ = False
-
-
- at not_rpython
-def _hash_string(s):
- """The algorithm behind compute_hash() for a string or a unicode.
- This version is only for untranslated usage, and 's' is a str or unicode.
- """
- global HASH_ALGORITHM_FIXED
- HASH_ALGORITHM_FIXED = True
- if HASH_ALGORITHM == "rpython":
- return _hash_string_rpython(s)
- if HASH_ALGORITHM == "siphash24":
- return _hash_string_siphash24(s)
- raise NotImplementedError
-
def ll_hash_string(ll_s):
- """The algorithm behind compute_hash() for a string or a unicode.
- This version is called from lltypesystem/rstr.py, and 'll_s' is a
- rstr.STR or rstr.UNICODE.
- """
- if not we_are_translated():
- global HASH_ALGORITHM_FIXED
- HASH_ALGORITHM_FIXED = True
- if HASH_ALGORITHM == "rpython":
- return _hash_string_rpython(ll_s.chars)
- if HASH_ALGORITHM == "siphash24":
- if we_are_translated():
- return ll_hash_string_siphash24(ll_s)
- else:
- return _hash_string_siphash24(ll_s)
- raise NotImplementedError
-
+ return _hash_string(ll_s.chars)
def _hash_float(f):
"""The algorithm behind compute_hash() for a float.
@@ -698,6 +602,21 @@
return hop.gendirectcall(ll_fn, v_obj)
class Entry(ExtRegistryEntry):
+ _about_ = ll_hash_string
+ # this is only used when annotating the code in rstr.py, and so
+ # it always occurs after the RPython program signalled its intent
+ # to use a different hash. The code below overwrites the use of
+ # ll_hash_string() to make the annotator think a possibly different
+ # function was called.
+
+ def compute_annotation(self):
+ from rpython.annotator import model as annmodel
+ bk = self.bookkeeper
+ translator = bk.annotator.translator
+ fn = getattr(translator, 'll_hash_string', ll_hash_string)
+ return annmodel.SomePBC([bk.getdesc(fn)])
+
+class Entry(ExtRegistryEntry):
_about_ = compute_identity_hash
def compute_result_annotation(self, s_x):
diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py
--- a/rpython/rlib/rsiphash.py
+++ b/rpython/rlib/rsiphash.py
@@ -1,12 +1,24 @@
-import sys, os, struct
+"""
+This module implements siphash-2-4, the hashing algorithm for strings
+and unicodes. You can use it explicitly by calling siphash24() with
+a byte string, or you can use enable_siphash24() to enable the use
+of siphash-2-4 on all RPython strings and unicodes in your program
+after translation.
+"""
+import sys, os, errno
from contextlib import contextmanager
-from rpython.rlib import rarithmetic
+from rpython.rlib import rarithmetic, rurandom
from rpython.rlib.objectmodel import not_rpython, always_inline
-from rpython.rlib.rgc import no_collect
-from rpython.rlib.rarithmetic import r_uint64
+from rpython.rlib.objectmodel import we_are_translated, dont_inline
+from rpython.rlib.objectmodel import keepalive_until_here
+from rpython.rlib import rgc, jit, rposix
+from rpython.rlib.rarithmetic import r_uint64, r_uint32, r_uint
from rpython.rlib.rawstorage import misaligned_is_fine
-from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rlib.nonconst import NonConstant
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr
from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.rtyper.extregistry import ExtRegistryEntry
+from rpython.rtyper.annlowlevel import llhelper
if sys.byteorder == 'little':
@@ -16,37 +28,164 @@
_le64toh = rarithmetic.byteswap
-# Initialize the values of the secret seed: two 64-bit constants.
-# CPython picks a new seed every time 'python' starts. PyPy cannot do
-# that as easily because many details may rely on getting the same hash
-# value before and after translation. We can, however, pick a random
-# seed once per translation, which should already be quite good.
-#
-# XXX no, it is not: e.g. all Ubuntu installations of the same Ubuntu
-# would get the same seed. That's not good enough.
+class Seed:
+ k0l = k1l = r_uint64(0)
+seed = Seed()
- at not_rpython
-def select_random_seed():
- global k0, k1 # note: the globals k0, k1 are already byte-swapped
- v0, v1 = struct.unpack("QQ", os.urandom(16))
- k0 = r_uint64(v0)
- k1 = r_uint64(v1)
-select_random_seed()
+def _decode64(s):
+ return (r_uint64(ord(s[0])) |
+ r_uint64(ord(s[1])) << 8 |
+ r_uint64(ord(s[2])) << 16 |
+ r_uint64(ord(s[3])) << 24 |
+ r_uint64(ord(s[4])) << 32 |
+ r_uint64(ord(s[5])) << 40 |
+ r_uint64(ord(s[6])) << 48 |
+ r_uint64(ord(s[7])) << 56)
+
+def select_random_seed(s):
+ """'s' is a string of length 16"""
+ seed.k0l = _decode64(s)
+ seed.k1l = _decode64(s[8:16])
+
+
+random_ctx = rurandom.init_urandom()
+strtoul = rffi.llexternal("strtoul", [rffi.CCHARP, rffi.CCHARPP, rffi.INT],
+ rffi.ULONG, save_err=rffi.RFFI_SAVE_ERRNO)
+
+env_var_name = "PYTHONHASHSEED"
+
+def initialize_from_env():
+ # This uses the same algorithms as CPython 3.5. The environment
+ # variable we read also defaults to "PYTHONHASHSEED". If needed,
+ # a different RPython interpreter can patch the value of the
+ # global variable 'env_var_name', or just patch the whole
+ # initialize_from_env() function.
+ value = os.environ.get(env_var_name)
+ if value and value != "random":
+ with rffi.scoped_view_charp(value) as ptr:
+ with lltype.scoped_alloc(rffi.CCHARPP.TO, 1) as endptr:
+ endptr[0] = ptr
+ seed = strtoul(ptr, endptr, 10)
+ full = endptr[0][0] == '\x00'
+ seed = lltype.cast_primitive(lltype.Unsigned, seed)
+ if not full or seed > r_uint(4294967295) or (
+ rposix.get_saved_errno() == errno.ERANGE and
+ seed == lltype.cast_primitive(lltype.Unsigned,
+ rffi.cast(rffi.ULONG, -1))):
+ os.write(2,
+ "%s must be \"random\" or an integer "
+ "in range [0; 4294967295]\n" % (env_var_name,))
+ os._exit(1)
+ if not seed:
+ # disable the randomized hash
+ s = '\x00' * 16
+ else:
+ s = lcg_urandom(seed)
+ else:
+ try:
+ s = rurandom.urandom(random_ctx, 16)
+ except Exception as e:
+ os.write(2,
+ "%s: failed to get random numbers to initialize Python\n" %
+ (str(e),))
+ os._exit(1)
+ raise # makes the annotator happy
+ select_random_seed(s)
+
+def lcg_urandom(x):
+ s = ''
+ for index in range(16):
+ x *= 214013
+ x += 2531011
+ s += chr((x >> 16) & 0xff)
+ return s
+
+
+_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+
+def enable_siphash24():
+ """
+ Enable the use of siphash-2-4 for all RPython strings and unicodes
+ in the translated program. You must call this function anywhere
+ from your interpreter (from a place that is annotated). Don't call
+ more than once.
+ """
+
+class Entry(ExtRegistryEntry):
+ _about_ = enable_siphash24
+
+ def compute_result_annotation(self):
+ translator = self.bookkeeper.annotator.translator
+ if hasattr(translator, 'll_hash_string'):
+ assert translator.ll_hash_string == ll_hash_string_siphash24
+ else:
+ translator.ll_hash_string = ll_hash_string_siphash24
+ bk = self.bookkeeper
+ s_callable = bk.immutablevalue(initialize_from_env)
+ key = (enable_siphash24,)
+ bk.emulate_pbc_call(key, s_callable, [])
+
+ def specialize_call(self, hop):
+ hop.exception_cannot_occur()
+ bk = hop.rtyper.annotator.bookkeeper
+ s_callable = bk.immutablevalue(initialize_from_env)
+ r_callable = hop.rtyper.getrepr(s_callable)
+ ll_init = r_callable.get_unique_llfn().value
+ bk.annotator.translator._call_at_startup.append(ll_init)
+
+
+ at rgc.no_collect
+def ll_hash_string_siphash24(ll_s):
+ """Called indirectly from lltypesystem/rstr.py, by redirection from
+ objectmodel.ll_string_hash().
+ """
+ from rpython.rlib.rarithmetic import intmask
+
+ # This function is entirely @rgc.no_collect.
+ length = len(ll_s.chars)
+ if lltype.typeOf(ll_s).TO.chars.OF == lltype.Char: # regular STR
+ addr = rstr._get_raw_buf_string(rstr.STR, ll_s, 0)
+ else:
+ # NOTE: a latin-1 unicode string must have the same hash as the
+ # corresponding byte string. If the unicode is all within
+ # 0-255, then we need to allocate a byte buffer and copy the
+ # latin-1 encoding in it manually. Note also that we give a
+ # different hash result than CPython on ucs4 platforms, for
+ # unicode strings where CPython uses 2 bytes per character.
+ for i in range(length):
+ if ord(ll_s.chars[i]) > 0xFF:
+ addr = rstr._get_raw_buf_unicode(rstr.UNICODE, ll_s, 0)
+ length *= rffi.sizeof(rstr.UNICODE.chars.OF)
+ break
+ else:
+ p = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw')
+ i = 0
+ while i < length:
+ p[i] = chr(ord(ll_s.chars[i]))
+ i += 1
+ x = _siphash24(llmemory.cast_ptr_to_adr(p), length)
+ lltype.free(p, flavor='raw')
+ return intmask(x)
+ x = _siphash24(addr, length)
+ keepalive_until_here(ll_s)
+ return intmask(x)
+
@contextmanager
def choosen_seed(new_k0, new_k1, test_misaligned_path=False):
- global k0, k1, misaligned_is_fine
- old = k0, k1, misaligned_is_fine
- k0 = _le64toh(r_uint64(new_k0))
- k1 = _le64toh(r_uint64(new_k1))
+ """For tests."""
+ global misaligned_is_fine
+ old = seed.k0l, seed.k1l, misaligned_is_fine
+ seed.k0l = _le64toh(r_uint64(new_k0))
+ seed.k1l = _le64toh(r_uint64(new_k1))
if test_misaligned_path:
misaligned_is_fine = False
yield
- k0, k1, misaligned_is_fine = old
+ seed.k0l, seed.k1l, misaligned_is_fine = old
def get_current_seed():
- return _le64toh(k0), _le64toh(k1)
+ return _le64toh(seed.k0l), _le64toh(seed.k1l)
magic0 = r_uint64(0x736f6d6570736575)
@@ -77,20 +216,21 @@
return v0, v1, v2, v3
- at no_collect
-def siphash24(addr_in, size):
+ at rgc.no_collect
+def _siphash24(addr_in, size):
"""Takes an address pointer and a size. Returns the hash as a r_uint64,
which can then be casted to the expected type."""
- direct = (misaligned_is_fine or
- (rffi.cast(lltype.Signed, addr_in) & 7) == 0)
-
+ k0 = seed.k0l
+ k1 = seed.k1l
b = r_uint64(size) << 56
v0 = k0 ^ magic0
v1 = k1 ^ magic1
v2 = k0 ^ magic2
v3 = k1 ^ magic3
+ direct = (misaligned_is_fine or
+ (rffi.cast(lltype.Signed, addr_in) & 7) == 0)
index = 0
if direct:
while size >= 8:
@@ -113,7 +253,6 @@
r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48 |
r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 7)) << 56
)
- mi = _le64toh(mi)
size -= 8
index += 8
v3 ^= mi
@@ -158,3 +297,13 @@
v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
return (v0 ^ v1) ^ (v2 ^ v3)
+
+
+ at jit.dont_look_inside
+def siphash24(s):
+ """'s' is a normal string. Returns its siphash-2-4 as a r_uint64.
+ Don't forget to cast the result to a regular integer if needed,
+ e.g. with rarithmetic.intmask().
+ """
+ with rffi.scoped_nonmovingbuffer(s) as p:
+ return _siphash24(llmemory.cast_ptr_to_adr(p), len(s))
diff --git a/rpython/rlib/rurandom.py b/rpython/rlib/rurandom.py
--- a/rpython/rlib/rurandom.py
+++ b/rpython/rlib/rurandom.py
@@ -57,6 +57,8 @@
immortal=True, zero=True)
def urandom(context, n, signal_checker=None):
+ # NOTE: no dictionaries here: rsiphash24 calls this to
+ # initialize the random seed of string hashes
provider = context[0]
if not provider:
# This handle is never explicitly released. The operating
@@ -139,6 +141,8 @@
def urandom(context, n, signal_checker=None):
"Read n bytes from /dev/urandom."
+ # NOTE: no dictionaries here: rsiphash24 calls this to
+ # initialize the random seed of string hashes
result = []
if SYS_getrandom is not None:
n = _getrandom(n, result, signal_checker)
diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py
--- a/rpython/rlib/test/test_objectmodel.py
+++ b/rpython/rlib/test/test_objectmodel.py
@@ -166,7 +166,6 @@
foo = Foo()
h = compute_hash(foo)
assert h == object.__hash__(foo)
- assert h == getattr(foo, '__precomputed_identity_hash')
assert compute_hash(None) == 0
def test_compute_hash_float():
@@ -182,7 +181,6 @@
foo = Foo()
h = compute_identity_hash(foo)
assert h == object.__hash__(foo)
- assert h == getattr(foo, '__precomputed_identity_hash')
def test_compute_unique_id():
from rpython.rlib.rarithmetic import intmask
@@ -410,36 +408,6 @@
res = self.interpret(f, [])
assert res == 1
- def test_compute_hash_across_translation(self):
- class Foo(object):
- pass
- q = Foo()
-
- def f(i):
- assert compute_hash(None) == 0
- assert compute_hash(i) == h_42
- assert compute_hash(i + 1.0) == h_43_dot_0
- assert compute_hash((i + 3) / 6.0) == h_7_dot_5
- assert compute_hash("Hello" + str(i)) == h_Hello42
- if i == 42:
- p = None
- else:
- p = Foo()
- assert compute_hash(p) == h_None
- assert compute_hash(("world", None, i, 7.5)) == h_tuple
- assert compute_hash(q) == h_q
- return i * 2
- h_42 = compute_hash(42)
- h_43_dot_0 = compute_hash(43.0)
- h_7_dot_5 = compute_hash(7.5)
- h_Hello42 = compute_hash("Hello42")
- h_None = compute_hash(None)
- h_tuple = compute_hash(("world", None, 42, 7.5))
- h_q = compute_hash(q)
-
- res = self.interpret(f, [42])
- assert res == 84
-
def test_fetch_translated_config(self):
assert fetch_translated_config() is None
def f():
diff --git a/rpython/rlib/test/test_rsiphash.py b/rpython/rlib/test/test_rsiphash.py
--- a/rpython/rlib/test/test_rsiphash.py
+++ b/rpython/rlib/test/test_rsiphash.py
@@ -1,5 +1,10 @@
-from rpython.rlib.rsiphash import siphash24, choosen_seed
+import os
+from rpython.rlib.rsiphash import siphash24, _siphash24, choosen_seed
+from rpython.rlib.rsiphash import initialize_from_env, enable_siphash24
+from rpython.rlib.objectmodel import compute_hash
+from rpython.rlib.rarithmetic import intmask
from rpython.rtyper.lltypesystem import llmemory, rffi
+from rpython.translator.c.test.test_genc import compile
CASES = [
@@ -28,13 +33,11 @@
]
def check(s):
- p = rffi.str2charp(s)
q = rffi.str2charp('?' + s)
with choosen_seed(0x8a9f065a358479f4, 0x11cb1e9ee7f40e1f,
test_misaligned_path=True):
- x = siphash24(llmemory.cast_ptr_to_adr(p), len(s))
- y = siphash24(llmemory.cast_ptr_to_adr(rffi.ptradd(q, 1)), len(s))
- rffi.free_charp(p)
+ x = siphash24(s)
+ y = _siphash24(llmemory.cast_ptr_to_adr(rffi.ptradd(q, 1)), len(s))
rffi.free_charp(q)
assert x == y
return x
@@ -42,3 +45,104 @@
def test_siphash24():
for expected, string in CASES:
assert check(string) == expected
+
+def test_fix_seed():
+ old_val = os.environ.get('PYTHONHASHSEED', None)
+ try:
+ os.environ['PYTHONHASHSEED'] = '0'
+ initialize_from_env()
+ assert siphash24("foo") == 15988776847138518036
+ # value checked with CPython 3.5
+
+ os.environ['PYTHONHASHSEED'] = '4000000000'
+ initialize_from_env()
+ assert siphash24("foo") == 13829150778707464258
+ # value checked with CPython 3.5
+
+ for env in ['', 'random']:
+ os.environ['PYTHONHASHSEED'] = env
+ initialize_from_env()
+ hash1 = siphash24("foo")
+ initialize_from_env()
+ hash2 = siphash24("foo")
+ assert hash1 != hash2 # extremely unlikely
+ finally:
+ if old_val is None:
+ del os.environ['PYTHONHASHSEED']
+ else:
+ os.environ['PYTHONHASHSEED'] = old_val
+
+def test_translated():
+ d1 = {"foo": 123}
+ d2 = {u"foo": 456, u"\u1234\u5678": 789}
+ class G:
+ pass
+ g = G()
+ g.v1 = d1.copy()
+ g.v2 = d2.copy()
+
+ def fetch(n):
+ if n == 0: return d1.get("foo", -1)
+ if n == 1: return g.v1.get("foo", -1)
+ if n == 2: return compute_hash("foo")
+ if n == 3: return d2.get(u"foo", -1)
+ if n == 4: return g.v2.get(u"foo", -1)
+ if n == 5: return compute_hash(u"foo")
+ if n == 6: return d2.get(u"\u1234\u5678", -1)
+ if n == 7: return g.v2.get(u"\u1234\u5678", -1)
+ if n == 8: return compute_hash(u"\u1234\u5678")
+ assert 0
+
+ def entrypoint(n):
+ enable_siphash24()
+ g.v1["bar"] = -2
+ g.v2[u"bar"] = -2
+ if n >= 0: # get items one by one, because otherwise it may
+ # be the case that one line influences the next
+ return str(fetch(n))
+ else:
+ # ...except in random mode, because we want all results
+ # to be computed with the same seed
+ return ' '.join([str(fetch(n)) for n in range(9)])
+
+ fn = compile(entrypoint, [int])
+
+ def getall():
+ return [int(fn(i)) for i in range(9)]
+
+ old_val = os.environ.get('PYTHONHASHSEED', None)
+ try:
+ os.environ['PYTHONHASHSEED'] = '0'
+ s1 = getall()
+ assert s1[:8] == [
+ 123, 123, intmask(15988776847138518036),
+ 456, 456, intmask(15988776847138518036),
+ 789, 789]
+ assert s1[8] in [intmask(17593683438421985039), # ucs2 mode
+ intmask(94801584261658677)] # ucs4 mode
+
+ os.environ['PYTHONHASHSEED'] = '3987654321'
+ s1 = getall()
+ assert s1[:8] == [
+ 123, 123, intmask(5890804383681474441),
+ 456, 456, intmask(5890804383681474441),
+ 789, 789]
+ assert s1[8] in [intmask(4192582507672183374), # ucs2 mode
+ intmask(7179255293164649778)] # ucs4 mode
+
+ for env in ['', 'random']:
+ os.environ['PYTHONHASHSEED'] = env
+ s1 = map(int, fn(-1).split())
+ s2 = map(int, fn(-1).split())
+ assert s1[0:2]+s1[3:5]+s1[6:8] == [123, 123, 456, 456, 789, 789]
+ assert s1[2] == s1[5]
+ assert s2[0:2]+s2[3:5]+s2[6:8] == [123, 123, 456, 456, 789, 789]
+ assert s2[2] == s2[5]
+ #
+ assert len(set([s1[2], s2[2], s1[8], s2[8]])) == 4
+
+ finally:
+ if old_val is None:
+ del os.environ['PYTHONHASHSEED']
+ else:
+ os.environ['PYTHONHASHSEED'] = old_val
diff --git a/rpython/rlib/test/test_rweakvaldict.py b/rpython/rlib/test/test_rweakvaldict.py
--- a/rpython/rlib/test/test_rweakvaldict.py
+++ b/rpython/rlib/test/test_rweakvaldict.py
@@ -1,8 +1,9 @@
import py
from rpython.annotator.model import UnionError
-from rpython.rlib import rgc
+from rpython.rlib import rgc, nonconst
from rpython.rlib.rweakref import RWeakValueDictionary
from rpython.rtyper.test.test_llinterp import interpret
+from rpython.translator.c.test.test_genc import compile
class X(object):
pass
@@ -213,3 +214,33 @@
assert d.get(keys[3]) is None
f()
interpret(f, [])
+
+def test_translation_prebuilt_1():
+ class K:
+ pass
+ d = RWeakValueDictionary(K, X)
+ k1 = K(); k2 = K()
+ x1 = X(); x2 = X()
+ d.set(k1, x1)
+ d.set(k2, x2)
+ def f():
+ assert d.get(k1) is x1
+ assert d.get(k2) is x2
+ f()
+ fc = compile(f, [], gcpolicy="boehm", rweakref=True)
+ fc()
+
+def _test_translation_prebuilt_2():
+ from rpython.rlib import rsiphash
+ d = RWeakValueDictionary(str, X)
+ k1 = "key1"; k2 = "key2"
+ x1 = X(); x2 = X()
+ d.set(k1, x1)
+ d.set(k2, x2)
+ def f():
+ rsiphash.enable_siphash24()
+ i = nonconst.NonConstant(1)
+ assert d.get("key%d" % (i,)) is x1
+ assert d.get("key%d" % (i+1,)) is x2
+ fc = compile(f, [], gcpolicy="boehm", rweakref=True)
+ fc()
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -539,6 +539,7 @@
'decode_arg_def': LLOp(canraise=(Exception,)),
'getslice': LLOp(canraise=(Exception,)),
'check_and_clear_exc': LLOp(),
+ 'call_at_startup': LLOp(canrun=True),
'threadlocalref_addr': LLOp(), # get (or make) addr of tl
'threadlocalref_get': LLOp(sideeffects=False), # read field (no check)
diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py
--- a/rpython/rtyper/lltypesystem/lltype.py
+++ b/rpython/rtyper/lltypesystem/lltype.py
@@ -1380,20 +1380,11 @@
return callb(*args)
raise TypeError("%r instance is not a function" % (self._T,))
- def _identityhash(self, cache=True):
+ def _identityhash(self):
p = normalizeptr(self)
- try:
- return p._obj._hash_cache_
- except AttributeError:
- assert self._T._gckind == 'gc'
- assert self # not for NULL
- result = hash(p._obj)
- if cache:
- try:
- p._obj._hash_cache_ = result
- except AttributeError:
- pass
- return result
+ assert self._T._gckind == 'gc'
+ assert self # not for NULL
+ return hash(p._obj)
class _ptr(_abstract_ptr):
__slots__ = ('_TYPE',
@@ -1759,7 +1750,7 @@
class _struct(_parentable):
_kind = "structure"
- __slots__ = ('_hash_cache_', '_compilation_info')
+ __slots__ = ('_compilation_info',)
def __new__(self, TYPE, n=None, initialization=None, parent=None,
parentindex=None):
@@ -2442,24 +2433,6 @@
return SomeInteger()
-def identityhash_nocache(p):
- """Version of identityhash() to use from backends that don't care about
- caching."""
- assert p
- return p._identityhash(cache=False)
-
-def init_identity_hash(p, value):
- """For a prebuilt object p, initialize its hash value to 'value'."""
- assert isinstance(typeOf(p), Ptr)
- p = normalizeptr(p)
- if not p:
- raise ValueError("cannot change hash(NULL)!")
- if hasattr(p._obj, '_hash_cache_'):
- raise ValueError("the hash of %r was already computed" % (p,))
- if typeOf(p).TO._is_varsize():
- raise ValueError("init_identity_hash(): not for varsized types")
- p._obj._hash_cache_ = intmask(value)
-
def isCompatibleType(TYPE1, TYPE2):
return TYPE1._is_compatible(TYPE2)
diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -742,6 +742,9 @@
def op_gc_move_out_of_nursery(obj):
return obj
+def op_call_at_startup(init_func):
+ pass # do nothing
+
# ____________________________________________________________
def get_op_impl(opname):
diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py
--- a/rpython/rtyper/lltypesystem/rdict.py
+++ b/rpython/rtyper/lltypesystem/rdict.py
@@ -236,21 +236,14 @@
if self.r_rdict_hashfn.lowleveltype != lltype.Void:
l_fn = self.r_rdict_hashfn.convert_const(dictobj.key_hash)
l_dict.fnkeyhash = l_fn
-
- for dictkeycontainer, dictvalue in dictobj._dict.items():
- llkey = r_key.convert_const(dictkeycontainer.key)
- llvalue = r_value.convert_const(dictvalue)
- ll_dict_insertclean(l_dict, llkey, llvalue,
- dictkeycontainer.hash)
- return l_dict
-
+ any_items = dictobj._dict.items()
else:
- for dictkey, dictvalue in dictobj.items():
- llkey = r_key.convert_const(dictkey)
- llvalue = r_value.convert_const(dictvalue)
- ll_dict_insertclean(l_dict, llkey, llvalue,
- l_dict.keyhash(llkey))
- return l_dict
+ any_items = dictobj.items()
+ if any_items:
+ raise TyperError("found a prebuilt, explicitly non-ordered, "
+ "non-empty dict. it would require additional"
+ " support to rehash it at program start-up")
+ return l_dict
def rtype_len(self, hop):
v_dict, = hop.inputargs(self)
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -829,7 +829,7 @@
return assert_str0(charpsize2str(cp, size))
charp2str._annenforceargs_ = [lltype.SomePtr(TYPEP)]
- # str -> char*, bool, bool
+ # str -> char*, flag
# Can't inline this because of the raw address manipulation.
@jit.dont_look_inside
def get_nonmovingbuffer(data):
diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py
--- a/rpython/rtyper/lltypesystem/rordereddict.py
+++ b/rpython/rtyper/lltypesystem/rordereddict.py
@@ -5,7 +5,7 @@
from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
from rpython.rlib import objectmodel, jit, rgc, types
from rpython.rlib.signature import signature
-from rpython.rlib.objectmodel import specialize, likely
+from rpython.rlib.objectmodel import specialize, likely, not_rpython
from rpython.rtyper.debug import ll_assert
from rpython.rlib.rarithmetic import r_uint, intmask
from rpython.rtyper import rmodel
@@ -46,20 +46,23 @@
@jit.look_inside_iff(lambda d, key, hash, flag: jit.isvirtual(d))
@jit.oopspec('ordereddict.lookup(d, key, hash, flag)')
def ll_call_lookup_function(d, key, hash, flag):
- fun = d.lookup_function_no & FUNC_MASK
- # This likely() here forces gcc to compile the check for fun == FUNC_BYTE
- # first. Otherwise, this is a regular switch and gcc (at least 4.7)
- # compiles this as a series of checks, with the FUNC_BYTE case last.
- # It sounds minor, but it is worth 6-7% on a PyPy microbenchmark.
- if likely(fun == FUNC_BYTE):
- return ll_dict_lookup(d, key, hash, flag, TYPE_BYTE)
- elif fun == FUNC_SHORT:
- return ll_dict_lookup(d, key, hash, flag, TYPE_SHORT)
- elif IS_64BIT and fun == FUNC_INT:
- return ll_dict_lookup(d, key, hash, flag, TYPE_INT)
- elif fun == FUNC_LONG:
- return ll_dict_lookup(d, key, hash, flag, TYPE_LONG)
- assert False
+ while True:
+ fun = d.lookup_function_no & FUNC_MASK
+ # This likely() here forces gcc to compile the check for fun==FUNC_BYTE
+ # first. Otherwise, this is a regular switch and gcc (at least 4.7)
+ # compiles this as a series of checks, with the FUNC_BYTE case last.
+ # It sounds minor, but it is worth 6-7% on a PyPy microbenchmark.
+ if likely(fun == FUNC_BYTE):
+ return ll_dict_lookup(d, key, hash, flag, TYPE_BYTE)
+ elif fun == FUNC_SHORT:
+ return ll_dict_lookup(d, key, hash, flag, TYPE_SHORT)
+ elif IS_64BIT and fun == FUNC_INT:
+ return ll_dict_lookup(d, key, hash, flag, TYPE_INT)
+ elif fun == FUNC_LONG:
+ return ll_dict_lookup(d, key, hash, flag, TYPE_LONG)
+ else:
+ ll_dict_create_initial_index(d)
+ # then, retry
def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None,
ll_fasthash_function=None, ll_hash_function=None,
@@ -235,6 +238,7 @@
self.setup()
self.setup_final()
l_dict = ll_newdict_size(self.DICT, len(dictobj))
+ ll_no_initial_index(l_dict)
self.dict_cache[key] = l_dict
r_key = self.key_repr
if r_key.lowleveltype == llmemory.Address:
@@ -252,16 +256,14 @@
for dictkeycontainer, dictvalue in dictobj._dict.items():
llkey = r_key.convert_const(dictkeycontainer.key)
llvalue = r_value.convert_const(dictvalue)
- _ll_dict_insertclean(l_dict, llkey, llvalue,
- dictkeycontainer.hash)
+ _ll_dict_insert_no_index(l_dict, llkey, llvalue)
return l_dict
else:
for dictkey, dictvalue in dictobj.items():
llkey = r_key.convert_const(dictkey)
llvalue = r_value.convert_const(dictvalue)
- _ll_dict_insertclean(l_dict, llkey, llvalue,
- l_dict.keyhash(llkey))
+ _ll_dict_insert_no_index(l_dict, llkey, llvalue)
return l_dict
def rtype_len(self, hop):
@@ -336,11 +338,15 @@
return DictIteratorRepr(self, "items").newiter(hop)
def rtype_method_iterkeys_with_hash(self, hop):
- hop.exception_cannot_occur()
+ v_dic, = hop.inputargs(self)
+ hop.exception_is_here()
+ hop.gendirectcall(ll_ensure_indexes, v_dic)
return DictIteratorRepr(self, "keys_with_hash").newiter(hop)
def rtype_method_iteritems_with_hash(self, hop):
- hop.exception_cannot_occur()
+ v_dic, = hop.inputargs(self)
+ hop.exception_is_here()
+ hop.gendirectcall(ll_ensure_indexes, v_dic)
return DictIteratorRepr(self, "items_with_hash").newiter(hop)
def rtype_method_clear(self, hop):
@@ -458,17 +464,30 @@
IS_64BIT = sys.maxint != 2 ** 31 - 1
-FUNC_SHIFT = 2
-FUNC_MASK = 0x03 # two bits
if IS_64BIT:
- FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG = range(4)
+ FUNC_SHIFT = 3
+ FUNC_MASK = 0x07 # three bits
+ FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG, FUNC_MUST_REINDEX = range(5)
else:
- FUNC_BYTE, FUNC_SHORT, FUNC_LONG = range(3)
+ FUNC_SHIFT = 2
+ FUNC_MASK = 0x03 # two bits
+ FUNC_BYTE, FUNC_SHORT, FUNC_LONG, FUNC_MUST_REINDEX = range(4)
TYPE_BYTE = rffi.UCHAR
TYPE_SHORT = rffi.USHORT
TYPE_INT = rffi.UINT
TYPE_LONG = lltype.Unsigned
+def ll_no_initial_index(d):
+ # Used when making new empty dicts, and when translating prebuilt dicts.
+ # Remove the index completely. A dictionary must always have an
+ # index unless it is freshly created or freshly translated. Most
+ # dict operations start with ll_call_lookup_function(), which will
+ # recompute the hashes and create the index.
+ ll_assert(d.num_live_items == d.num_ever_used_items,
+ "ll_no_initial_index(): dict already in use")
+ d.lookup_function_no = FUNC_MUST_REINDEX
+ d.indexes = lltype.nullptr(llmemory.GCREF.TO)
+
def ll_malloc_indexes_and_choose_lookup(d, n):
# keep in sync with ll_clear_indexes() below
if n <= 256:
@@ -508,6 +527,7 @@
@jit.dont_look_inside
def ll_call_insert_clean_function(d, hash, i):
+ assert i >= 0
fun = d.lookup_function_no & FUNC_MASK
if fun == FUNC_BYTE:
ll_dict_store_clean(d, hash, i, TYPE_BYTE)
@@ -518,6 +538,8 @@
elif fun == FUNC_LONG:
ll_dict_store_clean(d, hash, i, TYPE_LONG)
else:
+ # can't be still FUNC_MUST_REINDEX here
+ ll_assert(False, "ll_call_insert_clean_function(): invalid lookup_fun")
assert False
def ll_call_delete_by_entry_index(d, hash, i):
@@ -531,6 +553,8 @@
elif fun == FUNC_LONG:
ll_dict_delete_by_entry_index(d, hash, i, TYPE_LONG)
else:
+ # can't be still FUNC_MUST_REINDEX here
+ ll_assert(False, "ll_call_delete_by_entry_index(): invalid lookup_fun")
assert False
def ll_valid_from_flag(entries, i):
@@ -648,15 +672,14 @@
ll_dict_reindex(d, _ll_len_of_d_indexes(d))
_ll_dict_rescue._dont_inline_ = True
-def _ll_dict_insertclean(d, key, value, hash):
+ at not_rpython
+def _ll_dict_insert_no_index(d, key, value):
# never translated
ENTRY = lltype.typeOf(d.entries).TO.OF
- ll_call_insert_clean_function(d, hash, d.num_ever_used_items)
entry = d.entries[d.num_ever_used_items]
entry.key = key
entry.value = value
- if hasattr(ENTRY, 'f_hash'):
- entry.f_hash = hash
+ # note that f_hash is left uninitialized in prebuilt dicts
if hasattr(ENTRY, 'f_valid'):
entry.f_valid = True
d.num_ever_used_items += 1
@@ -811,12 +834,13 @@
# also possible that there are more dead items immediately behind the
# last one, we reclaim all the dead items at the end of the ordereditem
# at the same point.
- i = d.num_ever_used_items - 2
- while i >= 0 and not d.entries.valid(i):
+ i = index
+ while True:
i -= 1
- j = i + 1
- assert j >= 0
- d.num_ever_used_items = j
+ assert i >= 0
+ if d.entries.valid(i): # must be at least one
+ break
+ d.num_ever_used_items = i + 1
# If the dictionary is at least 87.5% dead items, then consider shrinking
# it.
@@ -844,6 +868,50 @@
else:
ll_dict_reindex(d, new_size)
+def ll_ensure_indexes(d):
+ num = d.lookup_function_no
+ if num == FUNC_MUST_REINDEX:
+ ll_dict_create_initial_index(d)
+ else:
+ ll_assert((num & FUNC_MASK) != FUNC_MUST_REINDEX,
+ "bad combination in lookup_function_no")
+
+def ll_dict_create_initial_index(d):
+ """Create the initial index for a dictionary. The common case is
+ that 'd' is empty. The uncommon case is that it is a prebuilt
+ dictionary frozen by translation, in which case we must rehash all
+ entries. The common case must be seen by the JIT.
+ """
+ if d.num_live_items == 0:
+ ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE)
+ d.resize_counter = DICT_INITSIZE * 2
+ else:
+ ll_dict_rehash_after_translation(d)
+
+ at jit.dont_look_inside
+def ll_dict_rehash_after_translation(d):
+ assert d.num_live_items == d.num_ever_used_items
+ assert not d.indexes
+ #
+ # recompute all hashes. Needed if they are stored in d.entries,
+ # but do it anyway: otherwise, e.g. a string-keyed dictionary
+ # won't have a fasthash on its strings if their hash is still
+ # uncomputed.
+ ENTRY = lltype.typeOf(d.entries).TO.OF
+ for i in range(d.num_ever_used_items):
+ assert d.entries.valid(i)
+ d_entry = d.entries[i]
+ h = d.keyhash(d_entry.key)
+ if hasattr(ENTRY, 'f_hash'):
+ d_entry.f_hash = h
+ #else: purely for the side-effect it can have on d_entry.key
+ #
+ # Use the smallest acceptable size for ll_dict_reindex
+ new_size = DICT_INITSIZE
+ while new_size * 2 - d.num_live_items * 3 <= 0:
+ new_size *= 2
+ ll_dict_reindex(d, new_size)
+
def ll_dict_reindex(d, new_size):
if bool(d.indexes) and _ll_len_of_d_indexes(d) == new_size:
ll_clear_indexes(d, new_size) # hack: we can reuse the same array
@@ -857,12 +925,33 @@
entries = d.entries
i = 0
ibound = d.num_ever_used_items
- while i < ibound:
- if entries.valid(i):
- hash = entries.hash(i)
- ll_call_insert_clean_function(d, hash, i)
- i += 1
- #old_entries.delete() XXXX!
+ #
+ # Write four loops, moving the check for the value of 'fun' out of
+ # the loops. A small speed-up over ll_call_insert_clean_function().
+ fun = d.lookup_function_no # == lookup_function_no & FUNC_MASK
+ if fun == FUNC_BYTE:
+ while i < ibound:
+ if entries.valid(i):
+ ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE)
+ i += 1
+ elif fun == FUNC_SHORT:
+ while i < ibound:
+ if entries.valid(i):
+ ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT)
+ i += 1
+ elif IS_64BIT and fun == FUNC_INT:
+ while i < ibound:
+ if entries.valid(i):
+ ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT)
+ i += 1
+ elif fun == FUNC_LONG:
+ while i < ibound:
+ if entries.valid(i):
+ ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG)
+ i += 1
+ else:
+ assert False
+
# ------- a port of CPython's dictobject.c's lookdict implementation -------
PERTURB_SHIFT = 5
@@ -1013,10 +1102,11 @@
def ll_newdict(DICT):
d = DICT.allocate()
d.entries = _ll_empty_array(DICT)
- ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE)
+ # Don't allocate an 'indexes' for empty dict. It seems a typical
+ # program contains tons of empty dicts, so this might be a memory win.
d.num_live_items = 0
d.num_ever_used_items = 0
- d.resize_counter = DICT_INITSIZE * 2
+ ll_no_initial_index(d)
return d
OrderedDictRepr.ll_newdict = staticmethod(ll_newdict)
@@ -1101,6 +1191,10 @@
# as soon as we do something like ll_dict_reindex().
if index == (dict.lookup_function_no >> FUNC_SHIFT):
dict.lookup_function_no += (1 << FUNC_SHIFT)
+ # note that we can't have modified a FUNC_MUST_REINDEX
+ # dict here because such dicts have no invalid entries
+ ll_assert((dict.lookup_function_no & FUNC_MASK) !=
+ FUNC_MUST_REINDEX, "bad combination in _ll_dictnext")
index = nextindex
# clear the reference to the dict and prevent restarts
iter.dict = lltype.nullptr(lltype.typeOf(iter).TO.dict.TO)
@@ -1146,6 +1240,8 @@
return dict.entries[index].value
def ll_dict_copy(dict):
+ ll_ensure_indexes(dict)
+
DICT = lltype.typeOf(dict).TO
newdict = DICT.allocate()
newdict.entries = DICT.entries.TO.allocate(len(dict.entries))
@@ -1180,6 +1276,10 @@
DICT = lltype.typeOf(d).TO
old_entries = d.entries
d.entries = _ll_empty_array(DICT)
+ # note: we can't remove the index here, because it is possible that
+ # crazy Python code calls d.clear() from the method __eq__() called
+ # from ll_dict_lookup(d). Instead, stick to the rule that once a
+ # dictionary has got an index, it will always have one.
ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE)
d.num_live_items = 0
d.num_ever_used_items = 0
@@ -1190,6 +1290,7 @@
def ll_dict_update(dic1, dic2):
if dic1 == dic2:
return
+ ll_ensure_indexes(dic2) # needed for entries.hash() below
ll_prepare_dict_update(dic1, dic2.num_live_items)
i = 0
while i < dic2.num_ever_used_items:
@@ -1216,6 +1317,7 @@
# the case where dict.update() actually has a lot of collisions.
# If num_extra is much greater than d.num_live_items the conditional_call
# will trigger anyway, which is really the goal.
+ ll_ensure_indexes(d)
x = num_extra - d.num_live_items
jit.conditional_call(d.resize_counter <= x * 3,
_ll_dict_resize_to, d, num_extra)
@@ -1275,6 +1377,7 @@
if dic.num_live_items == 0:
raise KeyError
+ ll_ensure_indexes(dic)
entries = dic.entries
# find the last entry. It's unclear if the loop below is still
diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -1,9 +1,9 @@
from weakref import WeakValueDictionary
from rpython.annotator import model as annmodel
-from rpython.rlib import jit, types
+from rpython.rlib import jit, types, objectmodel
from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
- ll_hash_string, keepalive_until_here, specialize, enforceargs)
+ ll_hash_string, keepalive_until_here, specialize, enforceargs, dont_inline)
from rpython.rlib.signature import signature
from rpython.rlib.rarithmetic import ovfcheck
from rpython.rtyper.error import TyperError
@@ -383,6 +383,8 @@
return 0
@staticmethod
+ @dont_inline
+ @jit.dont_look_inside
def _ll_strhash(s):
# unlike CPython, there is no reason to avoid to return -1
# but our malloc initializes the memory to zero, so we use zero as the
@@ -400,6 +402,7 @@
@staticmethod
def ll_strfasthash(s):
+ ll_assert(s.hash != 0, "ll_strfasthash: hash==0")
return s.hash # assumes that the hash is already computed
@staticmethod
@@ -1258,7 +1261,8 @@
'gethash': LLHelpers.ll_strhash,
'length': LLHelpers.ll_length,
'find': LLHelpers.ll_find,
- 'rfind': LLHelpers.ll_rfind}))
+ 'rfind': LLHelpers.ll_rfind},
+ hints={'remove_hash': True}))
UNICODE.become(GcStruct('rpy_unicode', ('hash', Signed),
('chars', Array(UniChar, hints={'immutable': True})),
adtmeths={'malloc' : staticAdtMethod(mallocunicode),
@@ -1266,8 +1270,8 @@
'copy_contents' : staticAdtMethod(copy_unicode_contents),
'copy_contents_from_str' : staticAdtMethod(copy_unicode_contents),
'gethash': LLHelpers.ll_strhash,
- 'length': LLHelpers.ll_length}
- ))
+ 'length': LLHelpers.ll_length},
+ hints={'remove_hash': True}))
# TODO: make the public interface of the rstr module cleaner
diff --git a/rpython/rtyper/lltypesystem/test/test_lltype.py b/rpython/rtyper/lltypesystem/test/test_lltype.py
--- a/rpython/rtyper/lltypesystem/test/test_lltype.py
+++ b/rpython/rtyper/lltypesystem/test/test_lltype.py
@@ -749,22 +749,10 @@
assert hash3 == identityhash(s3)
assert hash3 == identityhash(s3.super)
assert hash3 == identityhash(s3.super.super)
- py.test.raises(ValueError, init_identity_hash, s3, hash3^1)
- py.test.raises(ValueError, init_identity_hash, s3.super, hash3^4)
- py.test.raises(ValueError, init_identity_hash, s3.super.super, hash3^9)
-
- s3 = malloc(S3)
- init_identity_hash(s3.super, -123)
- assert -123 == identityhash(s3)
- assert -123 == identityhash(s3.super)
- assert -123 == identityhash(s3.super.super)
- py.test.raises(ValueError, init_identity_hash, s3, 4313)
More information about the pypy-commit
mailing list