[pypy-commit] pypy rpython-hash: Remove the precomputed hash systematically when writing STR/UNICODE

arigo pypy.commits at gmail.com
Sat Jan 28 10:56:29 EST 2017


Author: Armin Rigo <arigo at tunes.org>
Branch: rpython-hash
Changeset: r89820:82f0db9460ee
Date: 2017-01-28 16:51 +0100
http://bitbucket.org/pypy/pypy/changeset/82f0db9460ee/

Log:	Remove the precomputed hash systematically when writing STR/UNICODE
	objects to C, if using a non-default hash function for strings

diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -513,7 +513,7 @@
 
 # ----------
 
-HASH_ALGORITHM = "rpython"  # XXX Is there a better name?
+HASH_ALGORITHM = "rpython"    # the default, no source of randomness possible
 HASH_ALGORITHM_FIXED = False
 
 @not_rpython
diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -1,7 +1,7 @@
 from weakref import WeakValueDictionary
 
 from rpython.annotator import model as annmodel
-from rpython.rlib import jit, types
+from rpython.rlib import jit, types, objectmodel
 from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
     ll_hash_string, keepalive_until_here, specialize, enforceargs)
 from rpython.rlib.signature import signature
@@ -169,7 +169,10 @@
             for i in range(len(value)):
                 p.chars[i] = cast_primitive(self.base, value[i])
             p.hash = 0
-            self.ll.ll_strhash(p)   # precompute the hash
+            if objectmodel.HASH_ALGORITHM == "rpython":
+                self.ll.ll_strhash(p)   # precompute the hash
+                # but it is pointless if this hash wouldn't end up in the
+                # C code anyway: see "remove_hash" in translator/c/node.py
             self.CACHE[value] = p
             return p
 
@@ -400,6 +403,7 @@
 
     @staticmethod
     def ll_strfasthash(s):
+        ll_assert(s.hash != 0, "ll_strfasthash: hash==0")
         return s.hash     # assumes that the hash is already computed
 
     @staticmethod
@@ -1258,7 +1262,8 @@
                               'gethash': LLHelpers.ll_strhash,
                               'length': LLHelpers.ll_length,
                               'find': LLHelpers.ll_find,
-                              'rfind': LLHelpers.ll_rfind}))
+                              'rfind': LLHelpers.ll_rfind},
+                    hints={'remove_hash': True}))
 UNICODE.become(GcStruct('rpy_unicode', ('hash', Signed),
                         ('chars', Array(UniChar, hints={'immutable': True})),
                         adtmeths={'malloc' : staticAdtMethod(mallocunicode),
@@ -1266,8 +1271,8 @@
                                   'copy_contents' : staticAdtMethod(copy_unicode_contents),
                                   'copy_contents_from_str' : staticAdtMethod(copy_unicode_contents),
                                   'gethash': LLHelpers.ll_strhash,
-                                  'length': LLHelpers.ll_length}
-                        ))
+                                  'length': LLHelpers.ll_length},
+                    hints={'remove_hash': True}))
 
 
 # TODO: make the public interface of the rstr module cleaner
diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py
--- a/rpython/translator/c/node.py
+++ b/rpython/translator/c/node.py
@@ -8,7 +8,7 @@
 from rpython.translator.c.support import cdecl, forward_cdecl, somelettersfrom
 from rpython.translator.c.support import c_char_array_constant, barebonearray
 from rpython.translator.c.primitive import PrimitiveType, name_signed
-from rpython.rlib import exports
+from rpython.rlib import exports, objectmodel
 from rpython.rlib.rfloat import isfinite, isinf
 
 
@@ -585,12 +585,20 @@
         for name in defnode.fieldnames:
             data.append((name, getattr(self.obj, name)))
 
+        if T._hints.get('remove_hash'):
+            # hack for rstr.STR and UNICODE
+            if objectmodel.HASH_ALGORITHM != "rpython":
+                i = 0
+                while data[i][0] != 'hash':
+                    i += 1
+                data[i] = ('hash', 0)
+
         # Reasonably, you should only initialise one of the fields of a union
         # in C.  This is possible with the syntax '.fieldname value' or
         # '.fieldname = value'.  But here we don't know which of the
         # fields need initialization, so XXX we pick the first one
         # arbitrarily.
-        if hasattr(T, "_hints") and T._hints.get('union'):
+        if T._hints.get('union'):
             data = data[0:1]
 
         if 'get_padding_drop' in T._hints:


More information about the pypy-commit mailing list