[pypy-svn] r68335 - pypy/branch/gc-hash/pypy/rlib

arigo at codespeak.net arigo at codespeak.net
Mon Oct 12 15:47:41 CEST 2009


Author: arigo
Date: Mon Oct 12 15:47:37 2009
New Revision: 68335

Modified:
   pypy/branch/gc-hash/pypy/rlib/objectmodel.py
Log:
The object model interface that I propose to implement.


Modified: pypy/branch/gc-hash/pypy/rlib/objectmodel.py
==============================================================================
--- pypy/branch/gc-hash/pypy/rlib/objectmodel.py	(original)
+++ pypy/branch/gc-hash/pypy/rlib/objectmodel.py	Mon Oct 12 15:47:37 2009
@@ -5,6 +5,7 @@
 
 import sys
 import types
+import math
 
 # specialize is a decorator factory for attaching _annspecialcase_
 # attributes to functions: for example
@@ -127,37 +128,159 @@
     obj.__dict__ = {}
     obj.__class__ = FREED_OBJECT
 
-from pypy.rpython.extregistry import ExtRegistryEntry
-
 # ____________________________________________________________
 #
-# id-like functions.
-# In addition, RPython supports hash(x) on RPython instances,
-# returning a number that is not guaranteed to be unique but
-# that doesn't change over time for a given 'x'.
+# id-like functions.  The idea is that calling hash() or id() is not
+# allowed in RPython.  You have to call one of the following more
+# precise functions.
+
+def compute_hash(x):
+    """RPython equivalent of hash(x), where 'x' is an immutable
+    RPython-level object.  For strings or unicodes it computes the
+    hash as in Python.  For tuples it calls compute_hash()
+    recursively.  For instances it uses compute_identity_hash().
+    Note that this can return 0 or -1 too.
+
+    Behavior across translation:
+
+      * on lltypesystem, it always returns the same number, both
+        before and after translation.  Dictionaries don't need to
+        be rehashed after translation.
+
+      * on ootypesystem, the value changes because of translation.
+        Dictionaries need to be rehashed.
+    """
+    if isinstance(x, (str, unicode)):
+        return _hash_string(x)
+    if isinstance(x, int):
+        return x
+    if isinstance(x, float):
+        return _hash_float(x)
+    if isinstance(x, tuple):
+        return _hash_tuple(x)
+    if x is None:
+        return 0
+    return compute_identity_hash(x)
+
+def compute_identity_hash(x):
+    """RPython equivalent of object.__hash__(x).  This returns the
+    so-called 'identity hash', which is the non-overridable default
+    hash of Python.  Can be called for any RPython-level object
+    that turns into a GC object, or for any low-level GC object.
+    The value is not guaranteed to be the same before and after
+    translation, except for RPython instances on the lltypesystem.
+    """
+    result = object.__hash__(x)
+    try:
+        x.__dict__['__precomputed_identity_hash'] = result
+    except (TypeError, AttributeError):
+        pass
+    return result
 
 def compute_unique_id(x):
-    """RPython equivalent of id(x).  The 'x' must be an RPython instance.
-    This operation can be very costly depending on the garbage collector.
-    To remind you of this fact, we don't support id(x) directly.
+    """RPython equivalent of id(x).  The 'x' must be an RPython-level
+    object that turns into a GC object.  This operation can be very
+    costly depending on the garbage collector.  To remind you of this
+    fact, we don't support id(x) directly.
+    (XXX not implemented on ootype, falls back to compute_identity_hash)
     """
     return id(x)      # XXX need to return r_longlong on some platforms
 
 def current_object_addr_as_int(x):
     """A cheap version of id(x).  The current memory location of an
-    instance can change over time for moving GCs.  Also note that on
+    object can change over time for moving GCs.  Also note that on
     ootypesystem this typically doesn't return the real address but
-    just the same as hash(x).
+    just the same as compute_hash(x).
     """
     from pypy.rlib.rarithmetic import intmask
     return intmask(id(x))
 
+# ----------
+
+def _hash_string(s):
+    """The algorithm behind compute_hash() for a string or a unicode."""
+    from pypy.rlib.rarithmetic import intmask
+    length = len(s)
+    if length == 0:
+        x = 0
+    else:
+        x = ord(s[0]) << 7
+        i = 0
+        while i < length:
+            x = (1000003*x) ^ ord(s[i])
+            i += 1
+        x ^= length
+    return intmask(x)
+
+def _hash_float(f):
+    """The algorithm behind compute_hash() for a float.
+    This implementation is identical to the CPython implementation,
+    except the fact that the integer case is not treated specially.
+    In RPython, floats cannot be used with ints in dicts, anyway.
+    """
+    v, expo = math.frexp(f)
+    v *= TAKE_NEXT
+    hipart = int(v)
+    v = (v - float(hipart)) * TAKE_NEXT
+    x = hipart + int(v) + (expo << 15)
+    return x
+TAKE_NEXT = float(2**31)
+
+def _hash_tuple(t):
+    """NOT_RPYTHON.  The algorithm behind compute_hash() for a tuple."""
+    x = 0x345678
+    mult = 1000003
+    length = len(t)
+    for item in t:
+        y = compute_hash(item)
+        x = (x ^ y) * mult
+        mult += intmask(82520 + len + len)
+    return x
+
+# ----------
+
+from pypy.rpython.extregistry import ExtRegistryEntry
+
+class Entry(ExtRegistryEntry):
+    _about_ = compute_hash
+
+    def compute_result_annotation(self, s_x):
+        from pypy.annotation import model as annmodel
+        return annmodel.SomeInteger()
+
+    def specialize_call(self, hop):
+        r_obj, = hop.args_r
+        v_obj, = hop.inputargs(r_obj)
+        ll_fn = r_obj.get_ll_hash_function()
+        return hop.gendirectcall(ll_fn, v_obj)
+
+class Entry(ExtRegistryEntry):
+    _about_ = compute_identity_hash
+
+    def compute_result_annotation(self, s_x):
+        from pypy.annotation import model as annmodel
+        return annmodel.SomeInteger()
+
+    def specialize_call(self, hop):
+        vobj, = hop.inputargs(hop.args_r[0])
+        if hop.rtyper.type_system.name == 'lltypesystem':
+            from pypy.rpython.lltypesystem import lltype
+            ok = (isinstance(vobj.concretetype, lltype.Ptr) and
+                  vobj.concretetype.TO._gckind == 'gc')
+        else:
+            from pypy.rpython.lltypesystem import ootype
+            ok = isinstance(vobj.concretetype, ootype.OOType)
+        if not ok:
+            from pypy.rpython.error import TyperError
+            raise TyperError("compute_identity_hash() cannot be applied to"
+                             " %r" % (vobj.concretetype,))
+        return hop.genop('identityhash', [vobj], resulttype=lltype.Signed)
+
 class Entry(ExtRegistryEntry):
     _about_ = compute_unique_id
 
     def compute_result_annotation(self, s_x):
         from pypy.annotation import model as annmodel
-        assert isinstance(s_x, annmodel.SomeInstance)
         return annmodel.SomeInteger()
 
     def specialize_call(self, hop):
@@ -173,7 +296,7 @@
                 # XXX wrong implementation for now, fix me
                 from pypy.rpython.rmodel import warning
                 warning("compute_unique_id() is not fully supported on ootype")
-                return hop.genop('ooidentityhash', [vobj],
+                return hop.genop('identityhash', [vobj],
                                  resulttype = ootype.Signed)
         from pypy.rpython.error import TyperError
         raise TyperError("compute_unique_id() cannot be applied to %r" % (
@@ -184,7 +307,6 @@
 
     def compute_result_annotation(self, s_x):
         from pypy.annotation import model as annmodel
-        assert isinstance(s_x, annmodel.SomeInstance)
         return annmodel.SomeInteger()
 
     def specialize_call(self, hop):
@@ -197,12 +319,14 @@
         elif hop.rtyper.type_system.name == 'ootypesystem':
             from pypy.rpython.ootypesystem import ootype
             if isinstance(vobj.concretetype, ootype.Instance):
-                return hop.genop('ooidentityhash', [vobj],
+                return hop.genop('identityhash', [vobj],
                                  resulttype = ootype.Signed)
         from pypy.rpython.error import TyperError
         raise TyperError("current_object_addr_as_int() cannot be applied to"
                          " %r" % (vobj.concretetype,))
 
+# ____________________________________________________________
+
 def hlinvoke(repr, llcallable, *args):
     raise TypeError, "hlinvoke is meant to be rtyped and not called direclty"
 



More information about the Pypy-commit mailing list