[pypy-svn] r68335 - pypy/branch/gc-hash/pypy/rlib
arigo at codespeak.net
arigo at codespeak.net
Mon Oct 12 15:47:41 CEST 2009
Author: arigo
Date: Mon Oct 12 15:47:37 2009
New Revision: 68335
Modified:
pypy/branch/gc-hash/pypy/rlib/objectmodel.py
Log:
The object model interface that I propose to implement.
Modified: pypy/branch/gc-hash/pypy/rlib/objectmodel.py
==============================================================================
--- pypy/branch/gc-hash/pypy/rlib/objectmodel.py (original)
+++ pypy/branch/gc-hash/pypy/rlib/objectmodel.py Mon Oct 12 15:47:37 2009
@@ -5,6 +5,7 @@
import sys
import types
+import math
# specialize is a decorator factory for attaching _annspecialcase_
# attributes to functions: for example
@@ -127,37 +128,159 @@
obj.__dict__ = {}
obj.__class__ = FREED_OBJECT
-from pypy.rpython.extregistry import ExtRegistryEntry
-
# ____________________________________________________________
#
-# id-like functions.
-# In addition, RPython supports hash(x) on RPython instances,
-# returning a number that is not guaranteed to be unique but
-# that doesn't change over time for a given 'x'.
+# id-like functions. The idea is that calling hash() or id() is not
+# allowed in RPython. You have to call one of the following more
+# precise functions.
+
+def compute_hash(x):
+ """RPython equivalent of hash(x), where 'x' is an immutable
+ RPython-level object. For strings or unicodes it computes the
+ hash as in Python. For tuples it calls compute_hash()
+ recursively. For instances it uses compute_identity_hash().
+ Note that this can return 0 or -1 too.
+
+ Behavior across translation:
+
+ * on lltypesystem, it always returns the same number, both
+ before and after translation. Dictionaries don't need to
+ be rehashed after translation.
+
+ * on ootypesystem, the value changes because of translation.
+ Dictionaries need to be rehashed.
+ """
+ if isinstance(x, (str, unicode)):
+ return _hash_string(x)
+ if isinstance(x, int):
+ return x
+ if isinstance(x, float):
+ return _hash_float(x)
+ if isinstance(x, tuple):
+ return _hash_tuple(x)
+ if x is None:
+ return 0
+ return compute_identity_hash(x)
+
+def compute_identity_hash(x):
+ """RPython equivalent of object.__hash__(x). This returns the
+ so-called 'identity hash', which is the non-overridable default
+ hash of Python. Can be called for any RPython-level object
+ that turns into a GC object, or for any low-level GC object.
+ The value is not guaranteed to be the same before and after
+ translation, except for RPython instances on the lltypesystem.
+ """
+ result = object.__hash__(x)
+ try:
+ x.__dict__['__precomputed_identity_hash'] = result
+ except (TypeError, AttributeError):
+ pass
+ return result
def compute_unique_id(x):
- """RPython equivalent of id(x). The 'x' must be an RPython instance.
- This operation can be very costly depending on the garbage collector.
- To remind you of this fact, we don't support id(x) directly.
+ """RPython equivalent of id(x). The 'x' must be an RPython-level
+ object that turns into a GC object. This operation can be very
+ costly depending on the garbage collector. To remind you of this
+ fact, we don't support id(x) directly.
+ (XXX not implemented on ootype, falls back to compute_identity_hash)
"""
return id(x) # XXX need to return r_longlong on some platforms
def current_object_addr_as_int(x):
"""A cheap version of id(x). The current memory location of an
- instance can change over time for moving GCs. Also note that on
+ object can change over time for moving GCs. Also note that on
ootypesystem this typically doesn't return the real address but
- just the same as hash(x).
+ just the same as compute_hash(x).
"""
from pypy.rlib.rarithmetic import intmask
return intmask(id(x))
+# ----------
+
+def _hash_string(s):
+ """The algorithm behind compute_hash() for a string or a unicode."""
+ from pypy.rlib.rarithmetic import intmask
+ length = len(s)
+ if length == 0:
+ x = 0
+ else:
+ x = ord(s[0]) << 7
+ i = 0
+ while i < length:
+ x = (1000003*x) ^ ord(s[i])
+ i += 1
+ x ^= length
+ return intmask(x)
+
+def _hash_float(f):
+ """The algorithm behind compute_hash() for a float.
+ This implementation is identical to the CPython implementation,
+ except the fact that the integer case is not treated specially.
+ In RPython, floats cannot be used with ints in dicts, anyway.
+ """
+ v, expo = math.frexp(f)
+ v *= TAKE_NEXT
+ hipart = int(v)
+ v = (v - float(hipart)) * TAKE_NEXT
+ x = hipart + int(v) + (expo << 15)
+ return x
+TAKE_NEXT = float(2**31)
+
+def _hash_tuple(t):
+ """NOT_RPYTHON. The algorithm behind compute_hash() for a tuple."""
+ x = 0x345678
+ mult = 1000003
+ length = len(t)
+ for item in t:
+ y = compute_hash(item)
+ x = (x ^ y) * mult
+ mult += intmask(82520 + len + len)
+ return x
+
+# ----------
+
+from pypy.rpython.extregistry import ExtRegistryEntry
+
+class Entry(ExtRegistryEntry):
+ _about_ = compute_hash
+
+ def compute_result_annotation(self, s_x):
+ from pypy.annotation import model as annmodel
+ return annmodel.SomeInteger()
+
+ def specialize_call(self, hop):
+ r_obj, = hop.args_r
+ v_obj, = hop.inputargs(r_obj)
+ ll_fn = r_obj.get_ll_hash_function()
+ return hop.gendirectcall(ll_fn, v_obj)
+
+class Entry(ExtRegistryEntry):
+ _about_ = compute_identity_hash
+
+ def compute_result_annotation(self, s_x):
+ from pypy.annotation import model as annmodel
+ return annmodel.SomeInteger()
+
+ def specialize_call(self, hop):
+ vobj, = hop.inputargs(hop.args_r[0])
+ if hop.rtyper.type_system.name == 'lltypesystem':
+ from pypy.rpython.lltypesystem import lltype
+ ok = (isinstance(vobj.concretetype, lltype.Ptr) and
+ vobj.concretetype.TO._gckind == 'gc')
+ else:
+ from pypy.rpython.lltypesystem import ootype
+ ok = isinstance(vobj.concretetype, ootype.OOType)
+ if not ok:
+ from pypy.rpython.error import TyperError
+ raise TyperError("compute_identity_hash() cannot be applied to"
+ " %r" % (vobj.concretetype,))
+ return hop.genop('identityhash', [vobj], resulttype=lltype.Signed)
+
class Entry(ExtRegistryEntry):
_about_ = compute_unique_id
def compute_result_annotation(self, s_x):
from pypy.annotation import model as annmodel
- assert isinstance(s_x, annmodel.SomeInstance)
return annmodel.SomeInteger()
def specialize_call(self, hop):
@@ -173,7 +296,7 @@
# XXX wrong implementation for now, fix me
from pypy.rpython.rmodel import warning
warning("compute_unique_id() is not fully supported on ootype")
- return hop.genop('ooidentityhash', [vobj],
+ return hop.genop('identityhash', [vobj],
resulttype = ootype.Signed)
from pypy.rpython.error import TyperError
raise TyperError("compute_unique_id() cannot be applied to %r" % (
@@ -184,7 +307,6 @@
def compute_result_annotation(self, s_x):
from pypy.annotation import model as annmodel
- assert isinstance(s_x, annmodel.SomeInstance)
return annmodel.SomeInteger()
def specialize_call(self, hop):
@@ -197,12 +319,14 @@
elif hop.rtyper.type_system.name == 'ootypesystem':
from pypy.rpython.ootypesystem import ootype
if isinstance(vobj.concretetype, ootype.Instance):
- return hop.genop('ooidentityhash', [vobj],
+ return hop.genop('identityhash', [vobj],
resulttype = ootype.Signed)
from pypy.rpython.error import TyperError
raise TyperError("current_object_addr_as_int() cannot be applied to"
" %r" % (vobj.concretetype,))
+# ____________________________________________________________
+
def hlinvoke(repr, llcallable, *args):
raise TypeError, "hlinvoke is meant to be rtyped and not called direclty"
More information about the Pypy-commit
mailing list