[pypy-svn] r68525 - in pypy/branch/gc-hash-merge/pypy: annotation annotation/test config doc/config interpreter jit/backend/cli jit/metainterp jit/metainterp/test module/__builtin__ objspace objspace/std objspace/std/test rlib rlib/test rpython rpython/lltypesystem rpython/lltypesystem/test rpython/memory/gc rpython/memory/gctransform rpython/ootypesystem rpython/ootypesystem/test rpython/test translator/c translator/c/src translator/c/test translator/cli translator/jvm

arigo at codespeak.net arigo at codespeak.net
Fri Oct 16 11:47:21 CEST 2009


Author: arigo
Date: Fri Oct 16 11:47:18 2009
New Revision: 68525

Added:
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_typesystem.py
      - copied unchanged from r68524, pypy/branch/gc-hash/pypy/jit/metainterp/test/test_typesystem.py
   pypy/branch/gc-hash-merge/pypy/translator/c/src/align.h
      - copied unchanged from r68524, pypy/branch/gc-hash/pypy/translator/c/src/align.h
Removed:
   pypy/branch/gc-hash-merge/pypy/doc/config/objspace.std.withsmalldicts.txt
Modified:
   pypy/branch/gc-hash-merge/pypy/annotation/bookkeeper.py
   pypy/branch/gc-hash-merge/pypy/annotation/builtin.py
   pypy/branch/gc-hash-merge/pypy/annotation/dictdef.py
   pypy/branch/gc-hash-merge/pypy/annotation/test/test_annrpython.py
   pypy/branch/gc-hash-merge/pypy/annotation/unaryop.py
   pypy/branch/gc-hash-merge/pypy/config/pypyoption.py
   pypy/branch/gc-hash-merge/pypy/interpreter/buffer.py
   pypy/branch/gc-hash-merge/pypy/interpreter/pycode.py
   pypy/branch/gc-hash-merge/pypy/interpreter/typedef.py
   pypy/branch/gc-hash-merge/pypy/jit/backend/cli/method.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/codewriter.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/executor.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/history.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/optimizeopt.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/pyjitpl.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/resoperation.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/resume.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/support.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_basic.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_compile.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_loop.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_optimizeopt.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_resume.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_warmspot.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/typesystem.py
   pypy/branch/gc-hash-merge/pypy/jit/metainterp/warmspot.py
   pypy/branch/gc-hash-merge/pypy/module/__builtin__/interp_classobj.py
   pypy/branch/gc-hash-merge/pypy/objspace/descroperation.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/complextype.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/dictmultiobject.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/floattype.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/frozensettype.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/inttype.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/longtype.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/objecttype.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/stringobject.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/stringtype.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_dictmultiobject.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_floatobject.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_userobject.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/tupletype.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/typeobject.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/unicodeobject.py
   pypy/branch/gc-hash-merge/pypy/objspace/std/unicodetype.py
   pypy/branch/gc-hash-merge/pypy/rlib/jit.py
   pypy/branch/gc-hash-merge/pypy/rlib/objectmodel.py
   pypy/branch/gc-hash-merge/pypy/rlib/rarithmetic.py
   pypy/branch/gc-hash-merge/pypy/rlib/rope.py
   pypy/branch/gc-hash-merge/pypy/rlib/rweakrefimpl.py
   pypy/branch/gc-hash-merge/pypy/rlib/test/test_objectmodel.py
   pypy/branch/gc-hash-merge/pypy/rpython/llinterp.py
   pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/lloperation.py
   pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/lltype.py
   pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/rclass.py
   pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/rstr.py
   pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/test/test_lltype.py
   pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/generation.py
   pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/hybrid.py
   pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/markcompact.py
   pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/marksweep.py
   pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/semispace.py
   pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/boehm.py
   pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/framework.py
   pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/refcounting.py
   pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/transform.py
   pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/ooregistry.py
   pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/ootype.py
   pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rbuiltin.py
   pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rclass.py
   pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rstr.py
   pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rtupletype.py
   pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_ooclean.py
   pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_oorecord.py
   pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_oortype.py
   pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_ootype.py
   pypy/branch/gc-hash-merge/pypy/rpython/rbuiltin.py
   pypy/branch/gc-hash-merge/pypy/rpython/rclass.py
   pypy/branch/gc-hash-merge/pypy/rpython/rdict.py
   pypy/branch/gc-hash-merge/pypy/rpython/rfloat.py
   pypy/branch/gc-hash-merge/pypy/rpython/rtuple.py
   pypy/branch/gc-hash-merge/pypy/rpython/rtyper.py
   pypy/branch/gc-hash-merge/pypy/rpython/test/test_rclass.py
   pypy/branch/gc-hash-merge/pypy/rpython/test/test_rfloat.py
   pypy/branch/gc-hash-merge/pypy/rpython/test/test_rstr.py
   pypy/branch/gc-hash-merge/pypy/rpython/test/test_rtuple.py
   pypy/branch/gc-hash-merge/pypy/rpython/test/test_runicode.py
   pypy/branch/gc-hash-merge/pypy/translator/c/gc.py
   pypy/branch/gc-hash-merge/pypy/translator/c/node.py
   pypy/branch/gc-hash-merge/pypy/translator/c/src/g_prerequisite.h
   pypy/branch/gc-hash-merge/pypy/translator/c/src/mem.h
   pypy/branch/gc-hash-merge/pypy/translator/c/test/test_boehm.py
   pypy/branch/gc-hash-merge/pypy/translator/c/test/test_lltyped.py
   pypy/branch/gc-hash-merge/pypy/translator/c/test/test_newgc.py
   pypy/branch/gc-hash-merge/pypy/translator/c/test/test_typed.py
   pypy/branch/gc-hash-merge/pypy/translator/cli/opcodes.py
   pypy/branch/gc-hash-merge/pypy/translator/jvm/builtin.py
   pypy/branch/gc-hash-merge/pypy/translator/jvm/opcodes.py
   pypy/branch/gc-hash-merge/pypy/translator/jvm/typesystem.py
Log:
Merge the gc-hash branch.


Modified: pypy/branch/gc-hash-merge/pypy/annotation/bookkeeper.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/annotation/bookkeeper.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/annotation/bookkeeper.py	Fri Oct 16 11:47:18 2009
@@ -169,7 +169,6 @@
         self.pending_specializations = []   # list of callbacks
         self.external_class_cache = {}      # cache of ExternalType classes
 
-        self.needs_hash_support = {}
         self.needs_generic_instantiate = {}
 
         self.stats = Stats(self)
@@ -219,12 +218,6 @@
                 self.consider_call_site_for_pbc(pbc, 'simple_call', 
                                                 args_s, s_ImpossibleValue)
             self.emulated_pbc_calls = {}
-
-            for clsdef in self.needs_hash_support.keys():
-                for clsdef2 in self.needs_hash_support:
-                    if clsdef.issubclass(clsdef2) and clsdef is not clsdef2:
-                        del self.needs_hash_support[clsdef]
-                        break
         finally:
             self.leave()
 
@@ -399,6 +392,7 @@
                         for ek, ev in items:
                             result.dictdef.generalize_key(self.immutablevalue(ek))
                             result.dictdef.generalize_value(self.immutablevalue(ev))
+                            result.dictdef.seen_prebuilt_key(ek)
                         seen_elements = len(items)
                         # if the dictionary grew during the iteration,
                         # start over again
@@ -417,6 +411,7 @@
                 for ek, ev in x.iteritems():
                     dictdef.generalize_key(self.immutablevalue(ek, False))
                     dictdef.generalize_value(self.immutablevalue(ev, False))
+                    dictdef.seen_prebuilt_key(ek)
                 result = SomeDict(dictdef)
         elif tp is weakref.ReferenceType:
             x1 = x()

Modified: pypy/branch/gc-hash-merge/pypy/annotation/builtin.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/annotation/builtin.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/annotation/builtin.py	Fri Oct 16 11:47:18 2009
@@ -493,6 +493,10 @@
     assert PtrT.is_constant()
     return SomePtr(ll_ptrtype=PtrT.const)
 
+def identityhash(s_obj):
+    assert isinstance(s_obj, (SomePtr, SomeOOObject, SomeOOInstance))
+    return SomeInteger()
+
 def getRuntimeTypeInfo(T):
     assert T.is_constant()
     return immutablevalue(lltype.getRuntimeTypeInfo(T.const))
@@ -517,6 +521,7 @@
 BUILTIN_ANALYZERS[lltype.direct_ptradd] = direct_ptradd
 BUILTIN_ANALYZERS[lltype.cast_ptr_to_int] = cast_ptr_to_int
 BUILTIN_ANALYZERS[lltype.cast_int_to_ptr] = cast_int_to_ptr
+BUILTIN_ANALYZERS[lltype.identityhash] = identityhash
 BUILTIN_ANALYZERS[lltype.getRuntimeTypeInfo] = getRuntimeTypeInfo
 BUILTIN_ANALYZERS[lltype.runtime_type_info] = runtime_type_info
 BUILTIN_ANALYZERS[lltype.Ptr] = constPtr
@@ -562,10 +567,6 @@
     else:
         return SomeOOInstance(c.ootype)
 
-def ooidentityhash(i):
-    assert isinstance(i, (SomeOOInstance, SomeOOObject))
-    return SomeInteger()
-
 def ooupcast(I, i):
     assert isinstance(I.const, ootype.Instance)
     if ootype.isSubclass(i.ootype, I.const):
@@ -606,7 +607,6 @@
 BUILTIN_ANALYZERS[ootype.runtimenew] = runtimenew
 BUILTIN_ANALYZERS[ootype.classof] = classof
 BUILTIN_ANALYZERS[ootype.subclassof] = subclassof
-BUILTIN_ANALYZERS[ootype.ooidentityhash] = ooidentityhash
 BUILTIN_ANALYZERS[ootype.ooupcast] = ooupcast
 BUILTIN_ANALYZERS[ootype.oodowncast] = oodowncast
 BUILTIN_ANALYZERS[ootype.cast_to_object] = cast_to_object

Modified: pypy/branch/gc-hash-merge/pypy/annotation/dictdef.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/annotation/dictdef.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/annotation/dictdef.py	Fri Oct 16 11:47:18 2009
@@ -2,15 +2,16 @@
 from pypy.annotation.model import SomeInteger, s_Bool, unionof
 from pypy.annotation.model import SomeInstance
 from pypy.annotation.listdef import ListItem
+from pypy.rlib.objectmodel import compute_hash
 
 
 class DictKey(ListItem):
-    custom_eq_hash = False
+    s_rdict_eqfn = s_ImpossibleValue
+    s_rdict_hashfn = s_ImpossibleValue
 
     def __init__(self, bookkeeper, s_value, is_r_dict=False):
         ListItem.__init__(self, bookkeeper, s_value)
-        self.is_r_dict = is_r_dict
-        self.enable_hashing()
+        self.custom_eq_hash = is_r_dict
 
     def patch(self):
         for dictdef in self.itemof:
@@ -26,25 +27,16 @@
                                               other.s_rdict_hashfn,
                                               other=other)
 
-    def enable_hashing(self):
-        # r_dicts don't need the RPython hash of their keys
-        if isinstance(self.s_value, SomeInstance) and not self.is_r_dict:
-            self.bookkeeper.needs_hash_support[self.s_value.classdef] = True
-
     def generalize(self, s_other_value):
         updated = ListItem.generalize(self, s_other_value)
-        if updated:
-            self.enable_hashing()
         if updated and self.custom_eq_hash:
             self.emulate_rdict_calls()
         return updated
 
     def update_rdict_annotations(self, s_eqfn, s_hashfn, other=None):
-        if not self.custom_eq_hash:
-            self.custom_eq_hash = True
-        else:
-            s_eqfn = unionof(s_eqfn, self.s_rdict_eqfn)
-            s_hashfn = unionof(s_hashfn, self.s_rdict_hashfn)
+        assert self.custom_eq_hash
+        s_eqfn = unionof(s_eqfn, self.s_rdict_eqfn)
+        s_hashfn = unionof(s_hashfn, self.s_rdict_hashfn)
         self.s_rdict_eqfn = s_eqfn
         self.s_rdict_hashfn = s_hashfn
         self.emulate_rdict_calls(other=other)
@@ -139,6 +131,14 @@
     def generalize_value(self, s_value):
         self.dictvalue.generalize(s_value)
 
+    def seen_prebuilt_key(self, x):
+        # In case we are an r_dict, we don't ask for the hash ourselves.
+        # Note that if the custom hashing function ends up asking for
+        # the hash of x, then it must use compute_hash() itself, so it
+        # works out.
+        if not self.dictkey.custom_eq_hash:
+            compute_hash(x)
+
     def __repr__(self):
         return '<{%r: %r}>' % (self.dictkey.s_value, self.dictvalue.s_value)
 

Modified: pypy/branch/gc-hash-merge/pypy/annotation/test/test_annrpython.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/annotation/test/test_annrpython.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/annotation/test/test_annrpython.py	Fri Oct 16 11:47:18 2009
@@ -3187,14 +3187,24 @@
         s = a.build_types(f, [int])
         assert s.const == 0
 
-    def test_hash(self):
-        class A(object):
+    def test_hash_sideeffect(self):
+        class X:
             pass
-        def f():
-            return hash(A()) + hash(None)
+        x1 = X()
+        x2 = X()
+        x3 = X()
+        d = {(2, x1): 5, (3, x2): 7}
+        def f(n, m):
+            if   m == 1: x = x1
+            elif m == 2: x = x2
+            else:        x = x3
+            return d[n, x]
         a = self.RPythonAnnotator()
-        s = a.build_types(f, [])
+        s = a.build_types(f, [int, int])
         assert s.knowntype == int
+        assert hasattr(x1, '__precomputed_identity_hash')
+        assert hasattr(x2, '__precomputed_identity_hash')
+        assert not hasattr(x3, '__precomputed_identity_hash')
 
     def test_contains_of_empty_dict(self):
         class A(object):

Modified: pypy/branch/gc-hash-merge/pypy/annotation/unaryop.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/annotation/unaryop.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/annotation/unaryop.py	Fri Oct 16 11:47:18 2009
@@ -20,11 +20,12 @@
 def immutablevalue(x):
     return getbookkeeper().immutablevalue(x)
 
-UNARY_OPERATIONS = set(['len', 'is_true', 'getattr', 'setattr', 'delattr', 'hash',
+UNARY_OPERATIONS = set(['len', 'is_true', 'getattr', 'setattr', 'delattr',
                         'simple_call', 'call_args', 'str', 'repr',
                         'iter', 'next', 'invert', 'type', 'issubtype',
                         'pos', 'neg', 'nonzero', 'abs', 'hex', 'oct',
-                        'ord', 'int', 'float', 'long', 'id',
+                        'ord', 'int', 'float', 'long',
+                        'hash', 'id',    # <== not supported any more
                         'getslice', 'setslice', 'delslice',
                         'neg_ovf', 'abs_ovf', 'hint', 'unicode', 'unichr'])
 
@@ -98,7 +99,8 @@
         return obj.is_true()
 
     def hash(obj):
-        raise TypeError, "hash() is not generally supported"
+        raise TypeError, ("cannot use hash() in RPython; "
+                          "see objectmodel.compute_xxx()")
 
     def str(obj):
         getbookkeeper().count('str', obj)
@@ -121,10 +123,8 @@
         return SomeString()
 
     def id(obj):
-        raise Exception("cannot use id() in RPython; pick one of:\n"
-                        "\t\t objectmodel.compute_unique_id()\n"
-                        "\t\t hash()\n"
-                        "\t\t objectmodel.current_object_addr_as_int()")
+        raise Exception("cannot use id() in RPython; "
+                        "see objectmodel.compute_xxx()")
 
     def int(obj):
         return SomeInteger()
@@ -203,9 +203,6 @@
             return getbookkeeper().immutablevalue(bool(self.const))
         return s_Bool
 
-    def hash(flt):
-        return SomeInteger()
-
 class __extend__(SomeInteger):
 
     def invert(self):
@@ -272,11 +269,6 @@
     def getanyitem(tup):
         return unionof(*tup.items)
 
-    def hash(tup):
-        for s_item in tup.items:
-            s_item.hash()    # record that we need the hash of each item
-        return SomeInteger()
-
     def getslice(tup, s_start, s_stop):
         assert s_start.is_immutable_constant(),"tuple slicing: needs constants"
         assert s_stop.is_immutable_constant(), "tuple slicing: needs constants"
@@ -501,9 +493,6 @@
     def ord(str):
         return SomeInteger(nonneg=True)
 
-    def hash(str):
-        return SomeInteger()
-
     def method_split(str, patt): # XXX
         getbookkeeper().count("str_split", str, patt)
         return getbookkeeper().newlist(str.basestringclass())
@@ -632,10 +621,6 @@
             # create or update the attribute in clsdef
             clsdef.generalize_attr(attr, s_value)
 
-    def hash(ins):
-        getbookkeeper().needs_hash_support[ins.classdef] = True
-        return SomeInteger()
-
     def is_true_behavior(ins, s):
         if not ins.can_be_None:
             s.const = True
@@ -694,13 +679,6 @@
         else:
             return SomeObject()    # len() on a pbc? no chance
 
-    def hash(pbc):
-        if pbc.isNone():
-            # only supports hash(None) as part of hash(<SomeInstance>)
-            return SomeInteger()
-        else:
-            return SomeObject.hash(pbc)
-
 class __extend__(SomeGenericCallable):
     def call(self, args):
         bookkeeper = getbookkeeper()

Modified: pypy/branch/gc-hash-merge/pypy/config/pypyoption.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/config/pypyoption.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/config/pypyoption.py	Fri Oct 16 11:47:18 2009
@@ -250,11 +250,6 @@
                    default=False,
                    requires=[("objspace.std.withmultidict", True)]),
 
-        BoolOption("withsmalldicts",
-                   "handle small dictionaries differently",
-                   default=False,
-                   requires=[("objspace.std.withmultidict", True)]),
-
         BoolOption("withrangelist",
                    "enable special range list implementation that does not "
                    "actually create the full list until the resulting "

Modified: pypy/branch/gc-hash-merge/pypy/interpreter/buffer.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/interpreter/buffer.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/interpreter/buffer.py	Fri Oct 16 11:47:18 2009
@@ -19,6 +19,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.gateway import interp2app, ObjSpace, W_Root
 from pypy.interpreter.error import OperationError
+from pypy.rlib.objectmodel import compute_hash
 
 
 class Buffer(Wrappable):
@@ -117,7 +118,7 @@
     descr_ge = _make_descr__cmp('ge')
 
     def descr_hash(self, space):
-        return space.wrap(hash(self.as_str()))
+        return space.wrap(compute_hash(self.as_str()))
     descr_hash.unwrap_spec = ['self', ObjSpace]
 
     def descr_mul(self, space, w_times):

Modified: pypy/branch/gc-hash-merge/pypy/interpreter/pycode.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/interpreter/pycode.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/interpreter/pycode.py	Fri Oct 16 11:47:18 2009
@@ -14,6 +14,7 @@
 from pypy.rlib.rarithmetic import intmask
 from pypy.rlib.debug import make_sure_not_resized, make_sure_not_modified
 from pypy.rlib import jit
+from pypy.rlib.objectmodel import compute_hash
 
 # helper
 
@@ -301,15 +302,15 @@
 
     def descr_code__hash__(self):
         space = self.space
-        result =  hash(self.co_name)
+        result =  compute_hash(self.co_name)
         result ^= self.co_argcount
         result ^= self.co_nlocals
         result ^= self.co_flags
         result ^= self.co_firstlineno
-        result ^= hash(self.co_code)
-        for name in self.co_varnames:  result ^= hash(name)
-        for name in self.co_freevars:  result ^= hash(name)
-        for name in self.co_cellvars:  result ^= hash(name)
+        result ^= compute_hash(self.co_code)
+        for name in self.co_varnames:  result ^= compute_hash(name)
+        for name in self.co_freevars:  result ^= compute_hash(name)
+        for name in self.co_cellvars:  result ^= compute_hash(name)
         w_result = space.wrap(intmask(result))
         for w_name in self.co_names_w:
             w_result = space.xor(w_result, space.hash(w_name))

Modified: pypy/branch/gc-hash-merge/pypy/interpreter/typedef.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/interpreter/typedef.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/interpreter/typedef.py	Fri Oct 16 11:47:18 2009
@@ -9,8 +9,7 @@
     DescrMismatch
 from pypy.interpreter.error import OperationError
 from pypy.tool.sourcetools import compile2, func_with_new_name
-from pypy.rlib.objectmodel import instantiate
-from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.objectmodel import instantiate, compute_identity_hash
 from pypy.rlib.jit import hint
 
 class TypeDef:
@@ -20,12 +19,9 @@
         self.base = __base
         self.hasdict = '__dict__' in rawdict
         self.weakrefable = '__weakref__' in rawdict
-        self.custom_hash = '__hash__' in rawdict
         if __base is not None:
             self.hasdict     |= __base.hasdict
             self.weakrefable |= __base.weakrefable
-            self.custom_hash |= __base.custom_hash
-            # NB. custom_hash is sometimes overridden manually by callers
         self.rawdict = {}
         self.acceptable_as_base_class = True
         # xxx used by faking
@@ -50,39 +46,8 @@
 # ____________________________________________________________
 #  Hash support
 
-def get_default_hash_function(cls):
-    # go to the first parent class of 'cls' that has a typedef
-    while 'typedef' not in cls.__dict__:
-        cls = cls.__bases__[0]
-        if cls is object:
-            # not found: 'cls' must have been an abstract class,
-            # no hash function is needed
-            return None
-    if cls.typedef.custom_hash:
-        return None   # the typedef says that instances have their own
-                      # hash, so we don't need a default RPython-level
-                      # hash function.
-    try:
-        hashfunction = _hashfunction_cache[cls]
-    except KeyError:
-        def hashfunction(w_obj):
-            "Return the identity hash of 'w_obj'."
-            assert isinstance(w_obj, cls)
-            return hash(w_obj)   # forces a hash_cache only on 'cls' instances
-        hashfunction = func_with_new_name(hashfunction,
-                                       'hashfunction_for_%s' % (cls.__name__,))
-        _hashfunction_cache[cls] = hashfunction
-    return hashfunction
-get_default_hash_function._annspecialcase_ = 'specialize:memo'
-_hashfunction_cache = {}
-
 def default_identity_hash(space, w_obj):
-    fn = get_default_hash_function(w_obj.__class__)
-    if fn is None:
-        typename = space.type(w_obj).getname(space, '?')
-        msg = "%s objects have no default hash" % (typename,)
-        raise OperationError(space.w_TypeError, space.wrap(msg))
-    return space.wrap(intmask(fn(w_obj)))
+    return space.wrap(compute_identity_hash(w_obj))
 
 def descr__hash__unhashable(space, w_obj):
     typename = space.type(w_obj).getname(space, '?')

Modified: pypy/branch/gc-hash-merge/pypy/jit/backend/cli/method.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/backend/cli/method.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/backend/cli/method.py	Fri Oct 16 11:47:18 2009
@@ -576,9 +576,6 @@
         self.il.Emit(OpCodes.Call, methinfo)
         self.store_result(op)
 
-    def emit_op_ooidentityhash(self, op):
-        raise NotImplementedError
-
     def emit_op_call_impl(self, op):
         descr = op.descr
         assert isinstance(descr, runner.StaticMethDescr)

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/codewriter.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/codewriter.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/codewriter.py	Fri Oct 16 11:47:18 2009
@@ -1505,7 +1505,7 @@
 
     serialize_op_oostring  = handle_builtin_call
     serialize_op_oounicode = handle_builtin_call
-    serialize_op_oohash    = handle_builtin_call
+    serialize_op_gc_identityhash = handle_builtin_call
 
     # ----------
 

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/executor.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/executor.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/executor.py	Fri Oct 16 11:47:18 2009
@@ -145,10 +145,6 @@
         assert False
     return ConstInt(x)
 
-def do_ooidentityhash(cpu, box1):
-    obj = box1.getref_base()
-    return ConstInt(cpu.ts.ooidentityhash(obj))
-
 def do_subclassof(cpu, box1, box2):
     return ConstInt(cpu.ts.subclassOf(cpu, box1, box2))
 

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/history.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/history.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/history.py	Fri Oct 16 11:47:18 2009
@@ -3,6 +3,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.ootypesystem import ootype
 from pypy.rlib.objectmodel import we_are_translated, r_dict, Symbolic
+from pypy.rlib.objectmodel import compute_hash
 from pypy.rlib.rarithmetic import intmask
 from pypy.tool.uid import uid
 from pypy.conftest import option
@@ -310,7 +311,7 @@
         return self.value
 
     def _get_hash_(self):
-        return hash(self.value)
+        return compute_hash(self.value)
 
     def set_future_value(self, cpu, j):
         cpu.set_future_value_float(j, self.getfloat())
@@ -350,7 +351,10 @@
     getref._annspecialcase_ = 'specialize:arg(1)'
 
     def _get_hash_(self):
-        return lltype.cast_ptr_to_int(self.value)
+        if self.value:
+            return lltype.identityhash(self.value)
+        else:
+            return 0
 
     def getaddr(self, cpu):
         return llmemory.cast_ptr_to_adr(self.value)
@@ -398,7 +402,7 @@
 
     def _get_hash_(self):
         if self.value:
-            return ootype.ooidentityhash(self.value)
+            return ootype.identityhash(self.value)
         else:
             return 0
 
@@ -530,7 +534,7 @@
         return self.value
 
     def _get_hash_(self):
-        return hash(self.value)
+        return compute_hash(self.value)
 
     def set_future_value(self, cpu, j):
         cpu.set_future_value_float(j, self.value)
@@ -566,7 +570,10 @@
         return llmemory.cast_ptr_to_adr(self.value)
 
     def _get_hash_(self):
-        return lltype.cast_ptr_to_int(self.value)
+        if self.value:
+            return lltype.identityhash(self.value)
+        else:
+            return 0
 
     def set_future_value(self, cpu, j):
         cpu.set_future_value_ref(j, self.value)
@@ -602,7 +609,7 @@
 
     def _get_hash_(self):
         if self.value:
-            return ootype.ooidentityhash(self.value)
+            return ootype.identityhash(self.value)
         else:
             return 0
 
@@ -631,11 +638,20 @@
     return c1 == c2
 
 def dc_hash(c):
+    "NOT_RPYTHON"
+    # This is called during translation only.  Avoid using identityhash(),
+    # to avoid forcing a hash, at least on lltype objects.
     if not isinstance(c, Const):
         return hash(c)
     if isinstance(c.value, Symbolic):
         return id(c.value)
     try:
+        if isinstance(c, ConstPtr):
+            p = lltype.normalizeptr(c.value)
+            if p is not None:
+                return hash(p._obj)
+            else:
+                return 0
         return c._get_hash_()
     except lltype.DelayedPointer:
         return -2      # xxx risk of changing hash...

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/optimizeopt.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/optimizeopt.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/optimizeopt.py	Fri Oct 16 11:47:18 2009
@@ -364,7 +364,7 @@
         self.cpu = metainterp_sd.cpu
         self.loop = loop
         self.values = {}
-        self.interned_refs = {}
+        self.interned_refs = self.cpu.ts.new_ref_dict()
         self.resumedata_memo = resume.ResumeDataLoopMemo(self.cpu)
         self.heap_op_optimizer = HeapOpOptimizer(self)
 
@@ -380,12 +380,7 @@
             value = constbox.getref_base()
             if not value:
                 return box
-            key = self.cpu.ts.cast_ref_to_hashable(self.cpu, value)
-            try:
-                return self.interned_refs[key]
-            except KeyError:
-                self.interned_refs[key] = box
-                return box
+            return self.interned_refs.setdefault(value, box)
         else:
             return box
 

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/pyjitpl.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/pyjitpl.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/pyjitpl.py	Fri Oct 16 11:47:18 2009
@@ -329,10 +329,6 @@
     def opimpl_subclassof(self, box1, box2):
         self.execute(rop.SUBCLASSOF, box1, box2)
 
-    @arguments("box")
-    def opimpl_ooidentityhash(self, box):
-        self.execute(rop.OOIDENTITYHASH, box)
-
     @arguments("descr", "box")
     def opimpl_new_array(self, itemsize, countbox):
         self.execute_with_descr(rop.NEW_ARRAY, itemsize, countbox)
@@ -1034,7 +1030,7 @@
     def _setup_class_sizes(self):
         class_sizes = {}
         for vtable, sizedescr in self._class_sizes:
-            vtable = self.cpu.ts.cast_baseclass_to_hashable(self.cpu, vtable)
+            vtable = self.cpu.ts.cast_vtable_to_hashable(self.cpu, vtable)
             class_sizes[vtable] = sizedescr
         self.cpu.set_class_sizes(class_sizes)
 

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/resoperation.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/resoperation.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/resoperation.py	Fri Oct 16 11:47:18 2009
@@ -187,7 +187,6 @@
     'UNICODEGETITEM/2',
     #
     # ootype operations
-    'OOIDENTITYHASH/1',
     'INSTANCEOF/1d',
     'SUBCLASSOF/2',
     #

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/resume.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/resume.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/resume.py	Fri Oct 16 11:47:18 2009
@@ -101,7 +101,7 @@
         self.cpu = cpu
         self.consts = []
         self.large_ints = {}
-        self.refs = {}
+        self.refs = cpu.ts.new_ref_dict_2()
         self.numberings = {}
 
     def getconst(self, const):
@@ -124,7 +124,6 @@
             val = const.getref_base()
             if not val:
                 return NULLREF
-            val = self.cpu.ts.cast_ref_to_hashable(self.cpu, val)
             tagged = self.refs.get(val, UNASSIGNED)
             if not tagged_eq(tagged, UNASSIGNED):
                 return tagged

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/support.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/support.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/support.py	Fri Oct 16 11:47:18 2009
@@ -134,6 +134,10 @@
 _ll_2_list_getitem_foldable = _ll_2_list_getitem
 _ll_1_list_len_foldable     = _ll_1_list_len
 
+def _ll_1_gc_identityhash(x):
+    return lltype.identityhash(x)
+
+
 class LLtypeHelpers:
 
     # ---------- dict ----------
@@ -268,12 +272,6 @@
     def _ll_1_oounicode_string_foldable(s):
         return ootype.oounicode(s, -1)
 
-    def _ll_1_oohash_string_foldable(s):
-        return ootype.oohash(s)
-
-    def _ll_1_oohash_unicode_foldable(u):
-        return ootype.oohash(u)
-
 # -------------------------------------------------------
 
 def setup_extra_builtin(rtyper, oopspec_name, nb_args):
@@ -334,14 +332,8 @@
         T = ootype.ROOT
     return '%s_%s_foldable' % (op.opname, T._name.lower()), args
 
-def get_oohash_oopspec(op):
-    T = op.args[0].concretetype
-    if T is ootype.String:
-        return 'oohash_string_foldable', op.args
-    elif T is ootype.Unicode:
-        return 'oohash_unicode_foldable', op.args
-    else:
-        raise Exception("oohash() of type %r" % (T,))
+def get_identityhash_oopspec(op):
+    return 'gc_identityhash', op.args
 
 
 RENAMED_ADT_NAME = {
@@ -371,8 +363,8 @@
         return get_call_oopspec_opargs(fnobj, opargs)
     elif op.opname in ('oostring', 'oounicode'):
         return get_oostring_oopspec(op)
-    elif op.opname == 'oohash':
-        return get_oohash_oopspec(op)
+    elif op.opname == 'gc_identityhash':
+        return get_identityhash_oopspec(op)
     else:
         raise ValueError(op.opname)
 

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_basic.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_basic.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_basic.py	Fri Oct 16 11:47:18 2009
@@ -971,19 +971,19 @@
 class TestOOtype(BasicTests, OOJitMixin):
 
     def test_oohash(self):
-        def f():
-            s = ootype.oostring(5, -1)
-            return ootype.oohash(s)
-        res = self.interp_operations(f, [])
-        # xxx can we rely on oohash() returning the same value in and out of
-        # translation?
-        assert res == ootype.oohash(ootype.oostring(5, -1))
+        def f(n):
+            s = ootype.oostring(n, -1)
+            return s.ll_hash()
+        res = self.interp_operations(f, [5])
+        assert res == ootype.oostring(5, -1).ll_hash()
 
-    def test_ooidentityhash(self):
+    def test_identityhash(self):
+        A = ootype.Instance("A", ootype.ROOT)
         def f():
-            s1 = ootype.oostring(5, -1)
-            s2 = ootype.oostring(6, -1)
-            return ootype.ooidentityhash(s1) == ootype.ooidentityhash(s2)
+            obj1 = ootype.new(A)
+            obj2 = ootype.new(A)
+            return ootype.identityhash(obj1) == ootype.identityhash(obj2)
+        assert not f()
         res = self.interp_operations(f, [])
         assert not res
 
@@ -1057,6 +1057,16 @@
 
 class BaseLLtypeTests(BasicTests):
 
+    def test_identityhash(self):
+        A = lltype.GcStruct("A")
+        def f():
+            obj1 = lltype.malloc(A)
+            obj2 = lltype.malloc(A)
+            return lltype.identityhash(obj1) == lltype.identityhash(obj2)
+        assert not f()
+        res = self.interp_operations(f, [])
+        assert not res
+
     def test_oops_on_nongc(self):
         from pypy.rpython.lltypesystem import lltype
         

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_compile.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_compile.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_compile.py	Fri Oct 16 11:47:18 2009
@@ -1,7 +1,7 @@
 from pypy.jit.metainterp.history import LoopToken, ConstInt, History, Stats
 from pypy.jit.metainterp.specnode import NotSpecNode, ConstantSpecNode
 from pypy.jit.metainterp.compile import insert_loop_token, compile_new_loop
-from pypy.jit.metainterp import optimize, jitprof
+from pypy.jit.metainterp import optimize, jitprof, typesystem
 from pypy.jit.metainterp.test.oparser import parse
 from pypy.jit.metainterp.test.test_optimizefindnode import LLtypeMixin
 
@@ -26,6 +26,7 @@
 
 
 class FakeCPU:
+    ts = typesystem.llhelper
     def __init__(self):
         self.seen = []
     def compile_loop(self, inputargs, operations, token):

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_loop.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_loop.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_loop.py	Fri Oct 16 11:47:18 2009
@@ -1,5 +1,6 @@
 import py
 from pypy.rlib.jit import JitDriver, OPTIMIZER_SIMPLE, OPTIMIZER_FULL
+from pypy.rlib.objectmodel import compute_hash
 from pypy.jit.metainterp.warmspot import ll_meta_interp, get_stats
 from pypy.rpython.lltypesystem import lltype
 from pypy.jit.metainterp.test.test_basic import LLJitMixin, OOJitMixin
@@ -349,7 +350,7 @@
                 myjitdriver.jit_merge_point(n=n, x=x)
                 x += unichr(n)
                 n -= 1
-            return hash(x)
+            return compute_hash(x)
         expected = self.run_directly(f, [100])
         res = self.meta_interp(f, [100])
         assert res == expected
@@ -363,7 +364,7 @@
                 myjitdriver.jit_merge_point(n=n, x=x)
                 x += chr(n)
                 n -= 1
-            return hash(x)
+            return compute_hash(x)
         expected = self.run_directly(f, [100])
         res = self.meta_interp(f, [100])
         assert res == expected

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_optimizeopt.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_optimizeopt.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_optimizeopt.py	Fri Oct 16 11:47:18 2009
@@ -1,5 +1,4 @@
 import py
-from pypy.rpython.ootypesystem import ootype
 from pypy.rlib.objectmodel import instantiate
 from pypy.jit.metainterp.test.test_resume import MyMetaInterp
 from pypy.jit.metainterp.test.test_optimizefindnode import (LLtypeMixin,
@@ -35,7 +34,8 @@
     from pypy.jit.metainterp.resume import tag, TAGBOX
     b0 = BoxInt()
     b1 = BoxInt()
-    opt = optimizeopt.Optimizer(FakeMetaInterpStaticData(None), None)
+    opt = optimizeopt.Optimizer(FakeMetaInterpStaticData(LLtypeMixin.cpu),
+                                None)
     fdescr = ResumeGuardDescr(None)
     op = ResOperation(rop.GUARD_TRUE, [], None, descr=fdescr)
     # setup rd data

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_resume.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_resume.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_resume.py	Fri Oct 16 11:47:18 2009
@@ -244,7 +244,7 @@
           FakeFrame("code1", 3, 7, b3, c2, b1),
           FakeFrame("code2", 9, -1, c3, b2)]
     capture_resumedata(fs, None, storage)
-    memo = ResumeDataLoopMemo(None)
+    memo = ResumeDataLoopMemo(LLtypeMixin.cpu)
     modifier = ResumeDataVirtualAdder(storage, memo)
     liveboxes = modifier.finish({})
     metainterp = MyMetaInterp()
@@ -268,7 +268,7 @@
           FakeFrame("code1", 3, 7, b3, c2, b1),
           FakeFrame("code2", 9, -1, c3, b2)]
     capture_resumedata(fs, [b4], storage)
-    memo = ResumeDataLoopMemo(None)
+    memo = ResumeDataLoopMemo(LLtypeMixin.cpu)
     modifier = ResumeDataVirtualAdder(storage, memo)
     liveboxes = modifier.finish({})
     metainterp = MyMetaInterp()
@@ -296,7 +296,7 @@
     fs = fs[:-1] + [FakeFrame("code2", 10, -1, c3, b2, b4)]
     capture_resumedata(fs, None, storage2)
     
-    memo = ResumeDataLoopMemo(None)
+    memo = ResumeDataLoopMemo(LLtypeMixin.cpu)
     modifier = ResumeDataVirtualAdder(storage, memo)
     liveboxes = modifier.finish({})
 
@@ -421,7 +421,7 @@
 
 
 def test_ResumeDataLoopMemo_ints():
-    memo = ResumeDataLoopMemo(None)
+    memo = ResumeDataLoopMemo(LLtypeMixin.cpu)
     tagged = memo.getconst(ConstInt(44))
     assert untag(tagged) == (44, TAGINT)
     tagged = memo.getconst(ConstInt(-3))
@@ -461,7 +461,7 @@
     assert tagged == NULLREF
 
 def test_ResumeDataLoopMemo_other():
-    memo = ResumeDataLoopMemo(None)
+    memo = ResumeDataLoopMemo(LLtypeMixin.cpu)
     const = ConstFloat(-1.0)
     tagged = memo.getconst(const)
     index, tagbits = untag(tagged)
@@ -479,7 +479,7 @@
     env2 = [c3, b3, b1, c3]
     snap2 = Snapshot(snap, env2)
 
-    memo = ResumeDataLoopMemo(None)
+    memo = ResumeDataLoopMemo(LLtypeMixin.cpu)
 
     numb, liveboxes, v = memo.number({}, snap1)
     assert v == 0

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_warmspot.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_warmspot.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/test/test_warmspot.py	Fri Oct 16 11:47:18 2009
@@ -1,5 +1,5 @@
 import py
-from pypy.jit.metainterp.warmspot import ll_meta_interp, cast_whatever_to_int
+from pypy.jit.metainterp.warmspot import ll_meta_interp, hash_whatever
 from pypy.jit.metainterp.warmspot import get_stats
 from pypy.rlib.jit import JitDriver, OPTIMIZER_FULL, OPTIMIZER_SIMPLE
 from pypy.rlib.jit import unroll_safe
@@ -8,11 +8,11 @@
 from pypy.jit.metainterp.test.test_basic import LLJitMixin, OOJitMixin
 
 
-def test_translate_cast_whatever_to_int():
+def test_translate_hash_whatever():
     from pypy.rpython.test.test_llinterp import interpret
     from pypy.rpython.lltypesystem import lltype
     def fn(x):
-        return cast_whatever_to_int(lltype.typeOf(x), x)
+        return hash_whatever(lltype.typeOf(x), x)
     for type_system in ('lltype', 'ootype'):
         res = interpret(fn, [42], type_system=type_system)
         assert res == 42

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/typesystem.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/typesystem.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/typesystem.py	Fri Oct 16 11:47:18 2009
@@ -4,7 +4,7 @@
 from pypy.rpython.annlowlevel import cast_instance_to_base_ptr
 from pypy.rpython.annlowlevel import cast_instance_to_base_obj
 from pypy.jit.metainterp import history
-from pypy.jit.metainterp import history
+from pypy.rlib.objectmodel import r_dict
 
 def deref(T):
     if isinstance(T, lltype.Ptr):
@@ -114,11 +114,15 @@
         ll = llstr(str)
         return history.ConstPtr(lltype.cast_opaque_ptr(llmemory.GCREF, ll))
 
-    def cast_ref_to_hashable(self, cpu, ptr):
-        adr = llmemory.cast_ptr_to_adr(ptr)
-        return cpu.cast_adr_to_int(adr)
+    # A dict whose keys are refs (like the .value of BoxPtr).
+    # It is an r_dict on lltype.  Two copies, to avoid conflicts with
+    # the value type.  Note that NULL is not allowed as a key.
+    def new_ref_dict(self):
+        return r_dict(rd_eq, rd_hash)
+    def new_ref_dict_2(self):
+        return r_dict(rd_eq, rd_hash)
 
-    def cast_baseclass_to_hashable(self, cpu, ptr):
+    def cast_vtable_to_hashable(self, cpu, ptr):
         adr = llmemory.cast_ptr_to_adr(ptr)
         return cpu.cast_adr_to_int(adr)
 
@@ -133,8 +137,14 @@
     def getaddr_for_box(self, cpu, box):
         return box.getaddr(cpu)
 
-    def ooidentityhash(self, x):
-        raise NotImplementedError
+def rd_eq(ref1, ref2):
+    return ref1 == ref2
+
+def rd_hash(ref):
+    assert ref
+    return lltype.identityhash(ref)
+
+# ____________________________________________________________
 
 class OOTypeHelper(TypeSystemHelper):
 
@@ -212,10 +222,15 @@
         oo = oostr(str)
         return history.ConstObj(ootype.cast_to_object(oo))
 
-    def cast_ref_to_hashable(self, cpu, obj):
-        return ootype.cast_to_object(obj)
+    # A dict whose keys are refs (like the .value of BoxObj).
+    # It is a normal dict on ootype.  Two copies, to avoid conflicts
+    # with the value type.
+    def new_ref_dict(self):
+        return {}
+    def new_ref_dict_2(self):
+        return {}
 
-    def cast_baseclass_to_hashable(self, cpu, obj):
+    def cast_vtable_to_hashable(self, cpu, obj):
         return ootype.cast_to_object(obj)
 
     def cast_from_ref(self, TYPE, value):
@@ -228,8 +243,6 @@
 
     def getaddr_for_box(self, cpu, box):
         return box.getref_base()
-
-    ooidentityhash = staticmethod(ootype.ooidentityhash)
     
 llhelper = LLTypeHelper()
 oohelper = OOTypeHelper()

Modified: pypy/branch/gc-hash-merge/pypy/jit/metainterp/warmspot.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/jit/metainterp/warmspot.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/jit/metainterp/warmspot.py	Fri Oct 16 11:47:18 2009
@@ -637,18 +637,28 @@
     return x == y
 equal_whatever._annspecialcase_ = 'specialize:arg(0)'
 
-def cast_whatever_to_int(TYPE, x):
+def hash_whatever(TYPE, x):
+    # Hash of lltype or ootype object.
+    # Only supports strings, unicodes and regular instances,
+    # as well as primitives that can meaningfully be cast to Signed.
     if isinstance(TYPE, lltype.Ptr):
-        # only supports strings, unicodes and regular instances *with a hash
-        # cache*.  The 'jit_merge_point' hint forces a hash cache to appear.
-        return x.gethash()
+        if TYPE.TO is rstr.STR or TYPE.TO is rstr.UNICODE:
+            return rstr.LLHelpers.ll_strhash(x)    # assumed not null
+        else:
+            if x:
+                return lltype.identityhash(x)
+            else:
+                return 0
     elif TYPE is ootype.String or TYPE is ootype.Unicode:
-        return ootype.oohash(x)
+        return x.ll_hash()
     elif isinstance(TYPE, ootype.OOType):
-        return ootype.ooidentityhash(x)
+        if x:
+            return ootype.identityhash(x)
+        else:
+            return 0
     else:
         return lltype.cast_primitive(lltype.Signed, x)
-cast_whatever_to_int._annspecialcase_ = 'specialize:arg(0)'
+hash_whatever._annspecialcase_ = 'specialize:arg(0)'
 
 # ____________________________________________________________
 
@@ -914,7 +924,7 @@
                     result = result * mult
                     mult = mult + 82520 + 2*len(greenargs)
                 item = greenargs[i]
-                result = result ^ cast_whatever_to_int(TYPE, item)
+                result = result ^ hash_whatever(TYPE, item)
                 i = i + 1
             return result         # returns a r_uint
         getkeyhash._always_inline_ = True

Modified: pypy/branch/gc-hash-merge/pypy/module/__builtin__/interp_classobj.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/module/__builtin__/interp_classobj.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/module/__builtin__/interp_classobj.py	Fri Oct 16 11:47:18 2009
@@ -6,6 +6,7 @@
 from pypy.interpreter.argument import Arguments
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.rlib.rarithmetic import r_uint, intmask
+from pypy.rlib.objectmodel import compute_identity_hash
 
 
 def raise_type_err(space, argument, expected, w_obj):
@@ -569,7 +570,7 @@
                 raise OperationError(space.w_TypeError,
                                      space.wrap("unhashable instance"))
             else:
-                return space.wrap(hash(self))
+                return space.wrap(compute_identity_hash(self))
         w_ret = space.call_function(w_func)
         if (not space.is_true(space.isinstance(w_ret, space.w_int)) and
             not space.is_true(space.isinstance(w_ret, space.w_long))):

Modified: pypy/branch/gc-hash-merge/pypy/objspace/descroperation.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/descroperation.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/descroperation.py	Fri Oct 16 11:47:18 2009
@@ -320,10 +320,10 @@
     def hash(space, w_obj):
         w_hash = space.lookup(w_obj, '__hash__')
         if w_hash is None:
-            if space.lookup(w_obj, '__eq__') is not None or \
-               space.lookup(w_obj, '__cmp__') is not None: 
-                raise OperationError(space.w_TypeError, 
-                                     space.wrap("unhashable type"))
+            # xxx there used to be logic about "do we have __eq__ or __cmp__"
+            # here, but it does not really make sense, as 'object' has a
+            # default __hash__.  This path should only be taken under very
+            # obscure circumstances.
             return default_identity_hash(space, w_obj)
         # XXX CPython has a special case for types with "__hash__ = None"
         # to produce a nicer error message, namely "unhashable type: 'X'".

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/complextype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/complextype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/complextype.py	Fri Oct 16 11:47:18 2009
@@ -222,5 +222,4 @@
     imag = complexwprop('imagval'),
     )
 
-complex_typedef.custom_hash = True
 complex_typedef.registermethods(globals())

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/dictmultiobject.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/dictmultiobject.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/dictmultiobject.py	Fri Oct 16 11:47:18 2009
@@ -30,7 +30,6 @@
 #
 #              EmptyDictImplementation
 #                /                 \
-#  SmallStrDictImplementation   SmallDictImplementation
 #               |                   |
 #   StrDictImplementation           |
 #                \                 /
@@ -164,15 +163,9 @@
     def setitem(self, w_key, w_value):
         space = self.space
         if _is_str(space, w_key):
-            if space.config.objspace.std.withsmalldicts:
-                return SmallStrDictImplementation(space, w_key, w_value)
-            else:
-                return StrDictImplementation(space).setitem_str(w_key, w_value)
+            return StrDictImplementation(space).setitem_str(w_key, w_value)
         else:
-            if space.config.objspace.std.withsmalldicts:
-                return SmallDictImplementation(space, w_key, w_value)
-            else:
-                return space.DefaultDictImpl(space).setitem(w_key, w_value)
+            return space.DefaultDictImpl(space).setitem(w_key, w_value)
     def setitem_str(self, w_key, w_value, shadows_type=True):
         return StrDictImplementation(self.space).setitem_str(w_key, w_value)
         #return SmallStrDictImplementation(self.space, w_key, w_value)
@@ -202,228 +195,10 @@
     def items(self):
         return []
 
-
 class EmptyIteratorImplementation(IteratorImplementation):
     def next_entry(self):
         return None
 
-class Entry(object):
-    def __init__(self):
-        self.hash = 0
-        self.w_key = None
-        self.w_value = None
-    def __repr__(self):
-        return '<%r, %r, %r>'%(self.hash, self.w_key, self.w_value)
-
-class SmallDictImplementation(DictImplementation):
-    # XXX document the invariants here!
-    
-    def __init__(self, space, w_key, w_value):
-        self.space = space
-        self.entries = [Entry(), Entry(), Entry(), Entry(), Entry()]
-        self.entries[0].hash = space.hash_w(w_key)
-        self.entries[0].w_key = w_key
-        self.entries[0].w_value = w_value
-        self.valid = 1
-
-    def _lookup(self, w_key):
-        hash = self.space.hash_w(w_key)
-        i = 0
-        last = self.entries[self.valid]
-        last.hash = hash
-        last.w_key = w_key
-        while 1:
-            look_entry = self.entries[i]
-            if look_entry.hash == hash and self.space.eq_w(look_entry.w_key, w_key):
-                return look_entry
-            i += 1
-
-    def _convert_to_rdict(self):
-        newimpl = self.space.DefaultDictImpl(self.space)
-        i = 0
-        while 1:
-            entry = self.entries[i]
-            if entry.w_value is None:
-                break
-            newimpl.setitem(entry.w_key, entry.w_value)
-            i += 1
-        return newimpl
-
-    def setitem(self, w_key, w_value):
-        entry = self._lookup(w_key)
-        if entry.w_value is None:
-            if self.valid == 4:
-                return self._convert_to_rdict().setitem(w_key, w_value)
-            self.valid += 1
-        entry.w_value = w_value
-        return self
-
-    def setitem_str(self, w_key, w_value, shadows_type=True):
-        return self.setitem(w_key, w_value)
-
-    def delitem(self, w_key):
-        entry = self._lookup(w_key)
-        if entry.w_value is not None:
-            for i in range(self.entries.index(entry), self.valid):
-                self.entries[i] = self.entries[i+1]
-            self.entries[self.valid] = entry
-            entry.w_value = None
-            self.valid -= 1
-            if self.valid == 0:
-                return self.space.emptydictimpl
-            return self
-        else:
-            entry.w_key = None
-            raise KeyError
-
-    def length(self):
-        return self.valid
-    def get(self, w_lookup):
-        entry = self._lookup(w_lookup)
-        val = entry.w_value
-        if val is None:
-            entry.w_key = None
-        return val
-
-    def iteritems(self):
-        return self._convert_to_rdict().iteritems()
-    def iterkeys(self):
-        return self._convert_to_rdict().iterkeys()
-    def itervalues(self):
-        return self._convert_to_rdict().itervalues()
-
-    def keys(self):
-        return [self.entries[i].w_key for i in range(self.valid)]
-    def values(self):
-        return [self.entries[i].w_value for i in range(self.valid)]
-    def items(self):
-        return [self.space.newtuple([e.w_key, e.w_value])
-                    for e in [self.entries[i] for i in range(self.valid)]]
-
-
-class StrEntry(object):
-    def __init__(self):
-        self.key = None
-        self.w_value = None
-    def __repr__(self):
-        return '<%r, %r, %r>'%(self.hash, self.key, self.w_value)
-
-class SmallStrDictImplementation(DictImplementation):
-    # XXX document the invariants here!
-
-    def __init__(self, space, w_key, w_value):
-        self.space = space
-        self.entries = [StrEntry(), StrEntry(), StrEntry(), StrEntry(), StrEntry()]
-        key = space.str_w(w_key)
-        self.entries[0].key = key
-        self.entries[0].w_value = w_value
-        self.valid = 1
-
-    def _lookup(self, key):
-        assert isinstance(key, str)
-        _hash = hash(key)
-        i = 0
-        last = self.entries[self.valid]
-        last.key = key
-        while 1:
-            look_entry = self.entries[i]
-            if hash(look_entry.key) == _hash and look_entry.key == key:
-                return look_entry
-            i += 1
-
-    def _convert_to_rdict(self):
-        newimpl = self.space.DefaultDictImpl(self.space)
-        i = 0
-        while 1:
-            entry = self.entries[i]
-            if entry.w_value is None:
-                break
-            newimpl.setitem(self.space.wrap(entry.key), entry.w_value)
-            i += 1
-        return newimpl
-
-    def _convert_to_sdict(self, w_value):
-        # this relies on the fact that the new key is in the entries
-        # list already.
-        newimpl = StrDictImplementation(self.space)
-        i = 0
-        while 1:
-            entry = self.entries[i]
-            if entry.w_value is None:
-                newimpl.content[entry.key] = w_value
-                break
-            newimpl.content[entry.key] = entry.w_value
-            i += 1
-        return newimpl
-
-    def setitem(self, w_key, w_value):
-        if not _is_str(self.space, w_key):
-            return self._convert_to_rdict().setitem(w_key, w_value)
-        return self.setitem_str(w_key, w_value)
-
-    def setitem_str(self, w_key, w_value, shadows_type=True):
-        entry = self._lookup(self.space.str_w(w_key))
-        if entry.w_value is None:
-            if self.valid == 4:
-                return self._convert_to_sdict(w_value)
-            self.valid += 1
-        entry.w_value = w_value
-        return self
-
-    def delitem(self, w_key):
-        space = self.space
-        w_key_type = space.type(w_key)
-        if space.is_w(w_key_type, space.w_str):
-            entry = self._lookup(space.str_w(w_key))
-            if entry.w_value is not None:
-                for i in range(self.entries.index(entry), self.valid):
-                    self.entries[i] = self.entries[i+1]
-                self.entries[self.valid] = entry
-                entry.w_value = None
-                self.valid -= 1
-                if self.valid == 0:
-                    return self.space.emptydictimpl
-                return self
-            else:
-                entry.key = None
-                raise KeyError
-        elif _is_sane_hash(self.space, w_key_type):
-            raise KeyError
-        else:
-            return self._convert_to_rdict().delitem(w_key)
-
-    def length(self):
-        return self.valid
-
-    def get(self, w_lookup):
-        space = self.space
-        w_lookup_type = space.type(w_lookup)
-        if space.is_w(w_lookup_type, space.w_str):
-            entry = self._lookup(space.str_w(w_lookup))
-            val = entry.w_value
-            if val is None:
-                entry.key = None
-            return val
-        elif _is_sane_hash(self.space, w_lookup_type):
-            return None
-        else:
-            return self._convert_to_rdict().get(w_lookup)
-
-    def iteritems(self):
-        return self._convert_to_rdict().iteritems()
-    def iterkeys(self):
-        return self._convert_to_rdict().iterkeys()
-    def itervalues(self):
-        return self._convert_to_rdict().itervalues()
-
-    def keys(self):
-        return [self.space.wrap(self.entries[i].key) for i in range(self.valid)]
-    def values(self):
-        return [self.entries[i].w_value for i in range(self.valid)]
-    def items(self):
-        return [self.space.newtuple([self.space.wrap(e.key), e.w_value])
-                    for e in [self.entries[i] for i in range(self.valid)]]
-
 
 class StrDictImplementation(DictImplementation):
     def __init__(self, space):

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/floattype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/floattype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/floattype.py	Fri Oct 16 11:47:18 2009
@@ -50,4 +50,3 @@
 Convert a string or number to a floating point number, if possible.''',
     __new__ = newmethod(descr__new__),
     )
-float_typedef.custom_hash = True

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/frozensettype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/frozensettype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/frozensettype.py	Fri Oct 16 11:47:18 2009
@@ -55,5 +55,4 @@
     __new__ = newmethod(descr__frozenset__new__),
     )
 
-frozenset_typedef.custom_hash = True
 frozenset_typedef.registermethods(globals())

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/inttype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/inttype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/inttype.py	Fri Oct 16 11:47:18 2009
@@ -145,4 +145,3 @@
 will be returned instead.''',
     __new__ = newmethod(descr__new__),
     )
-int_typedef.custom_hash = True

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/longtype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/longtype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/longtype.py	Fri Oct 16 11:47:18 2009
@@ -77,4 +77,3 @@
 converting a non-string.''',
     __new__ = newmethod(descr__new__),
     )
-long_typedef.custom_hash = True

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/objecttype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/objecttype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/objecttype.py	Fri Oct 16 11:47:18 2009
@@ -182,5 +182,3 @@
     __init__ = gateway.interp2app(descr__init__,
                                   unwrap_spec=[gateway.ObjSpace,gateway.W_Root,gateway.Arguments]),
     )
-
-object_typedef.custom_hash = False    # object.__hash__ is not a custom hash

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/stringobject.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/stringobject.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/stringobject.py	Fri Oct 16 11:47:18 2009
@@ -2,8 +2,8 @@
 
 from pypy.objspace.std.objspace import *
 from pypy.interpreter import gateway
-from pypy.rlib.rarithmetic import ovfcheck, _hash_string
-from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.rarithmetic import ovfcheck
+from pypy.rlib.objectmodel import we_are_translated, compute_hash
 from pypy.objspace.std.inttype import wrapint
 from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
 from pypy.objspace.std import slicetype
@@ -755,12 +755,7 @@
 
 def hash__String(space, w_str):
     s = w_str._value
-    if we_are_translated():
-        x = hash(s)            # to use the hash cache in rpython strings
-    else:
-        x = _hash_string(s)    # to make sure we get the same hash as rpython
-        # (otherwise translation will freeze W_DictObjects where we can't find
-        #  the keys any more!)
+    x = compute_hash(s)
     return wrapint(space, x)
 
 def lt__String_String(space, w_str1, w_str2):

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/stringtype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/stringtype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/stringtype.py	Fri Oct 16 11:47:18 2009
@@ -301,7 +301,6 @@
 If the argument is a string, the return value is the same object.'''
     )
 
-str_typedef.custom_hash = True
 str_typedef.registermethods(globals())
 
 # ____________________________________________________________

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_dictmultiobject.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_dictmultiobject.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_dictmultiobject.py	Fri Oct 16 11:47:18 2009
@@ -2,7 +2,7 @@
 from pypy.objspace.std.dictmultiobject import \
      W_DictMultiObject, setitem__DictMulti_ANY_ANY, getitem__DictMulti_ANY, \
      EmptyDictImplementation, RDictImplementation, StrDictImplementation, \
-     SmallDictImplementation, SmallStrDictImplementation, MeasuringDictImplementation
+     MeasuringDictImplementation
 
 from pypy.objspace.std.celldict import ModuleDictImplementation
 from pypy.conftest import gettestobjspace
@@ -91,16 +91,6 @@
         raises(KeyError, "d['def']")
 
 
-
-class TestW_DictSmall(test_dictobject.TestW_DictObject):
-    def setup_class(cls):
-        cls.space = gettestobjspace(**{"objspace.std.withsmalldicts": True})
-
-class AppTest_DictSmall(test_dictobject.AppTest_DictObject):
-    def setup_class(cls):
-        cls.space = gettestobjspace(**{"objspace.std.withsmalldicts": True})
-
-
 class C: pass
 
 class FakeSpace(test_dictobject.FakeSpace):
@@ -250,23 +240,11 @@
 class TestStrDictImplementation(TestRDictImplementation):
     ImplementionClass = StrDictImplementation
 
-class TestSmallDictImplementation(TestRDictImplementation):
-    ImplementionClass = SmallDictImplementation
-
-    def get_impl(self):
-        return self.ImplementionClass(self.space, self.string, self.string2)
-
 class TestMeasuringDictImplementation(TestRDictImplementation):
     ImplementionClass = MeasuringDictImplementation
     DevolvedClass = MeasuringDictImplementation
     EmptyClass = MeasuringDictImplementation
 
-class TestSmallStrDictImplementation(TestRDictImplementation):
-    ImplementionClass = SmallStrDictImplementation
-
-    def get_impl(self):
-        return self.ImplementionClass(self.space, self.string, self.string2)
-
 class TestModuleDictImplementation(TestRDictImplementation):
     ImplementionClass = ModuleDictImplementation
     EmptyClass = ModuleDictImplementation

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_floatobject.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_floatobject.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_floatobject.py	Fri Oct 16 11:47:18 2009
@@ -56,8 +56,13 @@
             32,              # answer on 32-bit machines
             137438953472)    # answer on 64-bit machines
         # testing special overflow values
-        assert hash(1e200 * 1e200) == 314159
-        assert hash(-1e200 * 1e200) == -271828
+        inf = 1e200 * 1e200
+        assert hash(inf) == 314159
+        assert hash(-inf) == -271828
+        x = hash(inf/inf)
+        # ^^^ assert did not crash, even though the result is a bit random
+        #     e.g. it appears to be -32768 on Win32 and 0 on Linux
+        assert x == hash(inf/inf)
 
     def test_int_float(self):
         assert int(42.1234) == 42

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_userobject.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_userobject.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/test/test_userobject.py	Fri Oct 16 11:47:18 2009
@@ -1,3 +1,4 @@
+from pypy.interpreter import gateway
 
 
 class AppTestUserObject:
@@ -6,6 +7,16 @@
     def setup_class(cls):
         from pypy import conftest
         cls.space = conftest.gettestobjspace(**cls.OPTIONS)
+        #
+        import random
+        def fn_rand():
+            return cls.space.wrap(random.randrange(0, 5))
+        fn_rand.unwrap_spec = []
+        if conftest.option.runappdirect:
+            cls.w_rand = fn_rand
+        else:
+            cls.w_rand = cls.space.wrap(gateway.interp2app(fn_rand))
+        cls.w_runappdirect = cls.space.wrap(bool(conftest.option.runappdirect))
 
     def test_emptyclass(self):
         class empty(object): pass
@@ -231,6 +242,38 @@
 
         raises(AttributeError, "del Foo.x")
 
+    def test_hash(self):
+        if not hasattr(self, 'runappdirect'):
+            skip("disabled")
+        if self.runappdirect:
+            total = 500000
+        else:
+            total = 50
+        #
+        class A(object):
+            hash = None
+        tail = any = A()
+        tail.next = tail
+        i = 0
+        while i < total:
+            a = A()
+            a.next = tail.next
+            tail.next = a
+            for j in range(self.rand()):
+                any = any.next
+            if any.hash is None:
+                any.hash = hash(any)
+            else:
+                assert any.hash == hash(any)
+            i += 1
+        i = 0
+        while i < total:
+            if any.hash is not None:
+                assert any.hash == hash(any)
+            any = any.next
+            i += 1
+
+
 class AppTestWithMultiMethodVersion2(AppTestUserObject):
     OPTIONS = {}    # for test_builtinshortcut.py
 

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/tupletype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/tupletype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/tupletype.py	Fri Oct 16 11:47:18 2009
@@ -27,4 +27,3 @@
 If the argument is a tuple, the return value is the same object.''',
     __new__ = newmethod(descr__new__),
     )
-tuple_typedef.custom_hash = True

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/typeobject.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/typeobject.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/typeobject.py	Fri Oct 16 11:47:18 2009
@@ -6,7 +6,7 @@
 from pypy.objspace.std.objecttype import object_typedef
 from pypy.objspace.std.dictproxyobject import W_DictProxyObject
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.rlib.objectmodel import current_object_addr_as_int
+from pypy.rlib.objectmodel import current_object_addr_as_int, compute_hash
 from pypy.rlib.jit import hint, purefunction, we_are_jitted, dont_look_inside
 from pypy.rlib.rarithmetic import intmask, r_uint
 
@@ -231,7 +231,8 @@
         # assumption is that the version_tag object won't keep moving all
         # the time - so using the fast current_object_addr_as_int() instead
         # of a slower solution like hash() is still a good trade-off.
-        method_hash = r_uint(intmask(version_tag_as_int * hash(name))) >> SHIFT
+        hash_name = compute_hash(name)
+        method_hash = r_uint(intmask(version_tag_as_int * hash_name)) >> SHIFT
         cached_version_tag = space.method_cache_versions[method_hash]
         if cached_version_tag is version_tag:
             cached_name = space.method_cache_names[method_hash]

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/unicodeobject.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/unicodeobject.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/unicodeobject.py	Fri Oct 16 11:47:18 2009
@@ -7,6 +7,7 @@
 from pypy.objspace.std import slicetype
 from pypy.objspace.std.tupleobject import W_TupleObject
 from pypy.rlib.rarithmetic import intmask, ovfcheck
+from pypy.rlib.objectmodel import compute_hash
 from pypy.module.unicodedata import unicodedb_4_1_0 as unicodedb
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -211,13 +212,7 @@
         x ^= ord(s[0])
         h = intmask(x)
         return space.wrap(h)
-    if we_are_translated():
-        x = hash(s)            # to use the hash cache in rpython strings
-    else:
-        from pypy.rlib.rarithmetic import _hash_string
-        x = _hash_string(s)    # to make sure we get the same hash as rpython
-        # (otherwise translation will freeze W_DictObjects where we can't find
-        #  the keys any more!)
+    x = compute_hash(s)
     return space.wrap(x)
 
 def len__Unicode(space, w_uni):

Modified: pypy/branch/gc-hash-merge/pypy/objspace/std/unicodetype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/objspace/std/unicodetype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/objspace/std/unicodetype.py	Fri Oct 16 11:47:18 2009
@@ -298,7 +298,6 @@
 errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.'''
     )
 
-unicode_typedef.custom_hash = True
 unicode_typedef.registermethods(globals())
 
 unitypedef = unicode_typedef

Modified: pypy/branch/gc-hash-merge/pypy/rlib/jit.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rlib/jit.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rlib/jit.py	Fri Oct 16 11:47:18 2009
@@ -210,9 +210,6 @@
             raise JitHintError("%s expects the following keyword "
                                "arguments: %s" % (self.instance,
                                                   expected))
-        for name in driver.greens:
-            s_green_key = kwds_s['s_' + name]
-            s_green_key.hash()      # force the hash cache to appear
 
         if self.instance.__name__ == 'jit_merge_point':
             self.annotate_hooks(**kwds_s)

Modified: pypy/branch/gc-hash-merge/pypy/rlib/objectmodel.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rlib/objectmodel.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rlib/objectmodel.py	Fri Oct 16 11:47:18 2009
@@ -5,6 +5,7 @@
 
 import sys
 import types
+import math
 
 # specialize is a decorator factory for attaching _annspecialcase_
 # attributes to functions: for example
@@ -127,64 +128,192 @@
     obj.__dict__ = {}
     obj.__class__ = FREED_OBJECT
 
-from pypy.rpython.extregistry import ExtRegistryEntry
-
 # ____________________________________________________________
 #
-# id-like functions.
-# In addition, RPython supports hash(x) on RPython instances,
-# returning a number that is not guaranteed to be unique but
-# that doesn't change over time for a given 'x'.
+# id-like functions.  The idea is that calling hash() or id() is not
+# allowed in RPython.  You have to call one of the following more
+# precise functions.
+
+def compute_hash(x):
+    """RPython equivalent of hash(x), where 'x' is an immutable
+    RPython-level.  For strings or unicodes it computes the hash as
+    in Python.  For tuples it calls compute_hash() recursively.
+    For instances it uses compute_identity_hash().
+
+    Note that this can return 0 or -1 too.
+
+    Behavior across translation:
+
+      * on lltypesystem, it always returns the same number, both
+        before and after translation.  Dictionaries don't need to
+        be rehashed after translation.
+
+      * on ootypesystem, the value changes because of translation.
+        Dictionaries need to be rehashed.
+    """
+    if isinstance(x, (str, unicode)):
+        return _hash_string(x)
+    if isinstance(x, int):
+        return x
+    if isinstance(x, float):
+        return _hash_float(x)
+    if isinstance(x, tuple):
+        return _hash_tuple(x)
+    if x is None:
+        return 0
+    return compute_identity_hash(x)
+
+def compute_identity_hash(x):
+    """RPython equivalent of object.__hash__(x).  This returns the
+    so-called 'identity hash', which is the non-overridable default hash
+    of Python.  Can be called for any RPython-level object that turns
+    into a GC object, but not NULL.  The value is not guaranteed to be the
+    same before and after translation, except for RPython instances on the
+    lltypesystem.
+    """
+    result = object.__hash__(x)
+    try:
+        x.__dict__['__precomputed_identity_hash'] = result
+    except (TypeError, AttributeError):
+        pass
+    return result
 
 def compute_unique_id(x):
-    """RPython equivalent of id(x).  The 'x' must be an RPython instance.
-    This operation can be very costly depending on the garbage collector.
-    To remind you of this fact, we don't support id(x) directly.
+    """RPython equivalent of id(x).  The 'x' must be an RPython-level
+    object that turns into a GC object.  This operation can be very
+    costly depending on the garbage collector.  To remind you of this
+    fact, we don't support id(x) directly.
+    (XXX not implemented on ootype, falls back to compute_identity_hash)
     """
     return id(x)      # XXX need to return r_longlong on some platforms
 
 def current_object_addr_as_int(x):
     """A cheap version of id(x).  The current memory location of an
-    instance can change over time for moving GCs.  Also note that on
+    object can change over time for moving GCs.  Also note that on
     ootypesystem this typically doesn't return the real address but
-    just the same as hash(x).
+    just the same as compute_hash(x).
     """
     from pypy.rlib.rarithmetic import intmask
     return intmask(id(x))
 
+# ----------
+
+def _hash_string(s):
+    """The algorithm behind compute_hash() for a string or a unicode."""
+    from pypy.rlib.rarithmetic import intmask
+    length = len(s)
+    if length == 0:
+        return -1
+    x = ord(s[0]) << 7
+    i = 0
+    while i < length:
+        x = (1000003*x) ^ ord(s[i])
+        i += 1
+    x ^= length
+    return intmask(x)
+
+def _hash_float(f):
+    """The algorithm behind compute_hash() for a float.
+    This implementation is identical to the CPython implementation,
+    except the fact that the integer case is not treated specially.
+    In RPython, floats cannot be used with ints in dicts, anyway.
+    """
+    from pypy.rlib.rarithmetic import intmask, isinf, isnan
+    if isinf(f):
+        if f < 0.0:
+            return -271828
+        else:
+            return 314159
+    elif isnan(f):
+        return 0
+    v, expo = math.frexp(f)
+    v *= TAKE_NEXT
+    hipart = int(v)
+    v = (v - float(hipart)) * TAKE_NEXT
+    x = hipart + int(v) + (expo << 15)
+    return intmask(x)
+TAKE_NEXT = float(2**31)
+
+def _hash_tuple(t):
+    """NOT_RPYTHON.  The algorithm behind compute_hash() for a tuple.
+    It is modelled after the old algorithm of Python 2.3, which is
+    a bit faster than the one introduced by Python 2.4.  We assume
+    that nested tuples are very uncommon in RPython, making the bad
+    case unlikely.
+    """
+    from pypy.rlib.rarithmetic import intmask
+    x = 0x345678
+    for item in t:
+        y = compute_hash(item)
+        x = intmask((1000003 * x) ^ y)
+    return x
+
+# ----------
+
+from pypy.rpython.extregistry import ExtRegistryEntry
+
+class Entry(ExtRegistryEntry):
+    _about_ = compute_hash
+
+    def compute_result_annotation(self, s_x):
+        from pypy.annotation import model as annmodel
+        return annmodel.SomeInteger()
+
+    def specialize_call(self, hop):
+        r_obj, = hop.args_r
+        v_obj, = hop.inputargs(r_obj)
+        ll_fn = r_obj.get_ll_hash_function()
+        return hop.gendirectcall(ll_fn, v_obj)
+
+class Entry(ExtRegistryEntry):
+    _about_ = compute_identity_hash
+
+    def compute_result_annotation(self, s_x):
+        from pypy.annotation import model as annmodel
+        return annmodel.SomeInteger()
+
+    def specialize_call(self, hop):
+        from pypy.rpython.lltypesystem import lltype
+        vobj, = hop.inputargs(hop.args_r[0])
+        if hop.rtyper.type_system.name == 'lltypesystem':
+            ok = (isinstance(vobj.concretetype, lltype.Ptr) and
+                  vobj.concretetype.TO._gckind == 'gc')
+        else:
+            from pypy.rpython.ootypesystem import ootype
+            ok = isinstance(vobj.concretetype, ootype.OOType)
+        if not ok:
+            from pypy.rpython.error import TyperError
+            raise TyperError("compute_identity_hash() cannot be applied to"
+                             " %r" % (vobj.concretetype,))
+        return hop.genop('gc_identityhash', [vobj], resulttype=lltype.Signed)
+
 class Entry(ExtRegistryEntry):
     _about_ = compute_unique_id
 
     def compute_result_annotation(self, s_x):
         from pypy.annotation import model as annmodel
-        assert isinstance(s_x, annmodel.SomeInstance)
         return annmodel.SomeInteger()
 
     def specialize_call(self, hop):
+        from pypy.rpython.lltypesystem import lltype
         vobj, = hop.inputargs(hop.args_r[0])
         if hop.rtyper.type_system.name == 'lltypesystem':
-            from pypy.rpython.lltypesystem import lltype
-            if isinstance(vobj.concretetype, lltype.Ptr):
-                return hop.genop('gc_id', [vobj],
-                                 resulttype = lltype.Signed)
-        elif hop.rtyper.type_system.name == 'ootypesystem':
+            ok = (isinstance(vobj.concretetype, lltype.Ptr) and
+                  vobj.concretetype.TO._gckind == 'gc')
+        else:
             from pypy.rpython.ootypesystem import ootype
-            if isinstance(vobj.concretetype, ootype.Instance):
-                # XXX wrong implementation for now, fix me
-                from pypy.rpython.rmodel import warning
-                warning("compute_unique_id() is not fully supported on ootype")
-                return hop.genop('ooidentityhash', [vobj],
-                                 resulttype = ootype.Signed)
-        from pypy.rpython.error import TyperError
-        raise TyperError("compute_unique_id() cannot be applied to %r" % (
-            vobj.concretetype,))
+            ok = isinstance(vobj.concretetype, ootype.Instance)
+        if not ok:
+            from pypy.rpython.error import TyperError
+            raise TyperError("compute_unique_id() cannot be applied to"
+                             " %r" % (vobj.concretetype,))
+        return hop.genop('gc_id', [vobj], resulttype=lltype.Signed)
 
 class Entry(ExtRegistryEntry):
     _about_ = current_object_addr_as_int
 
     def compute_result_annotation(self, s_x):
         from pypy.annotation import model as annmodel
-        assert isinstance(s_x, annmodel.SomeInstance)
         return annmodel.SomeInteger()
 
     def specialize_call(self, hop):
@@ -197,12 +326,14 @@
         elif hop.rtyper.type_system.name == 'ootypesystem':
             from pypy.rpython.ootypesystem import ootype
             if isinstance(vobj.concretetype, ootype.Instance):
-                return hop.genop('ooidentityhash', [vobj],
+                return hop.genop('gc_identityhash', [vobj],
                                  resulttype = ootype.Signed)
         from pypy.rpython.error import TyperError
         raise TyperError("current_object_addr_as_int() cannot be applied to"
                          " %r" % (vobj.concretetype,))
 
+# ____________________________________________________________
+
 def hlinvoke(repr, llcallable, *args):
     raise TypeError, "hlinvoke is meant to be rtyped and not called direclty"
 

Modified: pypy/branch/gc-hash-merge/pypy/rlib/rarithmetic.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rlib/rarithmetic.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rlib/rarithmetic.py	Fri Oct 16 11:47:18 2009
@@ -463,23 +463,6 @@
 
     return formatd(fmt, x)
 
-# a common string hash function
-
-def _hash_string(s):
-    length = len(s)
-    if length == 0:
-        x = -1
-    else:
-        x = ord(s[0]) << 7
-        i = 0
-        while i < length:
-            x = (1000003*x) ^ ord(s[i])
-            i += 1
-        x ^= length
-        if x == 0:
-            x = -1
-    return intmask(x)
-
 # the 'float' C type
 
 class r_singlefloat(object):

Modified: pypy/branch/gc-hash-merge/pypy/rlib/rope.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rlib/rope.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rlib/rope.py	Fri Oct 16 11:47:18 2009
@@ -1,6 +1,6 @@
 import py
 import sys
-from pypy.rlib.rarithmetic import intmask, _hash_string, ovfcheck
+from pypy.rlib.rarithmetic import intmask, ovfcheck
 from pypy.rlib.rarithmetic import r_uint, LONG_BIT
 from pypy.rlib.objectmodel import we_are_translated
 import math

Modified: pypy/branch/gc-hash-merge/pypy/rlib/rweakrefimpl.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rlib/rweakrefimpl.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rlib/rweakrefimpl.py	Fri Oct 16 11:47:18 2009
@@ -91,6 +91,8 @@
                                     adtmeths=entrymeths,
                                     hints={'weakarray': 'value'})
 
+ll_strhash = rstr.LLHelpers.ll_strhash
+
 @jit.dont_look_inside
 def ll_new_weakdict():
     d = lltype.malloc(WEAKDICT)
@@ -101,7 +103,8 @@
 
 @jit.dont_look_inside
 def ll_get(d, llkey):
-    i = rdict.ll_dict_lookup(d, llkey, llkey.gethash())
+    hash = ll_strhash(llkey)
+    i = rdict.ll_dict_lookup(d, llkey, hash)
     #llop.debug_print(lltype.Void, i, 'get')
     valueref = d.entries[i].value
     if valueref:
@@ -118,8 +121,9 @@
 
 @jit.dont_look_inside
 def ll_set_nonnull(d, llkey, llvalue):
+    hash = ll_strhash(llkey)
     valueref = weakref_create(llvalue)    # GC effects here, before the rest
-    i = rdict.ll_dict_lookup(d, llkey, llkey.gethash())
+    i = rdict.ll_dict_lookup(d, llkey, hash)
     everused = d.entries.everused(i)
     d.entries[i].key = llkey
     d.entries[i].value = valueref
@@ -132,7 +136,8 @@
 
 @jit.dont_look_inside
 def ll_set_null(d, llkey):
-    i = rdict.ll_dict_lookup(d, llkey, llkey.gethash())
+    hash = ll_strhash(llkey)
+    i = rdict.ll_dict_lookup(d, llkey, hash)
     if d.entries.everused(i):
         # If the entry was ever used, clean up its key and value.
         # We don't store a NULL value, but a dead weakref, because

Modified: pypy/branch/gc-hash-merge/pypy/rlib/test/test_objectmodel.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rlib/test/test_objectmodel.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rlib/test/test_objectmodel.py	Fri Oct 16 11:47:18 2009
@@ -141,6 +141,51 @@
     py.test.raises(TypeError, "s1 < s2")
     py.test.raises(TypeError, "hash(s1)")
 
+def test_compute_hash():
+    from pypy.rlib.objectmodel import _hash_string, _hash_float, _hash_tuple
+    assert compute_hash("Hello") == _hash_string("Hello")
+    assert compute_hash(7) == 7
+    assert compute_hash(-3.5) == _hash_float(-3.5)
+    assert compute_hash(None) == 0
+    assert compute_hash(("world", None, 7)) == _hash_tuple(("world", None, 7))
+    #
+    class Foo(object):
+        def __hash__(self):
+            return 42
+    foo = Foo()
+    h = compute_hash(foo)
+    assert h == object.__hash__(foo)
+    assert h == getattr(foo, '__precomputed_identity_hash')
+    assert compute_hash(None) == 0
+
+def test_compute_hash_float():
+    from pypy.rlib.rarithmetic import INFINITY, NAN
+    assert compute_hash(INFINITY) == 314159
+    assert compute_hash(-INFINITY) == -271828
+    assert compute_hash(NAN) == 0
+
+def test_compute_identity_hash():
+    class Foo(object):
+        def __hash__(self):
+            return 42
+    foo = Foo()
+    h = compute_identity_hash(foo)
+    assert h == object.__hash__(foo)
+    assert h == getattr(foo, '__precomputed_identity_hash')
+
+def test_compute_unique_id():
+    class Foo(object):
+        pass
+    foo = Foo()
+    assert compute_unique_id(foo) == id(foo)
+
+def test_current_object_addr_as_int():
+    from pypy.rlib.rarithmetic import intmask
+    class Foo(object):
+        pass
+    foo = Foo()
+    assert current_object_addr_as_int(foo) == intmask(id(foo))
+
 class BaseTestObjectModel(BaseRtypingTest):
 
     def test_we_are_translated(self):
@@ -270,6 +315,31 @@
         res = self.interpret(g, [3])
         assert res == 77
 
+    def test_compute_hash(self):
+        class Foo(object):
+            pass
+        def f(i):
+            assert compute_hash(i) == compute_hash(42)
+            assert compute_hash(i+1.0) == compute_hash(43.0)
+            assert compute_hash("Hello" + str(i)) == compute_hash("Hello42")
+            if i == 42:
+                p = None
+            else:
+                p = Foo()
+            assert compute_hash(p) == compute_hash(None)
+            assert (compute_hash(("world", None, i, 7.5)) ==
+                    compute_hash(("world", None, 42, 7.5)))
+            q = Foo()
+            assert compute_hash(q) == compute_identity_hash(q)
+            from pypy.rlib.rarithmetic import INFINITY, NAN
+            assert compute_hash(INFINITY) == 314159
+            assert compute_hash(-INFINITY) == -271828
+            assert compute_hash(NAN) == 0
+            return i*2
+        res = self.interpret(f, [42])
+        assert res == 84
+
+
 class TestLLtype(BaseTestObjectModel, LLRtypeMixin):
 
     def test_rtype_keepalive(self):
@@ -283,6 +353,36 @@
         res = self.interpret(f, [])
         assert res == 1
 
+    def test_compute_hash_across_translation(self):
+        class Foo(object):
+            pass
+        q = Foo()
+
+        def f(i):
+            assert compute_hash(None) == 0
+            assert compute_hash(i) == h_42
+            assert compute_hash(i+1.0) == h_43_dot_0
+            assert compute_hash((i+3)/6.0) == h_7_dot_5
+            assert compute_hash("Hello" + str(i)) == h_Hello42
+            if i == 42:
+                p = None
+            else:
+                p = Foo()
+            assert compute_hash(p) == h_None
+            assert compute_hash(("world", None, i, 7.5)) == h_tuple
+            assert compute_hash(q) == h_q
+            return i*2
+        h_42       = compute_hash(42)
+        h_43_dot_0 = compute_hash(43.0)
+        h_7_dot_5  = compute_hash(7.5)
+        h_Hello42  = compute_hash("Hello42")
+        h_None     = compute_hash(None)
+        h_tuple    = compute_hash(("world", None, 42, 7.5))
+        h_q        = compute_hash(q)
+        
+        res = self.interpret(f, [42])
+        assert res == 84
+
 
 class TestOOtype(BaseTestObjectModel, OORtypeMixin):
     pass

Modified: pypy/branch/gc-hash-merge/pypy/rpython/llinterp.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/llinterp.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/llinterp.py	Fri Oct 16 11:47:18 2009
@@ -824,6 +824,9 @@
     def op_gc_assume_young_pointers(self, addr):
         raise NotImplementedError
 
+    def op_gc_obtain_free_space(self, size):
+        raise NotImplementedError
+
     def op_gc_can_move(self, ptr):
         addr = llmemory.cast_ptr_to_adr(ptr)
         return self.heap.can_move(addr)
@@ -881,8 +884,16 @@
             self.setvar(v_ptr, p)
     op_gc_reload_possibly_moved.specialform = True
 
-    def op_gc_id(self, v_ptr):
-        return self.heap.gc_id(v_ptr)
+    def op_gc_identityhash(self, obj):
+        return lltype.identityhash(obj)
+
+    def op_gc_id(self, ptr):
+        PTR = lltype.typeOf(ptr)
+        if isinstance(PTR, lltype.Ptr):
+            return self.heap.gc_id(ptr)
+        elif isinstance(PTR, ootype.OOType):
+            return ootype.identityhash(ptr)     # XXX imprecise
+        raise NotImplementedError("gc_id on %r" % (PTR,))
 
     def op_gc_set_max_heap_size(self, maxsize):
         raise NotImplementedError("gc_set_max_heap_size")
@@ -1186,9 +1197,6 @@
             raise RuntimeError("calling abstract method %r" % (m,))
         return self.perform_call(m, (lltype.typeOf(inst),)+lltype.typeOf(m).ARGS, [inst]+args)
 
-    def op_ooidentityhash(self, inst):
-        return ootype.ooidentityhash(inst)
-
     def op_oostring(self, obj, base):
         return ootype.oostring(obj, base)
 
@@ -1210,13 +1218,10 @@
         except ValueError:
             self.make_llexception()
 
-    def op_oohash(self, s):
-        return ootype.oohash(s)
-
 class Tracer(object):
     Counter = 0
     file = None
-    TRACE = False
+    TRACE = int(os.getenv('PYPY_TRACE') or '0')
 
     HEADER = """<html><head>
         <script language=javascript type='text/javascript'>

Modified: pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/lloperation.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/lloperation.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/lloperation.py	Fri Oct 16 11:47:18 2009
@@ -439,10 +439,18 @@
     'gc_push_alive_pyobj':  LLOp(),
     'gc_pop_alive_pyobj':   LLOp(),
     'gc_reload_possibly_moved': LLOp(),
+    # see rlib/objectmodel for gc_identityhash and gc_id
+    'gc_identityhash':      LLOp(canraise=(MemoryError,), sideeffects=False,
+                                 canunwindgc=True),
     'gc_id':                LLOp(canraise=(MemoryError,), sideeffects=False),
+                                 # ^^^ but canunwindgc=False, as it is
+                                 # allocating non-GC structures only
+    'gc_obtain_free_space': LLOp(),
     'gc_set_max_heap_size': LLOp(),
     'gc_can_move'         : LLOp(sideeffects=False),
     'gc_thread_prepare'   : LLOp(canraise=(MemoryError,)),
+                                 # ^^^ but canunwindgc=False, as it is
+                                 # allocating non-GC structures only
     'gc_thread_run'       : LLOp(),
     'gc_thread_die'       : LLOp(),
     'gc_assume_young_pointers': LLOp(),
@@ -538,11 +546,9 @@
     'instanceof':           LLOp(oo=True, canfold=True),
     'classof':              LLOp(oo=True, canfold=True),
     'subclassof':           LLOp(oo=True, canfold=True),
-    'ooidentityhash':       LLOp(oo=True, sideeffects=False),  # not an id()!
     'oostring':             LLOp(oo=True, sideeffects=False),
     'ooparse_int':          LLOp(oo=True, canraise=(ValueError,)),
     'ooparse_float':        LLOp(oo=True, canraise=(ValueError,)),
-    'oohash':               LLOp(oo=True, sideeffects=False),
     'oounicode':            LLOp(oo=True, canraise=(UnicodeDecodeError,)),
 
     # _____ read frame var support ___

Modified: pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/lltype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/lltype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/lltype.py	Fri Oct 16 11:47:18 2009
@@ -1119,6 +1119,19 @@
             return callb(*args)
         raise TypeError("%r instance is not a function" % (self._T,))
 
+    def _identityhash(self, cache=True):
+        p = normalizeptr(self)
+        try:
+            return p._obj._hash_cache_
+        except AttributeError:
+            result = hash(p._obj)
+            if cache:
+                try:
+                    p._obj._hash_cache_ = result
+                except AttributeError:
+                    pass
+            return result
+
 class _ptr(_abstract_ptr):
     __slots__ = ('_TYPE', 
                  '_weak', '_solid',
@@ -1390,7 +1403,7 @@
 class _struct(_parentable):
     _kind = "structure"
 
-    __slots__ = ()
+    __slots__ = ('_hash_cache_',)
 
     def __new__(self, TYPE, n=None, initialization=None, parent=None, parentindex=None):
         my_variety = _struct_variety(TYPE._names)
@@ -1840,6 +1853,27 @@
                                  "should have been: %s" % (p, result2, result))
     return result
 
+def identityhash(p):
+    """Returns the lltype-level hash of the given GcStruct.
+    Also works with most ootype objects.  Not for NULL.
+    See rlib.objectmodel.compute_identity_hash() for more
+    information about the RPython-level meaning of this.
+    """
+    assert p
+    return p._identityhash()
+
+def init_identity_hash(p, value):
+    """For a prebuilt object p, initialize its hash value to 'value'."""
+    assert isinstance(typeOf(p), Ptr)
+    p = normalizeptr(p)
+    if not p:
+        raise ValueError("cannot change hash(NULL)!")
+    if hasattr(p._obj, '_hash_cache_'):
+        raise ValueError("the hash of %r was already computed" % (p,))
+    if typeOf(p).TO._is_varsize():
+        raise ValueError("init_identity_hash(): not for varsized types")
+    p._obj._hash_cache_ = intmask(value)
+
 def isCompatibleType(TYPE1, TYPE2):
     return TYPE1._is_compatible(TYPE2)
 

Modified: pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/rclass.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/rclass.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/rclass.py	Fri Oct 16 11:47:18 2009
@@ -20,6 +20,7 @@
 from pypy.rpython.extregistry import ExtRegistryEntry
 from pypy.annotation import model as annmodel
 from pypy.rlib.rarithmetic import intmask
+from pypy.rlib import objectmodel
 
 #
 #  There is one "vtable" per user class, with the following structure:
@@ -331,12 +332,6 @@
                     mangled_name = 'inst_' + name
                     fields[name] = mangled_name, r
                     llfields.append((mangled_name, r.lowleveltype))
-            #
-            # hash() support
-            if self.rtyper.needs_hash_support(self.classdef):
-                from pypy.rpython import rint
-                fields['_hash_cache_'] = 'hash_cache', rint.signed_repr
-                llfields.append(('hash_cache', Signed))
 
             self.rbase = getinstancerepr(self.rtyper, self.classdef.basedef,
                                          self.gcflavor)
@@ -348,10 +343,6 @@
             if hints is None:
                 hints = {}
             hints = self._check_for_immutable_hints(hints)
-            if ('_hash_cache_' in fields or
-                '_hash_cache_' in self.rbase.allinstancefields):
-                adtmeths = adtmeths.copy()
-                adtmeths['gethash'] = self.get_ll_hash_function()
             object_type = MkStruct(self.classdef.name,
                                    ('super', self.rbase.object_type),
                                    hints=hints,
@@ -406,21 +397,6 @@
     def create_instance(self):
         return malloc(self.object_type, flavor=self.gcflavor)
 
-    def get_ll_hash_function(self):
-        if self.classdef is None:
-            raise TyperError, 'missing hash support flag in classdef'
-        if self.rtyper.needs_hash_support(self.classdef):
-            try:
-                return self._ll_hash_function
-            except AttributeError:
-                INSPTR = self.lowleveltype
-                def _ll_hash_function(ins):
-                    return ll_inst_hash(cast_pointer(INSPTR, ins))
-                self._ll_hash_function = _ll_hash_function
-                return _ll_hash_function
-        else:
-            return self.rbase.get_ll_hash_function()
-
     def initialize_prebuilt_data(self, value, classdef, result):
         if self.classdef is not None:
             # recursively build the parent part of the instance
@@ -429,8 +405,6 @@
             for name, (mangled_name, r) in self.fields.items():
                 if r.lowleveltype is Void:
                     llattrvalue = None
-                elif name == '_hash_cache_': # hash() support
-                    continue   # already done by initialize_prebuilt_hash()
                 else:
                     try:
                         attrvalue = getattr(value, name)
@@ -451,12 +425,9 @@
             result.typeptr = rclass.getvtable()
 
     def initialize_prebuilt_hash(self, value, result):
-        if self.classdef is not None:
-            self.rbase.initialize_prebuilt_hash(value, result.super)
-            if '_hash_cache_' in self.fields:
-                mangled_name, r = self.fields['_hash_cache_']
-                llattrvalue = hash(value)
-                setattr(result, mangled_name, llattrvalue)
+        llattrvalue = getattr(value, '__precomputed_identity_hash', None)
+        if llattrvalue is not None:
+            lltype.init_identity_hash(result, llattrvalue)
 
     def getfieldrepr(self, attr):
         """Return the repr used for the given attribute."""
@@ -523,10 +494,7 @@
                 mangled_name, r = self.allinstancefields[fldname]
                 if r.lowleveltype is Void:
                     continue
-                if fldname == '_hash_cache_':
-                    value = Constant(0, Signed)
-                else:
-                    value = self.classdef.classdesc.read_attribute(fldname, None)
+                value = self.classdef.classdesc.read_attribute(fldname, None)
                 if value is not None:
                     cvalue = inputconst(r.lowleveltype,
                                         r.convert_desc_or_const(value))
@@ -696,18 +664,6 @@
 def ll_runtime_type_info(obj):
     return obj.typeptr.rtti
 
-def ll_inst_hash(ins):
-    if not ins:
-        return 0    # for None
-    cached = ins.hash_cache
-    if cached == 0:
-        # XXX this should ideally be done in a GC-dependent way: we only
-        # need a hash_cache for moving GCs, and we only need the '~' to
-        # avoid Boehm keeping the object alive if the value is passed
-        # around
-       cached = ins.hash_cache = ~cast_ptr_to_int(ins)
-    return cached
-
 def ll_inst_type(obj):
     if obj:
         return obj.typeptr

Modified: pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/rstr.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/rstr.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/rstr.py	Fri Oct 16 11:47:18 2009
@@ -2,10 +2,10 @@
 from pypy.tool.pairtype import pairtype
 from pypy.rpython.error import TyperError
 from pypy.rlib.objectmodel import malloc_zero_filled, we_are_translated
+from pypy.rlib.objectmodel import _hash_string
 from pypy.rlib.debug import ll_assert
 from pypy.rlib.jit import purefunction
 from pypy.rpython.robject import PyObjRepr, pyobj_repr
-from pypy.rlib.rarithmetic import _hash_string
 from pypy.rpython.rmodel import inputconst, IntegerRepr
 from pypy.rpython.rstr import AbstractStringRepr,AbstractCharRepr,\
      AbstractUniCharRepr, AbstractStringIteratorRepr,\
@@ -287,6 +287,8 @@
         x = s.hash
         if x == 0:
             x = _hash_string(s.chars)
+            if x == 0:
+                x = 29872897
             s.hash = x
         return x
     ll_strhash._pure_function_ = True # it's pure but it does not look like it

Modified: pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/test/test_lltype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/test/test_lltype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/lltypesystem/test/test_lltype.py	Fri Oct 16 11:47:18 2009
@@ -739,3 +739,39 @@
     del ptr
     import gc; gc.collect(); gc.collect()
     ptr2[0] = 5    # crashes if the array was deallocated
+
+def test_identityhash():
+    S = GcStruct('S', ('x', Signed))
+    S2 = GcStruct('S2', ('super', S))
+    S3 = GcStruct('S3', ('super', S2))
+
+    py.test.raises(AssertionError, identityhash, nullptr(S2))
+
+    s3 = malloc(S3)
+    hash3 = identityhash(s3.super)
+    assert hash3 == identityhash(s3)
+    assert hash3 == identityhash(s3.super)
+    assert hash3 == identityhash(s3.super.super)
+    py.test.raises(ValueError, init_identity_hash, s3, hash3^1)
+    py.test.raises(ValueError, init_identity_hash, s3.super, hash3^4)
+    py.test.raises(ValueError, init_identity_hash, s3.super.super, hash3^9)
+
+    s3 = malloc(S3)
+    init_identity_hash(s3.super, -123)
+    assert -123 == identityhash(s3)
+    assert -123 == identityhash(s3.super)
+    assert -123 == identityhash(s3.super.super)
+    py.test.raises(ValueError, init_identity_hash, s3, 4313)
+    py.test.raises(ValueError, init_identity_hash, s3.super, 0)
+    py.test.raises(ValueError, init_identity_hash, s3.super.super, -124)
+
+    from pypy.rpython.lltypesystem import llmemory
+    p3 = cast_opaque_ptr(llmemory.GCREF, s3)
+    assert -123 == identityhash(p3)
+
+    A = GcArray(Signed)
+    a = malloc(A, 3)
+    hash1 = identityhash(a)
+    assert hash1 == identityhash(a)
+    p = cast_opaque_ptr(llmemory.GCREF, a)
+    assert hash1 == identityhash(p)

Modified: pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/generation.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/generation.py	Fri Oct 16 11:47:18 2009
@@ -1,6 +1,7 @@
 import sys
 from pypy.rpython.memory.gc.semispace import SemiSpaceGC
 from pypy.rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
+from pypy.rpython.memory.gc.semispace import GCFLAG_HASHTAKEN
 from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
 from pypy.rpython.lltypesystem import lltype, llmemory, llarena
 from pypy.rpython.memory.support import DEFAULT_CHUNK_SIZE
@@ -220,7 +221,8 @@
     # flags exposed for the HybridGC subclass
     GCFLAGS_FOR_NEW_YOUNG_OBJECTS = 0   # NO_YOUNG_PTRS never set on young objs
     GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS = (GCFLAG_EXTERNAL | GCFLAG_FORWARDED |
-                                        GCFLAG_NO_YOUNG_PTRS)
+                                        GCFLAG_NO_YOUNG_PTRS |
+                                        GCFLAG_HASHTAKEN)
 
     # ____________________________________________________________
     # Support code for full collections
@@ -243,11 +245,11 @@
             llop.debug_print(lltype.Void, "percent survived", float(self.free - self.tospace) / self.space_size)
 
     def make_a_copy(self, obj, objsize):
-        newobj = SemiSpaceGC.make_a_copy(self, obj, objsize)
+        tid = self.header(obj).tid
         # During a full collect, all copied objects might implicitly come
         # from the nursery.  In case they do, we must add this flag:
-        self.header(newobj).tid |= GCFLAG_NO_YOUNG_PTRS
-        return newobj
+        tid |= GCFLAG_NO_YOUNG_PTRS
+        return self._make_a_copy_with_tid(obj, objsize, tid)
         # history: this was missing and caused an object to become old but without the
         # flag set.  Such an object is bogus in the sense that the write_barrier doesn't
         # work on it.  So it can eventually contain a ptr to a young object but we didn't
@@ -386,7 +388,7 @@
         while scan < self.free:
             curr = scan + self.size_gc_header()
             self.trace_and_drag_out_of_nursery(curr)
-            scan += self.size_gc_header() + self.get_size(curr)
+            scan += self.size_gc_header() + self.get_size_incl_hash(curr)
         return scan
 
     def trace_and_drag_out_of_nursery(self, obj):

Modified: pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/hybrid.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/hybrid.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/hybrid.py	Fri Oct 16 11:47:18 2009
@@ -2,6 +2,7 @@
 from pypy.rpython.memory.gc.semispace import SemiSpaceGC
 from pypy.rpython.memory.gc.generation import GenerationGC
 from pypy.rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
+from pypy.rpython.memory.gc.semispace import GCFLAG_HASHTAKEN, GCFLAG_HASHFIELD
 from pypy.rpython.memory.gc.generation import GCFLAG_NO_YOUNG_PTRS
 from pypy.rpython.memory.gc.generation import GCFLAG_NO_HEAP_PTRS
 from pypy.rpython.lltypesystem import lltype, llmemory, llarena
@@ -222,6 +223,8 @@
     def realloc(self, ptr, newlength, fixedsize, itemsize, lengthofs, grow):
         size_gc_header = self.size_gc_header()
         addr = llmemory.cast_ptr_to_adr(ptr)
+        ll_assert(self.header(addr).tid & GCFLAG_EXTERNAL,
+                  "realloc() on a non-external object")
         nonvarsize = size_gc_header + fixedsize
         try:
             varsize = ovfcheck(itemsize * newlength)
@@ -401,15 +404,18 @@
             tid &= ~GCFLAG_AGE_MASK
         # skip GenerationGC.make_a_copy() as we already did the right
         # thing about GCFLAG_NO_YOUNG_PTRS
-        newobj = SemiSpaceGC.make_a_copy(self, obj, objsize)
-        self.header(newobj).tid = tid
-        return newobj
+        return self._make_a_copy_with_tid(obj, objsize, tid)
 
     def make_a_nonmoving_copy(self, obj, objsize):
         # NB. the object can have a finalizer or be a weakref, but
         # it's not an issue.
         totalsize = self.size_gc_header() + objsize
-        newaddr = self.allocate_external_object(totalsize)
+        tid = self.header(obj).tid
+        if tid & (GCFLAG_HASHTAKEN|GCFLAG_HASHFIELD):
+            totalsize_incl_hash = totalsize + llmemory.sizeof(lltype.Signed)
+        else:
+            totalsize_incl_hash = totalsize
+        newaddr = self.allocate_external_object(totalsize_incl_hash)
         if not newaddr:
             return llmemory.NULL   # can't raise MemoryError during a collect()
         if self.config.gcconfig.debugprint:
@@ -417,13 +423,22 @@
             self._nonmoving_copy_size += raw_malloc_usage(totalsize)
 
         llmemory.raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
-        newobj = newaddr + self.size_gc_header()
-        hdr = self.header(newobj)
-        hdr.tid |= self.GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS
+        # check if we need to write a hash value at the end of the new obj
+        if tid & (GCFLAG_HASHTAKEN|GCFLAG_HASHFIELD):
+            if tid & GCFLAG_HASHFIELD:
+                hash = (obj + objsize).signed[0]
+            else:
+                hash = llmemory.cast_adr_to_int(obj)
+                tid |= GCFLAG_HASHFIELD
+            (newaddr + totalsize).signed[0] = hash
+        #
         # GCFLAG_UNVISITED is not set
         # GCFLAG_NO_HEAP_PTRS is not set either, conservatively.  It may be
         # set by the next collection's collect_last_generation_roots().
         # This old object is immediately put at generation 3.
+        newobj = newaddr + self.size_gc_header()
+        hdr = self.header(newobj)
+        hdr.tid = tid | self.GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS
         ll_assert(self.is_last_generation(newobj),
                   "make_a_nonmoving_copy: object too young")
         self.gen3_rawmalloced_objects.append(newobj)
@@ -503,13 +518,13 @@
             if tid & GCFLAG_UNVISITED:
                 if self.config.gcconfig.debugprint:
                     dead_count+=1
-                    dead_size+=raw_malloc_usage(self.get_size(obj))
+                    dead_size+=raw_malloc_usage(self.get_size_incl_hash(obj))
                 addr = obj - self.gcheaderbuilder.size_gc_header
                 llmemory.raw_free(addr)
             else:
                 if self.config.gcconfig.debugprint:
                     alive_count+=1
-                    alive_size+=raw_malloc_usage(self.get_size(obj))
+                    alive_size+=raw_malloc_usage(self.get_size_incl_hash(obj))
                 if generation == 3:
                     surviving_objects.append(obj)
                 elif generation == 2:
@@ -591,6 +606,8 @@
         tid = self.header(obj).tid
         ll_assert(bool(tid & GCFLAG_EXTERNAL),
                   "gen2: missing GCFLAG_EXTERNAL")
+        ll_assert(bool(tid & GCFLAG_HASHTAKEN),
+                  "gen2: missing GCFLAG_HASHTAKEN")
         ll_assert(bool(tid & GCFLAG_UNVISITED),
                   "gen2: missing GCFLAG_UNVISITED")
         ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
@@ -599,6 +616,8 @@
         tid = self.header(obj).tid
         ll_assert(bool(tid & GCFLAG_EXTERNAL),
                   "gen3: missing GCFLAG_EXTERNAL")
+        ll_assert(bool(tid & GCFLAG_HASHTAKEN),
+                  "gen3: missing GCFLAG_HASHTAKEN")
         ll_assert(not (tid & GCFLAG_UNVISITED),
                   "gen3: unexpected GCFLAG_UNVISITED")
         ll_assert((tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX,

Modified: pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/markcompact.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/markcompact.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/markcompact.py	Fri Oct 16 11:47:18 2009
@@ -16,6 +16,8 @@
 
 first_gcflag = 1 << 16
 GCFLAG_MARKBIT = first_gcflag << 0
+GCFLAG_HASHTAKEN = first_gcflag << 1      # someone already asked for the hash
+GCFLAG_HASHFIELD = first_gcflag << 2      # we have an extra hash field
 
 memoryError = MemoryError()
 
@@ -71,6 +73,9 @@
 class MarkCompactGC(MovingGCBase):
     HDR = lltype.Struct('header', ('tid', lltype.Signed))
     typeid_is_in_field = 'tid'
+    withhash_flag_is_in_field = 'tid', GCFLAG_HASHFIELD
+    # ^^^ all prebuilt objects have GCFLAG_HASHTAKEN, but only some have
+    #     GCFLAG_HASHFIELD (and then they are one word longer).
     TID_BACKUP = lltype.Array(TID_TYPE, hints={'nolength':True})
     WEAKREF_OFFSETS = lltype.Array(lltype.Signed)
 
@@ -80,11 +85,12 @@
     malloc_zero_filled = True
     inline_simple_malloc = True
     inline_simple_malloc_varsize = True
-    first_unused_gcflag = first_gcflag << 1
+    first_unused_gcflag = first_gcflag << 3
     total_collection_time = 0.0
     total_collection_count = 0
 
     def __init__(self, config, chunk_size=DEFAULT_CHUNK_SIZE, space_size=4096):
+        import py; py.test.skip("Disabled for now, sorry")
         MovingGCBase.__init__(self, config, chunk_size)
         self.space_size = space_size
         self.next_collect_after = space_size/2 # whatever...
@@ -107,6 +113,7 @@
 
     def init_gc_object_immortal(self, addr, typeid16, flags=0):
         hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
+        flags |= GCFLAG_HASHTAKEN
         hdr.tid = self.combine(typeid16, flags)
         # XXX we can store forward_ptr to itself, if we fix C backend
         # so that get_forwarding_address(obj) returns
@@ -176,19 +183,28 @@
 
     def compute_alive_objects(self):
         fromaddr = self.space
-        totalsize = 0
+        addraftercollect = self.space
         num = 1
         while fromaddr < self.free:
             size_gc_header = self.gcheaderbuilder.size_gc_header
-            hdr = llmemory.cast_adr_to_ptr(fromaddr, lltype.Ptr(self.HDR))
+            tid = llmemory.cast_adr_to_ptr(fromaddr, lltype.Ptr(self.HDR)).tid
             obj = fromaddr + size_gc_header
             objsize = self.get_size(obj)
             objtotalsize = size_gc_header + objsize
             if self.marked(obj):
-                totalsize += raw_malloc_usage(objtotalsize)
+                copy_has_hash_field = ((tid & GCFLAG_HASHFIELD) != 0 or
+                                       ((tid & GCFLAG_HASHTAKEN) != 0 and
+                                        addraftercollect < fromaddr))
+                addraftercollect += raw_malloc_usage(objtotalsize)
+                if copy_has_hash_field:
+                    addraftercollect += llmemory.sizeof(lltype.Signed)
             num += 1
             fromaddr += objtotalsize
-        self.totalsize_of_objs = totalsize
+            if tid & GCFLAG_HASHFIELD:
+                fromaddr += llmemory.sizeof(lltype.Signed)
+        ll_assert(addraftercollect <= fromaddr,
+                  "markcompactcollect() is trying to increase memory usage")
+        self.totalsize_of_objs = addraftercollect - self.space
         return num
 
     def collect(self, gen=0):
@@ -346,6 +362,8 @@
         self._trace_and_mark()
 
     def _trace_and_mark(self):
+        # XXX depth-first tracing... it can consume a lot of rawmalloced
+        # memory for very long stacks in some cases
         while self.to_see.non_empty():
             obj = self.to_see.pop()
             self.trace(obj, self._mark_obj, None)
@@ -592,3 +610,30 @@
         self.objects_with_weakrefs.delete()
         self.objects_with_weakrefs = new_with_weakref
         lltype.free(weakref_offsets, flavor='raw')
+
+    def get_size_incl_hash(self, obj):
+        size = self.get_size(obj)
+        hdr = self.header(obj)
+        if hdr.tid & GCFLAG_HASHFIELD:
+            size += llmemory.sizeof(lltype.Signed)
+        return size
+
+    def identityhash(self, gcobj):
+        # Unlike SemiSpaceGC.identityhash(), this function does not have
+        # to care about reducing top_of_space.  The reason is as
+        # follows.  When we collect, each object either moves to the
+        # left or stays where it is.  If it moves to the left (and if it
+        # has GCFLAG_HASHTAKEN), we can give it a hash field, and the
+        # end of the new object cannot move to the right of the end of
+        # the old object.  If it stays where it is, then we don't need
+        # to add the hash field.  So collecting can never actually grow
+        # the consumed size.
+        obj = llmemory.cast_ptr_to_adr(gcobj)
+        hdr = self.header(obj)
+        #
+        if hdr.tid & GCFLAG_HASHFIELD:  # the hash is in a field at the end
+            obj += self.get_size(obj)
+            return obj.signed[0]
+        #
+        hdr.tid |= GCFLAG_HASHTAKEN
+        return llmemory.cast_adr_to_int(obj)  # direct case

Modified: pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/marksweep.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/marksweep.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/marksweep.py	Fri Oct 16 11:47:18 2009
@@ -19,6 +19,9 @@
                                        ('pool',        X_POOL_PTR))
 X_CLONE_PTR = lltype.Ptr(X_CLONE)
 
+FL_WITHHASH = 0x01
+FL_CURPOOL  = 0x02
+
 memoryError = MemoryError()
 class MarkSweepGC(GCBase):
     HDR = lltype.ForwardReference()
@@ -27,9 +30,10 @@
     # systems allocator and can't walk the heap
     HDR.become(lltype.Struct('header', ('typeid16', rffi.USHORT),
                                        ('mark', lltype.Bool),
-                                       ('curpool_flag', lltype.Bool),
+                                       ('flags', lltype.Char),
                                        ('next', HDRPTR)))
     typeid_is_in_field = 'typeid16'
+    withhash_flag_is_in_field = 'flags', FL_WITHHASH
 
     POOL = lltype.GcStruct('gc_pool')
     POOLPTR = lltype.Ptr(POOL)
@@ -102,7 +106,7 @@
         hdr = llmemory.cast_adr_to_ptr(result, self.HDRPTR)
         hdr.typeid16 = typeid16
         hdr.mark = False
-        hdr.curpool_flag = False
+        hdr.flags = '\x00'
         if has_finalizer:
             hdr.next = self.malloced_objects_with_finalizer
             self.malloced_objects_with_finalizer = hdr
@@ -139,7 +143,7 @@
         hdr = llmemory.cast_adr_to_ptr(result, self.HDRPTR)
         hdr.typeid16 = typeid16
         hdr.mark = False
-        hdr.curpool_flag = False
+        hdr.flags = '\x00'
         if has_finalizer:
             hdr.next = self.malloced_objects_with_finalizer
             self.malloced_objects_with_finalizer = hdr
@@ -178,7 +182,7 @@
         hdr = llmemory.cast_adr_to_ptr(result, self.HDRPTR)
         hdr.typeid16 = typeid16
         hdr.mark = False
-        hdr.curpool_flag = False
+        hdr.flags = '\x00'
         hdr.next = self.malloced_objects
         self.malloced_objects = hdr
         self.bytes_malloced = bytes_malloced
@@ -213,7 +217,7 @@
         hdr = llmemory.cast_adr_to_ptr(result, self.HDRPTR)
         hdr.typeid16 = typeid16
         hdr.mark = False
-        hdr.curpool_flag = False
+        hdr.flags = '\x00'
         hdr.next = self.malloced_objects
         self.malloced_objects = hdr
         self.bytes_malloced = bytes_malloced
@@ -516,7 +520,7 @@
         hdr = llmemory.cast_adr_to_ptr(addr, self.HDRPTR)
         hdr.typeid16 = typeid
         hdr.mark = False
-        hdr.curpool_flag = False
+        hdr.flags = '\x00'
 
     def init_gc_object_immortal(self, addr, typeid, flags=0):
         # prebuilt gc structures always have the mark bit set
@@ -524,7 +528,7 @@
         hdr = llmemory.cast_adr_to_ptr(addr, self.HDRPTR)
         hdr.typeid16 = typeid
         hdr.mark = True
-        hdr.curpool_flag = False
+        hdr.flags = '\x00'
 
     # experimental support for thread cloning
     def x_swap_pool(self, newpool):
@@ -596,7 +600,8 @@
         hdr = hdr.next   # skip the POOL object itself
         while hdr:
             next = hdr.next
-            hdr.curpool_flag = True   # mark all objects from malloced_list
+            # mark all objects from malloced_list
+            hdr.flags = chr(ord(hdr.flags) | FL_CURPOOL)
             hdr.next = lltype.nullptr(self.HDR)  # abused to point to the copy
             oldobjects.append(llmemory.cast_ptr_to_adr(hdr))
             hdr = next
@@ -613,7 +618,7 @@
                 continue   # pointer is NULL
             oldhdr = llmemory.cast_adr_to_ptr(oldobj_addr - size_gc_header,
                                               self.HDRPTR)
-            if not oldhdr.curpool_flag:
+            if not (ord(oldhdr.flags) & FL_CURPOOL):
                 continue   # ignore objects that were not in the malloced_list
             newhdr = oldhdr.next      # abused to point to the copy
             if not newhdr:
@@ -645,13 +650,13 @@
 
                 saved_id   = newhdr.typeid16  # XXX hack needed for genc
                 saved_flg1 = newhdr.mark
-                saved_flg2 = newhdr.curpool_flag
+                saved_flg2 = newhdr.flags
                 saved_next = newhdr.next      # where size_gc_header == 0
                 raw_memcopy(oldobj_addr, newobj_addr, size)
-                newhdr.typeid16     = saved_id
-                newhdr.mark         = saved_flg1
-                newhdr.curpool_flag = saved_flg2
-                newhdr.next         = saved_next
+                newhdr.typeid16 = saved_id
+                newhdr.mark     = saved_flg1
+                newhdr.flags    = saved_flg2
+                newhdr.next     = saved_next
 
                 offsets = self.offsets_to_gc_pointers(typeid)
                 i = 0
@@ -685,7 +690,7 @@
         next = lltype.nullptr(self.HDR)
         while oldobjects.non_empty():
             hdr = llmemory.cast_adr_to_ptr(oldobjects.pop(), self.HDRPTR)
-            hdr.curpool_flag = False   # reset the flag
+            hdr.flags = chr(ord(hdr.flags) &~ FL_CURPOOL)  # reset the flag
             hdr.next = next
             next = hdr
         oldobjects.delete()
@@ -700,6 +705,15 @@
         # reinstall the pool that was current at the beginning of x_clone()
         clonedata.pool = self.x_swap_pool(curpool)
 
+    def identityhash(self, obj):
+        obj = llmemory.cast_ptr_to_adr(obj)
+        hdr = self.header(obj)
+        if ord(hdr.flags) & FL_WITHHASH:
+            obj += self.get_size(obj)
+            return obj.signed[0]
+        else:
+            return llmemory.cast_adr_to_int(obj)
+
 
 class PrintingMarkSweepGC(MarkSweepGC):
     _alloc_flavor_ = "raw"

Modified: pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/semispace.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/semispace.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/memory/gc/semispace.py	Fri Oct 16 11:47:18 2009
@@ -19,6 +19,8 @@
 # either immortal objects or (for HybridGC) externally raw_malloc'ed
 GCFLAG_EXTERNAL = first_gcflag << 1
 GCFLAG_FINALIZATION_ORDERING = first_gcflag << 2
+GCFLAG_HASHTAKEN = first_gcflag << 3      # someone already asked for the hash
+GCFLAG_HASHFIELD = first_gcflag << 4      # we have an extra hash field
 
 memoryError = MemoryError()
 
@@ -28,12 +30,15 @@
     inline_simple_malloc = True
     inline_simple_malloc_varsize = True
     malloc_zero_filled = True
-    first_unused_gcflag = first_gcflag << 3
+    first_unused_gcflag = first_gcflag << 5
     total_collection_time = 0.0
     total_collection_count = 0
 
     HDR = lltype.Struct('header', ('tid', lltype.Signed))   # XXX or rffi.INT?
     typeid_is_in_field = 'tid'
+    withhash_flag_is_in_field = 'tid', GCFLAG_HASHFIELD
+    # ^^^ all prebuilt objects have GCFLAG_HASHTAKEN, but only some have
+    #     GCFLAG_HASHFIELD (and then they are one word longer).
     FORWARDSTUB = lltype.GcStruct('forwarding_stub',
                                   ('forw', llmemory.Address))
     FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
@@ -297,11 +302,18 @@
             if free_after_collection < self.space_size // 5:
                 self.red_zone += 1
 
+    def get_size_incl_hash(self, obj):
+        size = self.get_size(obj)
+        hdr = self.header(obj)
+        if hdr.tid & GCFLAG_HASHFIELD:
+            size += llmemory.sizeof(lltype.Signed)
+        return size
+
     def scan_copied(self, scan):
         while scan < self.free:
             curr = scan + self.size_gc_header()
             self.trace_and_copy(curr)
-            scan += self.size_gc_header() + self.get_size(curr)
+            scan += self.size_gc_header() + self.get_size_incl_hash(curr)
         return scan
 
     def collect_roots(self):
@@ -328,15 +340,32 @@
             self.set_forwarding_address(obj, newobj, objsize)
             return newobj
 
-    def make_a_copy(self, obj, objsize):
+    def _make_a_copy_with_tid(self, obj, objsize, tid):
         totalsize = self.size_gc_header() + objsize
         newaddr = self.free
-        self.free += totalsize
         llarena.arena_reserve(newaddr, totalsize)
         raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
+        #
+        # check if we need to write a hash value at the end of the new obj
+        if tid & (GCFLAG_HASHTAKEN|GCFLAG_HASHFIELD):
+            if tid & GCFLAG_HASHFIELD:
+                hash = (obj + objsize).signed[0]
+            else:
+                hash = llmemory.cast_adr_to_int(obj)
+                tid |= GCFLAG_HASHFIELD
+            (newaddr + totalsize).signed[0] = hash
+            totalsize += llmemory.sizeof(lltype.Signed)
+        #
+        self.free += totalsize
+        newhdr = llmemory.cast_adr_to_ptr(newaddr, lltype.Ptr(self.HDR))
+        newhdr.tid = tid
         newobj = newaddr + self.size_gc_header()
         return newobj
 
+    def make_a_copy(self, obj, objsize):
+        tid = self.header(obj).tid
+        return self._make_a_copy_with_tid(obj, objsize, tid)
+
     def trace_and_copy(self, obj):
         self.trace(obj, self._trace_copy, None)
 
@@ -407,7 +436,7 @@
 
     def init_gc_object_immortal(self, addr, typeid16, flags=0):
         hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
-        flags |= GCFLAG_EXTERNAL | GCFLAG_FORWARDED
+        flags |= GCFLAG_EXTERNAL | GCFLAG_FORWARDED | GCFLAG_HASHTAKEN
         hdr.tid = self.combine(typeid16, flags)
         # immortal objects always have GCFLAG_FORWARDED set;
         # see get_forwarding_address().
@@ -570,3 +599,33 @@
 
     STATISTICS_NUMBERS = 0
 
+    def identityhash(self, gcobj):
+        # The following code should run at most twice.
+        while 1:
+            obj = llmemory.cast_ptr_to_adr(gcobj)
+            hdr = self.header(obj)
+            #
+            if hdr.tid & GCFLAG_HASHFIELD:  # the hash is in a field at the end
+                obj += self.get_size(obj)
+                return obj.signed[0]
+            #
+            if not (hdr.tid & GCFLAG_HASHTAKEN):
+                # It's the first time we ask for a hash, and it's not an
+                # external object.  Shrink the top of space by the extra
+                # hash word that will be needed after a collect.
+                shrunk_top = self.top_of_space - llmemory.sizeof(lltype.Signed)
+                if shrunk_top < self.free:
+                    # Cannot shrink!  Do a collection, asking for at least
+                    # one word of free space, and try again.  May raise
+                    # MemoryError.  Obscure: not called directly, but
+                    # across an llop, to make sure that there is the
+                    # correct push_roots/pop_roots around the call...
+                    llop.gc_obtain_free_space(llmemory.Address,
+                                              llmemory.sizeof(lltype.Signed))
+                    continue
+                # Now we can have side-effects: set GCFLAG_HASHTAKEN
+                # and lower the top of space.
+                self.top_of_space = shrunk_top
+                hdr.tid |= GCFLAG_HASHTAKEN
+            #
+            return llmemory.cast_adr_to_int(obj)  # direct case

Modified: pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/boehm.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/boehm.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/boehm.py	Fri Oct 16 11:47:18 2009
@@ -10,6 +10,7 @@
 class BoehmGCTransformer(GCTransformer):
     malloc_zero_filled = True
     FINALIZER_PTR = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void))
+    HDR = lltype.Struct("header", ("hash", lltype.Signed))
 
     def __init__(self, translator, inline=False):
         super(BoehmGCTransformer, self).__init__(translator, inline=inline)
@@ -34,6 +35,15 @@
 
         ll_realloc = mh.ll_realloc
 
+        HDRPTR = lltype.Ptr(self.HDR)
+
+        def ll_identityhash(addr):
+            obj = llmemory.cast_adr_to_ptr(addr, HDRPTR)
+            h = obj.hash
+            if h == 0:
+                obj.hash = h = ~llmemory.cast_adr_to_int(addr)
+            return h
+
         if self.translator:
             self.malloc_fixedsize_ptr = self.inittime_helper(
                 ll_malloc_fixedsize, [lltype.Signed], llmemory.Address)
@@ -51,6 +61,9 @@
             self.realloc_ptr = self.inittime_helper(
                 ll_realloc, [llmemory.Address] + [lltype.Signed] * 4,
                 llmemory.Address)
+            self.identityhash_ptr = self.inittime_helper(
+                ll_identityhash, [llmemory.Address], lltype.Signed,
+                inline=False)
             self.mixlevelannotator.finish()   # for now
             self.mixlevelannotator.backend_optimize()
 
@@ -154,6 +167,13 @@
                            resulttype=llmemory.Address)
         hop.cast_result(v_addr)
 
+    def gct_gc_identityhash(self, hop):
+        v_obj = hop.spaceop.args[0]
+        v_adr = hop.genop("cast_ptr_to_adr", [v_obj],
+                          resulttype=llmemory.Address)
+        hop.genop("direct_call", [self.identityhash_ptr, v_adr],
+                  resultvar=hop.spaceop.result)
+
     def gct_gc_id(self, hop):
         # this is the logic from the HIDE_POINTER macro in <gc/gc.h>
         v_int = hop.genop('cast_ptr_to_int', [hop.spaceop.args[0]],

Modified: pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/framework.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/framework.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/framework.py	Fri Oct 16 11:47:18 2009
@@ -337,6 +337,15 @@
                 [annmodel.SomeBool()],
                 s_gcref)
 
+        self.identityhash_ptr = getfn(GCClass.identityhash.im_func,
+                                      [s_gc, s_gcref],
+                                      annmodel.SomeInteger(),
+                                      minimal_transform=False)
+        if getattr(GCClass, 'obtain_free_space', False):
+            self.obtainfreespace_ptr = getfn(GCClass.obtain_free_space.im_func,
+                                             [s_gc, annmodel.SomeInteger()],
+                                             annmodel.SomeAddress())
+
         if GCClass.moving_gc:
             self.id_ptr = getfn(GCClass.id.im_func,
                                 [s_gc, s_gcref], annmodel.SomeInteger(),
@@ -432,10 +441,28 @@
     def gc_fields(self):
         return self._gc_fields
 
-    def gc_field_values_for(self, obj):
+    def gc_field_values_for(self, obj, needs_hash=False):
         hdr = self.gcdata.gc.gcheaderbuilder.header_of_object(obj)
         HDR = self._gc_HDR
-        return [getattr(hdr, fldname) for fldname in HDR._names]
+        withhash, flag = self.gcdata.gc.withhash_flag_is_in_field
+        result = []
+        for fldname in HDR._names:
+            x = getattr(hdr, fldname)
+            if fldname == withhash:
+                TYPE = lltype.typeOf(x)
+                x = lltype.cast_primitive(lltype.Signed, x)
+                if needs_hash:
+                    x |= flag       # set the flag in the header
+                else:
+                    x &= ~flag      # clear the flag in the header
+                x = lltype.cast_primitive(TYPE, x)
+            result.append(x)
+        return result
+
+    def get_hash_offset(self, T):
+        type_id = self.get_type_id(T)
+        assert not self.gcdata.q_is_varsize(type_id)
+        return self.gcdata.q_fixed_size(type_id)
 
     def finish_tables(self):
         group = self.layoutbuilder.close_table()
@@ -735,6 +762,16 @@
                            resulttype=llmemory.Address)
         hop.cast_result(v_addr)
 
+    def gct_gc_identityhash(self, hop):
+        livevars = self.push_roots(hop)
+        [v_ptr] = hop.spaceop.args
+        v_adr = hop.genop("cast_ptr_to_adr", [v_ptr],
+                          resulttype=llmemory.Address)
+        hop.genop("direct_call",
+                  [self.identityhash_ptr, self.c_const_gc, v_adr],
+                  resultvar=hop.spaceop.result)
+        self.pop_roots(hop, livevars)
+
     def gct_gc_id(self, hop):
         if self.id_ptr is not None:
             livevars = self.push_roots(hop)
@@ -747,6 +784,14 @@
         else:
             hop.rename('cast_ptr_to_int')     # works nicely for non-moving GCs
 
+    def gct_gc_obtain_free_space(self, hop):
+        livevars = self.push_roots(hop)
+        [v_number] = hop.spaceop.args
+        hop.genop("direct_call",
+                  [self.obtainfreespace_ptr, self.c_const_gc, v_number],
+                  resultvar=hop.spaceop.result)
+        self.pop_roots(hop, livevars)
+
     def gct_gc_set_max_heap_size(self, hop):
         [v_size] = hop.spaceop.args
         hop.genop("direct_call", [self.set_max_heap_size_ptr,

Modified: pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/refcounting.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/refcounting.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/refcounting.py	Fri Oct 16 11:47:18 2009
@@ -34,7 +34,8 @@
 class RefcountingGCTransformer(GCTransformer):
     malloc_zero_filled = True
 
-    HDR = lltype.Struct("header", ("refcount", lltype.Signed))
+    HDR = lltype.Struct("header", ("refcount", lltype.Signed),
+                                  ("hash", lltype.Signed))
 
     def __init__(self, translator):
         super(RefcountingGCTransformer, self).__init__(translator, inline=True)
@@ -91,6 +92,13 @@
         mh.ll_malloc_varsize_no_length = ll_malloc_varsize_no_length
         ll_malloc_varsize = mh.ll_malloc_varsize
 
+        def ll_identityhash(addr):
+            obj = llmemory.cast_adr_to_ptr(addr, HDRPTR)
+            h = obj.hash
+            if h == 0:
+                obj.hash = h = llmemory.cast_adr_to_int(addr)
+            return h
+
         if self.translator:
             self.increfptr = self.inittime_helper(
                 ll_incref, [llmemory.Address], lltype.Void)
@@ -107,6 +115,9 @@
                 ll_malloc_varsize_no_length, [lltype.Signed]*3, llmemory.Address)
             self.malloc_varsize_ptr = self.inittime_helper(
                 ll_malloc_varsize, [lltype.Signed]*4, llmemory.Address)
+            self.identityhash_ptr = self.inittime_helper(
+                ll_identityhash, [llmemory.Address], lltype.Signed,
+                inline=False)
             self.mixlevelannotator.finish()
             self.mixlevelannotator.backend_optimize()
         # cache graphs:
@@ -183,6 +194,7 @@
             if not self.gcheaderbuilder.get_header(p):
                 hdr = self.gcheaderbuilder.new_header(p)
                 hdr.refcount = sys.maxint // 2
+                hdr.hash = lltype.identityhash(p)
 
     def static_deallocation_funcptr_for_type(self, TYPE):
         if TYPE in self.static_deallocator_funcptrs:
@@ -286,4 +298,9 @@
         self.queryptr2dynamic_deallocator_funcptr[queryptr._obj] = fptr
         return fptr
 
-
+    def gct_gc_identityhash(self, hop):
+        v_obj = hop.spaceop.args[0]
+        v_adr = hop.genop("cast_ptr_to_adr", [v_obj],
+                          resulttype=llmemory.Address)
+        hop.genop("direct_call", [self.identityhash_ptr, v_adr],
+                  resultvar=hop.spaceop.result)

Modified: pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/transform.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/transform.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/memory/gctransform/transform.py	Fri Oct 16 11:47:18 2009
@@ -380,6 +380,10 @@
     def gct_zero_gc_pointers_inside(self, hop):
         pass
 
+    def gct_gc_identityhash(self, hop):
+        # must be implemented in the various GCs
+        raise NotImplementedError
+
     def gct_gc_id(self, hop):
         # this assumes a non-moving GC.  Moving GCs need to override this
         hop.rename('cast_ptr_to_int')

Modified: pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/ooregistry.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/ooregistry.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/ooregistry.py	Fri Oct 16 11:47:18 2009
@@ -81,21 +81,3 @@
         hop.has_implicit_exception(ValueError)
         hop.exception_is_here()
         return hop.genop('ooparse_float', vlist, resulttype = ootype.Float)
-
-
-class Entry_oohash(ExtRegistryEntry):
-    _about_ = ootype.oohash
-
-    def compute_result_annotation(self, str_s):
-        if not (isinstance(str_s, annmodel.SomeOOInstance)
-                and (str_s.ootype is ootype.String or
-                     str_s.ootype is ootype.Unicode)):
-            return annmodel.s_ImpossibleValue
-        return annmodel.SomeInteger()
-
-    def specialize_call(self, hop):
-        assert isinstance(hop.args_s[0], annmodel.SomeOOInstance)\
-               and (hop.args_s[0].ootype is ootype.String or
-                    hop.args_s[0].ootype is ootype.Unicode)
-        vlist = hop.inputargs(hop.args_r[0])
-        return hop.genop('oohash', vlist, resulttype=ootype.Signed)

Modified: pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/ootype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/ootype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/ootype.py	Fri Oct 16 11:47:18 2009
@@ -4,6 +4,7 @@
 from pypy.rpython.lltypesystem.lltype import Bool, Void, UniChar, typeOf, \
         Primitive, isCompatibleType, enforce, saferecursive, SignedLongLong, UnsignedLongLong
 from pypy.rpython.lltypesystem.lltype import frozendict, isCompatibleType
+from pypy.rpython.lltypesystem.lltype import identityhash
 from pypy.rlib.rarithmetic import intmask
 from pypy.rlib import objectmodel
 from pypy.tool.uid import uid
@@ -331,9 +332,14 @@
     # can treat them polymorphically, if they choose to do so.
 
     def __init__(self, fields, _hints={}):
+        if isinstance(fields, dict):
+            fields = fields.items()    # random order in that case
         self._fields = frozendict()
-        for name, ITEMTYPE in fields.items():
+        fields_in_order = []
+        for name, ITEMTYPE in fields:
             self._fields[name] = ITEMTYPE, ITEMTYPE._defl()
+            fields_in_order.append(name)
+        self._fields_in_order = tuple(fields_in_order)
         self._null = _null_record(self)
         self._hints = frozendict(_hints)
 
@@ -361,8 +367,8 @@
             return self, None
 
     def __str__(self):
-        item_str = ["%s: %s" % (str(name), str(ITEMTYPE))
-                    for name, (ITEMTYPE, _) in self._fields.items()]
+        item_str = ["%s: %s" % (str(name), str(self._fields[name][0]))
+                    for name in self._fields_in_order]
         return '%s(%s)' % (self.__class__.__name__, ", ".join(item_str))
 
 class BuiltinADTType(BuiltinType):
@@ -411,6 +417,7 @@
 
         generic_types = { self.SELFTYPE_T: self }
         self._GENERIC_METHODS = frozendict({
+            "ll_hash": Meth([], Signed),
             "ll_stritem_nonneg": Meth([Signed], self.CHAR),
             "ll_strlen": Meth([], Signed),
             "ll_strconcat": Meth([self.SELFTYPE_T], self.SELFTYPE_T),
@@ -881,13 +888,10 @@
         return hash(self.obj)
 
     def _identityhash(self):
-        if self:
-            try:
-                return self.obj._identityhash()
-            except AttributeError:
-                return intmask(id(self.obj))
-        else:
-            return 0 # for all null objects
+        try:
+            return self.obj._identityhash()
+        except AttributeError:
+            return hash(self.obj)
 
     def _cast_to_object(self):
         return self
@@ -986,10 +990,7 @@
         return self
 
     def _identityhash(self):
-        if self:
-            return intmask(id(self))
-        else:
-            return 0   # for all null instances
+        return hash(self)
 
     def _cast_to_object(self):
         return make_object(ooupcast(ROOT, self))
@@ -1386,6 +1387,12 @@
         else:
             assert False, 'Unknown type %s' % self._TYPE
 
+    def ll_hash(self):
+        # NOT_RPYTHON
+        # hopefully, ll_hash() should not be called on NULL
+        assert self._str is not None
+        return objectmodel._hash_string(self._str)
+
     def ll_stritem_nonneg(self, i):
         # NOT_RPYTHON
         s = self._str
@@ -1622,10 +1629,7 @@
         self._array[index] = item
 
     def _identityhash(self):
-        if self:
-            return intmask(id(self))
-        else:
-            return 0   # for all null arrays
+        return hash(self)
 
 class _null_array(_null_mixin(_array), _array):
 
@@ -1772,13 +1776,16 @@
             self.__dict__[name] = value
 
     def _identityhash(self):
-        if self:
-            return intmask(id(self))
-        else:
-            return 0 # for all null tuples
+        return hash(self)
+
+    def _items_in_order(self):
+        return [self._items[name] for name in self._TYPE._fields_in_order]
+
+    def _ll_hash(self):
+        return objectmodel._ll_hash_tuple(self._items_in_order())
 
     def __hash__(self):
-        key = tuple(self._items.keys()), tuple(self._items.values())
+        key = tuple(self._items_in_order())
         return hash(key)
 
     def __eq__(self, other):
@@ -1898,16 +1905,6 @@
     assert typeOf(obj) is Object
     return obj._cast_to(EXPECTED_TYPE)
 
-def ooidentityhash(inst):
-    T = typeOf(inst)
-    assert T is Object or isinstance(T, (Instance, Record, Array))
-    return inst._identityhash()
-
-def oohash(inst):
-    assert typeOf(inst) is String or typeOf(inst) is Unicode
-    # for now only strings and unicode are supported
-    return hash(inst._str)
-
 def oostring(obj, base):
     """
     Convert char, int, float, instances and str to str.

Modified: pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rbuiltin.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rbuiltin.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rbuiltin.py	Fri Oct 16 11:47:18 2009
@@ -48,12 +48,6 @@
     return hop.genop('runtimenew', vlist,
                      resulttype = hop.r_result.lowleveltype)
 
-def rtype_ooidentityhash(hop):
-    assert isinstance(hop.args_s[0], (annmodel.SomeOOInstance, annmodel.SomeOOObject))
-    vlist = hop.inputargs(hop.args_r[0])
-    return hop.genop('ooidentityhash', vlist,
-                     resulttype = ootype.Signed)
-
 def rtype_ooupcast(hop):
     assert isinstance(hop.args_s[0].const, ootype.Instance)
     assert isinstance(hop.args_s[1], annmodel.SomeOOInstance)
@@ -132,7 +126,6 @@
 BUILTIN_TYPER[ootype.subclassof] = rtype_subclassof
 BUILTIN_TYPER[ootype.instanceof] = rtype_instanceof
 BUILTIN_TYPER[ootype.runtimenew] = rtype_runtimenew
-BUILTIN_TYPER[ootype.ooidentityhash] = rtype_ooidentityhash
 BUILTIN_TYPER[ootype.ooupcast] = rtype_ooupcast
 BUILTIN_TYPER[ootype.oodowncast] = rtype_oodowncast
 BUILTIN_TYPER[ootype.cast_from_object] = rtype_cast_from_object

Modified: pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rclass.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rclass.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rclass.py	Fri Oct 16 11:47:18 2009
@@ -152,7 +152,7 @@
     if config.translation.ootype.mangle:
         return 'o' + name
     else:
-        not_allowed = ('_hash_cache_', 'meta', 'class_')
+        not_allowed = ('meta', 'class_')
         assert name not in not_allowed, "%s is a reserved name" % name
         return name
 
@@ -253,13 +253,6 @@
                     allmethods[mangled] = meth_name, s_meth
                 # else: it's the __init__ of a builtin exception
 
-        #
-        # hash() support
-        if self.rtyper.needs_hash_support(self.classdef):
-            from pypy.rpython import rint
-            allfields['_hash_cache_'] = rint.signed_repr
-            fields['_hash_cache_'] = ootype.Signed
-
         ootype.addFields(self.lowleveltype, fields)
 
         self.rbase = getinstancerepr(self.rtyper, self.classdef.basedef)
@@ -413,9 +406,6 @@
                         graph=graph)
         ootype.addMethods(self.lowleveltype, {mangled: m})
 
-    def get_ll_hash_function(self):
-        return ll_inst_hash
-
     def rtype_getattr(self, hop):
         if hop.s_result.is_constant():
             return hop.inputconst(hop.r_result, hop.s_result.const)
@@ -526,8 +516,6 @@
                 llattrvalue = None
             elif mangled == 'meta':
                 llattrvalue = classrepr.get_meta_instance()
-            elif mangled == '_hash_cache_': # hash() support
-                continue   # already done by initialize_prebuilt_hash()
             else:
                 name = unmangle(mangled, self.rtyper.getconfig())
                 try:
@@ -544,8 +532,7 @@
             setattr(result, mangled, llattrvalue)
 
     def initialize_prebuilt_hash(self, value, result):
-        if '_hash_cache_' in self.lowleveltype._allfields():
-            result._hash_cache_ = hash(value)
+        pass
 
 
 class __extend__(pairtype(InstanceRepr, InstanceRepr)):
@@ -581,15 +568,6 @@
         v = rpair.rtype_eq(hop)
         return hop.genop("bool_not", [v], resulttype=ootype.Bool)
 
-
-def ll_inst_hash(ins):
-    if not ins:
-        return 0
-    cached = ins._hash_cache_
-    if cached == 0:
-        cached = ins._hash_cache_ = ootype.ooidentityhash(ins)
-    return cached
-
 def ll_inst_type(obj):
     if obj:
         return ootype.classof(obj)

Modified: pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rstr.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rstr.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rstr.py	Fri Oct 16 11:47:18 2009
@@ -113,10 +113,10 @@
         return ootype.oounicode(ch, -1)
 
     def ll_strhash(s):
-        return ootype.oohash(s)
+        return s.ll_hash()
 
     def ll_strfasthash(s):
-        return ootype.oohash(s)
+        return s.ll_hash()
 
     def ll_char_mul(ch, times):
         if times < 0:

Modified: pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rtupletype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rtupletype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/rtupletype.py	Fri Oct 16 11:47:18 2009
@@ -12,4 +12,4 @@
         fields = [('item%d' % i, TYPE) for i, TYPE in enumerate(field_lltypes)]
         hints = {'immutable': True,
                  'noidentity': True}
-        return ootype.Record(dict(fields), _hints=hints)
+        return ootype.Record(fields, _hints=hints)

Modified: pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_ooclean.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_ooclean.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_ooclean.py	Fri Oct 16 11:47:18 2009
@@ -438,21 +438,22 @@
 
 def test_hash_preservation():
     from pypy.rlib.objectmodel import current_object_addr_as_int
+    from pypy.rlib.objectmodel import compute_identity_hash
     class C:
         pass
     class D(C):
         pass
     def f1():
         d2 = D()
-        # xxx we assume that the identityhash doesn't change from
-        #     one line to the next
+        # xxx we assume that current_object_addr_as_int is defined as
+        # simply returning the identity hash
         current_identityhash = current_object_addr_as_int(d2)
-        instance_hash = hash(d2)
-        return ((current_identityhash & sys.maxint) ==
-                (instance_hash & sys.maxint))
+        instance_hash = compute_identity_hash(d2)
+        return current_identityhash == instance_hash
     res = interpret(f1, [])
     assert res is True
 
+    py.test.skip("hash is not preserved during an ootype translation")
     c = C()
     d = D()
     def f2(): return hash(c)

Modified: pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_oorecord.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_oorecord.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_oorecord.py	Fri Oct 16 11:47:18 2009
@@ -42,12 +42,12 @@
     n2 = null(T)
     assert n == n2
 
-def test_ooidentityhash():
+def test_identityhash():
     T = Record({"a": Signed, "b": Signed})
     t = new(T)
     t.a = 1
     t.b = 2
     t2 = new(T)
-    t.a = 1
-    t.b = 2
-    assert ooidentityhash(t) != ooidentityhash(t2)
+    t2.a = 1
+    t2.b = 2
+    assert identityhash(t) != identityhash(t2)       # xxx???

Modified: pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_oortype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_oortype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_oortype.py	Fri Oct 16 11:47:18 2009
@@ -351,7 +351,7 @@
     res = interpret(fn, [], type_system='ootype')
     assert res == 42
 
-def test_ooidentityhash():
+def test_identityhash():
     L = List(Signed)
 
     def fn():
@@ -359,7 +359,7 @@
         lst2 = new(L)
         obj1 = cast_to_object(lst1)
         obj2 = cast_to_object(lst2)
-        return ooidentityhash(obj1) == ooidentityhash(obj2)
+        return identityhash(obj1) == identityhash(obj2)
 
     res = interpret(fn, [], type_system='ootype')
     assert not res

Modified: pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_ootype.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_ootype.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/ootypesystem/test/test_ootype.py	Fri Oct 16 11:47:18 2009
@@ -622,27 +622,27 @@
     cls2 = cast_from_object(Class, obj)
     assert cls is cls2
     
-def test_object_ooidentityhash():
+def test_object_identityhash():
     A = Instance("Foo", ROOT)
     a = new(A)
     obj1 = cast_to_object(a)
     obj2 = cast_to_object(a)
-    assert ooidentityhash(obj1) == ooidentityhash(obj2)
+    assert identityhash(obj1) == identityhash(obj2)
 
-def test_object_ooidentityhash_sm():
+def test_object_identityhash_sm():
     M = StaticMethod([Signed], Signed)
     def m_(x):
        return x
     m = static_meth(M, "m", _callable=m_)
     obj1 = cast_to_object(m)
     obj2 = cast_to_object(m)
-    assert ooidentityhash(obj1) == ooidentityhash(obj2)
+    assert identityhash(obj1) == identityhash(obj2)
 
-def test_ooidentityhash_array():
+def test_identityhash_array():
     A = Array(Signed)
     a = oonewarray(A, 10)
     b = oonewarray(A, 10)
-    assert ooidentityhash(a) != ooidentityhash(b)
+    assert identityhash(a) != identityhash(b)     # likely
 
 def test_bool_class():
     A = Instance("Foo", ROOT)

Modified: pypy/branch/gc-hash-merge/pypy/rpython/rbuiltin.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/rbuiltin.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/rbuiltin.py	Fri Oct 16 11:47:18 2009
@@ -494,6 +494,10 @@
     return hop.genop('cast_int_to_ptr', [v_input],
                      resulttype = hop.r_result.lowleveltype)
 
+def rtype_identity_hash(hop):
+    vlist = hop.inputargs(hop.args_r[0])
+    return hop.genop('gc_identityhash', vlist, resulttype=lltype.Signed)
+
 def rtype_runtime_type_info(hop):
     assert isinstance(hop.args_r[0], rptr.PtrRepr)
     vlist = hop.inputargs(hop.args_r[0])
@@ -512,6 +516,7 @@
 BUILTIN_TYPER[lltype.cast_int_to_ptr] = rtype_cast_int_to_ptr
 BUILTIN_TYPER[lltype.typeOf] = rtype_const_result
 BUILTIN_TYPER[lltype.nullptr] = rtype_const_result
+BUILTIN_TYPER[lltype.identityhash] = rtype_identity_hash
 BUILTIN_TYPER[lltype.getRuntimeTypeInfo] = rtype_const_result
 BUILTIN_TYPER[lltype.Ptr] = rtype_const_result
 BUILTIN_TYPER[lltype.runtime_type_info] = rtype_runtime_type_info

Modified: pypy/branch/gc-hash-merge/pypy/rpython/rclass.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/rclass.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/rclass.py	Fri Oct 16 11:47:18 2009
@@ -236,15 +236,18 @@
             self.setup()
             result = self.create_instance()
             self._reusable_prebuilt_instance = result
-            self.initialize_prebuilt_instance(Ellipsis, self.classdef, result)
+            self.initialize_prebuilt_data(Ellipsis, self.classdef, result)
             return result
 
     def initialize_prebuilt_instance(self, value, classdef, result):
-        # must fill in the _hash_cache_ field before the other ones
+        # must fill in the hash cache before the other ones
         # (see test_circular_hash_initialization)
         self.initialize_prebuilt_hash(value, result)
         self.initialize_prebuilt_data(value, classdef, result)
 
+    def get_ll_hash_function(self):
+        return ll_inst_hash
+
     def rtype_type(self, hop):
         raise NotImplementedError
 
@@ -272,6 +275,13 @@
     rinstance = getinstancerepr(rtyper, classdef)
     return rinstance.new_instance(llops, classcallhop)
 
+def ll_inst_hash(ins):
+    if not ins:
+        return 0    # for None
+    else:
+        from pypy.rpython.lltypesystem import lltype
+        return lltype.identityhash(ins)     # also works for ootype
+
 
 _missing = object()
 

Modified: pypy/branch/gc-hash-merge/pypy/rpython/rdict.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/rdict.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/rdict.py	Fri Oct 16 11:47:18 2009
@@ -46,14 +46,8 @@
         else:
             return self._externalvsinternal(self.rtyper, item_repr)
 
-    def pickkeyrepr(self, key_repr):
-        external, internal = self.pickrepr(key_repr)
-        if external != internal:
-            internal = external
-            while not self.rtyper.needs_hash_support(internal.classdef):
-                internal = internal.rbase
-        return external, internal
-        
+    pickkeyrepr = pickrepr
+
     def compact_repr(self):
         return 'DictR %s %s' % (self.key_repr.compact_repr(), self.value_repr.compact_repr())
 

Modified: pypy/branch/gc-hash-merge/pypy/rpython/rfloat.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/rfloat.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/rfloat.py	Fri Oct 16 11:47:18 2009
@@ -10,6 +10,7 @@
 from pypy.rpython.rmodel import log
 
 from pypy.rlib.rarithmetic import base_int
+from pypy.rlib.objectmodel import _hash_float
 
 import math
 
@@ -109,7 +110,7 @@
     get_ll_le_function = get_ll_eq_function
 
     def get_ll_hash_function(self):
-        return ll_hash_float
+        return _hash_float
 
     def rtype_is_true(_, hop):
         vlist = hop.inputargs(Float)
@@ -142,23 +143,6 @@
         pass
     ll_str._annspecialcase_ = "specialize:ts('ll_str.ll_float_str')"
 
-
-TAKE_NEXT = float(2**31)
-
-def ll_hash_float(f):
-    """
-    this implementation is identical to the CPython implementation,
-    despite the fact that the integer case is not treated, specially.
-    This should be special-cased in W_FloatObject.
-    In the low-level case, floats cannot be used with ints in dicts, anyway.
-    """
-    from pypy.rlib.rarithmetic import intmask
-    v, expo = math.frexp(f)
-    v *= TAKE_NEXT
-    hipart = int(v)
-    v = (v - float(hipart)) * TAKE_NEXT
-    x = hipart + int(v) + (expo << 15)
-    return intmask(x)
 #
 # _________________________ Conversions _________________________
 

Modified: pypy/branch/gc-hash-merge/pypy/rpython/rtuple.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/rtuple.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/rtuple.py	Fri Oct 16 11:47:18 2009
@@ -100,14 +100,14 @@
         autounrolling_funclist = unrolling_iterable(enumerate(hash_funcs))
 
         def ll_hash(t):
-            retval = 0x345678
-            mult = 1000003
+            """Must be kept in sync with rlib.objectmodel._hash_tuple()."""
+            x = 0x345678
             for i, hash_func in autounrolling_funclist:
                 attrname = 'item%d' % i
                 item = getattr(t, attrname)
-                retval = intmask((retval ^ hash_func(item)) * intmask(mult))
-                mult = mult + 82520 + 2*len(items_r)
-            return retval
+                y = hash_func(item)
+                x = intmask((1000003 * x) ^ y)
+            return x
 
         _gen_hash_function_cache[key] = ll_hash
         return ll_hash

Modified: pypy/branch/gc-hash-merge/pypy/rpython/rtyper.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/rtyper.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/rtyper.py	Fri Oct 16 11:47:18 2009
@@ -651,9 +651,6 @@
 
     # __________ utilities __________
 
-    def needs_hash_support(self, clsdef):
-        return clsdef in self.annotator.bookkeeper.needs_hash_support
-
     def needs_wrapper(self, cls):
         return cls in self.classes_with_wrapper
 

Modified: pypy/branch/gc-hash-merge/pypy/rpython/test/test_rclass.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/test/test_rclass.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/test/test_rclass.py	Fri Oct 16 11:47:18 2009
@@ -425,27 +425,32 @@
 
     def test_hash_preservation(self):
         from pypy.rlib.objectmodel import current_object_addr_as_int
+        from pypy.rlib.objectmodel import compute_identity_hash
         class C:
             pass
         class D(C):
             pass
         c = C()
         d = D()
+        h_c = compute_identity_hash(c)
+        h_d = compute_identity_hash(d)
+        #
         def f():
             d2 = D()
-            return hash(d2), current_object_addr_as_int(d2), hash(c), hash(d)
+            return (compute_identity_hash(d2),
+                    current_object_addr_as_int(d2),
+                    compute_identity_hash(c),
+                    compute_identity_hash(d))
 
         res = self.interpret(f, [])
         # xxx this is too precise, checking the exact implementation
-        if isinstance(self, OORtypeMixin):
-            assert res.item0 == res.item1
-        else:
-            assert res.item0 == ~res.item1
+        assert res.item0 == res.item1
         # the following property is essential on top of the lltypesystem
-        # otherwise prebuilt dictionaries are broken.  It's not that
-        # relevant on top of the ootypesystem though.
-        assert res.item2 == hash(c)
-        assert res.item3 == hash(d)
+        # otherwise prebuilt dictionaries are broken.  It's wrong on
+        # top of the ootypesystem though.
+        if type(self) is TestLLtype:
+            assert res.item2 == h_c
+            assert res.item3 == h_d
 
     def test_circular_hash_initialization(self):
         class B:
@@ -681,6 +686,7 @@
         assert self.interpret(fn, []) == 3 + 8 + 9
 
     def test_hash_of_none(self):
+        from pypy.rlib.objectmodel import compute_hash
         class A:
             pass
         def fn(x):
@@ -688,14 +694,15 @@
                 obj = A()
             else:
                 obj = None
-            return hash(obj)
+            return compute_hash(obj)
         res = self.interpret(fn, [0])
         assert res == 0
 
     def test_hash_of_only_none(self):
+        from pypy.rlib.objectmodel import compute_hash
         def fn():
             obj = None
-            return hash(obj)
+            return compute_hash(obj)
         res = self.interpret(fn, [])
         assert res == 0
 
@@ -754,7 +761,7 @@
                f_summary == {"oosetfield": 2} # for ootype
 
 
-class TestLltype(BaseTestRclass, LLRtypeMixin):
+class TestLLtype(BaseTestRclass, LLRtypeMixin):
 
     def test__del__(self):
         class A(object):
@@ -843,15 +850,16 @@
         from pypy.annotation import model as annmodel
         from pypy.rpython import extregistry
         from pypy.rpython.annlowlevel import cast_object_to_ptr
-        class X(object): pass
-        class Y(X): pass
-        class Z(Y): pass
+        from pypy.rlib.objectmodel import compute_identity_hash
+
+        class Z(object):
+            pass
 
         def my_gethash(z):
             not_implemented
 
         def ll_my_gethash(ptr):
-            return ptr.gethash()
+            return identityhash(ptr)    # from lltype
 
         class MyGetHashEntry(extregistry.ExtRegistryEntry):
             _about_ = my_gethash
@@ -862,13 +870,9 @@
                 return hop.gendirectcall(ll_my_gethash, v_instance)
 
         def f(n):
-            if n > 10:
-                z = Y()
-                got = -1    # path never used
-            else:
-                z = Z()
-                got = my_gethash(z)
-            expected = hash(z)     # put the _hash_cache_ in the class Y
+            z = Z()
+            got = my_gethash(z)
+            expected = compute_identity_hash(z)
             return got - expected
 
         res = self.interpret(f, [5])

Modified: pypy/branch/gc-hash-merge/pypy/rpython/test/test_rfloat.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/test/test_rfloat.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/test/test_rfloat.py	Fri Oct 16 11:47:18 2009
@@ -4,6 +4,7 @@
 from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
 from pypy.rlib.rarithmetic import r_int, r_uint, r_longlong, r_singlefloat,\
      isnan, isinf
+from pypy.rlib.objectmodel import compute_hash
 
 class TestSnippet(object):
 
@@ -145,9 +146,9 @@
 
     def test_hash(self):
         def fn(f):
-            return hash(f)
+            return compute_hash(f)
         res = self.interpret(fn, [1.5])
-        assert res == hash(1.5)
+        assert res == compute_hash(1.5)
 
 
 class TestOOtype(BaseTestRfloat, OORtypeMixin):

Modified: pypy/branch/gc-hash-merge/pypy/rpython/test/test_rstr.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/test/test_rstr.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/test/test_rstr.py	Fri Oct 16 11:47:18 2009
@@ -694,15 +694,16 @@
         assert self.ll_to_string(res) == ""
 
     def test_hash(self):
+        from pypy.rlib.objectmodel import compute_hash
         const = self.const
         def fn(i):
             if i == 0:
                 s = const('')
             else:
                 s = const("xxx")
-            return hash(s)
+            return compute_hash(s)
         res = self.interpret(fn, [0])
-        assert res == self.EMPTY_STRING_HASH
+        assert res == -1          # empty string hash
         res = self.interpret(fn, [1])
         assert typeOf(res) == Signed
 
@@ -883,8 +884,6 @@
 
 class TestLLtype(BaseTestRstr, LLRtypeMixin):
 
-    EMPTY_STRING_HASH = -1
-
     def test_ll_find_rfind(self):
         llstr = self.string_to_ll
         
@@ -899,18 +898,18 @@
             assert res == s1.rfind(s2)
 
     def test_hash_via_type(self):
+        from pypy.rlib.objectmodel import compute_hash
+
         def f(n):
             s = malloc(STR, n)
             s.hash = 0
             for i in range(n):
                 s.chars[i] = chr(i)
-            return s.gethash() - hash('\x00\x01\x02\x03\x04')
+            return s.gethash() - compute_hash('\x00\x01\x02\x03\x04')
 
         res = self.interpret(f, [5])
         assert res == 0
 
 
 class TestOOtype(BaseTestRstr, OORtypeMixin):
-
-    EMPTY_STRING_HASH = 0
-        
+    pass

Modified: pypy/branch/gc-hash-merge/pypy/rpython/test/test_rtuple.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/test/test_rtuple.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/test/test_rtuple.py	Fri Oct 16 11:47:18 2009
@@ -172,8 +172,9 @@
         assert r_AB_tup.lowleveltype == r_BA_tup.lowleveltype
 
     def test_tuple_hash(self):
+        from pypy.rlib.objectmodel import compute_hash
         def f(i, j):
-            return hash((i, j))
+            return compute_hash((i, j))
 
         res1 = self.interpret(f, [12, 27])
         res2 = self.interpret(f, [27, 12])
@@ -333,9 +334,10 @@
             yield test_le, a,b,c,d, resu
             yield test_ge, a,b,c,d, resu
 
-    def test_tuple_hash(self):
+    def test_tuple_hash_2(self):
+        from pypy.rlib.objectmodel import compute_hash
         def f(n):
-            return hash((n, 6)) == hash((3, n*2))
+            return compute_hash((n, 6)) == compute_hash((3, n*2))
         res = self.interpret(f, [3])
         assert res is True
 

Modified: pypy/branch/gc-hash-merge/pypy/rpython/test/test_runicode.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/rpython/test/test_runicode.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/rpython/test/test_runicode.py	Fri Oct 16 11:47:18 2009
@@ -214,18 +214,19 @@
     test_hlstr = unsupported
 
 class TestLLtype(BaseTestRUnicode, LLRtypeMixin):
-    EMPTY_STRING_HASH = -1
 
     def test_hash_via_type(self):
+        from pypy.rlib.objectmodel import compute_hash
+
         def f(n):
             s = malloc(UNICODE, n)
             s.hash = 0
             for i in range(n):
                 s.chars[i] = unichr(ord('A') + i)
-            return s.gethash() - hash(u'ABCDE')
+            return s.gethash() - compute_hash(u'ABCDE')
 
         res = self.interpret(f, [5])
         assert res == 0
 
 class TestOOtype(BaseTestRUnicode, OORtypeMixin):
-    EMPTY_STRING_HASH = 0
+    pass

Modified: pypy/branch/gc-hash-merge/pypy/translator/c/gc.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/c/gc.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/c/gc.py	Fri Oct 16 11:47:18 2009
@@ -12,16 +12,24 @@
 
 class BasicGcPolicy(object):
     requires_stackless = False
+    stores_hash_at_the_end = False
 
     def __init__(self, db, thread_enabled=False):
         self.db = db
         self.thread_enabled = thread_enabled
 
     def common_gcheader_definition(self, defnode):
-        return []
+        if defnode.db.gctransformer is not None:
+            HDR = defnode.db.gctransformer.HDR
+            return [(name, HDR._flds[name]) for name in HDR._names]
+        else:
+            return []
 
     def common_gcheader_initdata(self, defnode):
-        return []
+        if defnode.db.gctransformer is not None:
+            raise NotImplementedError
+        else:
+            return []
 
     def struct_gcheader_definition(self, defnode):
         return self.common_gcheader_definition(defnode)
@@ -35,9 +43,6 @@
     def array_gcheader_initdata(self, defnode):
         return self.common_gcheader_initdata(defnode)
 
-    def struct_after_definition(self, defnode):
-        return []
-
     def compilation_info(self):
         if not self.db:
             return ExternalCompilationInfo()
@@ -50,6 +55,9 @@
             post_include_bits=['typedef void *GC_hidden_pointer;']
             )
 
+    def get_prebuilt_hash(self, obj):
+        return None
+
     def need_no_typeptr(self):
         return False
 
@@ -102,13 +110,6 @@
 class RefcountingGcPolicy(BasicGcPolicy):
     transformerclass = refcounting.RefcountingGCTransformer
 
-    def common_gcheader_definition(self, defnode):
-        if defnode.db.gctransformer is not None:
-            HDR = defnode.db.gctransformer.HDR
-            return [(name, HDR._flds[name]) for name in HDR._names]
-        else:
-            return []
-
     def common_gcheader_initdata(self, defnode):
         if defnode.db.gctransformer is not None:
             gct = defnode.db.gctransformer
@@ -193,6 +194,12 @@
 class BoehmGcPolicy(BasicGcPolicy):
     transformerclass = boehm.BoehmGCTransformer
 
+    def common_gcheader_initdata(self, defnode):
+        if defnode.db.gctransformer is not None:
+            return [defnode.obj._as_ptr()._identityhash(cache=False)]
+        else:
+            return []
+
     def array_setup(self, arraydefnode):
         pass
 
@@ -281,6 +288,7 @@
 
 class FrameworkGcPolicy(BasicGcPolicy):
     transformerclass = framework.FrameworkGCTransformer
+    stores_hash_at_the_end = True
 
     def struct_setup(self, structdefnode, rtti):
         if rtti is not None and hasattr(rtti._obj, 'destructor_funcptr'):
@@ -327,7 +335,18 @@
 
     def common_gcheader_initdata(self, defnode):
         o = top_container(defnode.obj)
-        return defnode.db.gctransformer.gc_field_values_for(o)
+        needs_hash = self.get_prebuilt_hash(o) is not None
+        return defnode.db.gctransformer.gc_field_values_for(o, needs_hash)
+
+    def get_prebuilt_hash(self, obj):
+        # for prebuilt objects that need to have their hash stored and
+        # restored.  Note that only structures that are StructNodes all
+        # the way have their hash stored (and not e.g. structs with var-
+        # sized arrays at the end).  'obj' must be the top_container.
+        TYPE = typeOf(obj)
+        if not isinstance(TYPE, lltype.GcStruct):
+            return None
+        return getattr(obj, '_hash_cache_', None)
 
     def need_no_typeptr(self):
         config = self.db.translator.config

Modified: pypy/branch/gc-hash-merge/pypy/translator/c/node.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/c/node.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/c/node.py	Fri Oct 16 11:47:18 2009
@@ -9,7 +9,7 @@
 from pypy.translator.c.support import USESLOTS # set to False if necessary while refactoring
 from pypy.translator.c.support import cdecl, forward_cdecl, somelettersfrom
 from pypy.translator.c.support import c_char_array_constant, barebonearray
-from pypy.translator.c.primitive import PrimitiveType
+from pypy.translator.c.primitive import PrimitiveType, name_signed
 from pypy.rlib.rarithmetic import isinf, isnan
 from pypy.translator.c import extfunc
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
@@ -149,8 +149,6 @@
         if is_empty:
             yield '\t' + 'char _dummy; /* this struct is empty */'
         yield '};'
-        for line in self.db.gcpolicy.struct_after_definition(self):
-            yield line
 
     def visitor_lines(self, prefix, on_field):
         for name in self.fieldnames:
@@ -547,6 +545,49 @@
 
 assert not USESLOTS or '__dict__' not in dir(StructNode)
 
+class GcStructNodeWithHash(StructNode):
+    # for the outermost level of nested structures, if it has a _hash_cache_.
+    nodekind = 'struct'
+    if USESLOTS:
+        __slots__ = ()
+
+    def get_hash_typename(self):
+        return 'struct _hashT_%s @' % self.name
+
+    def forward_declaration(self):
+        hash_typename = self.get_hash_typename()
+        hash_offset = self.db.gctransformer.get_hash_offset(self.T)
+        yield '%s {' % cdecl(hash_typename, '')
+        yield '\tunion {'
+        yield '\t\t%s;' % cdecl(self.implementationtypename, 'head')
+        yield '\t\tchar pad[%s];' % name_signed(hash_offset, self.db)
+        yield '\t} u;'
+        yield '\tlong hash;'
+        yield '};'
+        yield '%s;' % (
+            forward_cdecl(hash_typename, '_hash_' + self.name,
+                          self.db.standalone, self.is_thread_local()),)
+        yield '#define %s _hash_%s.u.head' % (self.name, self.name)
+
+    def implementation(self):
+        hash_typename = self.get_hash_typename()
+        hash = self.db.gcpolicy.get_prebuilt_hash(self.obj)
+        assert hash is not None
+        lines = list(self.initializationexpr())
+        lines.insert(0, '%s = { {' % (
+            cdecl(hash_typename, '_hash_' + self.name,
+                  self.is_thread_local()),))
+        lines.append('}, %s /* hash */ };' % name_signed(hash, self.db))
+        return lines
+
+def gcstructnode_factory(db, T, obj):
+    if db.gcpolicy.get_prebuilt_hash(obj) is not None:
+        cls = GcStructNodeWithHash
+    else:
+        cls = StructNode
+    return cls(db, T, obj)
+
+
 class ArrayNode(ContainerNode):
     nodekind = 'array'
     if USESLOTS:
@@ -970,7 +1011,7 @@
 
 ContainerNodeFactory = {
     Struct:       StructNode,
-    GcStruct:     StructNode,
+    GcStruct:     gcstructnode_factory,
     Array:        ArrayNode,
     GcArray:      ArrayNode,
     FixedSizeArray: FixedSizeArrayNode,

Modified: pypy/branch/gc-hash-merge/pypy/translator/c/src/g_prerequisite.h
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/c/src/g_prerequisite.h	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/c/src/g_prerequisite.h	Fri Oct 16 11:47:18 2009
@@ -27,3 +27,6 @@
 #else
 typedef unsigned char bool_t;
 #endif
+
+
+#include "src/align.h"

Modified: pypy/branch/gc-hash-merge/pypy/translator/c/src/mem.h
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/c/src/mem.h	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/c/src/mem.h	Fri Oct 16 11:47:18 2009
@@ -2,22 +2,6 @@
 /************************************************************/
  /***  C header subsection: operations on LowLevelTypes    ***/
 
-/* alignment for arena-based garbage collectors: the following line
-   enforces an alignment that should be enough for any structure
-   containing pointers and 'double' fields. */
-struct rpy_memory_alignment_test1 {
-  double d;
-  void* p;
-};
-struct rpy_memory_alignment_test2 {
-  char c;
-  struct rpy_memory_alignment_test1 s;
-};
-#define MEMORY_ALIGNMENT	offsetof(struct rpy_memory_alignment_test2, s)
-#define ROUND_UP_FOR_ALLOCATION(x, minsize)  \
-  ((((x)>=(minsize)?(x):(minsize))           \
-               + (MEMORY_ALIGNMENT-1)) & ~(MEMORY_ALIGNMENT-1))
-
 extern char __gcmapstart;
 extern char __gcmapend;
 extern char __gccallshapes;

Modified: pypy/branch/gc-hash-merge/pypy/translator/c/test/test_boehm.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/c/test/test_boehm.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/c/test/test_boehm.py	Fri Oct 16 11:47:18 2009
@@ -422,3 +422,31 @@
             return True
         run = self.getcompiled(f)
         assert run() == True        
+
+    def test_hash_preservation(self):
+        from pypy.rlib.objectmodel import compute_hash
+        from pypy.rlib.objectmodel import current_object_addr_as_int
+        class C:
+            pass
+        class D(C):
+            pass
+        c = C()
+        d = D()
+        compute_hash(d)     # force to be cached on 'd', but not on 'c'
+        #
+        def fn():
+            d2 = D()
+            return (compute_hash(d2),
+                    current_object_addr_as_int(d2),
+                    compute_hash(c),
+                    compute_hash(d),
+                    compute_hash(("Hi", None, (7.5, 2, d))))
+        
+        f = self.getcompiled(fn)
+        res = f()
+
+        # xxx the next line is too precise, checking the exact implementation
+        assert res[0] == ~res[1]
+        assert res[2] != compute_hash(c)     # likely
+        assert res[3] == compute_hash(d)
+        assert res[4] == compute_hash(("Hi", None, (7.5, 2, d)))

Modified: pypy/branch/gc-hash-merge/pypy/translator/c/test/test_lltyped.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/c/test/test_lltyped.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/c/test/test_lltyped.py	Fri Oct 16 11:47:18 2009
@@ -249,6 +249,7 @@
         assert res1 == res2
 
     def test_null_padding(self):
+        py.test.skip("we no longer pad our RPython strings with a final NUL")
         from pypy.rpython.lltypesystem import llmemory
         from pypy.rpython.lltypesystem import rstr
         chars_offset = llmemory.FieldOffset(rstr.STR, 'chars') + \

Modified: pypy/branch/gc-hash-merge/pypy/translator/c/test/test_newgc.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/c/test/test_newgc.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/c/test/test_newgc.py	Fri Oct 16 11:47:18 2009
@@ -26,6 +26,7 @@
     def _makefunc2(cls, f):
         t = Translation(f, [int, int], gc=cls.gcpolicy,
                         policy=annpolicy.StrictAnnotatorPolicy())
+        t.config.translation.gcconfig.debugprint = True
         t.config.translation.gcconfig.removetypeptr = cls.removetypeptr
         t.disable(['backendopt'])
         t.set_backend_extra_options(c_isolated=True, c_debug_defines=True)
@@ -88,6 +89,8 @@
         if not args:
             args = (-1, )
         num = self.name_to_func[name]
+        print
+        print 'Running %r (test number %d)' % (name, num)
         res = self.c_allfuncs(num, *args)
         if self.funcsstr[num]:
             return res
@@ -689,6 +692,116 @@
     def test_resizable_buffer(self):
         assert self.run('resizable_buffer')
 
+    def define_hash_preservation(cls):
+        from pypy.rlib.objectmodel import compute_hash
+        from pypy.rlib.objectmodel import compute_identity_hash
+        from pypy.rlib.objectmodel import current_object_addr_as_int
+        class C:
+            pass
+        class D(C):
+            pass
+        c = C()
+        d = D()
+        h_d = compute_hash(d)     # force to be cached on 'd', but not on 'c'
+        h_t = compute_hash(("Hi", None, (7.5, 2, d)))
+        S = lltype.GcStruct('S', ('x', lltype.Signed),
+                                 ('a', lltype.Array(lltype.Signed)))
+        s = lltype.malloc(S, 15, zero=True)
+        h_s = compute_identity_hash(s)   # varsized: hash not saved/restored
+        #
+        def f():
+            if compute_hash(c) != compute_identity_hash(c): return 12
+            if compute_hash(d) != h_d: return 13
+            if compute_hash(("Hi", None, (7.5, 2, d))) != h_t: return 14
+            c2 = C()
+            h_c2 = compute_hash(c2)
+            if compute_hash(c2) != h_c2: return 15
+            if compute_identity_hash(s) == h_s: return 16   # unlikely
+            i = 0
+            while i < 6:
+                rgc.collect()
+                if compute_hash(c2) != h_c2: return i
+                i += 1
+            return 42
+        return f
+
+    def test_hash_preservation(self):
+        res = self.run('hash_preservation')
+        assert res == 42
+
+    def define_hash_overflow(self):
+        from pypy.rlib.objectmodel import compute_identity_hash
+        class X(object):
+            pass
+
+        def g(n):
+            "Make a chain of n objects."
+            x1 = None
+            i = 0
+            while i < n:
+                x2 = X()
+                x2.prev = x1
+                x1 = x2
+                i += 1
+            return x1
+
+        def build(xr, n):
+            "Build the identity hashes of all n objects of the chain."
+            i = 0
+            while i < n:
+                xr.hash = compute_identity_hash(xr)
+                # ^^^ likely to trigger a collection
+                xr = xr.prev
+                i += 1
+            assert xr is None
+
+        def check(xr, n, step):
+            "Check that the identity hashes are still correct."
+            i = 0
+            while i < n:
+                if xr.hash != compute_identity_hash(xr):
+                    os.write(2, "wrong hash! i=%d, n=%d, step=%d\n" % (i, n,
+                                                                       step))
+                    raise ValueError
+                xr = xr.prev
+                i += 1
+            assert xr is None
+
+        def h(n):
+            x3 = g(3)
+            x4 = g(3)
+            x1 = g(n)
+            build(x1, n)       # can collect!
+            check(x1, n, 1)
+            build(x3, 3)
+            x2 = g(n//2)       # allocate more and try again
+            build(x2, n//2)
+            check(x1, n, 11)
+            check(x2, n//2, 12)
+            build(x4, 3)
+            check(x3, 3, 13)   # check these old objects too
+            check(x4, 3, 14)   # check these old objects too
+            rgc.collect()
+            check(x1, n, 21)
+            check(x2, n//2, 22)
+            check(x3, 3, 23)
+            check(x4, 3, 24)
+
+        def f():
+            # numbers optimized for a 8MB space
+            for n in [100000, 225000, 250000, 300000, 380000,
+                      460000, 570000, 800000]:
+                os.write(2, 'case %d\n' % n)
+                rgc.collect()
+                h(n)
+            return -42
+
+        return f
+
+    def test_hash_overflow(self):
+        res = self.run('hash_overflow')
+        assert res == -42
+
 class TestSemiSpaceGC(TestUsingFramework, snippet.SemiSpaceGCTestDefines):
     gcpolicy = "semispace"
     should_be_moving = True
@@ -805,6 +918,9 @@
     gcpolicy = "markcompact"
     should_be_moving = True
 
+    def setup_class(cls):
+        py.test.skip("Disabled for now")
+
     def test_gc_set_max_heap_size(self):
         py.test.skip("not implemented")
 

Modified: pypy/branch/gc-hash-merge/pypy/translator/c/test/test_typed.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/c/test/test_typed.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/c/test/test_typed.py	Fri Oct 16 11:47:18 2009
@@ -563,6 +563,7 @@
         assert f(255) == 255
 
     def test_hash_preservation(self):
+        from pypy.rlib.objectmodel import compute_hash
         from pypy.rlib.objectmodel import current_object_addr_as_int
         class C:
             pass
@@ -570,18 +571,24 @@
             pass
         c = C()
         d = D()
+        compute_hash(d)     # force to be cached on 'd', but not on 'c'
+        #
         def fn():
             d2 = D()
-            return hash(d2), current_object_addr_as_int(d2), hash(c), hash(d)
+            return (compute_hash(d2),
+                    current_object_addr_as_int(d2),
+                    compute_hash(c),
+                    compute_hash(d),
+                    compute_hash(("Hi", None, (7.5, 2, d))))
         
         f = self.getcompiled(fn)
-
         res = f()
 
-        # xxx this is too precise, checking the exact implementation
-        assert res[0] == ~res[1]
-        assert res[2] == hash(c)
-        assert res[3] == hash(d)
+        # xxx the next line is too precise, checking the exact implementation
+        assert res[0] == res[1]
+        assert res[2] != compute_hash(c)     # likely
+        assert res[3] == compute_hash(d)
+        assert res[4] == compute_hash(("Hi", None, (7.5, 2, d)))
 
     def test_list_basic_ops(self):
         def list_basic_ops(i, j):

Modified: pypy/branch/gc-hash-merge/pypy/translator/cli/opcodes.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/cli/opcodes.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/cli/opcodes.py	Fri Oct 16 11:47:18 2009
@@ -58,8 +58,8 @@
     'classof':                  [PushAllArgs, 'callvirt instance class [mscorlib]System.Type object::GetType()'],
     'instanceof':               [CastTo, 'ldnull', 'cgt.un'],
     'subclassof':               [PushAllArgs, 'call bool [pypylib]pypy.runtime.Utils::SubclassOf(class [mscorlib]System.Type, class[mscorlib]System.Type)'],
-    'ooidentityhash':           [PushAllArgs, 'callvirt instance int32 object::GetHashCode()'],
-    'oohash':                   [PushAllArgs, 'callvirt instance int32 object::GetHashCode()'],    
+    'gc_id':                    [PushAllArgs, 'call int32 [mscorlib]System.Runtime.CompilerServices.RuntimeHelpers::GetHashCode(object)'],   # XXX not implemented
+    'gc_identityhash':          [PushAllArgs, 'call int32 [mscorlib]System.Runtime.CompilerServices.RuntimeHelpers::GetHashCode(object)'],
     'oostring':                 [OOString],
     'oounicode':                [OOUnicode],
     'ooparse_int':              [PushAllArgs, 'call int32 [pypylib]pypy.runtime.Utils::OOParseInt(string, int32)'],

Modified: pypy/branch/gc-hash-merge/pypy/translator/jvm/builtin.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/jvm/builtin.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/jvm/builtin.py	Fri Oct 16 11:47:18 2009
@@ -84,6 +84,9 @@
     (ootype.StringBuilder.__class__, "ll_build"):
     jvm.Method.v(jStringBuilder, "toString", (), jString),
 
+    (ootype.String.__class__, "ll_hash"):
+    jvm.Method.v(jString, "hashCode", (), jInt),
+
     (ootype.String.__class__, "ll_streq"):
     jvm.Method.v(jString, "equals", (jObject,), jBool),
 

Modified: pypy/branch/gc-hash-merge/pypy/translator/jvm/opcodes.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/jvm/opcodes.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/jvm/opcodes.py	Fri Oct 16 11:47:18 2009
@@ -82,8 +82,8 @@
     'instanceof':               [CastTo, StoreResult],
     'subclassof':               [PushAllArgs, jvm.SWAP, jvm.CLASSISASSIGNABLEFROM, StoreResult],
     'classof':                  [PushAllArgs, jvm.OBJECTGETCLASS, StoreResult],
-    'ooidentityhash':           [PushAllArgs, jvm.OBJHASHCODE, StoreResult], 
-    'oohash':                   [PushAllArgs, jvm.OBJHASHCODE, StoreResult], 
+    'gc_id':                    [PushAllArgs, jvm.SYSTEMIDENTITYHASH, StoreResult],   # XXX not implemented
+    'gc_identityhash':          [PushAllArgs, jvm.SYSTEMIDENTITYHASH, StoreResult], 
     'oostring':                 [OOString, StoreResult],
     'oounicode':                [OOUnicode, StoreResult],
     'ooparse_float':            jvm.PYPYOOPARSEFLOAT,

Modified: pypy/branch/gc-hash-merge/pypy/translator/jvm/typesystem.py
==============================================================================
--- pypy/branch/gc-hash-merge/pypy/translator/jvm/typesystem.py	(original)
+++ pypy/branch/gc-hash-merge/pypy/translator/jvm/typesystem.py	Fri Oct 16 11:47:18 2009
@@ -888,6 +888,7 @@
 OBJHASHCODE =           Method.v(jObject, 'hashCode', (), jInt)
 OBJTOSTRING =           Method.v(jObject, 'toString', (), jString)
 OBJEQUALS =             Method.v(jObject, 'equals', (jObject,), jBool)
+SYSTEMIDENTITYHASH =    Method.s(jSystem, 'identityHashCode', (jObject,), jInt)
 SYSTEMGC =              Method.s(jSystem, 'gc', (), jVoid)
 INTTOSTRINGI =          Method.s(jIntegerClass, 'toString', (jInt,), jString)
 LONGTOSTRINGL =         Method.s(jLongClass, 'toString', (jLong,), jString)



More information about the Pypy-commit mailing list