[pypy-commit] pypy numpy-ufunc-trig: merge default

Fri Jul 29 01:57:07 CEST 2011

Author: Christoph Gerum <christoph at gerum.de>
Branch: numpy-ufunc-trig
Changeset: r46076:47c7c4f3577f
Date: 2011-07-29 01:48 +0200
http://bitbucket.org/pypy/pypy/changeset/47c7c4f3577f/

Log:	merge default

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -1,1 +1,2 @@
 b590cf6de4190623aad9aa698694c22e614d67b9 release-1.5
+b48df0bf4e75b81d98f19ce89d4a7dc3e1dab5e5 benchmarked
diff --git a/lib-python/modified-2.7/ctypes/__init__.py b/lib-python/modified-2.7/ctypes/__init__.py
--- a/lib-python/modified-2.7/ctypes/__init__.py
+++ b/lib-python/modified-2.7/ctypes/__init__.py
@@ -489,9 +489,12 @@
         _flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI
     return CFunctionType
 
-_cast = PYFUNCTYPE(py_object, c_void_p, py_object, py_object)(_cast_addr)
 def cast(obj, typ):
-    return _cast(obj, obj, typ)
+    try:
+        c_void_p.from_param(obj)
+    except TypeError, e:
+        raise ArgumentError(str(e))
+    return _cast_addr(obj, obj, typ)
 
 _string_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_string_at_addr)
 def string_at(ptr, size=-1):
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -78,8 +78,6 @@
     _com_iid = None
     _is_fastpath = False
 
-    __restype_set = False
-
     def _getargtypes(self):
         return self._argtypes_
 
@@ -149,7 +147,6 @@
         return self._restype_
 
     def _setrestype(self, restype):
-        self.__restype_set = True
         self._ptr = None
         if restype is int:
             from ctypes import c_int
@@ -296,13 +293,12 @@
                     "This function takes %d argument%s (%s given)"
                     % (len(self._argtypes_), plural, len(args)))
 
-            # check that arguments are convertible
-            ## XXX Not as long as ctypes.cast is a callback function with
-            ## py_object arguments...
-            ## self._convert_args(self._argtypes_, args, {})
-
             try:
-                res = self.callable(*args)
+                newargs = self._convert_args_for_callback(argtypes, args)
+            except (UnicodeError, TypeError, ValueError), e:
+                raise ArgumentError(str(e))
+            try:
+                res = self.callable(*newargs)
             except:
                 exc_info = sys.exc_info()
                 traceback.print_tb(exc_info[2], file=sys.stderr)
@@ -316,10 +312,6 @@
             warnings.warn('C function without declared arguments called',
                           RuntimeWarning, stacklevel=2)
             argtypes = []
-            
-        if not self.__restype_set:
-            warnings.warn('C function without declared return type called',
-                          RuntimeWarning, stacklevel=2)
 
         if self._com_index:
             from ctypes import cast, c_void_p, POINTER
@@ -366,7 +358,10 @@
             if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
                 set_last_error(_rawffi.get_last_error())
         #
-        return self._build_result(self._restype_, result, newargs)
+        try:
+            return self._build_result(self._restype_, result, newargs)
+        finally:
+            funcptr.free_temp_buffers()
 
     def _do_errcheck(self, result, args):
         # The 'errcheck' protocol
@@ -466,6 +461,18 @@
 
         return cobj, cobj._to_ffi_param(), type(cobj)
 
+    def _convert_args_for_callback(self, argtypes, args):
+        assert len(argtypes) == len(args)
+        newargs = []
+        for argtype, arg in zip(argtypes, args):
+            param = argtype.from_param(arg)
+            if argtype._type_ == 'P': # special-case for c_void_p
+                param = param._get_buffer_value()
+            elif self._is_primitive(argtype):
+                param = param.value
+            newargs.append(param)
+        return newargs
+
     def _convert_args(self, argtypes, args, kwargs, marker=object()):
         newargs = []
         outargs = []
@@ -556,6 +563,9 @@
                 newargtypes.append(newargtype)
         return keepalives, newargs, newargtypes, outargs
 
+    @staticmethod
+    def _is_primitive(argtype):
+        return argtype.__bases__[0] is _SimpleCData
     
     def _wrap_result(self, restype, result):
         """
@@ -564,7 +574,7 @@
         """
         # hack for performance: if restype is a "simple" primitive type, don't
         # allocate the buffer because it's going to be thrown away immediately
-        if restype.__bases__[0] is _SimpleCData and not restype._is_pointer_like():
+        if self._is_primitive(restype) and not restype._is_pointer_like():
             return result
         #
         shape = restype._ffishape
@@ -680,7 +690,7 @@
             try:
                 result = self._call_funcptr(funcptr, *args)
                 result = self._do_errcheck(result, args)
-            except (TypeError, ArgumentError): # XXX, should be FFITypeError
+            except (TypeError, ArgumentError, UnicodeDecodeError):
                 assert self._slowpath_allowed
                 return CFuncPtr.__call__(self, *args)
             return result
diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py
--- a/lib_pypy/_ctypes/primitive.py
+++ b/lib_pypy/_ctypes/primitive.py
@@ -10,6 +10,8 @@
 from _ctypes.builtin import ConvMode
 from _ctypes.array import Array
 from _ctypes.pointer import _Pointer, as_ffi_pointer
+#from _ctypes.function import CFuncPtr # this import is moved at the bottom
+                                       # because else it's circular
 
 class NULL(object):
     pass
@@ -86,7 +88,7 @@
         return res
     if isinstance(value, Array):
         return value
-    if isinstance(value, _Pointer):
+    if isinstance(value, (_Pointer, CFuncPtr)):
         return cls.from_address(value._buffer.buffer)
     if isinstance(value, (int, long)):
         return cls(value)
@@ -338,3 +340,5 @@
 
     def __nonzero__(self):
         return self._buffer[0] not in (0, '\x00')
+
+from _ctypes.function import CFuncPtr
diff --git a/lib_pypy/pyrepl/unix_console.py b/lib_pypy/pyrepl/unix_console.py
--- a/lib_pypy/pyrepl/unix_console.py
+++ b/lib_pypy/pyrepl/unix_console.py
@@ -384,15 +384,19 @@
 
         self.__maybe_write_code(self._smkx)
 
-        self.old_sigwinch = signal.signal(
-            signal.SIGWINCH, self.__sigwinch)
+        try:
+            self.old_sigwinch = signal.signal(
+                signal.SIGWINCH, self.__sigwinch)
+        except ValueError:
+            pass
 
     def restore(self):
         self.__maybe_write_code(self._rmkx)
         self.flushoutput()
         tcsetattr(self.input_fd, termios.TCSADRAIN, self.__svtermstate)
 
-        signal.signal(signal.SIGWINCH, self.old_sigwinch)
+        if hasattr(self, 'old_sigwinch'):
+            signal.signal(signal.SIGWINCH, self.old_sigwinch)
 
     def __sigwinch(self, signum, frame):
         self.height, self.width = self.getheightwidth()
diff --git a/pypy/jit/backend/x86/arch.py b/pypy/jit/backend/x86/arch.py
--- a/pypy/jit/backend/x86/arch.py
+++ b/pypy/jit/backend/x86/arch.py
@@ -27,3 +27,6 @@
 # which are used in the malloc itself.  They are:
 #   ecx, ebx, esi, edi               [32 and 64 bits]
 #   r8, r9, r10, r12, r13, r14, r15    [64 bits only]
+#
+# Note that with asmgcc, the locations corresponding to callee-save registers
+# are never used.
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -181,6 +181,7 @@
         # instructions in assembler, with a mark_gc_roots in between.
         # With shadowstack, this is not needed, so we produce a single helper.
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
         #
         # ---------- first helper for the slow path of malloc ----------
         mc = codebuf.MachineCodeBlockWrapper()
@@ -190,10 +191,19 @@
         mc.SUB_rr(edx.value, eax.value)       # compute the size we want
         addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
         #
-        if gcrootmap is not None and gcrootmap.is_shadow_stack:
+        # The registers to save in the copy area: with shadowstack, most
+        # registers need to be saved.  With asmgcc, the callee-saved registers
+        # don't need to.
+        save_in_copy_area = gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items()
+        if not shadow_stack:
+            save_in_copy_area = [(reg, ofs) for (reg, ofs) in save_in_copy_area
+                   if reg not in gpr_reg_mgr_cls.REGLOC_TO_GCROOTMAP_REG_INDEX]
+        #
+        for reg, ofs in save_in_copy_area:
+            mc.MOV_br(ofs, reg.value)
+        #
+        if shadow_stack:
             # ---- shadowstack ----
-            for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
-                mc.MOV_br(ofs, reg.value)
             mc.SUB_ri(esp.value, 16 - WORD)      # stack alignment of 16 bytes
             if IS_X86_32:
                 mc.MOV_sr(0, edx.value)          # push argument
@@ -201,15 +211,13 @@
                 mc.MOV_rr(edi.value, edx.value)
             mc.CALL(imm(addr))
             mc.ADD_ri(esp.value, 16 - WORD)
-            for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
-                mc.MOV_rb(reg.value, ofs)
         else:
             # ---- asmgcc ----
             if IS_X86_32:
                 mc.MOV_sr(WORD, edx.value)       # save it as the new argument
             elif IS_X86_64:
-                # rdi can be clobbered: its content was forced to the stack
-                # by _fastpath_malloc(), like all other save_around_call_regs.
+                # rdi can be clobbered: its content was saved in the
+                # copy area of the stack
                 mc.MOV_rr(edi.value, edx.value)
             mc.JMP(imm(addr))                    # tail call to the real malloc
             rawstart = mc.materialize(self.cpu.asmmemmgr, [])
@@ -217,6 +225,10 @@
             # ---------- second helper for the slow path of malloc ----------
             mc = codebuf.MachineCodeBlockWrapper()
         #
+        for reg, ofs in save_in_copy_area:
+            mc.MOV_rb(reg.value, ofs)
+            assert reg is not eax and reg is not edx
+        #
         if self.cpu.supports_floats:          # restore the XMM registers
             for i in range(self.cpu.NUM_REGS):# from where they were saved
                 mc.MOVSD_xs(i, (WORD*2)+8*i)
@@ -2424,8 +2436,7 @@
             # there are two helpers to call only with asmgcc
             slowpath_addr1 = self.malloc_slowpath1
             self.mc.CALL(imm(slowpath_addr1))
-        self.mark_gc_roots(self.write_new_force_index(),
-                           use_copy_area=shadow_stack)
+        self.mark_gc_roots(self.write_new_force_index(), use_copy_area=True)
         slowpath_addr2 = self.malloc_slowpath2
         self.mc.CALL(imm(slowpath_addr2))
 
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -921,27 +921,13 @@
     def _do_fastpath_malloc(self, op, size, tid):
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
         self.rm.force_allocate_reg(op.result, selected_reg=eax)
-
-        if gc_ll_descr.gcrootmap and gc_ll_descr.gcrootmap.is_shadow_stack:
-            # ---- shadowstack ----
-            # We need edx as a temporary, but otherwise don't save any more
-            # register.  See comments in _build_malloc_slowpath().
-            tmp_box = TempBox()
-            self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
-            self.rm.possibly_free_var(tmp_box)
-        else:
-            # ---- asmgcc ----
-            # We need to force-allocate each of save_around_call_regs now.
-            # The alternative would be to save and restore them around the
-            # actual call to malloc(), in the rare case where we need to do
-            # it; however, mark_gc_roots() would need to be adapted to know
-            # where the variables end up being saved.  Messy.
-            for reg in self.rm.save_around_call_regs:
-                if reg is not eax:
-                    tmp_box = TempBox()
-                    self.rm.force_allocate_reg(tmp_box, selected_reg=reg)
-                    self.rm.possibly_free_var(tmp_box)
-
+        #
+        # We need edx as a temporary, but otherwise don't save any more
+        # register.  See comments in _build_malloc_slowpath().
+        tmp_box = TempBox()
+        self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+        self.rm.possibly_free_var(tmp_box)
+        #
         self.assembler.malloc_cond(
             gc_ll_descr.get_nursery_free_addr(),
             gc_ll_descr.get_nursery_top_addr(),
@@ -1337,14 +1323,26 @@
             if reg is eax:
                 continue      # ok to ignore this one
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
-                if use_copy_area:
-                    assert reg in self.rm.REGLOC_TO_COPY_AREA_OFS
-                    area_offset = self.rm.REGLOC_TO_COPY_AREA_OFS[reg]
-                    gcrootmap.add_frame_offset(shape, area_offset)
-                else:
-                    assert reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX
-                    gcrootmap.add_callee_save_reg(
-                        shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
+                #
+                # The register 'reg' is alive across this call.
+                gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
+                if gcrootmap is None or not gcrootmap.is_shadow_stack:
+                    #
+                    # Asmgcc: if reg is a callee-save register, we can
+                    # explicitly mark it as containing a BoxPtr.
+                    if reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX:
+                        gcrootmap.add_callee_save_reg(
+                            shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
+                        continue
+                #
+                # Else, 'use_copy_area' must be True (otherwise this BoxPtr
+                # should not be in a register).  The copy area contains the
+                # real value of the register.
+                assert use_copy_area
+                assert reg in self.rm.REGLOC_TO_COPY_AREA_OFS
+                area_offset = self.rm.REGLOC_TO_COPY_AREA_OFS[reg]
+                gcrootmap.add_frame_offset(shape, area_offset)
+        #
         return gcrootmap.compress_callshape(shape,
                                             self.assembler.datablockwrapper)
 
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -228,8 +228,10 @@
             elif loopinvariant:
                 extraeffect = EffectInfo.EF_LOOPINVARIANT
             elif elidable:
-                # XXX check what to do about exceptions (also MemoryError?)
-                extraeffect = EffectInfo.EF_ELIDABLE
+                if self._canraise(op):
+                    extraeffect = EffectInfo.EF_ELIDABLE_CAN_RAISE
+                else:
+                    extraeffect = EffectInfo.EF_ELIDABLE_CANNOT_RAISE
             elif self._canraise(op):
                 extraeffect = EffectInfo.EF_CAN_RAISE
             else:
@@ -263,7 +265,7 @@
     def calldescr_canraise(self, calldescr):
         effectinfo = calldescr.get_extra_info()
         return (effectinfo is None or
-                effectinfo.extraeffect >= EffectInfo.EF_CAN_RAISE)
+                effectinfo.extraeffect > EffectInfo.EF_CANNOT_RAISE)
 
     def jitdriver_sd_from_portal_graph(self, graph):
         for jd in self.jitdrivers_sd:
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -9,10 +9,11 @@
     _cache = {}
 
     # the 'extraeffect' field is one of the following values:
-    EF_ELIDABLE                        = 0 #elidable function (and cannot raise)
+    EF_ELIDABLE_CANNOT_RAISE           = 0 #elidable function (and cannot raise)
     EF_LOOPINVARIANT                   = 1 #special: call it only once per loop
     EF_CANNOT_RAISE                    = 2 #a function which cannot raise
-    EF_CAN_RAISE                       = 3 #normal function (can raise)
+    EF_ELIDABLE_CAN_RAISE              = 3 #elidable function (but can raise)
+    EF_CAN_RAISE                       = 4 #normal function (can raise)
     EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE = 5 #can raise and force virtualizables
 
     # the 'oopspecindex' field is one of the following values:
@@ -94,7 +95,8 @@
         result.readonly_descrs_fields = readonly_descrs_fields
         result.readonly_descrs_arrays = readonly_descrs_arrays
         if extraeffect == EffectInfo.EF_LOOPINVARIANT or \
-           extraeffect == EffectInfo.EF_ELIDABLE:
+           extraeffect == EffectInfo.EF_ELIDABLE_CANNOT_RAISE or \
+           extraeffect == EffectInfo.EF_ELIDABLE_CAN_RAISE:
             result.write_descrs_fields = []
             result.write_descrs_arrays = []
         else:
diff --git a/pypy/jit/codewriter/jitcode.py b/pypy/jit/codewriter/jitcode.py
--- a/pypy/jit/codewriter/jitcode.py
+++ b/pypy/jit/codewriter/jitcode.py
@@ -1,7 +1,6 @@
 from pypy.jit.metainterp.history import AbstractDescr
 from pypy.jit.codewriter import heaptracker
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.rpython.lltypesystem import llmemory
 
 
 class JitCode(AbstractDescr):
@@ -102,7 +101,7 @@
 
     def _clone_if_mutable(self):
         raise NotImplementedError
-    
+
 class MissingLiveness(Exception):
     pass
 
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -1,18 +1,16 @@
-import py, sys
-from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass
-from pypy.rpython import rlist
-from pypy.jit.metainterp.history import getkind
-from pypy.objspace.flow.model import SpaceOperation, Variable, Constant
-from pypy.objspace.flow.model import Block, Link, c_last_exception
-from pypy.jit.codewriter.flatten import ListOfKind, IndirectCallTargets
+import py
 from pypy.jit.codewriter import support, heaptracker, longlong
 from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.codewriter.flatten import ListOfKind, IndirectCallTargets
 from pypy.jit.codewriter.policy import log
+from pypy.jit.metainterp import quasiimmut
+from pypy.jit.metainterp.history import getkind
 from pypy.jit.metainterp.typesystem import deref, arrayItem
-from pypy.jit.metainterp import quasiimmut
-from pypy.rpython.rclass import IR_QUASIIMMUTABLE, IR_QUASIIMMUTABLE_ARRAY
+from pypy.objspace.flow.model import SpaceOperation, Variable, Constant, c_last_exception
 from pypy.rlib import objectmodel
 from pypy.rlib.jit import _we_are_jitted
+from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass
+from pypy.rpython.rclass import IR_QUASIIMMUTABLE, IR_QUASIIMMUTABLE_ARRAY
 from pypy.translator.simplify import get_funcobj
 from pypy.translator.unsimplify import varoftype
 
@@ -810,7 +808,6 @@
 
     def force_cast_without_longlong(self, v_arg, v_result):
         from pypy.rpython.lltypesystem.rffi import size_and_sign, sizeof, FLOAT
-        from pypy.rlib.rarithmetic import intmask
         #
         if (v_result.concretetype in (FLOAT, lltype.Float) or
             v_arg.concretetype in (FLOAT, lltype.Float)):
@@ -905,7 +902,7 @@
                 op1 = self.prepare_builtin_call(op, "llong_%s", args)
                 op2 = self._handle_oopspec_call(op1, args,
                                                 EffectInfo.OS_LLONG_%s,
-                                                EffectInfo.EF_ELIDABLE)
+                                           EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
                 if %r == "TO_INT":
                     assert op2.result.concretetype == lltype.Signed
                 return op2
@@ -1366,15 +1363,15 @@
                     otherindex += EffectInfo._OS_offset_uni
                 self._register_extra_helper(otherindex, othername,
                                             argtypes, resulttype,
-                                            EffectInfo.EF_ELIDABLE)
+                                           EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
         #
         return self._handle_oopspec_call(op, args, dict[oopspec_name],
-                                         EffectInfo.EF_ELIDABLE)
+                                         EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
 
     def _handle_str2unicode_call(self, op, oopspec_name, args):
-        # ll_str2unicode is not EF_ELIDABLE, because it can raise
-        # UnicodeDecodeError...
-        return self._handle_oopspec_call(op, args, EffectInfo.OS_STR2UNICODE)
+        # ll_str2unicode can raise UnicodeDecodeError
+        return self._handle_oopspec_call(op, args, EffectInfo.OS_STR2UNICODE,
+                                         EffectInfo.EF_ELIDABLE_CAN_RAISE)
 
     # ----------
     # VirtualRefs.
@@ -1412,13 +1409,13 @@
         assert vinfo is not None
         self.vable_flags[op.args[0]] = op.args[2].value
         return []
-        
+
     # ---------
     # ll_math.sqrt_nonneg()
-    
+
     def _handle_math_sqrt_call(self, op, oopspec_name, args):
         return self._handle_oopspec_call(op, args, EffectInfo.OS_MATH_SQRT,
-                                         EffectInfo.EF_ELIDABLE)
+                                         EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
 
     def rewrite_op_jit_force_quasi_immutable(self, op):
         v_inst, c_fieldname = op.args
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -1,9 +1,7 @@
-from pypy.translator.simplify import get_funcobj
 from pypy.jit.metainterp import history
-from pypy.rpython.lltypesystem import lltype, rclass
 from pypy.tool.udir import udir
 
-import py, sys
+import py
 from pypy.tool.ansi_print import ansi_log
 log = py.log.Producer('jitcodewriter')
 py.log.setconsumer('jitcodewriter', ansi_log)
diff --git a/pypy/jit/codewriter/regalloc.py b/pypy/jit/codewriter/regalloc.py
--- a/pypy/jit/codewriter/regalloc.py
+++ b/pypy/jit/codewriter/regalloc.py
@@ -1,129 +1,8 @@
-import sys
-from pypy.objspace.flow.model import Variable
-from pypy.tool.algo.color import DependencyGraph
-from pypy.tool.algo.unionfind import UnionFind
+from pypy.tool.algo import regalloc
 from pypy.jit.metainterp.history import getkind
 from pypy.jit.codewriter.flatten import ListOfKind
 
+
 def perform_register_allocation(graph, kind):
-    """Perform register allocation for the Variables of the given 'kind'
-    in the 'graph'."""
-    regalloc = RegAllocator(graph, kind)
-    regalloc.make_dependencies()
-    regalloc.coalesce_variables()
-    regalloc.find_node_coloring()
-    return regalloc
-
-
-class RegAllocator(object):
-    DEBUG_REGALLOC = False
-
-    def __init__(self, graph, kind):
-        self.graph = graph
-        self.kind = kind
-
-    def make_dependencies(self):
-        dg = DependencyGraph()
-        for block in self.graph.iterblocks():
-            # Compute die_at = {Variable: index_of_operation_with_last_usage}
-            die_at = dict.fromkeys(block.inputargs, 0)
-            for i, op in enumerate(block.operations):
-                for v in op.args:
-                    if isinstance(v, Variable):
-                        die_at[v] = i
-                    elif isinstance(v, ListOfKind):
-                        for v1 in v:
-                            if isinstance(v1, Variable):
-                                die_at[v1] = i
-                if op.result is not None:
-                    die_at[op.result] = i + 1
-            if isinstance(block.exitswitch, tuple):
-                for x in block.exitswitch:
-                    die_at.pop(x, None)
-            else:
-                die_at.pop(block.exitswitch, None)
-            for link in block.exits:
-                for v in link.args:
-                    die_at.pop(v, None)
-            die_at = [(value, key) for (key, value) in die_at.items()]
-            die_at.sort()
-            die_at.append((sys.maxint,))
-            # Done.  XXX the code above this line runs 3 times
-            # (for kind in KINDS) to produce the same result...
-            livevars = [v for v in block.inputargs
-                          if getkind(v.concretetype) == self.kind]
-            # Add the variables of this block to the dependency graph
-            for i, v in enumerate(livevars):
-                dg.add_node(v)
-                for j in range(i):
-                    dg.add_edge(livevars[j], v)
-            livevars = set(livevars)
-            die_index = 0
-            for i, op in enumerate(block.operations):
-                while die_at[die_index][0] == i:
-                    try:
-                        livevars.remove(die_at[die_index][1])
-                    except KeyError:
-                        pass
-                    die_index += 1
-                if (op.result is not None and
-                    getkind(op.result.concretetype) == self.kind):
-                    dg.add_node(op.result)
-                    for v in livevars:
-                        if getkind(v.concretetype) == self.kind:
-                            dg.add_edge(v, op.result)
-                    livevars.add(op.result)
-        self._depgraph = dg
-
-    def coalesce_variables(self):
-        self._unionfind = UnionFind()
-        pendingblocks = list(self.graph.iterblocks())
-        while pendingblocks:
-            block = pendingblocks.pop()
-            # Aggressively try to coalesce each source variable with its
-            # target.  We start from the end of the graph instead of
-            # from the beginning.  This is a bit arbitrary, but the idea
-            # is that the end of the graph runs typically more often
-            # than the start, given that we resume execution from the
-            # middle during blackholing.
-            for link in block.exits:
-                if link.last_exception is not None:
-                    self._depgraph.add_node(link.last_exception)
-                if link.last_exc_value is not None:
-                    self._depgraph.add_node(link.last_exc_value)
-                for i, v in enumerate(link.args):
-                    self._try_coalesce(v, link.target.inputargs[i])
-
-    def _try_coalesce(self, v, w):
-        if isinstance(v, Variable) and getkind(v.concretetype) == self.kind:
-            assert getkind(w.concretetype) == self.kind
-            dg = self._depgraph
-            uf = self._unionfind
-            v0 = uf.find_rep(v)
-            w0 = uf.find_rep(w)
-            if v0 is not w0 and v0 not in dg.neighbours[w0]:
-                _, rep, _ = uf.union(v0, w0)
-                assert uf.find_rep(v0) is uf.find_rep(w0) is rep
-                if rep is v0:
-                    dg.coalesce(w0, v0)
-                else:
-                    assert rep is w0
-                    dg.coalesce(v0, w0)
-
-    def find_node_coloring(self):
-        self._coloring = self._depgraph.find_node_coloring()
-        if self.DEBUG_REGALLOC:
-            for block in self.graph.iterblocks():
-                print block
-                for v in block.getvariables():
-                    print '\t', v, '\t', self.getcolor(v)
-
-    def getcolor(self, v):
-        return self._coloring[self._unionfind.find_rep(v)]
-
-    def swapcolors(self, col1, col2):
-        for key, value in self._coloring.items():
-            if value == col1:
-                self._coloring[key] = col2
-            elif value == col2:
-                self._coloring[key] = col1
+    checkkind = lambda v: getkind(v.concretetype) == kind
+    return regalloc.perform_register_allocation(graph, checkkind, ListOfKind)
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -20,6 +20,7 @@
 from pypy.rpython.annlowlevel import MixLevelHelperAnnotator
 from pypy.jit.metainterp.typesystem import deref
 from pypy.rlib import rgc
+from pypy.rlib.jit import elidable
 from pypy.rlib.rarithmetic import r_longlong, r_ulonglong, r_uint, intmask
 
 def getargtypes(annotator, values):
@@ -167,9 +168,14 @@
 
 _ll_5_list_ll_arraycopy = rgc.ll_arraycopy
 
+ at elidable
 def _ll_1_gc_identityhash(x):
     return lltype.identityhash(x)
 
+# the following function should not be "@elidable": I can think of
+# a corner case in which id(const) is constant-folded, and then 'const'
+# disappears and is collected too early (possibly causing another object
+# with the same id() to appear).
 def _ll_1_gc_id(ptr):
     return llop.gc_id(lltype.Signed, ptr)
 
@@ -420,10 +426,6 @@
     _ll_1_dict_values.need_result_type = True
     _ll_1_dict_items .need_result_type = True
 
-    def _ll_1_newdictiter(ITER, d):
-        return ll_rdict.ll_dictiter(lltype.Ptr(ITER), d)
-    _ll_1_newdictiter.need_result_type = True
-
     _dictnext_keys   = staticmethod(ll_rdict.ll_dictnext_group['keys'])
     _dictnext_values = staticmethod(ll_rdict.ll_dictnext_group['values'])
     _dictnext_items  = staticmethod(ll_rdict.ll_dictnext_group['items'])
@@ -574,10 +576,6 @@
     _ll_1_dict_values.need_result_type = True
     _ll_1_dict_items .need_result_type = True
 
-    def _ll_1_newdictiter(ITER, d):
-        return oo_rdict.ll_dictiter(ITER, d)
-    _ll_1_newdictiter.need_result_type = True
-
     _dictnext_keys   = staticmethod(oo_rdict.ll_dictnext_group['keys'])
     _dictnext_values = staticmethod(oo_rdict.ll_dictnext_group['values'])
     _dictnext_items  = staticmethod(oo_rdict.ll_dictnext_group['items'])
diff --git a/pypy/jit/codewriter/test/test_jtransform.py b/pypy/jit/codewriter/test/test_jtransform.py
--- a/pypy/jit/codewriter/test/test_jtransform.py
+++ b/pypy/jit/codewriter/test/test_jtransform.py
@@ -120,9 +120,9 @@
             assert argtypes[0] == [v.concretetype for v in op.args[1:]]
             assert argtypes[1] == op.result.concretetype
             if oopspecindex == EI.OS_STR2UNICODE:
-                assert extraeffect == None    # not pure, can raise!
+                assert extraeffect == EI.EF_ELIDABLE_CAN_RAISE
             else:
-                assert extraeffect == EI.EF_ELIDABLE
+                assert extraeffect == EI.EF_ELIDABLE_CANNOT_RAISE
         return 'calldescr-%d' % oopspecindex
     def calldescr_canraise(self, calldescr):
         return False
@@ -769,7 +769,7 @@
         def get_vinfo(self, v):
             return None
         def could_be_green_field(self, S1, name1):
-            assert S1 is S
+            assert S1 == S
             assert name1 == 'x'
             return True
     S = lltype.GcStruct('S', ('x', lltype.Char),
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -55,7 +55,7 @@
 
 
 def optimize_loop_1(metainterp_sd, loop, enable_opts,
-                    inline_short_preamble=True, retraced=False):
+                    inline_short_preamble=True, retraced=False, bridge=False):
     """Optimize loop.operations to remove internal overheadish operations.
     """
 
@@ -64,7 +64,7 @@
     if unroll:
         optimize_unroll(metainterp_sd, loop, optimizations)
     else:
-        optimizer = Optimizer(metainterp_sd, loop, optimizations)
+        optimizer = Optimizer(metainterp_sd, loop, optimizations, bridge)
         optimizer.propagate_all_forward()
 
 def optimize_bridge_1(metainterp_sd, bridge, enable_opts,
@@ -76,7 +76,7 @@
     except KeyError:
         pass
     optimize_loop_1(metainterp_sd, bridge, enable_opts,
-                    inline_short_preamble, retraced)
+                    inline_short_preamble, retraced, bridge=True)
 
 if __name__ == '__main__':
     print ALL_OPTS_NAMES
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -1,7 +1,7 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.libffi import Func
-from pypy.rlib.debug import debug_start, debug_stop, debug_print, have_debug_prints
+from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
@@ -48,7 +48,7 @@
           inst_argtypes is actually a low-level array, but we can use it
           directly since the only thing we do with it is to read its items
         """
-        
+
         llfunc = funcval.box.getref_base()
         if we_are_translated():
             func = cast_base_ptr_to_instance(Func, llfunc)
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -1,9 +1,10 @@
 import os
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.rlib.objectmodel import we_are_translated
+
 from pypy.jit.metainterp.jitexc import JitException
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from pypy.jit.metainterp.resoperation import rop
+from pypy.rlib.objectmodel import we_are_translated
 
 
 class CachedField(object):
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -1,9 +1,10 @@
+from pypy.jit.metainterp.history import ConstInt
+from pypy.jit.metainterp.optimizeopt.intutils import (IntBound, IntLowerBound,
+    IntUpperBound)
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, CONST_1, CONST_0
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.metainterp.optimizeopt.intutils import (IntBound, IntUnbounded,
-    IntLowerBound, IntUpperBound)
-from pypy.jit.metainterp.history import Const, ConstInt
-from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.jit.metainterp.resoperation import rop
+
 
 class OptIntBounds(Optimization):
     """Keeps track of the bounds placed on integers by guards and remove
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -1,17 +1,11 @@
-from pypy.jit.metainterp.history import Box, BoxInt, LoopToken, BoxFloat,\
-     ConstFloat
-from pypy.jit.metainterp.history import Const, ConstInt, ConstPtr, ConstObj, REF
+from pypy.jit.metainterp import jitprof, resume, compile
+from pypy.jit.metainterp.executor import execute_nonspec
+from pypy.jit.metainterp.history import BoxInt, BoxFloat, Const, ConstInt, REF
+from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded
+from pypy.jit.metainterp.optimizeopt.util import (make_dispatcher_method,
+    args_dict)
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp import jitprof
-from pypy.jit.metainterp.executor import execute_nonspec
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method, sort_descrs
-from pypy.jit.metainterp.optimizeopt.util import descrlist_dict, args_dict
-from pypy.jit.metainterp.optimize import InvalidLoop
-from pypy.jit.metainterp import resume, compile
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
-from pypy.rpython.lltypesystem import lltype
-from pypy.jit.metainterp.history import AbstractDescr, make_hashable_int
-from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded
 from pypy.tool.pairtype import extendabletype
 
 LEVEL_UNKNOWN    = '\x00'
@@ -254,10 +248,11 @@
 
 class Optimizer(Optimization):
 
-    def __init__(self, metainterp_sd, loop, optimizations=None):
+    def __init__(self, metainterp_sd, loop, optimizations=None, bridge=False):
         self.metainterp_sd = metainterp_sd
         self.cpu = metainterp_sd.cpu
         self.loop = loop
+        self.bridge = bridge
         self.values = {}
         self.interned_refs = self.cpu.ts.new_ref_dict()
         self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd)
@@ -413,9 +408,7 @@
             return CVAL_ZERO
 
     def propagate_all_forward(self):
-        self.exception_might_have_happened = True
-        # ^^^ at least at the start of bridges.  For loops, we could set
-        # it to False, but we probably don't care
+        self.exception_might_have_happened = self.bridge
         self.newoperations = []
         self.first_optimization.propagate_begin_forward()
         self.i = 0
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -1,10 +1,11 @@
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.history import ConstInt, make_hashable_int
+from pypy.jit.metainterp.optimize import InvalidLoop
+from pypy.jit.metainterp.optimizeopt.intutils import IntBound
 from pypy.jit.metainterp.optimizeopt.optimizer import *
-from pypy.jit.metainterp.resoperation import opboolinvers, opboolreflex
-from pypy.jit.metainterp.history import ConstInt
 from pypy.jit.metainterp.optimizeopt.util import _findall, make_dispatcher_method
-from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.metainterp.optimizeopt.intutils import IntBound
+from pypy.jit.metainterp.resoperation import (opboolinvers, opboolreflex, rop,
+    ResOperation)
 from pypy.rlib.rarithmetic import highest_bit
 
 
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -1,7 +1,7 @@
-
-from pypy.jit.metainterp.resoperation import ResOperation, rop
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from pypy.jit.metainterp.resoperation import ResOperation, rop
+
 
 class OptSimplify(Optimization):
     def optimize_CALL_PURE(self, op):
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -693,7 +693,6 @@
         """
         expected = """
         [i]
-        guard_no_exception() []
         i1 = int_add(i, 3)
         i2 = call(i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
@@ -4532,7 +4531,7 @@
         escape(i1)
         jump(p0, i0)
         """
-        self.optimize_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected)
 
     def test_int_is_true_bounds(self):
         ops = """
@@ -4551,7 +4550,7 @@
         guard_true(i1) []
         jump(p0)
         """
-        self.optimize_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected)
 
     def test_strslice_subtraction_folds(self):
         ops = """
@@ -4586,6 +4585,42 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_null_char_str(self):
+        ops = """
+        [p0]
+        p1 = newstr(4)
+        setfield_gc(p0, p1, descr=valuedescr)
+        jump(p0)
+        """
+        # It used to be the case that this would have a series of
+        # strsetitem(p1, idx, 0), which was silly because memory is 0 filled
+        # when allocated.
+        expected = """
+        [p0]
+        p1 = newstr(4)
+        setfield_gc(p0, p1, descr=valuedescr)
+        jump(p0)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
+    def test_newstr_strlen(self):
+        ops = """
+        [i0]
+        p0 = newstr(i0)
+        escape(p0)
+        i1 = strlen(p0)
+        i2 = int_add(i1, 1)
+        jump(i2)
+        """
+        expected = """
+        [i0]
+        p0 = newstr(i0)
+        escape(p0)
+        i1 = int_add(i0, 1)
+        jump(i1)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
 
 class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
     pass
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -889,12 +889,10 @@
         i3 = call(i2, descr=nonwritedescr)
         jump(i1)       # the exception is considered lost when we loop back
         """
-        # note that 'guard_no_exception' at the very start must be kept
-        # around: bridges may start with one.  (In case of loops we could
-        # remove it, but we probably don't care.)
+        # note that 'guard_no_exception' at the very start is kept around
+        # for bridges, but not for loops
         preamble = """
         [i]
-        guard_no_exception() []
         i1 = int_add(i, 3)
         i2 = call(i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
@@ -2993,6 +2991,38 @@
         '''
         self.optimize_loop(ops, expected, preamble, call_pure_results)
 
+    def test_call_pure_constant_folding_exc(self):
+        # CALL_PURE may be followed by GUARD_NO_EXCEPTION
+        arg_consts = [ConstInt(i) for i in (123456, 4, 5, 6)]
+        call_pure_results = {tuple(arg_consts): ConstInt(42)}
+        ops = '''
+        [i0, i1, i2]
+        escape(i1)
+        escape(i2)
+        i3 = call_pure(123456, 4, 5, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        i4 = call_pure(123456, 4, i0, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        jump(i0, i3, i4)
+        '''
+        preamble = '''
+        [i0, i1, i2]
+        escape(i1)
+        escape(i2)
+        i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        jump(i0, i4)
+        '''
+        expected = '''
+        [i0, i2]
+        escape(42)
+        escape(i2)
+        i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        jump(i0, i4)
+        '''
+        self.optimize_loop(ops, expected, preamble, call_pure_results)
+
     # ----------
 
     def test_vref_nonvirtual_nonescape(self):
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -1,14 +1,12 @@
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.compile import ResumeGuardDescr
+from pypy.jit.metainterp.history import TreeLoop, LoopToken
+from pypy.jit.metainterp.jitexc import JitException
+from pypy.jit.metainterp.optimize import InvalidLoop, RetraceLoop
 from pypy.jit.metainterp.optimizeopt.optimizer import *
-from pypy.jit.metainterp.optimizeopt.virtualize import AbstractVirtualValue
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.compile import ResumeGuardDescr
 from pypy.jit.metainterp.resume import Snapshot
-from pypy.jit.metainterp.history import TreeLoop, LoopToken
-from pypy.rlib.debug import debug_start, debug_stop, debug_print
-from pypy.jit.metainterp.optimize import InvalidLoop, RetraceLoop
-from pypy.jit.metainterp.jitexc import JitException
-from pypy.jit.metainterp.history import make_hashable_int
-from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.rlib.debug import debug_print
 
 # Assumptions
 # ===========
@@ -22,7 +20,7 @@
 # are also recreated to allow virtuals not supported to be forced.
 #
 # First of all, the optimizations are not allowed to introduce new
-# boxes. It is the unoptimized version of the trace that is inlined to 
+# boxes. It is the unoptimized version of the trace that is inlined to
 # form the second iteration of the loop. Otherwise the
 # state of the virtuals would not be updated correctly. Whenever some
 # box from the first iteration is reused in the second iteration, it
@@ -57,7 +55,7 @@
 # be absorbed into the virtual p2 and never seen by the heap
 # optimizer. At the end of the loop both p2 and p3 are virtuals, but
 # the loop needs p2 to be a pointer to be able to call itself. So it
-# is forced producing the operations 
+# is forced producing the operations
 #
 #         p2 = new_with_vtable(ConstClass(node_vtable))
 #         setfield_gc(p2, i2, descr=nextdescr)
@@ -68,7 +66,7 @@
 # the trace were optimized under the wrong assumption that the
 # setfield_gc was store sinked which could lead to errors. In this
 # case what would happen is that it would be inserted once more in
-# front of the guard. 
+# front of the guard.
 
 
 
@@ -112,7 +110,7 @@
     def inline_descr_inplace(self, descr):
         if isinstance(descr, ResumeGuardDescr):
             descr.rd_snapshot = self.inline_snapshot(descr.rd_snapshot)
-            
+
     def inline_arg(self, arg):
         if arg is None:
             return None
@@ -139,7 +137,7 @@
                 return False
         return True
 
-    def generate_guards(self, other, args, cpu, extra_guards):        
+    def generate_guards(self, other, args, cpu, extra_guards):
         assert len(self.state) == len(other.state) == len(args)
         for i in range(len(self.state)):
             self.state[i].generate_guards(other.state[i], args[i],
@@ -153,7 +151,7 @@
 
     def register_virtual_fields(self, keybox, fieldboxes):
         self.fieldboxes[keybox] = fieldboxes
-        
+
     def already_seen_virtual(self, keybox):
         return keybox in self.fieldboxes
 
@@ -233,20 +231,20 @@
         if self.level == LEVEL_CONSTANT:
             import pdb; pdb.set_trace()
             raise NotImplementedError
-        
+
 
 class UnrollOptimizer(Optimization):
     """Unroll the loop into two iterations. The first one will
     become the preamble or entry bridge (don't think there is a
     distinction anymore)"""
-    
+
     def __init__(self, metainterp_sd, loop, optimizations):
         self.optimizer = Optimizer(metainterp_sd, loop, optimizations)
         self.cloned_operations = []
         for op in self.optimizer.loop.operations:
             newop = op.clone()
             self.cloned_operations.append(newop)
-            
+
     def propagate_all_forward(self):
         loop = self.optimizer.loop
         jumpop = loop.operations[-1]
@@ -284,7 +282,7 @@
             assert isinstance(start_resumedescr, ResumeGuardDescr)
             snapshot = start_resumedescr.rd_snapshot
             while snapshot is not None:
-                snapshot_args = snapshot.boxes 
+                snapshot_args = snapshot.boxes
                 new_snapshot_args = []
                 for a in snapshot_args:
                     if not isinstance(a, Const):
@@ -313,7 +311,7 @@
                 short_loop.inputargs = loop.preamble.inputargs[:]
                 short_loop.operations = short
 
-                # Clone ops and boxes to get private versions and 
+                # Clone ops and boxes to get private versions and
                 newargs = [a.clonebox() for a in short_loop.inputargs]
                 inliner = Inliner(short_loop.inputargs, newargs)
                 short_loop.inputargs = newargs
@@ -336,10 +334,10 @@
                 for op in short_loop.operations:
                     if op.result:
                         op.result.forget_value()
-                
+
     def inline(self, loop_operations, loop_args, jump_args):
         self.inliner = inliner = Inliner(loop_args, jump_args)
-           
+
         for v in self.optimizer.values.values():
             v.last_guard_index = -1 # FIXME: Are there any more indexes stored?
 
@@ -371,12 +369,12 @@
         jumpargs = jmp.getarglist()
 
         # FIXME: Should also loop over operations added by forcing things in this loop
-        for op in newoperations: 
+        for op in newoperations:
             boxes_created_this_iteration[op.result] = True
             args = op.getarglist()
             if op.is_guard():
                 args = args + op.getfailargs()
-            
+
             for a in args:
                 if not isinstance(a, Const) and not a in boxes_created_this_iteration:
                     if a not in inputargs:
@@ -439,7 +437,7 @@
                             "at preamble position: ", preamble_i,
                             "loop position: ", loop_i)
                 return None
-                
+
             if self.sameop(newop, loop_ops[loop_i]) \
                and loop_i < len(loop_ops):
                 try:
@@ -460,7 +458,7 @@
                                 "loop position: ", loop_i)
                     return None
                 short_preamble.append(op)
-                
+
             state.update(op)
             preamble_i += 1
 
@@ -470,7 +468,7 @@
                         "at position", loop_i)
             return None
 
-        
+
         jumpargs = []
         for i in range(len(loop.inputargs)):
             try:
@@ -498,7 +496,7 @@
                     return None
             if op.result:
                 seen[op.result] = True
-        
+
         return short_preamble
 
 class ExeState(object):
@@ -508,7 +506,7 @@
         self.unsafe_getitem = {}
         self.unsafe_getarrayitem = {}
         self.unsafe_getarrayitem_indexes = {}
-        
+
     # Make sure it is safe to move the instrucions in short_preamble
     # to the top making short_preamble followed by loop equvivalent
     # to preamble
@@ -545,15 +543,17 @@
         elif opnum == rop.CALL:
             effectinfo = descr.get_extra_info()
             if effectinfo is not None:
-                if effectinfo.extraeffect == EffectInfo.EF_LOOPINVARIANT or \
-                   effectinfo.extraeffect == EffectInfo.EF_ELIDABLE:
+                ef = effectinfo.extraeffect
+                if ef == EffectInfo.EF_LOOPINVARIANT or \
+                   ef == EffectInfo.EF_ELIDABLE_CANNOT_RAISE or \
+                   ef == EffectInfo.EF_ELIDABLE_CAN_RAISE:
                     return True
         return False
-    
+
     def update(self, op):
         if (op.has_no_side_effect() or
             op.is_ovf() or
-            op.is_guard()): 
+            op.is_guard()):
             return
         opnum = op.getopnum()
         descr = op.getdescr()
@@ -566,7 +566,7 @@
         if (opnum == rop.SETARRAYITEM_GC or
             opnum == rop.SETARRAYITEM_RAW):
             index = op.getarg(1)
-            if isinstance(index, Const):                
+            if isinstance(index, Const):
                 d = self.unsafe_getarrayitem_indexes.get(descr, None)
                 if d is None:
                     d = self.unsafe_getarrayitem_indexes[descr] = {}
@@ -592,7 +592,7 @@
     def __init__(self):
         self.map = {}
 
-    
+
     def link_ops(self, preambleop, loopop):
         pargs = preambleop.getarglist()
         largs = loopop.getarglist()
@@ -606,7 +606,7 @@
             if not loopop.result:
                 raise ImpossibleLink
             self.link_boxes(preambleop.result, loopop.result)
-        
+
 
     def link_boxes(self, pbox, lbox):
         if lbox in self.map:
@@ -627,11 +627,11 @@
     def __init__(self, retraced):
         self.retraced = retraced
         self.inliner = None
-        
-    
+
+
     def reconstruct_for_next_iteration(self, optimizer, valuemap):
         return self
-    
+
     def propagate_forward(self, op):
         if op.getopnum() == rop.JUMP:
             descr = op.getdescr()
@@ -657,7 +657,7 @@
                             sh.virtual_state.generate_guards(virtual_state,
                                                              args, cpu,
                                                              extra_guards)
-                            
+
                             ok = True
                         except InvalidLoop:
                             pass
@@ -697,7 +697,7 @@
                     else:
                         debug_print("Retracing (%d of %d)" % (retraced_count,
                                                               limit))
-                                                              
+
                         raise RetraceLoop
                 else:
                     if not descr.failed_states:
@@ -705,21 +705,21 @@
                     else:
                         descr.failed_states.append(virtual_state)
         self.emit_operation(op)
-                
-        
-        
+
+
+
     def inline(self, loop_operations, loop_args, jump_args, dryrun=False):
         self.inliner = inliner = Inliner(loop_args, jump_args)
 
         for op in loop_operations:
             newop = inliner.inline_op(op)
-            
+
             if not dryrun:
                 self.emit_operation(newop)
             else:
                 if not self.is_emittable(newop):
                     return False
-        
+
         return True
 
     #def inline_arg(self, arg):
diff --git a/pypy/jit/metainterp/optimizeopt/virtualize.py b/pypy/jit/metainterp/optimizeopt/virtualize.py
--- a/pypy/jit/metainterp/optimizeopt/virtualize.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualize.py
@@ -1,11 +1,11 @@
+from pypy.jit.codewriter.heaptracker import vtable2descr
+from pypy.jit.metainterp.executor import execute
 from pypy.jit.metainterp.history import Const, ConstInt, BoxInt
+from pypy.jit.metainterp.optimizeopt import optimizer
+from pypy.jit.metainterp.optimizeopt.util import (make_dispatcher_method,
+    descrlist_dict, sort_descrs)
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.metainterp.optimizeopt.util import descrlist_dict, sort_descrs
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.jit.metainterp.optimizeopt import optimizer
-from pypy.jit.metainterp.executor import execute
-from pypy.jit.codewriter.heaptracker import vtable2descr
 
 
 class AbstractVirtualValue(optimizer.OptValue):
diff --git a/pypy/jit/metainterp/optimizeopt/vstring.py b/pypy/jit/metainterp/optimizeopt/vstring.py
--- a/pypy/jit/metainterp/optimizeopt/vstring.py
+++ b/pypy/jit/metainterp/optimizeopt/vstring.py
@@ -1,18 +1,14 @@
-from pypy.rpython.lltypesystem import lltype, rstr, llmemory
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.history import (BoxInt, Const, ConstInt, ConstPtr,
+    get_const_ptr_for_string, get_const_ptr_for_unicode)
+from pypy.jit.metainterp.optimizeopt import optimizer, virtualize
+from pypy.jit.metainterp.optimizeopt.optimizer import CONST_0, CONST_1, llhelper
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.rlib.objectmodel import specialize, we_are_translated
+from pypy.rlib.unroll import unrolling_iterable
 from pypy.rpython import annlowlevel
-from pypy.jit.metainterp.history import Box, BoxInt, BoxPtr
-from pypy.jit.metainterp.history import Const, ConstInt, ConstPtr
-from pypy.jit.metainterp.history import get_const_ptr_for_string
-from pypy.jit.metainterp.history import get_const_ptr_for_unicode
-from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.optimizeopt import optimizer, virtualize
-from pypy.jit.metainterp.optimizeopt.optimizer import CONST_0, CONST_1
-from pypy.jit.metainterp.optimizeopt.optimizer import llhelper
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.codewriter import heaptracker
-from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.objectmodel import specialize, we_are_translated
+from pypy.rpython.lltypesystem import lltype, rstr
 
 
 class StrOrUnicode(object):
@@ -147,10 +143,11 @@
     def string_copy_parts(self, optimizer, targetbox, offsetbox, mode):
         for i in range(len(self._chars)):
             charbox = self._chars[i].force_box()
-            optimizer.emit_operation(ResOperation(mode.STRSETITEM, [targetbox,
-                                                                offsetbox,
-                                                                charbox],
-                                              None))
+            if not (isinstance(charbox, Const) and charbox.same_constant(CONST_0)):
+                optimizer.emit_operation(ResOperation(mode.STRSETITEM, [targetbox,
+                                                                    offsetbox,
+                                                                    charbox],
+                                                  None))
             offsetbox = _int_add(optimizer, offsetbox, CONST_1)
         return offsetbox
 
@@ -402,6 +399,7 @@
         else:
             self.getvalue(op.result).ensure_nonnull()
             self.emit_operation(op)
+            self.pure(mode.STRLEN, [op.result], op.getarg(0))
 
     def optimize_STRSETITEM(self, op):
         value = self.getvalue(op.getarg(0))
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -56,6 +56,8 @@
         # for resume.py operation
         self.parent_resumedata_snapshot = None
         self.parent_resumedata_frame_info_list = None
+        # counter for unrolling inlined loops
+        self.unroll_iterations = 1
 
     @specialize.arg(3)
     def copy_constants(self, registers, constants, ConstClass):
@@ -931,6 +933,10 @@
             # 'redboxes' back into the registers where it comes from.
             put_back_list_of_boxes3(self, jcposition, redboxes)
         else:
+            if jitdriver_sd.warmstate.should_unroll_one_iteration(greenboxes):
+                if self.unroll_iterations > 0:
+                    self.unroll_iterations -= 1
+                    return
             # warning! careful here.  We have to return from the current
             # frame containing the jit_merge_point, and then use
             # do_recursive_call() to follow the recursive call.  This is
@@ -1193,7 +1199,7 @@
         return self.metainterp.execute_and_record(opnum, descr, *argboxes)
 
     @specialize.arg(1)
-    def execute_varargs(self, opnum, argboxes, descr, exc):
+    def execute_varargs(self, opnum, argboxes, descr, exc, pure):
         self.metainterp.clear_exception()
         resbox = self.metainterp.execute_and_record_varargs(opnum, argboxes,
                                                             descr=descr)
@@ -1201,6 +1207,9 @@
             self.make_result_of_lastop(resbox)
             # ^^^ this is done before handle_possible_exception() because we
             # need the box to show up in get_list_of_active_boxes()
+        if pure and self.metainterp.last_exc_value_box is None:
+            resbox = self.metainterp.record_result_of_call_pure(resbox)
+            exc = exc and not isinstance(resbox, Const)
         if exc:
             self.metainterp.handle_possible_exception()
         else:
@@ -1263,16 +1272,14 @@
             return resbox
         else:
             effect = effectinfo.extraeffect
-            if effect == effectinfo.EF_CANNOT_RAISE:
-                return self.execute_varargs(rop.CALL, allboxes, descr, False)
-            elif effect == effectinfo.EF_ELIDABLE:
-                return self.metainterp.record_result_of_call_pure(
-                    self.execute_varargs(rop.CALL, allboxes, descr, False))
-            elif effect == effectinfo.EF_LOOPINVARIANT:
+            if effect == effectinfo.EF_LOOPINVARIANT:
                 return self.execute_varargs(rop.CALL_LOOPINVARIANT, allboxes,
-                                            descr, False)
-            else:
-                return self.execute_varargs(rop.CALL, allboxes, descr, True)
+                                            descr, False, False)
+            exc = (effect != effectinfo.EF_CANNOT_RAISE and
+                   effect != effectinfo.EF_ELIDABLE_CANNOT_RAISE)
+            pure = (effect == effectinfo.EF_ELIDABLE_CAN_RAISE or
+                    effect == effectinfo.EF_ELIDABLE_CANNOT_RAISE)
+            return self.execute_varargs(rop.CALL, allboxes, descr, exc, pure)
 
     def do_residual_or_indirect_call(self, funcbox, calldescr, argboxes):
         """The 'residual_call' operation is emitted in two cases:
@@ -1680,8 +1687,12 @@
             return
         if opnum == rop.CALL:
             effectinfo = descr.get_extra_info()
-            if effectinfo.extraeffect == effectinfo.EF_ELIDABLE:
-                return
+            if effectinfo is not None:
+                ef = effectinfo.extraeffect
+                if ef == effectinfo.EF_LOOPINVARIANT or \
+                   ef == effectinfo.EF_ELIDABLE_CANNOT_RAISE or \
+                   ef == effectinfo.EF_ELIDABLE_CAN_RAISE:
+                    return
         if self.heap_cache:
             self.heap_cache.clear()
         if self.heap_array_cache:
@@ -2369,6 +2380,7 @@
                 tobox = newbox
             if change:
                 self.heap_cache[descr] = frombox, tobox
+        # XXX what about self.heap_array_cache?
 
     def find_biggest_function(self):
         start_stack = []
diff --git a/pypy/jit/metainterp/test/support.py b/pypy/jit/metainterp/test/support.py
--- a/pypy/jit/metainterp/test/support.py
+++ b/pypy/jit/metainterp/test/support.py
@@ -277,3 +277,15 @@
         NODE._add_fields({'value': ootype.Signed,
                           'next': NODE})
         return NODE
+
+# ____________________________________________________________
+
+class _Foo:
+    pass
+
+def noConst(x):
+    """Helper function for tests, returning 'x' as a BoxInt/BoxPtr
+    even if it is a ConstInt/ConstPtr."""
+    f1 = _Foo(); f2 = _Foo()
+    f1.x = x; f2.x = 0
+    return f1.x
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -14,7 +14,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.ootypesystem import ootype
 from pypy.jit.metainterp.optimizeopt import ALL_OPTS_DICT
-from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
+from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin, noConst
 
 class BasicTests:
 
@@ -407,6 +407,58 @@
         # the CALL_PURE is constant-folded away by optimizeopt.py
         self.check_loops(int_sub=1, call=0, call_pure=0, getfield_gc=0)
 
+    def test_elidable_raising(self):
+        myjitdriver = JitDriver(greens = ['m'], reds = ['n'])
+        @elidable
+        def externfn(x):
+            if x <= 0:
+                raise ValueError
+            return x - 1
+        def f(n, m):
+            while n > 0:
+                myjitdriver.can_enter_jit(n=n, m=m)
+                myjitdriver.jit_merge_point(n=n, m=m)
+                try:
+                    n -= externfn(m)
+                except ValueError:
+                    n -= 1
+            return n
+        res = self.meta_interp(f, [22, 6])
+        assert res == -3
+        # the CALL_PURE is constant-folded away during tracing
+        self.check_loops(int_sub=1, call=0, call_pure=0)
+        #
+        res = self.meta_interp(f, [22, -5])
+        assert res == 0
+        # raises: becomes CALL and is not constant-folded away
+        self.check_loops(int_sub=1, call=1, call_pure=0)
+
+    def test_elidable_raising_2(self):
+        myjitdriver = JitDriver(greens = ['m'], reds = ['n'])
+        @elidable
+        def externfn(x):
+            if x <= 0:
+                raise ValueError
+            return x - 1
+        def f(n, m):
+            while n > 0:
+                myjitdriver.can_enter_jit(n=n, m=m)
+                myjitdriver.jit_merge_point(n=n, m=m)
+                try:
+                    n -= externfn(noConst(m))
+                except ValueError:
+                    n -= 1
+            return n
+        res = self.meta_interp(f, [22, 6])
+        assert res == -3
+        # the CALL_PURE is constant-folded away by optimizeopt.py
+        self.check_loops(int_sub=1, call=0, call_pure=0)
+        #
+        res = self.meta_interp(f, [22, -5])
+        assert res == 0
+        # raises: becomes CALL and is not constant-folded away
+        self.check_loops(int_sub=1, call=1, call_pure=0)
+
     def test_constant_across_mp(self):
         myjitdriver = JitDriver(greens = [], reds = ['n'])
         class X(object):
@@ -508,6 +560,32 @@
         assert res == 84 - 61 - 62
         self.check_history(call=1)   # because the trace starts immediately
 
+    def test_unroll_one_loop_iteration(self):
+        def unroll(code):
+            return code == 0
+        myjitdriver = JitDriver(greens = ['code'],
+                                reds = ['loops', 'inner_loops', 's'],
+                                should_unroll_one_iteration=unroll)
+
+        def f(code, loops, inner_loops):
+            s = 0
+            while loops > 0:
+                myjitdriver.jit_merge_point(code=code, loops=loops,
+                                            inner_loops=inner_loops, s=s)
+                if code == 1:
+                    s += f(0, inner_loops, 0)
+                loops -= 1
+                s += 1
+            return s
+
+        res = self.meta_interp(f, [1, 4, 1], enable_opts="", inline=True)
+        assert res == f(1, 4, 1)
+        self.check_history(call_assembler=0)
+
+        res = self.meta_interp(f, [1, 4, 2], enable_opts="", inline=True)
+        assert res == f(1, 4, 2)
+        self.check_history(call_assembler=1)
+
     def test_format(self):
         def f(n):
             return len("<%d>" % n)
diff --git a/pypy/jit/metainterp/test/test_dict.py b/pypy/jit/metainterp/test/test_dict.py
--- a/pypy/jit/metainterp/test/test_dict.py
+++ b/pypy/jit/metainterp/test/test_dict.py
@@ -157,7 +157,7 @@
         # the same arguments are not folded, because we have conflicting
         # definitions of pure, once strhash can be appropriately folded
         # this should be decreased to seven.
-        self.check_loops({"call": 8, "guard_false": 1, "guard_no_exception": 5,
+        self.check_loops({"call": 8, "guard_false": 1, "guard_no_exception": 6,
                           "guard_true": 1, "int_and": 1, "int_gt": 1,
                           "int_is_true": 1, "int_sub": 1, "jump": 1,
                           "new_with_vtable": 1, "setfield_gc": 1})
diff --git a/pypy/jit/metainterp/test/test_string.py b/pypy/jit/metainterp/test/test_string.py
--- a/pypy/jit/metainterp/test/test_string.py
+++ b/pypy/jit/metainterp/test/test_string.py
@@ -358,3 +358,22 @@
         self.check_loops(call=3,    # str(), _str(), escape()
                          newunicode=1, unicodegetitem=0,
                          unicodesetitem=1, copyunicodecontent=1)
+
+    def test_str2unicode_fold(self):
+        _str = self._str
+        jitdriver = JitDriver(greens = ['g'], reds = ['m'])
+        @dont_look_inside
+        def escape(x):
+            print str(x)
+        def f(g, m):
+            g = str(g)
+            while m >= 0:
+                jitdriver.can_enter_jit(g=g, m=m)
+                jitdriver.jit_merge_point(g=g, m=m)
+                escape(_str(g))
+                m -= 1
+            return 42
+        self.meta_interp(f, [6, 7])
+        self.check_loops(call_pure=0, call=1,
+                         newunicode=0, unicodegetitem=0,
+                         unicodesetitem=0, copyunicodecontent=0)
diff --git a/pypy/jit/metainterp/test/test_warmstate.py b/pypy/jit/metainterp/test/test_warmstate.py
--- a/pypy/jit/metainterp/test/test_warmstate.py
+++ b/pypy/jit/metainterp/test/test_warmstate.py
@@ -186,6 +186,7 @@
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
         _can_never_inline_ptr = None
+        _should_unroll_one_iteration_ptr = None
     class FakeCell:
         dont_trace_here = False
     state = WarmEnterState(FakeWarmRunnerDesc(), FakeJitDriverSD())
@@ -214,6 +215,7 @@
         _confirm_enter_jit_ptr = None
         _can_never_inline_ptr = None
         _get_jitcell_at_ptr = None
+        _should_unroll_one_iteration_ptr = None
     state = WarmEnterState(FakeWarmRunnerDesc(), FakeJitDriverSD())
     state.make_jitdriver_callbacks()
     res = state.get_location_str([ConstInt(5), constfloat(42.5)])
@@ -238,6 +240,7 @@
         _confirm_enter_jit_ptr = llhelper(ENTER_JIT, confirm_enter_jit)
         _can_never_inline_ptr = None
         _get_jitcell_at_ptr = None
+        _should_unroll_one_iteration_ptr = None
 
     state = WarmEnterState(FakeWarmRunnerDesc(), FakeJitDriverSD())
     state.make_jitdriver_callbacks()
@@ -262,6 +265,7 @@
         _confirm_enter_jit_ptr = None
         _can_never_inline_ptr = llhelper(CAN_NEVER_INLINE, can_never_inline)
         _get_jitcell_at_ptr = None
+        _should_unroll_one_iteration_ptr = None
 
     state = WarmEnterState(FakeWarmRunnerDesc(), FakeJitDriverSD())
     state.make_jitdriver_callbacks()
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -409,6 +409,7 @@
         jd.warmstate = state
 
         def crash_in_jit(e):
+            tb = not we_are_translated() and sys.exc_info()[2]
             try:
                 raise e
             except JitException:
@@ -422,8 +423,8 @@
                     print "~~~ Crash in JIT!"
                     print '~~~ %s: %s' % (e.__class__, e)
                     if sys.stdout == sys.__stdout__:
-                        import pdb; pdb.post_mortem(sys.exc_info()[2])
-                    raise
+                        import pdb; pdb.post_mortem(tb)
+                    raise e.__class__, e, tb
                 fatalerror('~~~ Crash in JIT! %s' % (e,), traceback=True)
         crash_in_jit._dont_inline_ = True
 
@@ -468,6 +469,9 @@
                 onlygreens=False)
             jd._can_never_inline_ptr = self._make_hook_graph(jd,
                 annhelper, jd.jitdriver.can_never_inline, annmodel.s_Bool)
+            jd._should_unroll_one_iteration_ptr = self._make_hook_graph(jd,
+                annhelper, jd.jitdriver.should_unroll_one_iteration,
+                annmodel.s_Bool)
         annhelper.finish()
 
     def _make_hook_graph(self, jitdriver_sd, annhelper, func,
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -572,6 +572,19 @@
             return can_inline_greenargs(*greenargs)
         self.can_inline_greenargs = can_inline_greenargs
         self.can_inline_callable = can_inline_callable
+
+        if jd._should_unroll_one_iteration_ptr is None:
+            def should_unroll_one_iteration(greenkey):
+                return False
+        else:
+            rtyper = self.warmrunnerdesc.rtyper
+            inline_ptr = jd._should_unroll_one_iteration_ptr
+            def should_unroll_one_iteration(greenkey):
+                greenargs = unwrap_greenkey(greenkey)
+                fn = support.maybe_on_top_of_llinterp(rtyper, inline_ptr)
+                return fn(*greenargs)
+        self.should_unroll_one_iteration = should_unroll_one_iteration
+        
         if hasattr(jd.jitdriver, 'on_compile'):
             def on_compile(logger, token, operations, type, greenkey):
                 greenargs = unwrap_greenkey(greenkey)
diff --git a/pypy/module/_ffi/interp_ffi.py b/pypy/module/_ffi/interp_ffi.py
--- a/pypy/module/_ffi/interp_ffi.py
+++ b/pypy/module/_ffi/interp_ffi.py
@@ -11,6 +11,7 @@
 from pypy.rlib import libffi
 from pypy.rlib.rdynload import DLOpenError
 from pypy.rlib.rarithmetic import intmask, r_uint
+from pypy.rlib.objectmodel import we_are_translated
 
 class W_FFIType(Wrappable):
 
@@ -74,6 +75,13 @@
     def is_struct(self):
         return libffi.types.is_struct(self.ffitype)
 
+    def is_char_p(self):
+        return self is app_types.char_p
+
+    def is_unichar_p(self):
+        return self is app_types.unichar_p
+
+
 W_FFIType.typedef = TypeDef(
     'FFIType',
     __repr__ = interp2app(W_FFIType.repr),
@@ -115,7 +123,12 @@
         ## 'Z' : ffi_type_pointer,
 
         ]
-    return dict([(t.name, t) for t in types])
+    d = dict([(t.name, t) for t in types])
+    w_char = d['char']
+    w_unichar = d['unichar']
+    d['char_p'] = W_FFIType('char_p', libffi.types.pointer, w_pointer_to = w_char)
+    d['unichar_p'] = W_FFIType('unichar_p', libffi.types.pointer, w_pointer_to = w_unichar)
+    return d
 
 class app_types:
     pass
@@ -125,9 +138,14 @@
     try:
         return descr_new_pointer.cache[w_pointer_to]
     except KeyError:
-        w_pointer_to = space.interp_w(W_FFIType, w_pointer_to)
-        name = '(pointer to %s)' % w_pointer_to.name
-        w_result = W_FFIType(name, libffi.types.pointer, w_pointer_to = w_pointer_to)
+        if w_pointer_to is app_types.char:
+            w_result = app_types.char_p
+        elif w_pointer_to is app_types.unichar:
+            w_result = app_types.unichar_p
+        else:
+            w_pointer_to = space.interp_w(W_FFIType, w_pointer_to)
+            name = '(pointer to %s)' % w_pointer_to.name
+            w_result = W_FFIType(name, libffi.types.pointer, w_pointer_to = w_pointer_to)
         descr_new_pointer.cache[w_pointer_to] = w_result
         return w_result
 descr_new_pointer.cache = {}
@@ -164,6 +182,7 @@
         self.func = func
         self.argtypes_w = argtypes_w
         self.w_restype = w_restype
+        self.to_free = []
 
     @jit.unroll_safe
     def build_argchain(self, space, args_w):
@@ -188,6 +207,9 @@
                 self.arg_longlong(space, argchain, w_arg)
             elif w_argtype.is_signed():
                 argchain.arg(unwrap_truncate_int(rffi.LONG, space, w_arg))
+            elif self.add_char_p_maybe(space, argchain, w_arg, w_argtype):
+                # the argument is added to the argchain direcly by the method above
+                pass
             elif w_argtype.is_pointer():
                 w_arg = self.convert_pointer_arg_maybe(space, w_arg, w_argtype)
                 argchain.arg(intmask(space.uint_w(w_arg)))
@@ -212,6 +234,29 @@
                 assert False, "Argument shape '%s' not supported" % w_argtype
         return argchain
 
+    def add_char_p_maybe(self, space, argchain, w_arg, w_argtype):
+        """
+        Automatic conversion from string to char_p. The allocated buffer will
+        be automatically freed after the call.
+        """
+        w_type = jit.promote(space.type(w_arg))
+        if w_argtype.is_char_p() and w_type is space.w_str:
+            strval = space.str_w(w_arg)
+            buf = rffi.str2charp(strval)
+            self.to_free.append(rffi.cast(rffi.VOIDP, buf))
+            addr = rffi.cast(rffi.ULONG, buf)
+            argchain.arg(addr)
+            return True
+        elif w_argtype.is_unichar_p() and (w_type is space.w_str or
+                                           w_type is space.w_unicode):
+            unicodeval = space.unicode_w(w_arg)
+            buf = rffi.unicode2wcharp(unicodeval)
+            self.to_free.append(rffi.cast(rffi.VOIDP, buf))
+            addr = rffi.cast(rffi.ULONG, buf)
+            argchain.arg(addr)
+            return True
+        return False
+
     def convert_pointer_arg_maybe(self, space, w_arg, w_argtype):
         """
         Try to convert the argument by calling _as_ffi_pointer_()
@@ -235,6 +280,17 @@
     def call(self, space, args_w):
         self = jit.promote(self)
         argchain = self.build_argchain(space, args_w)
+        return self._do_call(space, argchain)
+
+    def free_temp_buffers(self, space):
+        for buf in self.to_free:
+            if not we_are_translated():
+                buf[0] = '\00' # invalidate the buffer, so that
+                               # test_keepalive_temp_buffer can fail
+            lltype.free(buf, flavor='raw')
+        self.to_free = []
+
+    def _do_call(self, space, argchain):
         w_restype = self.w_restype
         if w_restype.is_longlong():
             # note that we must check for longlong first, because either
@@ -372,6 +428,7 @@
     '_ffi.FuncPtr',
     __call__ = interp2app(W_FuncPtr.call),
     getaddr = interp2app(W_FuncPtr.getaddr),
+    free_temp_buffers = interp2app(W_FuncPtr.free_temp_buffers),
     fromaddr = interp2app(descr_fromaddr, as_classmethod=True)
     )
 
diff --git a/pypy/module/_ffi/test/test__ffi.py b/pypy/module/_ffi/test/test__ffi.py
--- a/pypy/module/_ffi/test/test__ffi.py
+++ b/pypy/module/_ffi/test/test__ffi.py
@@ -188,6 +188,75 @@
         assert get_dummy() == 123
         set_val_to_ptr(ptr2, 0)
 
+    def test_convert_strings_to_char_p(self):
+        """
+            long mystrlen(char* s)
+            {
+                long len = 0;
+                while(*s++)
+                    len++;
+                return len;
+            }
+        """
+        from _ffi import CDLL, types
+        import _rawffi
+        libfoo = CDLL(self.libfoo_name)
+        mystrlen = libfoo.getfunc('mystrlen', [types.char_p], types.slong)
+        #
+        # first, try automatic conversion from a string
+        assert mystrlen('foobar') == 6
+        # then, try to pass an explicit pointer
+        CharArray = _rawffi.Array('c')
+        mystr = CharArray(7, 'foobar')
+        assert mystrlen(mystr.buffer) == 6
+        mystr.free()
+        mystrlen.free_temp_buffers()
+
+    def test_convert_unicode_to_unichar_p(self):
+        """
+            #include <wchar.h>
+            long mystrlen_u(wchar_t* s)
+            {
+                long len = 0;
+                while(*s++)
+                    len++;
+                return len;
+            }
+        """
+        from _ffi import CDLL, types
+        import _rawffi
+        libfoo = CDLL(self.libfoo_name)
+        mystrlen = libfoo.getfunc('mystrlen_u', [types.unichar_p], types.slong)
+        #
+        # first, try automatic conversion from strings and unicode
+        assert mystrlen('foobar') == 6
+        assert mystrlen(u'foobar') == 6
+        assert mystrlen(u'ab\u2070') == 3
+        # then, try to pass an explicit pointer
+        UniCharArray = _rawffi.Array('u')
+        mystr = UniCharArray(7, u'foobar')
+        assert mystrlen(mystr.buffer) == 6
+        mystr.free()
+        mystrlen.free_temp_buffers()
+
+    def test_keepalive_temp_buffer(self):
+        """
+            char* do_nothing(char* s)
+            {
+                return s;
+            }
+        """
+        from _ffi import CDLL, types
+        import _rawffi
+        libfoo = CDLL(self.libfoo_name)
+        do_nothing = libfoo.getfunc('do_nothing', [types.char_p], types.char_p)
+        CharArray = _rawffi.Array('c')
+        #
+        ptr = do_nothing('foobar')
+        array = CharArray.fromaddress(ptr, 7)
+        assert list(array) == list('foobar\00')
+        do_nothing.free_temp_buffers()
+
     def test_typed_pointer(self):
         from _ffi import types
         intptr = types.Pointer(types.sint) # create a typed pointer to sint
@@ -204,6 +273,13 @@
         assert x is y
         assert x is not z
 
+    def test_char_p_cached(self):
+        from _ffi import types
+        x = types.Pointer(types.char)
+        assert x is types.char_p
+        x = types.Pointer(types.unichar)
+        assert x is types.unichar_p
+
     def test_typed_pointer_args(self):
         """
             extern int dummy; // defined in test_void_result 
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -14,8 +14,6 @@
 from pypy.rpython.lltypesystem import lltype, rffi
 
 
-memcpy = rffi.llexternal("memcpy", [rffi.VOIDP, rffi.VOIDP, rffi.SIZE_T], lltype.Void)
-
 @unwrap_spec(typecode=str)
 def w_array(space, w_cls, typecode, __args__):
     if len(__args__.arguments_w) > 1:
@@ -617,7 +615,7 @@
     def array_copy__Array(space, self):
         w_a = mytype.w_class(self.space)
         w_a.setlen(self.len)
-        memcpy(
+        rffi.c_memcpy(
             rffi.cast(rffi.VOIDP, w_a.buffer),
             rffi.cast(rffi.VOIDP, self.buffer),
             self.len * mytype.bytes
diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -44,9 +44,11 @@
             ec.w_tracefunc is None)
 
 def can_never_inline(next_instr, is_being_profiled, bytecode):
+    return False
+
+def should_unroll_one_iteration(next_instr, is_being_profiled, bytecode):
     return (bytecode.co_flags & CO_GENERATOR) != 0
 
-
 def wrap_oplist(space, logops, operations):
     list_w = []
     for op in operations:
@@ -110,7 +112,9 @@
                               get_jitcell_at = get_jitcell_at,
                               set_jitcell_at = set_jitcell_at,
                               confirm_enter_jit = confirm_enter_jit,
-                              can_never_inline = can_never_inline)
+                              can_never_inline = can_never_inline,
+                              should_unroll_one_iteration =
+                              should_unroll_one_iteration)
 
 class __extend__(PyFrame):
 
diff --git a/pypy/module/pypyjit/test_pypy_c/test_generators.py b/pypy/module/pypyjit/test_pypy_c/test_generators.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_generators.py
@@ -0,0 +1,25 @@
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+
+class TestGenerators(BaseTestPyPyC):
+    def test_simple_generator(self):
+        def main(n):
+            def f():
+                for i in range(10000):
+                    yield i
+
+            def g():
+                for i in f():  # ID: generator
+                    pass
+
+            g()
+
+        log = self.run(main, [500])
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("generator", """
+            i16 = force_token()
+            p45 = new_with_vtable(ConstClass(W_IntObject))
+            setfield_gc(p45, i29, descr=<SignedFieldDescr .*>)
+            setarrayitem_gc(p8, 0, p45, descr=<GcPtrArrayDescr>)
+            jump(..., descr=...)
+            """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_math.py b/pypy/module/pypyjit/test_pypy_c/test_math.py
--- a/pypy/module/pypyjit/test_pypy_c/test_math.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_math.py
@@ -30,3 +30,34 @@
             --TICK--
             jump(..., descr=<Loop0>)
         """)
+
+    def test_sin_cos(self):
+        def main(n):
+            import math
+
+            i = 1
+            s = 0.0
+            while i < n:
+                s += math.sin(i) - math.cos(i)
+                i += 1
+            return s
+        log = self.run(main, [500])
+        assert round(log.result, 6) == round(main(500), 6)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i2 = int_lt(i0, i1)
+            guard_true(i2, descr=...)
+            f1 = cast_int_to_float(i0)
+            i3 = float_eq(f1, inf)
+            i4 = float_eq(f1, -inf)
+            i5 = int_or(i3, i4)
+            i6 = int_is_true(i5)
+            guard_false(i6, descr=...)
+            f2 = call(ConstClass(sin), f1, descr=<FloatCallDescr>)
+            f3 = call(ConstClass(cos), f1, descr=<FloatCallDescr>)
+            f4 = float_sub(f2, f3)
+            f5 = float_add(f0, f4)
+            i7 = int_add(i0, f1)
+            --TICK--
+            jump(..., descr=)
+        """)
\ No newline at end of file
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py
@@ -14,14 +14,27 @@
         return args[-1]
 
     def check_type(self, typ, arg):
+        unwrapped_types = {
+            c_float: (float,),
+            c_double: (float,),
+            c_char: (str,),
+            c_char_p: (str,),
+            c_uint: (int, long),
+            c_ulong: (int, long),
+            }
+        
         PROTO = self.functype.im_func(typ, typ)
-        result = PROTO(self.callback)(arg)
+        cfunc = PROTO(self.callback)
+        result = cfunc(arg)
         if typ == c_float:
             assert abs(result - arg) < 0.000001
         else:
             assert self.got_args == (arg,)
             assert result == arg
 
+        result2 = cfunc(typ(arg))
+        assert type(result2) in unwrapped_types.get(typ, (int, long))
+
         PROTO = self.functype.im_func(typ, c_byte, typ)
         result = PROTO(self.callback)(-3, arg)
         if typ == c_float:
@@ -222,3 +235,20 @@
         out, err = capsys.readouterr()
         assert (out, err) == ("", "")
 
+
+    def test_callback_pyobject(self):
+        def callback(obj):
+            return obj
+
+        FUNC = CFUNCTYPE(py_object, py_object)
+        cfunc = FUNC(callback)
+        param = c_int(42)
+        assert cfunc(param) is param
+
+    def test_raise_argumenterror(self):
+        def callback(x):
+            pass
+        FUNC = CFUNCTYPE(None, c_void_p)
+        cfunc = FUNC(callback)
+        param = c_uint(42)
+        py.test.raises(ArgumentError, "cfunc(param)")
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py b/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py
@@ -90,3 +90,8 @@
         assert sqrt._objects is my_sqrt._objects   # on CPython too
         my_sqrt._objects.clear()
         my_sqrt._objects.update(saved_objects)
+
+    def test_cast_argumenterror(self):
+        param = c_uint(42)
+        py.test.raises(ArgumentError, "cast(param, c_void_p)")
+        
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
@@ -46,6 +46,18 @@
         tf_b.argtypes = (c_byte,)
         assert tf_b(-126) == -42
 
+    def test_undeclared_restype(self):
+        # make sure we get a fresh function
+        try:
+            del dll.tf_i
+        except AttributeError:
+            pass
+        tf_i = dll.tf_i
+        assert not tf_i._is_fastpath
+        tf_i.argtypes = (c_int,)
+        assert tf_i._is_fastpath
+        assert tf_i(12) == 4
+
     def test_pointer_args(self):
         f = dll._testfunc_p_p
         f.restype = POINTER(c_int)
@@ -63,13 +75,10 @@
         result = f(mystr, ord("b"))
         assert result == "bcd"
 
-    @py.test.mark.xfail
     def test_strings(self):
         f = dll.my_strchr
         f.argtypes = [c_char_p, c_int]
         f.restype = c_char_p
-        # python strings need to be converted to c_char_p, but this is
-        # supported only in the slow path so far
         result = f("abcd", ord("b"))
         assert result == "bcd"
 
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
@@ -488,11 +488,9 @@
         warnings.simplefilter("always")
         with warnings.catch_warnings(record=True) as w:
             dll.get_an_integer()
-            assert len(w) == 2
+            assert len(w) == 1
             assert issubclass(w[0].category, RuntimeWarning)
-            assert issubclass(w[1].category, RuntimeWarning)
             assert "C function without declared arguments called" in str(w[0].message)
-            assert "C function without declared return type called" in str(w[1].message)
 
     def test_errcheck(self):
         py.test.skip('fixme')
diff --git a/pypy/objspace/descroperation.py b/pypy/objspace/descroperation.py
--- a/pypy/objspace/descroperation.py
+++ b/pypy/objspace/descroperation.py
@@ -35,6 +35,13 @@
     return w_hash
 object_hash._annspecialcase_ = 'specialize:memo'
 
+def type_eq(space):
+    "Utility that returns the app-level descriptor type.__eq__."
+    w_src, w_eq = space.lookup_in_type_where(space.w_type,
+                                             '__eq__')
+    return w_eq
+type_eq._annspecialcase_ = 'specialize:memo'
+
 def raiseattrerror(space, w_obj, name, w_descr=None):
     w_type = space.type(w_obj)
     typename = w_type.getname(space)
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -282,8 +282,8 @@
     return space.wrap(''.join(w_bytearray.data))
 
 def _convert_idx_params(space, w_self, w_start, w_stop):
-    start = slicetype._Eval_SliceIndex(space, w_start)
-    stop = slicetype._Eval_SliceIndex(space, w_stop)
+    start = slicetype.eval_slice_index(space, w_start)
+    stop = slicetype.eval_slice_index(space, w_stop)
     length = len(w_self.data)
     if start < 0:
         start += length
diff --git a/pypy/objspace/std/identitydict.py b/pypy/objspace/std/identitydict.py
--- a/pypy/objspace/std/identitydict.py
+++ b/pypy/objspace/std/identitydict.py
@@ -2,6 +2,7 @@
 ## dict strategy (see dict_multiobject.py)
 
 from pypy.rlib import rerased
+from pypy.rlib.debug import mark_dict_non_null
 from pypy.objspace.std.dictmultiobject import (AbstractTypedStrategy,
                                                DictStrategy,
                                                IteratorImplementation,
@@ -50,7 +51,7 @@
     The second case is completely non-deterministic, even in CPython.
     Depending on the phase of the moon, you might call the __eq__ or not, so
     it is perfectly fine to *never* call it.  Morever, in practice with the
-    minimar GC we never have two live objects with the same hash, so it would
+    minimark GC we never have two live objects with the same hash, so it would
     never happen anyway.
     """
 
@@ -65,7 +66,9 @@
         return wrapped
 
     def get_empty_storage(self):
-        return self.erase({})
+        d = {}
+        mark_dict_non_null(d)
+        return self.erase(d)
 
     def is_correct_type(self, w_obj):
         w_type = self.space.type(w_obj)
diff --git a/pypy/objspace/std/sliceobject.py b/pypy/objspace/std/sliceobject.py
--- a/pypy/objspace/std/sliceobject.py
+++ b/pypy/objspace/std/sliceobject.py
@@ -4,7 +4,7 @@
 from pypy.interpreter import gateway
 from pypy.objspace.std.model import registerimplementation, W_Object
 from pypy.objspace.std.register_all import register_all
-from pypy.objspace.std.slicetype import _Eval_SliceIndex
+from pypy.objspace.std.slicetype import eval_slice_index
 
 class W_SliceObject(W_Object):
     from pypy.objspace.std.slicetype import slice_typedef as typedef
@@ -25,7 +25,7 @@
         if space.is_w(w_slice.w_step, space.w_None):
             step = 1
         else:
-            step = _Eval_SliceIndex(space, w_slice.w_step)
+            step = eval_slice_index(space, w_slice.w_step)
             if step == 0:
                 raise OperationError(space.w_ValueError,
                                      space.wrap("slice step cannot be zero"))
@@ -35,7 +35,7 @@
             else:
                 start = 0
         else:
-            start = _Eval_SliceIndex(space, w_slice.w_start)
+            start = eval_slice_index(space, w_slice.w_start)
             if start < 0:
                 start += length
                 if start < 0:
@@ -54,7 +54,7 @@
             else:
                 stop = length
         else:
-            stop = _Eval_SliceIndex(space, w_slice.w_stop)
+            stop = eval_slice_index(space, w_slice.w_stop)
             if stop < 0:
                 stop += length
                 if stop < 0:
diff --git a/pypy/objspace/std/slicetype.py b/pypy/objspace/std/slicetype.py
--- a/pypy/objspace/std/slicetype.py
+++ b/pypy/objspace/std/slicetype.py
@@ -14,7 +14,7 @@
                         ' normal slices.')
 
 # utility functions
-def _Eval_SliceIndex(space, w_int):
+def eval_slice_index(space, w_int):
     try:
         return space.getindex_w(w_int, None) # clamp if long integer too large
     except OperationError, err:
@@ -25,7 +25,7 @@
                                         "None or have an __index__ method"))
 
 def adapt_lower_bound(space, size, w_index):
-    index = _Eval_SliceIndex(space, w_index)
+    index = eval_slice_index(space, w_index)
     if index < 0:
         index = index + size
         if index < 0:
@@ -34,7 +34,7 @@
     return index
 
 def adapt_bound(space, size, w_index):
-    index = _Eval_SliceIndex(space, w_index)
+    index = eval_slice_index(space, w_index)
     if index < 0:
         index = index + size
         if index < 0:
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -913,7 +913,7 @@
 def repr__String(space, w_str):
     s = w_str._value
 
-    buf = StringBuilder(50)
+    buf = StringBuilder(len(s) + 2)
 
     quote = "'"
     if quote in s and '"' not in s:
diff --git a/pypy/objspace/std/test/test_identitydict.py b/pypy/objspace/std/test/test_identitydict.py
--- a/pypy/objspace/std/test/test_identitydict.py
+++ b/pypy/objspace/std/test/test_identitydict.py
@@ -32,10 +32,20 @@
             def __hash__(self):
                 return 0
 
+        class TypeSubclass(type):
+            pass
+
+        class TypeSubclassCustomCmp(type):
+            def __cmp__(self, other):
+                return 0
+
         assert self.compares_by_identity(Plain)
         assert not self.compares_by_identity(CustomEq)
         assert not self.compares_by_identity(CustomCmp)
         assert not self.compares_by_identity(CustomHash)
+        assert self.compares_by_identity(type)
+        assert self.compares_by_identity(TypeSubclass)
+        assert not self.compares_by_identity(TypeSubclassCustomCmp)
 
     def test_modify_class(self):
         class X(object):
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -172,8 +172,8 @@
     return space.wrap(count)
 
 def tuple_index__Tuple_ANY_ANY_ANY(space, w_tuple, w_obj, w_start, w_stop):
-    start = slicetype._Eval_SliceIndex(space, w_start)
-    stop = slicetype._Eval_SliceIndex(space, w_stop)
+    start = slicetype.eval_slice_index(space, w_start)
+    stop = slicetype.eval_slice_index(space, w_stop)
     length = len(w_tuple.wrappeditems)
     if start < 0:
         start += length
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -173,8 +173,6 @@
             # ^^^ conservative default, fixed during real usage
 
         if space.config.objspace.std.withidentitydict:
-            did_compare_by_identity = (
-                w_self.compares_by_identity_status == COMPARES_BY_IDENTITY)
             if (key is None or key == '__eq__' or
                 key == '__cmp__' or key == '__hash__'):
                 w_self.compares_by_identity_status = UNKNOWN
@@ -229,7 +227,7 @@
         return w_self.getattribute_if_not_from_object() is None
 
     def compares_by_identity(w_self):
-        from pypy.objspace.descroperation import object_hash
+        from pypy.objspace.descroperation import object_hash, type_eq
         if not w_self.space.config.objspace.std.withidentitydict:
             return False # conservative
         #
@@ -238,7 +236,9 @@
             return w_self.compares_by_identity_status == COMPARES_BY_IDENTITY
         #
         default_hash = object_hash(w_self.space)
-        overrides_eq_cmp_or_hash = (w_self.lookup('__eq__') or
+        my_eq = w_self.lookup('__eq__')
+        overrides_eq = (my_eq and my_eq is not type_eq(w_self.space))
+        overrides_eq_cmp_or_hash = (overrides_eq or
                                     w_self.lookup('__cmp__') or
                                     w_self.lookup('__hash__') is not default_hash)
         if overrides_eq_cmp_or_hash:
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -16,7 +16,8 @@
 
     Most importantly it doesn't mean that an elidable function has no observable
     side effect, but those side effects are idempotent (ie caching).
-    For now, such a function should never raise an exception.
+    If a particular call to this function ends up raising an exception, then it
+    is handled like a normal function call (this decorator is ignored).
     """
     func._elidable_function_ = True
     return func
@@ -315,7 +316,7 @@
     def __init__(self, greens=None, reds=None, virtualizables=None,
                  get_jitcell_at=None, set_jitcell_at=None,
                  get_printable_location=None, confirm_enter_jit=None,
-                 can_never_inline=None):
+                 can_never_inline=None, should_unroll_one_iteration=None):
         if greens is not None:
             self.greens = greens
         if reds is not None:
@@ -334,6 +335,7 @@
         self.get_printable_location = get_printable_location
         self.confirm_enter_jit = confirm_enter_jit
         self.can_never_inline = can_never_inline
+        self.should_unroll_one_iteration = should_unroll_one_iteration
 
     def _freeze_(self):
         return True
diff --git a/pypy/rlib/rbigint.py b/pypy/rlib/rbigint.py
--- a/pypy/rlib/rbigint.py
+++ b/pypy/rlib/rbigint.py
@@ -40,7 +40,7 @@
 # In that case, do 5 bits at a time.  The potential drawback is that
 # a table of 2**5 intermediate results is computed.
 
-FIVEARY_CUTOFF = 8
+## FIVEARY_CUTOFF = 8   disabled for now
 
 
 def _mask_digit(x):
@@ -456,7 +456,7 @@
 
         # python adaptation: moved macros REDUCE(X) and MULT(X, Y, result)
         # into helper function result = _help_mult(x, y, c)
-        if b.numdigits() <= FIVEARY_CUTOFF:
+        if 1:   ## b.numdigits() <= FIVEARY_CUTOFF:
             # Left-to-right binary exponentiation (HAC Algorithm 14.79)
             # http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
             i = b.numdigits() - 1
@@ -469,26 +469,30 @@
                         z = _help_mult(z, a, c)
                     j >>= 1
                 i -= 1
-        else:
-            # Left-to-right 5-ary exponentiation (HAC Algorithm 14.82)
-            # This is only useful in the case where c != None.
-            # z still holds 1L
-            table = [z] * 32
-            table[0] = z
-            for i in range(1, 32):
-                table[i] = _help_mult(table[i-1], a, c)
-            i = b.numdigits() - 1
-            while i >= 0:
-                bi = b.digit(i)
-                j = SHIFT - 5
-                while j >= 0:
-                    index = (bi >> j) & 0x1f
-                    for k in range(5):
-                        z = _help_mult(z, z, c)
-                    if index:
-                        z = _help_mult(z, table[index], c)
-                    j -= 5
-                i -= 1
+##        else:
+##            This code is disabled for now, because it assumes that
+##            SHIFT is a multiple of 5.  It could be fixed but it looks
+##            like it's more troubles than benefits...
+##
+##            # Left-to-right 5-ary exponentiation (HAC Algorithm 14.82)
+##            # This is only useful in the case where c != None.
+##            # z still holds 1L
+##            table = [z] * 32
+##            table[0] = z
+##            for i in range(1, 32):
+##                table[i] = _help_mult(table[i-1], a, c)
+##            i = b.numdigits() - 1
+##            while i >= 0:
+##                bi = b.digit(i)
+##                j = SHIFT - 5
+##                while j >= 0:
+##                    index = (bi >> j) & 0x1f
+##                    for k in range(5):
+##                        z = _help_mult(z, z, c)
+##                    if index:
+##                        z = _help_mult(z, table[index], c)
+##                    j -= 5
+##                i -= 1
 
         if negativeOutput and z.sign != 0:
             z = z.sub(c)
diff --git a/pypy/rlib/rerased.py b/pypy/rlib/rerased.py
--- a/pypy/rlib/rerased.py
+++ b/pypy/rlib/rerased.py
@@ -113,7 +113,7 @@
             if hop.r_result.lowleveltype is lltype.Void:
                 return hop.inputconst(lltype.Void, None)
             [v] = hop.inputargs(hop.args_r[0])
-            return hop.genop('cast_opaque_ptr', [v], resulttype = hop.r_result)
+            return hop.args_r[0].rtype_unerase(hop, v)
 
     return erase, unerase
 
@@ -147,7 +147,7 @@
     def specialize_call(self, hop):
         [v] = hop.inputargs(hop.args_r[0])
         assert isinstance(hop.s_result, annmodel.SomeInteger)
-        return hop.gendirectcall(ll_unerase_int, v)
+        return hop.args_r[0].rtype_unerase_int(hop, v)
 
 def ll_unerase_int(gcref):
     from pypy.rpython.lltypesystem.lloperation import llop
@@ -174,7 +174,10 @@
         return False # cannot be None, but can contain a None
 
     def rtyper_makerepr(self, rtyper):
-        return ErasedRepr(rtyper)
+        if rtyper.type_system.name == 'lltypesystem':
+            return ErasedRepr(rtyper)
+        elif rtyper.type_system.name == 'ootypesystem':
+            return OOErasedRepr(rtyper)
 
     def rtyper_makekey(self):
         return self.__class__,
@@ -200,6 +203,13 @@
         return hop.genop('cast_opaque_ptr', [v_obj],
                          resulttype=self.lowleveltype)
 
+    def rtype_unerase(self, hop, s_obj):
+        [v] = hop.inputargs(hop.args_r[0])
+        return hop.genop('cast_opaque_ptr', [v], resulttype=hop.r_result)
+
+    def rtype_unerase_int(self, hop, v):
+        return hop.gendirectcall(ll_unerase_int, v)
+
     def rtype_erase_int(self, hop):
         [v_value] = hop.inputargs(lltype.Signed)
         c_one = hop.inputconst(lltype.Signed, 1)
@@ -224,3 +234,50 @@
             return lltype.nullptr(self.lowleveltype.TO)
         v = r_obj.convert_const(value._x)
         return lltype.cast_opaque_ptr(self.lowleveltype, v)
+
+from pypy.rpython.ootypesystem import ootype
+
+class OOErasedRepr(Repr):
+    lowleveltype = ootype.Object
+    def __init__(self, rtyper):
+        self.rtyper = rtyper
+
+    def rtype_erase(self, hop, s_obj):
+        hop.exception_cannot_occur()
+        r_obj = self.rtyper.getrepr(s_obj)
+        if r_obj.lowleveltype is lltype.Void:
+            return hop.inputconst(self.lowleveltype,
+                                  ootype.NULL)
+        [v_obj] = hop.inputargs(r_obj)
+        return hop.genop('cast_to_object', [v_obj],
+                         resulttype=self.lowleveltype)
+
+    def rtype_unerase(self, hop, s_obj):
+        [v] = hop.inputargs(hop.args_r[0])
+        return hop.genop('cast_from_object', [v], resulttype=hop.r_result)
+
+    def rtype_unerase_int(self, hop, v):
+        c_one = hop.inputconst(lltype.Signed, 1)
+        v2 = hop.genop('oounbox_int', [v], resulttype=hop.r_result)
+        return hop.genop('int_rshift', [v2, c_one], resulttype=lltype.Signed)
+
+    def rtype_erase_int(self, hop):
+        hop.exception_is_here()
+        [v_value] = hop.inputargs(lltype.Signed)
+        c_one = hop.inputconst(lltype.Signed, 1)
+        v2 = hop.genop('int_lshift_ovf', [v_value, c_one],
+                       resulttype = lltype.Signed)
+        v2p1 = hop.genop('int_add', [v2, c_one],
+                         resulttype = lltype.Signed)
+        return hop.genop('oobox_int', [v2p1], resulttype=hop.r_result)
+
+    def convert_const(self, value):
+        if value._identity is _identity_for_ints:
+            return value._x # FIXME: what should we do here?
+        bk = self.rtyper.annotator.bookkeeper
+        s_obj = value._identity.get_input_annotation(bk)
+        r_obj = self.rtyper.getrepr(s_obj)
+        if r_obj.lowleveltype is lltype.Void:
+            return ootype.NULL
+        v = r_obj.convert_const(value._x)
+        return ootype.cast_to_object(v)
diff --git a/pypy/rlib/rstring.py b/pypy/rlib/rstring.py
--- a/pypy/rlib/rstring.py
+++ b/pypy/rlib/rstring.py
@@ -1,8 +1,8 @@
 """ String builder interface and string functions
 """
 
-from pypy.annotation.model import SomeObject, SomeString, s_None,\
-     SomeChar, SomeInteger, SomeUnicodeCodePoint, SomeUnicodeString
+from pypy.annotation.model import (SomeObject, SomeString, s_None, SomeChar,
+    SomeInteger, SomeUnicodeCodePoint, SomeUnicodeString, SomePtr)
 from pypy.rpython.extregistry import ExtRegistryEntry
 
 
@@ -65,6 +65,12 @@
         assert isinstance(c, self.tp)
         self.l.append(c * times)
 
+    def append_charpsize(self, s, size):
+        l = []
+        for i in xrange(size):
+            l.append(s[i])
+        self.l.append(self.tp("").join(l))
+
     def build(self):
         return self.tp("").join(self.l)
 
@@ -100,6 +106,11 @@
         assert isinstance(s_times, SomeInteger)
         return s_None
 
+    def method_append_charpsize(self, s_ptr, s_size):
+        assert isinstance(s_ptr, SomePtr)
+        assert isinstance(s_size, SomeInteger)
+        return s_None
+
     def method_getlength(self):
         return SomeInteger(nonneg=True)
 
@@ -127,6 +138,11 @@
         assert isinstance(s_times, SomeInteger)
         return s_None
 
+    def method_append_charpsize(self, s_ptr, s_size):
+        assert isinstance(s_ptr, SomePtr)
+        assert isinstance(s_size, SomeInteger)
+        return s_None
+
     def method_getlength(self):
         return SomeInteger(nonneg=True)
 
diff --git a/pypy/rlib/rzlib.py b/pypy/rlib/rzlib.py
--- a/pypy/rlib/rzlib.py
+++ b/pypy/rlib/rzlib.py
@@ -1,8 +1,11 @@
 import sys
+
+from pypy.rlib.rstring import StringBuilder
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rpython.tool import rffi_platform
+from pypy.translator.platform import platform as compiler, CompilationError
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
-from pypy.translator.platform import platform as compiler, CompilationError
+
 
 if compiler.name == "msvc":
     libname = 'zlib'
@@ -337,23 +340,18 @@
     """Common code for compress() and decompress().
     """
     # Prepare the input buffer for the stream
-    inbuf = lltype.malloc(rffi.CCHARP.TO, len(data), flavor='raw')
-    try:
+    with lltype.scoped_alloc(rffi.CCHARP.TO, len(data)) as inbuf:
         for i in xrange(len(data)):
             inbuf[i] = data[i]
         stream.c_next_in = rffi.cast(Bytefp, inbuf)
         rffi.setintfield(stream, 'c_avail_in', len(data))
 
         # Prepare the output buffer
-        outbuf = lltype.malloc(rffi.CCHARP.TO, OUTPUT_BUFFER_SIZE,
-                               flavor='raw')
-        try:
-            # Strategy: we call deflate() to get as much output data as
-            # fits in the buffer, then accumulate all output into a list
-            # of characters 'result'.  We don't need to gradually
-            # increase the output buffer size because there is no
-            # quadratic factor.
-            result = []
+        with lltype.scoped_alloc(rffi.CCHARP.TO, OUTPUT_BUFFER_SIZE) as outbuf:
+            # Strategy: we call deflate() to get as much output data as fits in
+            # the buffer, then accumulate all output into a StringBuffer
+            # 'result'.
+            result = StringBuilder()
 
             while True:
                 stream.c_next_out = rffi.cast(Bytefp, outbuf)
@@ -369,8 +367,7 @@
                 if err == Z_OK or err == Z_STREAM_END:
                     # accumulate data into 'result'
                     avail_out = rffi.cast(lltype.Signed, stream.c_avail_out)
-                    for i in xrange(bufsize - avail_out):
-                        result.append(outbuf[i])
+                    result.append_charpsize(outbuf, bufsize - avail_out)
                     # if the output buffer is full, there might be more data
                     # so we need to try again.  Otherwise, we're done.
                     if avail_out > 0:
@@ -393,14 +390,9 @@
                 # fallback case: report this error
                 raise RZlibError.fromstream(stream, err, while_doing)
 
-        finally:
-            lltype.free(outbuf, flavor='raw')
-    finally:
-        lltype.free(inbuf, flavor='raw')
-
     # When decompressing, if the compressed stream of data was truncated,
     # then the zlib simply returns Z_OK and waits for more.  If it is
     # complete it returns Z_STREAM_END.
-    return (''.join(result),
+    return (result.build(),
             err,
             rffi.cast(lltype.Signed, stream.c_avail_in))
diff --git a/pypy/rlib/streamio.py b/pypy/rlib/streamio.py
--- a/pypy/rlib/streamio.py
+++ b/pypy/rlib/streamio.py
@@ -894,13 +894,10 @@
             self.buf.append(data)
             self.buflen += datalen
         elif buflen:
-            i = self.bufsize - buflen
-            assert i >= 0
-            self.buf.append(data[:i])
+            self.buf.append(data)
             self.do_write(''.join(self.buf))
             self.buf = []
             self.buflen = 0
-            self.write(data[i:])
         else:
             self.do_write(data)
 
diff --git a/pypy/rlib/test/test_rbigint.py b/pypy/rlib/test/test_rbigint.py
--- a/pypy/rlib/test/test_rbigint.py
+++ b/pypy/rlib/test/test_rbigint.py
@@ -373,6 +373,13 @@
         print '--->', v
         assert v.tolong() == pow(x, y, z)
 
+    def test_pow_lll_bug(self):
+        two = rbigint.fromint(2)
+        t = rbigint.fromlong(2655689964083835493447941032762343136647965588635159615997220691002017799304)
+        for n, expected in [(37, 9), (1291, 931), (67889, 39464)]:
+            v = two.pow(t, rbigint.fromint(n))
+            assert v.toint() == expected
+
     def test_pow_lln(self):
         x = 10L
         y = 2L
diff --git a/pypy/rlib/test/test_rerased.py b/pypy/rlib/test/test_rerased.py
--- a/pypy/rlib/test/test_rerased.py
+++ b/pypy/rlib/test/test_rerased.py
@@ -5,8 +5,10 @@
 from pypy.annotation.annrpython import RPythonAnnotator
 from pypy.rpython.test.test_llinterp import interpret
 from pypy.rpython.lltypesystem.rclass import OBJECTPTR
+from pypy.rpython.ootypesystem.rclass import OBJECT
 from pypy.rpython.lltypesystem import lltype, llmemory
 
+from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
 
 class X(object):
     pass
@@ -79,136 +81,6 @@
     s = a.build_types(f, [])
     assert isinstance(s, annmodel.SomeInteger)
 
-def test_rtype_1():
-    def f():
-        return eraseX(X())
-    x = interpret(f, [])
-    assert lltype.typeOf(x) == llmemory.GCREF
-
-def test_rtype_2():
-    def f():
-        x1 = X()
-        e = eraseX(x1)
-        #assert not is_integer(e)
-        x2 = uneraseX(e)
-        return x2
-    x = interpret(f, [])
-    assert lltype.castable(OBJECTPTR, lltype.typeOf(x)) > 0
-
-def test_rtype_3():
-    def f():
-        e = erase_int(16)
-        #assert is_integer(e)
-        x2 = unerase_int(e)
-        return x2
-    x = interpret(f, [])
-    assert x == 16
-
-
-def test_prebuilt_erased():
-    e1 = erase_int(16)
-    x1 = X()
-    x1.foobar = 42
-    e2 = eraseX(x1)
-
-    def f():
-        #assert is_integer(e1)
-        #assert not is_integer(e2)
-        x1.foobar += 1
-        x2 = unerase_int(e1) + uneraseX(e2).foobar
-        return x2
-    x = interpret(f, [])
-    assert x == 16 + 42 + 1
-
-def test_prebuilt_erased_in_instance():
-    erase_empty, unerase_empty = new_erasing_pair("empty")
-    class FakeList(object):
-        pass
-
-    x1 = X()
-    x1.foobar = 42
-    l1 = FakeList()
-    l1.storage = eraseX(x1)
-    l2 = FakeList()
-    l2.storage = erase_empty(None)
-
-    def f():
-        #assert is_integer(e1)
-        #assert not is_integer(e2)
-        x1.foobar += 1
-        x2 = uneraseX(l1.storage).foobar + (unerase_empty(l2.storage) is None)
-        return x2
-    x = interpret(f, [])
-    assert x == 43 + True
-
-
-def test_overflow():
-    def f(i):
-        try:
-            e = erase_int(i)
-        except OverflowError:
-            return -1
-        #assert is_integer(e)
-        return unerase_int(e)
-    x = interpret(f, [16])
-    assert x == 16
-    x = interpret(f, [sys.maxint])
-    assert x == -1
-
-def test_none():
-    def foo():
-        return uneraseX(eraseX(None))
-    assert foo() is None
-    res = interpret(foo, [])
-    assert not res
-    #
-    def foo():
-        eraseX(X())
-        return uneraseX(eraseX(None))
-    assert foo() is None
-    res = interpret(foo, [])
-    assert not res
-
-def test_union():
-    s_e1 = SomeErased()
-    s_e1.const = 1
-    s_e2 = SomeErased()
-    s_e2.const = 3
-    assert not annmodel.pair(s_e1, s_e2).union().is_constant()
-
-
-def test_rtype_list():
-    prebuilt_l = [X()]
-    prebuilt_e = erase_list_X(prebuilt_l)
-    def l(flag):
-        if flag == 1:
-            l = [X()]
-            e = erase_list_X(l)
-        elif flag == 2:
-            l = prebuilt_l
-            e = erase_list_X(l)
-        else:
-            l = prebuilt_l
-            e = prebuilt_e
-        #assert is_integer(e) is False
-        assert unerase_list_X(e) is l
-    interpret(l, [0])
-    interpret(l, [1])
-    interpret(l, [2])
-
-# ____________________________________________________________
-
-def test_erasing_pair():
-    erase, unerase = new_erasing_pair("test1")
-    class X:
-        pass
-    x = X()
-    erased = erase(x)
-    assert unerase(erased) is x
-    #
-    erase2, unerase2 = new_erasing_pair("test2")
-    py.test.raises(AssertionError, unerase2, erased)
-
 def test_annotate_erasing_pair():
     erase, unerase = new_erasing_pair("test1")
     erase2, unerase2 = new_erasing_pair("test2")
@@ -296,3 +168,148 @@
     a = RPythonAnnotator()
     s = a.build_types(f, [int])
     assert isinstance(s, annmodel.SomeInteger)
+
+class BaseTestRErased(BaseRtypingTest):
+    def test_rtype_1(self):
+        def f():
+            return eraseX(X())
+        x = self.interpret(f, [])
+        assert lltype.typeOf(x) == self.ERASED_TYPE
+
+    def test_rtype_2(self):
+        def f():
+            x1 = X()
+            e = eraseX(x1)
+            #assert not is_integer(e)
+            x2 = uneraseX(e)
+            return x2
+        x = self.interpret(f, [])
+        assert self.castable(self.UNERASED_TYPE, x)
+
+    def test_rtype_3(self):
+        def f():
+            e = erase_int(16)
+            #assert is_integer(e)
+            x2 = unerase_int(e)
+            return x2
+        x = self.interpret(f, [])
+        assert x == 16
+
+    def test_prebuilt_erased(self):
+        e1 = erase_int(16)
+        x1 = X()
+        x1.foobar = 42
+        e2 = eraseX(x1)
+
+        def f():
+            #assert is_integer(e1)
+            #assert not is_integer(e2)
+            x1.foobar += 1
+            x2 = unerase_int(e1) + uneraseX(e2).foobar
+            return x2
+        x = self.interpret(f, [])
+        assert x == 16 + 42 + 1
+
+    def test_prebuilt_erased_in_instance(self):
+        erase_empty, unerase_empty = new_erasing_pair("empty")
+        class FakeList(object):
+            pass
+
+        x1 = X()
+        x1.foobar = 42
+        l1 = FakeList()
+        l1.storage = eraseX(x1)
+        l2 = FakeList()
+        l2.storage = erase_empty(None)
+
+        def f():
+            #assert is_integer(e1)
+            #assert not is_integer(e2)
+            x1.foobar += 1
+            x2 = uneraseX(l1.storage).foobar + (unerase_empty(l2.storage) is None)
+            return x2
+        x = self.interpret(f, [])
+        assert x == 43 + True
+
+    def test_overflow(self):
+        def f(i):
+            try:
+                e = erase_int(i)
+            except OverflowError:
+                return -1
+            #assert is_integer(e)
+            return unerase_int(e)
+        x = self.interpret(f, [16])
+        assert x == 16
+        x = self.interpret(f, [sys.maxint])
+        assert x == -1
+
+    def test_none(self):
+        def foo():
+            return uneraseX(eraseX(None))
+        assert foo() is None
+        res = self.interpret(foo, [])
+        assert not res
+        #
+        def foo():
+            eraseX(X())
+            return uneraseX(eraseX(None))
+        assert foo() is None
+        res = self.interpret(foo, [])
+        assert not res
+
+    def test_rtype_list(self):
+        prebuilt_l = [X()]
+        prebuilt_e = erase_list_X(prebuilt_l)
+        def l(flag):
+            if flag == 1:
+                l = [X()]
+                e = erase_list_X(l)
+            elif flag == 2:
+                l = prebuilt_l
+                e = erase_list_X(l)
+            else:
+                l = prebuilt_l
+                e = prebuilt_e
+            #assert is_integer(e) is False
+            assert unerase_list_X(e) is l
+        self.interpret(l, [0])
+        self.interpret(l, [1])
+        self.interpret(l, [2])
+
+class TestLLtype(BaseTestRErased, LLRtypeMixin):
+    ERASED_TYPE = llmemory.GCREF
+    UNERASED_TYPE = OBJECTPTR
+    def castable(self, TO, var):
+        return lltype.castable(TO, lltype.typeOf(var)) > 0
+
+from pypy.rpython.ootypesystem.ootype import Object
+
+class TestOOtype(BaseTestRErased, OORtypeMixin):
+    ERASED_TYPE = Object
+    UNERASED_TYPE = OBJECT
+    def castable(self, TO, var):
+        return ootype.isSubclass(lltype.typeOf(var), TO)
+    @py.test.mark.xfail
+    def test_prebuilt_erased(self):
+        super(TestOOtype, self).test_prebuilt_erased()
+
+def test_union():
+    s_e1 = SomeErased()
+    s_e1.const = 1
+    s_e2 = SomeErased()
+    s_e2.const = 3
+    assert not annmodel.pair(s_e1, s_e2).union().is_constant()
+
+# ____________________________________________________________
+
+def test_erasing_pair():
+    erase, unerase = new_erasing_pair("test1")
+    class X:
+        pass
+    x = X()
+    erased = erase(x)
+    assert unerase(erased) is x
+    #
+    erase2, unerase2 = new_erasing_pair("test2")
+    py.test.raises(AssertionError, unerase2, erased)
diff --git a/pypy/rpython/extfuncregistry.py b/pypy/rpython/extfuncregistry.py
--- a/pypy/rpython/extfuncregistry.py
+++ b/pypy/rpython/extfuncregistry.py
@@ -42,6 +42,8 @@
        ('sqrt', [float], float),
        ('log', [float], float),
        ('log10', [float], float),
+       ('sin', [float], float),
+       ('cos', [float], float),
     ]),
 ]
 for module, methods in _register:
diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -1225,6 +1225,12 @@
         except ValueError:
             self.make_llexception()
 
+    def op_oobox_int(self, i):
+        return ootype.oobox_int(i)
+
+    def op_oounbox_int(self, x):
+        return ootype.oounbox_int(x)
+
 class Tracer(object):
     Counter = 0
     file = None
diff --git a/pypy/rpython/lltypesystem/lloperation.py b/pypy/rpython/lltypesystem/lloperation.py
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -585,6 +585,8 @@
     'classof':              LLOp(oo=True, canfold=True),
     'subclassof':           LLOp(oo=True, canfold=True),
     'oostring':             LLOp(oo=True, sideeffects=False),
+    'oobox_int':            LLOp(oo=True, sideeffects=False),
+    'oounbox_int':          LLOp(oo=True, sideeffects=False),
     'ooparse_int':          LLOp(oo=True, canraise=(ValueError,)),
     'ooparse_float':        LLOp(oo=True, canraise=(ValueError,)),
     'oounicode':            LLOp(oo=True, canraise=(UnicodeDecodeError,)),
diff --git a/pypy/rpython/lltypesystem/module/ll_math.py b/pypy/rpython/lltypesystem/module/ll_math.py
--- a/pypy/rpython/lltypesystem/module/ll_math.py
+++ b/pypy/rpython/lltypesystem/module/ll_math.py
@@ -69,6 +69,8 @@
                         [rffi.DOUBLE, rffi.DOUBLE], rffi.DOUBLE)
 math_floor = llexternal('floor', [rffi.DOUBLE], rffi.DOUBLE, elidable_function=True)
 math_sqrt = llexternal('sqrt', [rffi.DOUBLE], rffi.DOUBLE)
+math_sin = llexternal('sin', [rffi.DOUBLE], rffi.DOUBLE)
+math_cos = llexternal('cos', [rffi.DOUBLE], rffi.DOUBLE)
 
 @jit.elidable
 def sqrt_nonneg(x):
@@ -340,6 +342,16 @@
         raise ValueError("math domain error")
     return math_log10(x)
 
+def ll_math_sin(x):
+    if isinf(x):
+        raise ValueError("math domain error")
+    return math_sin(x)
+
+def ll_math_cos(x):
+    if isinf(x):
+        raise ValueError("math domain error")
+    return math_cos(x)
+
 # ____________________________________________________________
 #
 # Default implementations
@@ -377,8 +389,8 @@
 
 unary_math_functions = [
     'acos', 'asin', 'atan',
-    'ceil', 'cos', 'cosh', 'exp', 'fabs',
-    'sin', 'sinh', 'tan', 'tanh',
+    'ceil', 'cosh', 'exp', 'fabs',
+    'sinh', 'tan', 'tanh',
     'acosh', 'asinh', 'atanh', 'log1p', 'expm1',
     ]
 unary_math_functions_can_overflow = [
diff --git a/pypy/rpython/lltypesystem/module/test/test_llinterp_math.py b/pypy/rpython/lltypesystem/module/test/test_llinterp_math.py
--- a/pypy/rpython/lltypesystem/module/test/test_llinterp_math.py
+++ b/pypy/rpython/lltypesystem/module/test/test_llinterp_math.py
@@ -37,7 +37,7 @@
             assert self.interpret(f, [0.3, 0.4]) == f(0.3, 0.4)
         return next_test
 
-    for name in ll_math.unary_math_functions + ['log', 'log10', 'sqrt']:
+    for name in ll_math.unary_math_functions + ['log', 'log10', 'sin', 'cos', 'sqrt']:
         func_name = 'test_%s' % (name,)
         next_test = new_unary_test(name)
         next_test.func_name = func_name
diff --git a/pypy/rpython/lltypesystem/rbuilder.py b/pypy/rpython/lltypesystem/rbuilder.py
--- a/pypy/rpython/lltypesystem/rbuilder.py
+++ b/pypy/rpython/lltypesystem/rbuilder.py
@@ -1,13 +1,13 @@
-
+from pypy.rlib import rgc
+from pypy.rlib.objectmodel import enforceargs
+from pypy.rlib.rarithmetic import ovfcheck
+from pypy.rpython.annlowlevel import llstr
+from pypy.rpython.rptr import PtrRepr
+from pypy.rpython.lltypesystem import lltype, rstr
+from pypy.rpython.lltypesystem.lltype import staticAdtMethod
+from pypy.rpython.lltypesystem.rstr import (STR, UNICODE, char_repr,
+    string_repr, unichar_repr, unicode_repr)
 from pypy.rpython.rbuilder import AbstractStringBuilderRepr
-from pypy.rpython.lltypesystem import lltype, rstr
-from pypy.rpython.lltypesystem.rstr import STR, UNICODE, char_repr,\
-     string_repr, unichar_repr, unicode_repr
-from pypy.rpython.annlowlevel import llstr
-from pypy.rlib import rgc
-from pypy.rlib.rarithmetic import ovfcheck
-from pypy.rlib.objectmodel import enforceargs
-from pypy.rpython.lltypesystem.lltype import staticAdtMethod
 from pypy.tool.sourcetools import func_with_new_name
 
 # Think about heuristics below, maybe we can come up with something
@@ -73,7 +73,7 @@
             ll_builder.grow(ll_builder, lgt)
         ll_str.copy_contents(ll_str, ll_builder.buf, 0, used, lgt)
         ll_builder.used = needed
-    
+
     @staticmethod
     def ll_append_char(ll_builder, char):
         if ll_builder.used == ll_builder.allocated:
@@ -102,6 +102,16 @@
         ll_builder.used = used
 
     @staticmethod
+    def ll_append_charpsize(ll_builder, charp, size):
+        used = ll_builder.used
+        if used + size > ll_builder.allocated:
+            ll_builder.grow(ll_builder, size)
+        for i in xrange(size):
+            ll_builder.buf.chars[used] = charp[i]
+            used += 1
+        ll_builder.used = used
+
+    @staticmethod
     def ll_getlength(ll_builder):
         return ll_builder.used
 
@@ -119,6 +129,9 @@
     mallocfn = staticmethod(rstr.mallocstr)
     string_repr = string_repr
     char_repr = char_repr
+    raw_ptr_repr = PtrRepr(
+        lltype.Ptr(lltype.Array(lltype.Char, hints={'nolength': True}))
+    )
 
 class UnicodeBuilderRepr(BaseStringBuilderRepr):
     lowleveltype = lltype.Ptr(UNICODEBUILDER)
@@ -126,6 +139,9 @@
     mallocfn = staticmethod(rstr.mallocunicode)
     string_repr = unicode_repr
     char_repr = unichar_repr
+    raw_ptr_repr = PtrRepr(
+        lltype.Ptr(lltype.Array(lltype.UniChar, hints={'nolength': True}))
+    )
 
 unicodebuilder_repr = UnicodeBuilderRepr()
 stringbuilder_repr = StringBuilderRepr()
diff --git a/pypy/rpython/lltypesystem/rdict.py b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -29,7 +29,7 @@
 #        DICTVALUE value;
 #        int f_hash;        # (optional) key hash, if hard to recompute
 #    }
-#    
+#
 #    struct dicttable {
 #        int num_items;
 #        int num_pristine_entries;  # never used entries
@@ -50,12 +50,12 @@
         self.custom_eq_hash = custom_eq_hash is not None
         if not isinstance(key_repr, rmodel.Repr):  # not computed yet, done by setup()
             assert callable(key_repr)
-            self._key_repr_computer = key_repr 
+            self._key_repr_computer = key_repr
         else:
             self.external_key_repr, self.key_repr = self.pickkeyrepr(key_repr)
         if not isinstance(value_repr, rmodel.Repr):  # not computed yet, done by setup()
             assert callable(value_repr)
-            self._value_repr_computer = value_repr 
+            self._value_repr_computer = value_repr
         else:
             self.external_value_repr, self.value_repr = self.pickrepr(value_repr)
         self.dictkey = dictkey
@@ -176,7 +176,7 @@
             self.DICTENTRYARRAY = lltype.GcArray(self.DICTENTRY,
                                                  adtmeths=entrymeths)
             fields =          [ ("num_items", lltype.Signed),
-                                ("num_pristine_entries", lltype.Signed), 
+                                ("num_pristine_entries", lltype.Signed),
                                 ("entries", lltype.Ptr(self.DICTENTRYARRAY)) ]
             if self.custom_eq_hash:
                 self.r_rdict_eqfn, self.r_rdict_hashfn = self._custom_eq_hash_repr()
@@ -211,7 +211,7 @@
     def convert_const(self, dictobj):
         from pypy.rpython.lltypesystem import llmemory
         # get object from bound dict methods
-        #dictobj = getattr(dictobj, '__self__', dictobj) 
+        #dictobj = getattr(dictobj, '__self__', dictobj)
         if dictobj is None:
             return lltype.nullptr(self.DICT)
         if not isinstance(dictobj, (dict, objectmodel.r_dict)):
@@ -222,7 +222,7 @@
         except KeyError:
             self.setup()
             l_dict = ll_newdict_size(self.DICT, len(dictobj))
-            self.dict_cache[key] = l_dict 
+            self.dict_cache[key] = l_dict
             r_key = self.key_repr
             if r_key.lowleveltype == llmemory.Address:
                 raise TypeError("No prebuilt dicts of address keys")
@@ -274,7 +274,7 @@
         hop.exception_cannot_occur()
         v_res = hop.gendirectcall(ll_setdefault, v_dict, v_key, v_default)
         return self.recast_value(hop.llops, v_res)
-    
+
     def rtype_method_copy(self, hop):
         v_dict, = hop.inputargs(self)
         hop.exception_cannot_occur()
@@ -325,7 +325,7 @@
         hop.exception_is_here()
         return hop.gendirectcall(ll_popitem, cTUPLE, v_dict)
 
-class __extend__(pairtype(DictRepr, rmodel.Repr)): 
+class __extend__(pairtype(DictRepr, rmodel.Repr)):
 
     def rtype_getitem((r_dict, r_key), hop):
         v_dict, v_key = hop.inputargs(r_dict, r_dict.key_repr)
@@ -338,7 +338,7 @@
     def rtype_delitem((r_dict, r_key), hop):
         v_dict, v_key = hop.inputargs(r_dict, r_dict.key_repr)
         if not r_dict.custom_eq_hash:
-            hop.has_implicit_exception(KeyError)   # record that we know about it        
+            hop.has_implicit_exception(KeyError)   # record that we know about it
         hop.exception_is_here()
         return hop.gendirectcall(ll_dict_delitem, v_dict, v_key)
 
@@ -354,11 +354,11 @@
         v_dict, v_key = hop.inputargs(r_dict, r_dict.key_repr)
         hop.exception_is_here()
         return hop.gendirectcall(ll_contains, v_dict, v_key)
-        
+
 class __extend__(pairtype(DictRepr, DictRepr)):
     def convert_from_to((r_dict1, r_dict2), v, llops):
         # check that we don't convert from Dicts with
-        # different key/value types 
+        # different key/value types
         if r_dict1.dictkey is None or r_dict2.dictkey is None:
             return NotImplemented
         if r_dict1.dictkey is not r_dict2.dictkey:
@@ -430,7 +430,7 @@
     return hlinvoke(DICT.r_rdict_eqfn, d.fnkeyeq, key1, key2)
 
 def ll_dict_len(d):
-    return d.num_items 
+    return d.num_items
 
 def ll_dict_is_true(d):
     # check if a dict is True, allowing for None
@@ -491,8 +491,8 @@
     if i & HIGHEST_BIT:
         raise KeyError
     _ll_dict_del(d, i)
-ll_dict_delitem.oopspec = 'dict.delitem(d, key)'
 
+ at jit.dont_look_inside
 def _ll_dict_del(d, i):
     d.entries.mark_deleted(i)
     d.num_items -= 1
@@ -501,9 +501,6 @@
     ENTRY = ENTRIES.OF
     entry = d.entries[i]
     if ENTRIES.must_clear_key:
-        key = entry.key   # careful about destructor side effects:
-                          # keep key alive until entry.value has also
-                          # been zeroed (if it must be)
         entry.key = lltype.nullptr(ENTRY.key.TO)
     if ENTRIES.must_clear_value:
         entry.value = lltype.nullptr(ENTRY.value.TO)
@@ -513,7 +510,7 @@
 
 def ll_dict_resize(d):
     old_entries = d.entries
-    old_size = len(old_entries) 
+    old_size = len(old_entries)
     # make a 'new_size' estimate and shrink it if there are many
     # deleted entry markers
     new_size = old_size * 2
@@ -541,7 +538,7 @@
     direct_compare = not hasattr(ENTRIES, 'no_direct_compare')
     mask = len(entries) - 1
     i = hash & mask
-    # do the first try before any looping 
+    # do the first try before any looping
     if entries.valid(i):
         checkingkey = entries[i].key
         if direct_compare and checkingkey == key:
@@ -565,8 +562,8 @@
 
     # In the loop, a deleted entry (everused and not valid) is by far
     # (factor of 100s) the least likely outcome, so test for that last.
-    perturb = r_uint(hash) 
-    while 1: 
+    perturb = r_uint(hash)
+    while 1:
         # compute the next index using unsigned arithmetic
         i = r_uint(i)
         i = (i << 2) + i + perturb + 1
@@ -594,7 +591,7 @@
                 if found:
                     return i   # found the entry
         elif freeslot == -1:
-            freeslot = i 
+            freeslot = i
         perturb >>= PERTURB_SHIFT
 
 def ll_dict_lookup_clean(d, hash):
@@ -604,7 +601,7 @@
     entries = d.entries
     mask = len(entries) - 1
     i = hash & mask
-    perturb = r_uint(hash) 
+    perturb = r_uint(hash)
     while entries.everused(i):
         i = r_uint(i)
         i = (i << 2) + i + perturb + 1
@@ -690,7 +687,6 @@
     iter.dict = d
     iter.index = 0
     return iter
-ll_dictiter.oopspec = 'newdictiter(d)'
 
 def _make_ll_dictnext(kind):
     # make three versions of the following function: keys, values, items
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -789,8 +789,7 @@
     # char* and size -> str (which can contain null bytes)
     def charpsize2str(cp, size):
         b = builder_class(size)
-        for i in xrange(size):
-            b.append(cp[i])
+        b.append_charpsize(cp, size)
         return b.build()
     charpsize2str._annenforceargs_ = [None, int]
 
@@ -1062,3 +1061,11 @@
         keep_unicodebuffer_alive_until_here(self.raw, self.gc_buf)
     def str(self, length):
         return unicode_from_buffer(self.raw, self.gc_buf, self.size, length)
+
+# You would have to have a *huge* amount of data for this to block long enough
+# to be worth it to release the GIL.
+c_memcpy = llexternal("memcpy",
+    [VOIDP, VOIDP, SIZE_T],
+    lltype.Void,
+    threadsafe=False
+)
\ No newline at end of file
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -49,6 +49,7 @@
 from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
 from pypy.rpython.memory.gc.base import GCBase, MovingGCBase
 from pypy.rpython.memory.gc import minimarkpage, env
+from pypy.rpython.memory.support import mangle_hash
 from pypy.rlib.rarithmetic import ovfcheck, LONG_BIT, intmask, r_uint
 from pypy.rlib.rarithmetic import LONG_BIT_SHIFT
 from pypy.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
@@ -1732,7 +1733,7 @@
     # ----------
     # id() and identityhash() support
 
-    def id_or_identityhash(self, gcobj, special_case_prebuilt):
+    def id_or_identityhash(self, gcobj, is_hash):
         """Implement the common logic of id() and identityhash()
         of an object, given as a GCREF.
         """
@@ -1775,7 +1776,7 @@
                 # The answer is the address of the shadow.
                 obj = shadow
                 #
-            elif special_case_prebuilt:
+            elif is_hash:
                 if self.header(obj).tid & GCFLAG_HAS_SHADOW:
                     #
                     # For identityhash(), we need a special case for some
@@ -1784,10 +1785,14 @@
                     # after the object.  But we cannot use it for id()
                     # because the stored value might clash with a real one.
                     size = self.get_size(obj)
-                    return (obj + size).signed[0]
+                    i = (obj + size).signed[0]
+                    # Important: the returned value is not mangle_hash()ed!
+                    return i
         #
-        return llmemory.cast_adr_to_int(obj)
-
+        i = llmemory.cast_adr_to_int(obj)
+        if is_hash:
+            i = mangle_hash(i)
+        return i
 
     def id(self, gcobj):
         return self.id_or_identityhash(gcobj, False)
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -525,7 +525,8 @@
         self.c_vtinfo_skip_offset = rmodel.inputconst(lltype.typeOf(sko), sko)
 
     def build_root_walker(self):
-        return ShadowStackRootWalker(self)
+        from pypy.rpython.memory.gctransform import shadowstack
+        return shadowstack.ShadowStackRootWalker(self)
 
     def consider_constant(self, TYPE, value):
         self.layoutbuilder.consider_constant(TYPE, value, self.gcdata.gc)
@@ -932,10 +933,10 @@
     def gct_gc_identityhash(self, hop):
         livevars = self.push_roots(hop)
         [v_ptr] = hop.spaceop.args
-        v_adr = hop.genop("cast_ptr_to_adr", [v_ptr],
-                          resulttype=llmemory.Address)
+        v_ptr = hop.genop("cast_opaque_ptr", [v_ptr],
+                          resulttype=llmemory.GCREF)
         hop.genop("direct_call",
-                  [self.identityhash_ptr, self.c_const_gc, v_adr],
+                  [self.identityhash_ptr, self.c_const_gc, v_ptr],
                   resultvar=hop.spaceop.result)
         self.pop_roots(hop, livevars)
 
@@ -1323,217 +1324,3 @@
     def need_thread_support(self, gctransformer, getfn):
         raise Exception("%s does not support threads" % (
             self.__class__.__name__,))
-
-
-class ShadowStackRootWalker(BaseRootWalker):
-    need_root_stack = True
-    collect_stacks_from_other_threads = None
-
-    def __init__(self, gctransformer):
-        BaseRootWalker.__init__(self, gctransformer)
-        self.rootstacksize = sizeofaddr * gctransformer.root_stack_depth
-        # NB. 'self' is frozen, but we can use self.gcdata to store state
-        gcdata = self.gcdata
-
-        def incr_stack(n):
-            top = gcdata.root_stack_top
-            gcdata.root_stack_top = top + n*sizeofaddr
-            return top
-        self.incr_stack = incr_stack
-
-        def decr_stack(n):
-            top = gcdata.root_stack_top - n*sizeofaddr
-            gcdata.root_stack_top = top
-            return top
-        self.decr_stack = decr_stack
-
-        self.rootstackhook = gctransformer.root_stack_jit_hook
-        if self.rootstackhook is None:
-            def collect_stack_root(callback, gc, addr):
-                if gc.points_to_valid_gc_object(addr):
-                    callback(gc, addr)
-                return sizeofaddr
-            self.rootstackhook = collect_stack_root
-
-    def push_stack(self, addr):
-        top = self.incr_stack(1)
-        top.address[0] = addr
-
-    def pop_stack(self):
-        top = self.decr_stack(1)
-        return top.address[0]
-
-    def allocate_stack(self):
-        return llmemory.raw_malloc(self.rootstacksize)
-
-    def setup_root_walker(self):
-        stackbase = self.allocate_stack()
-        ll_assert(bool(stackbase), "could not allocate root stack")
-        self.gcdata.root_stack_top  = stackbase
-        self.gcdata.root_stack_base = stackbase
-        BaseRootWalker.setup_root_walker(self)
-
-    def walk_stack_roots(self, collect_stack_root):
-        gcdata = self.gcdata
-        gc = self.gc
-        rootstackhook = self.rootstackhook
-        addr = gcdata.root_stack_base
-        end = gcdata.root_stack_top
-        while addr != end:
-            addr += rootstackhook(collect_stack_root, gc, addr)
-        if self.collect_stacks_from_other_threads is not None:
-            self.collect_stacks_from_other_threads(collect_stack_root)
-
-    def need_thread_support(self, gctransformer, getfn):
-        from pypy.module.thread import ll_thread    # xxx fish
-        from pypy.rpython.memory.support import AddressDict
-        from pypy.rpython.memory.support import copy_without_null_values
-        gcdata = self.gcdata
-        # the interfacing between the threads and the GC is done via
-        # three completely ad-hoc operations at the moment:
-        # gc_thread_prepare, gc_thread_run, gc_thread_die.
-        # See docstrings below.
-
-        def get_aid():
-            """Return the thread identifier, cast to an (opaque) address."""
-            return llmemory.cast_int_to_adr(ll_thread.get_ident())
-
-        def thread_setup():
-            """Called once when the program starts."""
-            aid = get_aid()
-            gcdata.main_thread = aid
-            gcdata.active_thread = aid
-            gcdata.thread_stacks = AddressDict()     # {aid: root_stack_top}
-            gcdata._fresh_rootstack = llmemory.NULL
-            gcdata.dead_threads_count = 0
-
-        def thread_prepare():
-            """Called just before thread.start_new_thread().  This
-            allocates a new shadow stack to be used by the future
-            thread.  If memory runs out, this raises a MemoryError
-            (which can be handled by the caller instead of just getting
-            ignored if it was raised in the newly starting thread).
-            """
-            if not gcdata._fresh_rootstack:
-                gcdata._fresh_rootstack = self.allocate_stack()
-                if not gcdata._fresh_rootstack:
-                    raise MemoryError
-
-        def thread_run():
-            """Called whenever the current thread (re-)acquired the GIL.
-            This should ensure that the shadow stack installed in
-            gcdata.root_stack_top/root_stack_base is the one corresponding
-            to the current thread.
-            """
-            aid = get_aid()
-            if gcdata.active_thread != aid:
-                switch_shadow_stacks(aid)
-
-        def thread_die():
-            """Called just before the final GIL release done by a dying
-            thread.  After a thread_die(), no more gc operation should
-            occur in this thread.
-            """
-            aid = get_aid()
-            if aid == gcdata.main_thread:
-                return   # ignore calls to thread_die() in the main thread
-                         # (which can occur after a fork()).
-            gcdata.thread_stacks.setitem(aid, llmemory.NULL)
-            old = gcdata.root_stack_base
-            if gcdata._fresh_rootstack == llmemory.NULL:
-                gcdata._fresh_rootstack = old
-            else:
-                llmemory.raw_free(old)
-            install_new_stack(gcdata.main_thread)
-            # from time to time, rehash the dictionary to remove
-            # old NULL entries
-            gcdata.dead_threads_count += 1
-            if (gcdata.dead_threads_count & 511) == 0:
-                copy = copy_without_null_values(gcdata.thread_stacks)
-                gcdata.thread_stacks.delete()
-                gcdata.thread_stacks = copy
-
-        def switch_shadow_stacks(new_aid):
-            save_away_current_stack()
-            install_new_stack(new_aid)
-        switch_shadow_stacks._dont_inline_ = True
-
-        def save_away_current_stack():
-            old_aid = gcdata.active_thread
-            # save root_stack_base on the top of the stack
-            self.push_stack(gcdata.root_stack_base)
-            # store root_stack_top into the dictionary
-            gcdata.thread_stacks.setitem(old_aid, gcdata.root_stack_top)
-
-        def install_new_stack(new_aid):
-            # look for the new stack top
-            top = gcdata.thread_stacks.get(new_aid, llmemory.NULL)
-            if top == llmemory.NULL:
-                # first time we see this thread.  It is an error if no
-                # fresh new stack is waiting.
-                base = gcdata._fresh_rootstack
-                gcdata._fresh_rootstack = llmemory.NULL
-                ll_assert(base != llmemory.NULL, "missing gc_thread_prepare")
-                gcdata.root_stack_top = base
-                gcdata.root_stack_base = base
-            else:
-                # restore the root_stack_base from the top of the stack
-                gcdata.root_stack_top = top
-                gcdata.root_stack_base = self.pop_stack()
-            # done
-            gcdata.active_thread = new_aid
-
-        def collect_stack(aid, stacktop, callback):
-            if stacktop != llmemory.NULL and aid != gcdata.active_thread:
-                # collect all valid stacks from the dict (the entry
-                # corresponding to the current thread is not valid)
-                gc = self.gc
-                rootstackhook = self.rootstackhook
-                end = stacktop - sizeofaddr
-                addr = end.address[0]
-                while addr != end:
-                    addr += rootstackhook(callback, gc, addr)
-
-        def collect_more_stacks(callback):
-            ll_assert(get_aid() == gcdata.active_thread,
-                      "collect_more_stacks(): invalid active_thread")
-            gcdata.thread_stacks.foreach(collect_stack, callback)
-
-        def _free_if_not_current(aid, stacktop, _):
-            if stacktop != llmemory.NULL and aid != gcdata.active_thread:
-                end = stacktop - sizeofaddr
-                base = end.address[0]
-                llmemory.raw_free(base)
-
-        def thread_after_fork(result_of_fork, opaqueaddr):
-            # we don't need a thread_before_fork in this case, so
-            # opaqueaddr == NULL.  This is called after fork().
-            if result_of_fork == 0:
-                # We are in the child process.  Assumes that only the
-                # current thread survived, so frees the shadow stacks
-                # of all the other ones.
-                gcdata.thread_stacks.foreach(_free_if_not_current, None)
-                # Clears the dict (including the current thread, which
-                # was an invalid entry anyway and will be recreated by
-                # the next call to save_away_current_stack()).
-                gcdata.thread_stacks.clear()
-                # Finally, reset the stored thread IDs, in case it
-                # changed because of fork().  Also change the main
-                # thread to the current one (because there is not any
-                # other left).
-                aid = get_aid()
-                gcdata.main_thread = aid
-                gcdata.active_thread = aid
-
-        self.thread_setup = thread_setup
-        self.thread_prepare_ptr = getfn(thread_prepare, [], annmodel.s_None)
-        self.thread_run_ptr = getfn(thread_run, [], annmodel.s_None,
-                                    inline=True)
-        # no thread_start_ptr here
-        self.thread_die_ptr = getfn(thread_die, [], annmodel.s_None)
-        # no thread_before_fork_ptr here
-        self.thread_after_fork_ptr = getfn(thread_after_fork,
-                                           [annmodel.SomeInteger(),
-                                            annmodel.SomeAddress()],
-                                           annmodel.s_None)
-        self.collect_stacks_from_other_threads = collect_more_stacks
diff --git a/pypy/rpython/memory/gctransform/shadowstack.py b/pypy/rpython/memory/gctransform/shadowstack.py
new file mode 100644
--- /dev/null
+++ b/pypy/rpython/memory/gctransform/shadowstack.py
@@ -0,0 +1,219 @@
+from pypy.rpython.memory.gctransform.framework import BaseRootWalker
+from pypy.rpython.memory.gctransform.framework import sizeofaddr
+from pypy.rlib.debug import ll_assert
+from pypy.rpython.lltypesystem import llmemory
+from pypy.annotation import model as annmodel
+
+
+class ShadowStackRootWalker(BaseRootWalker):
+    need_root_stack = True
+    collect_stacks_from_other_threads = None
+
+    def __init__(self, gctransformer):
+        BaseRootWalker.__init__(self, gctransformer)
+        self.rootstacksize = sizeofaddr * gctransformer.root_stack_depth
+        # NB. 'self' is frozen, but we can use self.gcdata to store state
+        gcdata = self.gcdata
+
+        def incr_stack(n):
+            top = gcdata.root_stack_top
+            gcdata.root_stack_top = top + n*sizeofaddr
+            return top
+        self.incr_stack = incr_stack
+
+        def decr_stack(n):
+            top = gcdata.root_stack_top - n*sizeofaddr
+            gcdata.root_stack_top = top
+            return top
+        self.decr_stack = decr_stack
+
+        self.rootstackhook = gctransformer.root_stack_jit_hook
+        if self.rootstackhook is None:
+            def collect_stack_root(callback, gc, addr):
+                if gc.points_to_valid_gc_object(addr):
+                    callback(gc, addr)
+                return sizeofaddr
+            self.rootstackhook = collect_stack_root
+
+    def push_stack(self, addr):
+        top = self.incr_stack(1)
+        top.address[0] = addr
+
+    def pop_stack(self):
+        top = self.decr_stack(1)
+        return top.address[0]
+
+    def allocate_stack(self):
+        return llmemory.raw_malloc(self.rootstacksize)
+
+    def setup_root_walker(self):
+        stackbase = self.allocate_stack()
+        ll_assert(bool(stackbase), "could not allocate root stack")
+        self.gcdata.root_stack_top  = stackbase
+        self.gcdata.root_stack_base = stackbase
+        BaseRootWalker.setup_root_walker(self)
+
+    def walk_stack_roots(self, collect_stack_root):
+        gcdata = self.gcdata
+        gc = self.gc
+        rootstackhook = self.rootstackhook
+        addr = gcdata.root_stack_base
+        end = gcdata.root_stack_top
+        while addr != end:
+            addr += rootstackhook(collect_stack_root, gc, addr)
+        if self.collect_stacks_from_other_threads is not None:
+            self.collect_stacks_from_other_threads(collect_stack_root)
+
+    def need_thread_support(self, gctransformer, getfn):
+        from pypy.module.thread import ll_thread    # xxx fish
+        from pypy.rpython.memory.support import AddressDict
+        from pypy.rpython.memory.support import copy_without_null_values
+        gcdata = self.gcdata
+        # the interfacing between the threads and the GC is done via
+        # three completely ad-hoc operations at the moment:
+        # gc_thread_prepare, gc_thread_run, gc_thread_die.
+        # See docstrings below.
+
+        def get_aid():
+            """Return the thread identifier, cast to an (opaque) address."""
+            return llmemory.cast_int_to_adr(ll_thread.get_ident())
+
+        def thread_setup():
+            """Called once when the program starts."""
+            aid = get_aid()
+            gcdata.main_thread = aid
+            gcdata.active_thread = aid
+            gcdata.thread_stacks = AddressDict()     # {aid: root_stack_top}
+            gcdata._fresh_rootstack = llmemory.NULL
+            gcdata.dead_threads_count = 0
+
+        def thread_prepare():
+            """Called just before thread.start_new_thread().  This
+            allocates a new shadow stack to be used by the future
+            thread.  If memory runs out, this raises a MemoryError
+            (which can be handled by the caller instead of just getting
+            ignored if it was raised in the newly starting thread).
+            """
+            if not gcdata._fresh_rootstack:
+                gcdata._fresh_rootstack = self.allocate_stack()
+                if not gcdata._fresh_rootstack:
+                    raise MemoryError
+
+        def thread_run():
+            """Called whenever the current thread (re-)acquired the GIL.
+            This should ensure that the shadow stack installed in
+            gcdata.root_stack_top/root_stack_base is the one corresponding
+            to the current thread.
+            """
+            aid = get_aid()
+            if gcdata.active_thread != aid:
+                switch_shadow_stacks(aid)
+
+        def thread_die():
+            """Called just before the final GIL release done by a dying
+            thread.  After a thread_die(), no more gc operation should
+            occur in this thread.
+            """
+            aid = get_aid()
+            if aid == gcdata.main_thread:
+                return   # ignore calls to thread_die() in the main thread
+                         # (which can occur after a fork()).
+            gcdata.thread_stacks.setitem(aid, llmemory.NULL)
+            old = gcdata.root_stack_base
+            if gcdata._fresh_rootstack == llmemory.NULL:
+                gcdata._fresh_rootstack = old
+            else:
+                llmemory.raw_free(old)
+            install_new_stack(gcdata.main_thread)
+            # from time to time, rehash the dictionary to remove
+            # old NULL entries
+            gcdata.dead_threads_count += 1
+            if (gcdata.dead_threads_count & 511) == 0:
+                copy = copy_without_null_values(gcdata.thread_stacks)
+                gcdata.thread_stacks.delete()
+                gcdata.thread_stacks = copy
+
+        def switch_shadow_stacks(new_aid):
+            save_away_current_stack()
+            install_new_stack(new_aid)
+        switch_shadow_stacks._dont_inline_ = True
+
+        def save_away_current_stack():
+            old_aid = gcdata.active_thread
+            # save root_stack_base on the top of the stack
+            self.push_stack(gcdata.root_stack_base)
+            # store root_stack_top into the dictionary
+            gcdata.thread_stacks.setitem(old_aid, gcdata.root_stack_top)
+
+        def install_new_stack(new_aid):
+            # look for the new stack top
+            top = gcdata.thread_stacks.get(new_aid, llmemory.NULL)
+            if top == llmemory.NULL:
+                # first time we see this thread.  It is an error if no
+                # fresh new stack is waiting.
+                base = gcdata._fresh_rootstack
+                gcdata._fresh_rootstack = llmemory.NULL
+                ll_assert(base != llmemory.NULL, "missing gc_thread_prepare")
+                gcdata.root_stack_top = base
+                gcdata.root_stack_base = base
+            else:
+                # restore the root_stack_base from the top of the stack
+                gcdata.root_stack_top = top
+                gcdata.root_stack_base = self.pop_stack()
+            # done
+            gcdata.active_thread = new_aid
+
+        def collect_stack(aid, stacktop, callback):
+            if stacktop != llmemory.NULL and aid != gcdata.active_thread:
+                # collect all valid stacks from the dict (the entry
+                # corresponding to the current thread is not valid)
+                gc = self.gc
+                rootstackhook = self.rootstackhook
+                end = stacktop - sizeofaddr
+                addr = end.address[0]
+                while addr != end:
+                    addr += rootstackhook(callback, gc, addr)
+
+        def collect_more_stacks(callback):
+            ll_assert(get_aid() == gcdata.active_thread,
+                      "collect_more_stacks(): invalid active_thread")
+            gcdata.thread_stacks.foreach(collect_stack, callback)
+
+        def _free_if_not_current(aid, stacktop, _):
+            if stacktop != llmemory.NULL and aid != gcdata.active_thread:
+                end = stacktop - sizeofaddr
+                base = end.address[0]
+                llmemory.raw_free(base)
+
+        def thread_after_fork(result_of_fork, opaqueaddr):
+            # we don't need a thread_before_fork in this case, so
+            # opaqueaddr == NULL.  This is called after fork().
+            if result_of_fork == 0:
+                # We are in the child process.  Assumes that only the
+                # current thread survived, so frees the shadow stacks
+                # of all the other ones.
+                gcdata.thread_stacks.foreach(_free_if_not_current, None)
+                # Clears the dict (including the current thread, which
+                # was an invalid entry anyway and will be recreated by
+                # the next call to save_away_current_stack()).
+                gcdata.thread_stacks.clear()
+                # Finally, reset the stored thread IDs, in case it
+                # changed because of fork().  Also change the main
+                # thread to the current one (because there is not any
+                # other left).
+                aid = get_aid()
+                gcdata.main_thread = aid
+                gcdata.active_thread = aid
+
+        self.thread_setup = thread_setup
+        self.thread_prepare_ptr = getfn(thread_prepare, [], annmodel.s_None)
+        self.thread_run_ptr = getfn(thread_run, [], annmodel.s_None,
+                                    inline=True)
+        # no thread_start_ptr here
+        self.thread_die_ptr = getfn(thread_die, [], annmodel.s_None)
+        # no thread_before_fork_ptr here
+        self.thread_after_fork_ptr = getfn(thread_after_fork,
+                                           [annmodel.SomeInteger(),
+                                            annmodel.SomeAddress()],
+                                           annmodel.s_None)
+        self.collect_stacks_from_other_threads = collect_more_stacks
diff --git a/pypy/rpython/memory/lldict.py b/pypy/rpython/memory/lldict.py
--- a/pypy/rpython/memory/lldict.py
+++ b/pypy/rpython/memory/lldict.py
@@ -1,6 +1,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.rpython.lltypesystem import rdict
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rpython.memory.support import mangle_hash
 
 # This is a low-level AddressDict, reusing a lot of the logic from rdict.py.
 # xxx this is very dependent on the details of rdict.py
@@ -40,7 +41,8 @@
     lltype.free(entries, flavor="raw")
     if not we_are_translated(): count_alloc(-1)
 
-_hash = llmemory.cast_adr_to_int
+def _hash(adr):
+    return mangle_hash(llmemory.cast_adr_to_int(adr))
 
 def dict_keyhash(d, key):
     return _hash(key)
diff --git a/pypy/rpython/memory/support.py b/pypy/rpython/memory/support.py
--- a/pypy/rpython/memory/support.py
+++ b/pypy/rpython/memory/support.py
@@ -4,6 +4,15 @@
 from pypy.rlib.debug import ll_assert
 from pypy.tool.identity_dict import identity_dict
 
+
+def mangle_hash(i):
+    # To hash pointers in dictionaries.  Assumes that i shows some
+    # alignment (to 4, 8, maybe 16 bytes), so we use the following
+    # formula to avoid the trailing bits being always 0.
+    return i ^ (i >> 4)
+
+# ____________________________________________________________
+
 DEFAULT_CHUNK_SIZE = 1019
 
 
diff --git a/pypy/rpython/ootypesystem/ooopimpl.py b/pypy/rpython/ootypesystem/ooopimpl.py
--- a/pypy/rpython/ootypesystem/ooopimpl.py
+++ b/pypy/rpython/ootypesystem/ooopimpl.py
@@ -3,7 +3,6 @@
 # ____________________________________________________________
 # Implementation of the 'canfold' oo operations
 
-
 def op_ooupcast(INST, inst):
     return ootype.ooupcast(INST, inst)
 op_ooupcast.need_result_type = True
diff --git a/pypy/rpython/ootypesystem/ootype.py b/pypy/rpython/ootypesystem/ootype.py
--- a/pypy/rpython/ootypesystem/ootype.py
+++ b/pypy/rpython/ootypesystem/ootype.py
@@ -1938,6 +1938,17 @@
     assert typeOf(obj) is Object
     return obj._cast_to(EXPECTED_TYPE)
 
+class Box(_object):
+    def __init__(self, i):
+        self._TYPE = Object
+        self.i = i
+
+def oobox_int(i):
+    return Box(i)
+
+def oounbox_int(x):
+    return x.i
+
 def oostring(obj, base):
     """
     Convert char, int, float, instances and str to str.
diff --git a/pypy/rpython/ootypesystem/rdict.py b/pypy/rpython/ootypesystem/rdict.py
--- a/pypy/rpython/ootypesystem/rdict.py
+++ b/pypy/rpython/ootypesystem/rdict.py
@@ -255,7 +255,7 @@
         methodname = None
     return fn, v_obj, methodname
 
-def rtype_r_dict(hop):
+def rtype_r_dict(hop, i_force_non_null=None):
     from pypy.rlib import jit
 
     r_dict = hop.r_result
diff --git a/pypy/rpython/rbuilder.py b/pypy/rpython/rbuilder.py
--- a/pypy/rpython/rbuilder.py
+++ b/pypy/rpython/rbuilder.py
@@ -36,6 +36,11 @@
         hop.exception_cannot_occur()
         return hop.gendirectcall(self.ll_append_multiple_char, *vlist)
 
+    def rtype_method_append_charpsize(self, hop):
+        vlist = hop.inputargs(self, self.raw_ptr_repr, lltype.Signed)
+        hop.exception_cannot_occur()
+        return hop.gendirectcall(self.ll_append_charpsize, *vlist)
+
     def rtype_method_getlength(self, hop):
         vlist = hop.inputargs(self)
         hop.exception_cannot_occur()
diff --git a/pypy/rpython/rptr.py b/pypy/rpython/rptr.py
--- a/pypy/rpython/rptr.py
+++ b/pypy/rpython/rptr.py
@@ -22,7 +22,7 @@
 class __extend__(annmodel.SomeInteriorPtr):
     def rtyper_makerepr(self, rtyper):
         return InteriorPtrRepr(self.ll_ptrtype)
- 
+
 
 class PtrRepr(Repr):
 
@@ -91,7 +91,7 @@
         vlist = hop.inputargs(*hop.args_r)
         nexpected = len(self.lowleveltype.TO.ARGS)
         nactual = len(vlist)-1
-        if nactual != nexpected: 
+        if nactual != nexpected:
             raise TyperError("argcount mismatch:  expected %d got %d" %
                             (nexpected, nactual))
         if isinstance(vlist[0], flowmodel.Constant):
@@ -111,7 +111,12 @@
         hop.swap_fst_snd_args()
         hop.r_s_popfirstarg()
         return self.rtype_simple_call(hop)
-        
+
+class __extend__(pairtype(PtrRepr, PtrRepr)):
+    def convert_from_to((r_ptr1, r_ptr2), v, llop):
+        assert r_ptr1.lowleveltype == r_ptr2.lowleveltype
+        return v
+
 
 class __extend__(pairtype(PtrRepr, IntegerRepr)):
 
@@ -205,7 +210,7 @@
         self.lowleveltype = adtmeth.ll_ptrtype
         self.ll_ptrtype = adtmeth.ll_ptrtype
         self.lowleveltype = rtyper.getrepr(annmodel.lltype_to_annotation(adtmeth.ll_ptrtype)).lowleveltype
- 
+
     def rtype_simple_call(self, hop):
         hop2 = hop.copy()
         func = self.func
@@ -242,7 +247,7 @@
         if numitemoffsets > 0:
             self.lowleveltype = lltype.Ptr(self.parentptrtype._interior_ptr_type_with_index(self.resulttype.TO))
         else:
-            self.lowleveltype = self.parentptrtype            
+            self.lowleveltype = self.parentptrtype
 
     def getinteriorfieldargs(self, hop, v_self):
         vlist = []
@@ -305,7 +310,7 @@
 
 
 class __extend__(pairtype(InteriorPtrRepr, IntegerRepr)):
-    def rtype_getitem((r_ptr, r_item), hop): 
+    def rtype_getitem((r_ptr, r_item), hop):
         ARRAY = r_ptr.resulttype.TO
         ITEM_TYPE = ARRAY.OF
         if isinstance(ITEM_TYPE, lltype.ContainerType):
@@ -325,7 +330,7 @@
             vlist = r_ptr.getinteriorfieldargs(hop, v_self) + [v_index]
             return hop.genop('getinteriorfield', vlist,
                              resulttype=ITEM_TYPE)
-        
+
     def rtype_setitem((r_ptr, r_index), hop):
         ARRAY = r_ptr.resulttype.TO
         ITEM_TYPE = ARRAY.OF
@@ -333,11 +338,11 @@
         v_self, v_index, v_value = hop.inputargs(r_ptr, lltype.Signed, hop.args_r[2])
         vlist = r_ptr.getinteriorfieldargs(hop, v_self) + [v_index, v_value]
         hop.genop('setinteriorfield', vlist)
-            
+
 class __extend__(pairtype(InteriorPtrRepr, LLADTMethRepr)):
 
     def convert_from_to((r_from, r_to), v, llops):
         if r_from.lowleveltype == r_to.lowleveltype:
             return v
         return NotImplemented
-   
+
diff --git a/pypy/rpython/test/test_rbuilder.py b/pypy/rpython/test/test_rbuilder.py
--- a/pypy/rpython/test/test_rbuilder.py
+++ b/pypy/rpython/test/test_rbuilder.py
@@ -1,8 +1,10 @@
 import py
+
+from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
+from pypy.rpython.annlowlevel import llstr, hlstr
+from pypy.rpython.lltypesystem import rffi
+from pypy.rpython.lltypesystem.rbuilder import *
 from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
-from pypy.rpython.lltypesystem.rbuilder import *
-from pypy.rpython.annlowlevel import llstr, hlstr
-from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
 
 
 class TestStringBuilderDirect(object):
@@ -73,6 +75,15 @@
         res = self.interpret(func, [])
         assert res == 4
 
+    def test_append_charpsize(self):
+        def func(l):
+            s = StringBuilder()
+            with rffi.scoped_str2charp("hello world") as x:
+                s.append_charpsize(x, l)
+            return s.build()
+        res = self.ll_to_string(self.interpret(func, [5]))
+        assert res == "hello"
+
 class TestLLtype(BaseTestStringBuilder, LLRtypeMixin):
     pass
 
@@ -81,3 +92,5 @@
         py.test.skip("getlength(): not implemented on ootype")
     def test_unicode_getlength(self):
         py.test.skip("getlength(): not implemented on ootype")
+    def test_append_charpsize(self):
+        py.test.skip("append_charpsize(): not implemented on ootype")
\ No newline at end of file
diff --git a/pypy/rpython/test/test_rint.py b/pypy/rpython/test/test_rint.py
--- a/pypy/rpython/test/test_rint.py
+++ b/pypy/rpython/test/test_rint.py
@@ -8,6 +8,9 @@
 from pypy.rlib import objectmodel
 from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
 
+from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.ootypesystem import ootype
+from pypy.rpython.lltypesystem.lloperation import llop
 
 class TestSnippet(object):
 
@@ -412,4 +415,8 @@
     pass
 
 class TestOOtype(BaseTestRint, OORtypeMixin):
-    pass
+    def test_oobox_int(self):
+        def f():
+            x = llop.oobox_int(ootype.Object, 42)
+            return llop.oounbox_int(lltype.Signed, x)
+        assert self.interpret(f, []) == 42
diff --git a/pypy/jit/codewriter/regalloc.py b/pypy/tool/algo/regalloc.py
copy from pypy/jit/codewriter/regalloc.py
copy to pypy/tool/algo/regalloc.py
--- a/pypy/jit/codewriter/regalloc.py
+++ b/pypy/tool/algo/regalloc.py
@@ -2,13 +2,11 @@
 from pypy.objspace.flow.model import Variable
 from pypy.tool.algo.color import DependencyGraph
 from pypy.tool.algo.unionfind import UnionFind
-from pypy.jit.metainterp.history import getkind
-from pypy.jit.codewriter.flatten import ListOfKind
 
-def perform_register_allocation(graph, kind):
+def perform_register_allocation(graph, consider_var, ListOfKind=()):
     """Perform register allocation for the Variables of the given 'kind'
     in the 'graph'."""
-    regalloc = RegAllocator(graph, kind)
+    regalloc = RegAllocator(graph, consider_var, ListOfKind)
     regalloc.make_dependencies()
     regalloc.coalesce_variables()
     regalloc.find_node_coloring()
@@ -18,9 +16,10 @@
 class RegAllocator(object):
     DEBUG_REGALLOC = False
 
-    def __init__(self, graph, kind):
+    def __init__(self, graph, consider_var, ListOfKind):
         self.graph = graph
-        self.kind = kind
+        self.consider_var = consider_var
+        self.ListOfKind = ListOfKind
 
     def make_dependencies(self):
         dg = DependencyGraph()
@@ -31,7 +30,7 @@
                 for v in op.args:
                     if isinstance(v, Variable):
                         die_at[v] = i
-                    elif isinstance(v, ListOfKind):
+                    elif isinstance(v, self.ListOfKind):
                         for v1 in v:
                             if isinstance(v1, Variable):
                                 die_at[v1] = i
@@ -51,7 +50,7 @@
             # Done.  XXX the code above this line runs 3 times
             # (for kind in KINDS) to produce the same result...
             livevars = [v for v in block.inputargs
-                          if getkind(v.concretetype) == self.kind]
+                          if self.consider_var(v)]
             # Add the variables of this block to the dependency graph
             for i, v in enumerate(livevars):
                 dg.add_node(v)
@@ -67,10 +66,10 @@
                         pass
                     die_index += 1
                 if (op.result is not None and
-                    getkind(op.result.concretetype) == self.kind):
+                        self.consider_var(op.result)):
                     dg.add_node(op.result)
                     for v in livevars:
-                        if getkind(v.concretetype) == self.kind:
+                        if self.consider_var(v):
                             dg.add_edge(v, op.result)
                     livevars.add(op.result)
         self._depgraph = dg
@@ -95,8 +94,8 @@
                     self._try_coalesce(v, link.target.inputargs[i])
 
     def _try_coalesce(self, v, w):
-        if isinstance(v, Variable) and getkind(v.concretetype) == self.kind:
-            assert getkind(w.concretetype) == self.kind
+        if isinstance(v, Variable) and self.consider_var(v):
+            assert self.consider_var(w)
             dg = self._depgraph
             uf = self._unionfind
             v0 = uf.find_rep(v)
diff --git a/pypy/translator/cli/metavm.py b/pypy/translator/cli/metavm.py
--- a/pypy/translator/cli/metavm.py
+++ b/pypy/translator/cli/metavm.py
@@ -1,6 +1,6 @@
 from pypy.translator.cli import oopspec
 from pypy.rpython.ootypesystem import ootype
-from pypy.rpython.lltypesystem import rffi
+from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.translator.oosupport.metavm import Generator, InstructionList, MicroInstruction,\
      PushAllArgs, StoreResult, GetField, SetField, DownCast
 from pypy.translator.oosupport.metavm import _Call as _OOCall
@@ -173,6 +173,16 @@
         generator.load(v_obj)
         generator.ilasm.opcode('unbox.any', boxtype)
 
+class _UnboxType(MicroInstruction):
+    def __init__(self, TO):
+        self.TO = TO
+
+    def render(self, generator, op):
+        v_obj, = op.args
+        boxtype = generator.cts.lltype_to_cts(self.TO)
+        generator.load(v_obj)
+        generator.ilasm.opcode('unbox.any', boxtype)
+
 class _NewArray(MicroInstruction):
     def render(self, generator, op):
         v_type, v_length = op.args
@@ -312,6 +322,7 @@
 #CastWeakAdrToPtr = _CastWeakAdrToPtr()
 Box = _Box()
 Unbox = _Unbox()
+UnboxInt = _UnboxType(lltype.Signed)
 NewArray = _NewArray()
 GetArrayElem = _GetArrayElem()
 SetArrayElem = _SetArrayElem()
diff --git a/pypy/translator/cli/opcodes.py b/pypy/translator/cli/opcodes.py
--- a/pypy/translator/cli/opcodes.py
+++ b/pypy/translator/cli/opcodes.py
@@ -2,7 +2,7 @@
      IndirectCall, GetField, SetField, DownCast, NewCustomDict,\
      MapException, Box, Unbox, NewArray, GetArrayElem, SetArrayElem,\
      TypeOf, CastPrimitive, EventHandler, GetStaticField, SetStaticField, \
-     DebugPrint
+     DebugPrint, UnboxInt
 from pypy.translator.oosupport.metavm import PushArg, PushAllArgs, StoreResult, InstructionList,\
     New, RuntimeNew, CastTo, PushPrimitive, OOString, OOUnicode, OONewArray
 from pypy.translator.cli.cts import WEAKREF
@@ -48,6 +48,8 @@
     'cast_from_object':         [DownCast],
     'clibox':                   [Box],
     'cliunbox':                 [Unbox],
+    'oobox_int':                [Box],
+    'oounbox_int':              [UnboxInt],
     'cli_newarray':             [NewArray],
     'cli_getelem':              [GetArrayElem],
     'cli_setelem':              [SetArrayElem],
@@ -92,6 +94,7 @@
     'debug_fatalerror':         [PushAllArgs, 'call void [pypylib]pypy.runtime.Debug::DEBUG_FATALERROR(string)'],
     'keepalive':                Ignore,
     'jit_marker':               Ignore,
+    'jit_force_quasi_immutable':Ignore,
     'jit_force_virtualizable':  Ignore,
     'jit_force_virtual':        DoNothing,
     }
diff --git a/pypy/translator/cli/test/test_int.py b/pypy/translator/cli/test/test_int.py
--- a/pypy/translator/cli/test/test_int.py
+++ b/pypy/translator/cli/test/test_int.py
@@ -1,8 +1,8 @@
 import py
 from pypy.translator.cli.test.runtest import CliTest
-from pypy.rpython.test.test_rint import BaseTestRint
+from pypy.rpython.test.test_rint import TestOOtype as _TestOOtype # so py.test won't run the base test
 
-class TestCliInt(CliTest, BaseTestRint):
+class TestCliInt(CliTest, _TestOOtype):
     def test_char_constant(self):
         def dummyfn(i):
             return chr(i)
diff --git a/pypy/translator/jvm/opcodes.py b/pypy/translator/jvm/opcodes.py
--- a/pypy/translator/jvm/opcodes.py
+++ b/pypy/translator/jvm/opcodes.py
@@ -77,6 +77,8 @@
     'oosend':                   [JvmCallMethod, StoreResult],
     'ooupcast':                 DoNothing,
     'oodowncast':               [DownCast, StoreResult],
+    'oobox_int':                jvm.PYPYBOXINT,
+    'oounbox_int':              jvm.PYPYUNBOXINT,
     'cast_to_object':           DoNothing,
     'cast_from_object':         [DownCast, StoreResult],
     'instanceof':               [CastTo, StoreResult],
diff --git a/pypy/translator/jvm/src/pypy/PyPy.java b/pypy/translator/jvm/src/pypy/PyPy.java
--- a/pypy/translator/jvm/src/pypy/PyPy.java
+++ b/pypy/translator/jvm/src/pypy/PyPy.java
@@ -307,6 +307,14 @@
         return result;
     }
 
+    public static Object box_integer(int x) {
+        return new Integer(x);
+    }
+
+    public static int unbox_integer(Object o) {
+        Integer x = (Integer)o;
+        return x.intValue();
+    }
     // Used in testing the JVM backend:
     //
     //    A series of methods which serve a similar purpose to repr() in Python:
diff --git a/pypy/translator/jvm/test/test_int.py b/pypy/translator/jvm/test/test_int.py
--- a/pypy/translator/jvm/test/test_int.py
+++ b/pypy/translator/jvm/test/test_int.py
@@ -1,10 +1,11 @@
 import py
 from pypy.translator.jvm.test.runtest import JvmTest
 from pypy.rpython.test.test_rint import BaseTestRint
+from pypy.rpython.test.test_rint import TestOOtype as _TestOOtype # so py.test won't run the base test
 
 # ====> ../../../rpython/test/test_rint.py
 
-class TestJvmInt(JvmTest, BaseTestRint):
+class TestJvmInt(JvmTest, _TestOOtype):
     def test_char_constant(self):
         def dummyfn(i):
             return chr(i)
diff --git a/pypy/translator/jvm/typesystem.py b/pypy/translator/jvm/typesystem.py
--- a/pypy/translator/jvm/typesystem.py
+++ b/pypy/translator/jvm/typesystem.py
@@ -963,6 +963,8 @@
 PYPYRUNTIMENEW =        Method.s(jPyPy, 'RuntimeNew', (jClass,), jObject)
 PYPYSTRING2BYTES =      Method.s(jPyPy, 'string2bytes', (jString,), jByteArray)
 PYPYARRAYTOLIST =       Method.s(jPyPy, 'array_to_list', (jObjectArray,), jArrayList)
+PYPYBOXINT =            Method.s(jPyPy, 'box_integer', (jInt,), jObject)
+PYPYUNBOXINT =          Method.s(jPyPy, 'unbox_integer', (jObject,), jInt)
 PYPYOOPARSEFLOAT =      Method.v(jPyPy, 'ooparse_float', (jString,), jDouble)
 OBJECTGETCLASS =        Method.v(jObject, 'getClass', (), jClass)
 CLASSGETNAME =          Method.v(jClass, 'getName', (), jString)
diff --git a/pypy/translator/oosupport/test_template/operations.py b/pypy/translator/oosupport/test_template/operations.py
--- a/pypy/translator/oosupport/test_template/operations.py
+++ b/pypy/translator/oosupport/test_template/operations.py
@@ -1,3 +1,6 @@
+from pypy.rpython.lltypesystem.lloperation import llop
+from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.ootypesystem import ootype
 from pypy.rlib.rarithmetic import r_uint, r_ulonglong, r_longlong, ovfcheck
 from pypy.rlib import rstack
 from pypy.annotation import model as annmodel