[pypy-commit] pypy set-strategies: merge default

l.diekmann noreply at buildbot.pypy.org
Tue Dec 20 13:44:07 CET 2011


Author: Lukas Diekmann <lukas.diekmann at uni-duesseldorf.de>
Branch: set-strategies
Changeset: r50750:a715f89d7712
Date: 2011-12-15 10:28 +0100
http://bitbucket.org/pypy/pypy/changeset/a715f89d7712/

Log:	merge default

diff too long, truncating to 10000 out of 27525 lines

diff --git a/lib-python/modified-2.7/ctypes/__init__.py b/lib-python/modified-2.7/ctypes/__init__.py
--- a/lib-python/modified-2.7/ctypes/__init__.py
+++ b/lib-python/modified-2.7/ctypes/__init__.py
@@ -351,7 +351,7 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            self._handle = _ffi.CDLL(name)
+            self._handle = _ffi.CDLL(name, mode)
         else:
             self._handle = handle
 
diff --git a/lib-python/modified-2.7/ctypes/test/test_callbacks.py b/lib-python/modified-2.7/ctypes/test/test_callbacks.py
--- a/lib-python/modified-2.7/ctypes/test/test_callbacks.py
+++ b/lib-python/modified-2.7/ctypes/test/test_callbacks.py
@@ -1,5 +1,6 @@
 import unittest
 from ctypes import *
+from ctypes.test import xfail
 import _ctypes_test
 
 class Callbacks(unittest.TestCase):
@@ -98,6 +99,7 @@
 ##        self.check_type(c_char_p, "abc")
 ##        self.check_type(c_char_p, "def")
 
+    @xfail
     def test_pyobject(self):
         o = ()
         from sys import getrefcount as grc
diff --git a/lib-python/modified-2.7/ctypes/test/test_libc.py b/lib-python/modified-2.7/ctypes/test/test_libc.py
--- a/lib-python/modified-2.7/ctypes/test/test_libc.py
+++ b/lib-python/modified-2.7/ctypes/test/test_libc.py
@@ -25,7 +25,10 @@
         lib.my_qsort(chars, len(chars)-1, sizeof(c_char), comparefunc(sort))
         self.assertEqual(chars.raw, "   ,,aaaadmmmnpppsss\x00")
 
-    def test_no_more_xfail(self):
+    def SKIPPED_test_no_more_xfail(self):
+        # We decided to not explicitly support the whole ctypes-2.7
+        # and instead go for a case-by-case, demand-driven approach.
+        # So this test is skipped instead of failing.
         import socket
         import ctypes.test
         self.assertTrue(not hasattr(ctypes.test, 'xfail'),
diff --git a/lib_pypy/_collections.py b/lib_pypy/_collections.py
--- a/lib_pypy/_collections.py
+++ b/lib_pypy/_collections.py
@@ -379,12 +379,14 @@
 class defaultdict(dict):
     
     def __init__(self, *args, **kwds):
-        self.default_factory = None
-        if 'default_factory' in kwds:
-            self.default_factory = kwds.pop('default_factory')
-        elif len(args) > 0 and (callable(args[0]) or args[0] is None):
-            self.default_factory = args[0]
+        if len(args) > 0:
+            default_factory = args[0]
             args = args[1:]
+            if not callable(default_factory) and default_factory is not None:
+                raise TypeError("first argument must be callable")
+        else:
+            default_factory = None
+        self.default_factory = default_factory
         super(defaultdict, self).__init__(*args, **kwds)
  
     def __missing__(self, key):
@@ -404,7 +406,7 @@
             recurse.remove(id(self))
 
     def copy(self):
-        return type(self)(self, default_factory=self.default_factory)
+        return type(self)(self.default_factory, self)
     
     def __copy__(self):
         return self.copy()
diff --git a/lib_pypy/_sha.py b/lib_pypy/_sha.py
--- a/lib_pypy/_sha.py
+++ b/lib_pypy/_sha.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: iso-8859-1
+# -*- coding: iso-8859-1 -*-
 
 # Note that PyPy contains also a built-in module 'sha' which will hide
 # this one if compiled in.
diff --git a/lib_pypy/distributed/socklayer.py b/lib_pypy/distributed/socklayer.py
--- a/lib_pypy/distributed/socklayer.py
+++ b/lib_pypy/distributed/socklayer.py
@@ -2,7 +2,7 @@
 import py
 from socket import socket
 
-XXX needs import adaptation as 'green' is removed from py lib for years 
+raise ImportError("XXX needs import adaptation as 'green' is removed from py lib for years")
 from py.impl.green.msgstruct import decodemessage, message
 from socket import socket, AF_INET, SOCK_STREAM
 import marshal
diff --git a/lib_pypy/itertools.py b/lib_pypy/itertools.py
--- a/lib_pypy/itertools.py
+++ b/lib_pypy/itertools.py
@@ -25,7 +25,7 @@
 
 __all__ = ['chain', 'count', 'cycle', 'dropwhile', 'groupby', 'ifilter',
            'ifilterfalse', 'imap', 'islice', 'izip', 'repeat', 'starmap',
-           'takewhile', 'tee']
+           'takewhile', 'tee', 'compress', 'product']
 
 try: from __pypy__ import builtinify
 except ImportError: builtinify = lambda f: f
diff --git a/py/_code/code.py b/py/_code/code.py
--- a/py/_code/code.py
+++ b/py/_code/code.py
@@ -164,6 +164,7 @@
         #   if something:  # assume this causes a NameError
         #      # _this_ lines and the one
                #        below we don't want from entry.getsource()
+        end = min(end, len(source))
         for i in range(self.lineno, end):
             if source[i].rstrip().endswith(':'):
                 end = i + 1
@@ -307,7 +308,7 @@
                     self._striptext = 'AssertionError: '
         self._excinfo = tup
         self.type, self.value, tb = self._excinfo
-        self.typename = self.type.__name__
+        self.typename = getattr(self.type, "__name__", "???")
         self.traceback = py.code.Traceback(tb)
 
     def __repr__(self):
diff --git a/pypy/annotation/binaryop.py b/pypy/annotation/binaryop.py
--- a/pypy/annotation/binaryop.py
+++ b/pypy/annotation/binaryop.py
@@ -252,7 +252,26 @@
     # unsignedness is considered a rare and contagious disease
 
     def union((int1, int2)):
-        knowntype = rarithmetic.compute_restype(int1.knowntype, int2.knowntype)
+        if int1.unsigned == int2.unsigned:
+            knowntype = rarithmetic.compute_restype(int1.knowntype, int2.knowntype)
+        else:
+            t1 = int1.knowntype
+            if t1 is bool:
+                t1 = int
+            t2 = int2.knowntype
+            if t2 is bool:
+                t2 = int
+
+            if t2 is int:
+                if int2.nonneg == False:
+                    raise UnionError, "Merging %s and a possibly negative int is not allowed" % t1
+                knowntype = t1
+            elif t1 is int:
+                if int1.nonneg == False:
+                    raise UnionError, "Merging %s and a possibly negative int is not allowed" % t2
+                knowntype = t2
+            else:
+                raise UnionError, "Merging these types (%s, %s) is not supported" % (t1, t2)
         return SomeInteger(nonneg=int1.nonneg and int2.nonneg,
                            knowntype=knowntype)
 
diff --git a/pypy/annotation/model.py b/pypy/annotation/model.py
--- a/pypy/annotation/model.py
+++ b/pypy/annotation/model.py
@@ -591,13 +591,11 @@
     immutable = True
     def __init__(self, method):
         self.method = method
-        
-NUMBER = object()
+
 annotation_to_ll_map = [
     (SomeSingleFloat(), lltype.SingleFloat),
     (s_None, lltype.Void),   # also matches SomeImpossibleValue()
     (s_Bool, lltype.Bool),
-    (SomeInteger(knowntype=r_ulonglong), NUMBER),    
     (SomeFloat(), lltype.Float),
     (SomeLongFloat(), lltype.LongFloat),
     (SomeChar(), lltype.Char),
@@ -623,10 +621,11 @@
             return lltype.Ptr(p.PARENTTYPE)
     if isinstance(s_val, SomePtr):
         return s_val.ll_ptrtype
+    if type(s_val) is SomeInteger:
+        return lltype.build_number(None, s_val.knowntype)
+
     for witness, T in annotation_to_ll_map:
         if witness.contains(s_val):
-            if T is NUMBER:
-                return lltype.build_number(None, s_val.knowntype)
             return T
     if info is None:
         info = ''
@@ -635,7 +634,7 @@
     raise ValueError("%sshould return a low-level type,\ngot instead %r" % (
         info, s_val))
 
-ll_to_annotation_map = dict([(ll, ann) for ann, ll in annotation_to_ll_map if ll is not NUMBER])
+ll_to_annotation_map = dict([(ll, ann) for ann, ll in annotation_to_ll_map])
 
 def lltype_to_annotation(T):
     try:
diff --git a/pypy/annotation/specialize.py b/pypy/annotation/specialize.py
--- a/pypy/annotation/specialize.py
+++ b/pypy/annotation/specialize.py
@@ -36,9 +36,7 @@
             newtup = SpaceOperation('newtuple', starargs, argscopy[-1])
             newstartblock.operations.append(newtup)
             newstartblock.closeblock(Link(argscopy, graph.startblock))
-            graph.startblock.isstartblock = False
             graph.startblock = newstartblock
-            newstartblock.isstartblock = True
             argnames = argnames + ['.star%d' % i for i in range(nb_extra_args)]
             graph.signature = Signature(argnames)
             # note that we can mostly ignore defaults: if nb_extra_args > 0, 
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -856,6 +856,46 @@
         py.test.raises(Exception, a.build_types, f, [])
         # if you want to get a r_uint, you have to be explicit about it
 
+    def test_add_different_ints(self):
+        def f(a, b):
+            return a + b
+        a = self.RPythonAnnotator()
+        py.test.raises(Exception, a.build_types, f, [r_uint, int])
+
+    def test_merge_different_ints(self):
+        def f(a, b):
+            if a:
+                c = a
+            else:
+                c = b
+            return c
+        a = self.RPythonAnnotator()
+        py.test.raises(Exception, a.build_types, f, [r_uint, int])
+
+    def test_merge_ruint_zero(self):
+        def f(a):
+            if a:
+                c = a
+            else:
+                c = 0
+            return c
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [r_uint])
+        assert s == annmodel.SomeInteger(nonneg = True, unsigned = True)
+
+    def test_merge_ruint_nonneg_signed(self):
+        def f(a, b):
+            if a:
+                c = a
+            else:
+                assert b >= 0
+                c = b
+            return c
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [r_uint, int])
+        assert s == annmodel.SomeInteger(nonneg = True, unsigned = True)
+
+
     def test_prebuilt_long_that_is_not_too_long(self):
         small_constant = 12L
         def f():
@@ -3029,7 +3069,7 @@
             if g(x, y):
                 g(x, r_uint(y))
         a = self.RPythonAnnotator()
-        a.build_types(f, [int, int])
+        py.test.raises(Exception, a.build_types, f, [int, int])
 
     def test_compare_with_zero(self):
         def g():
diff --git a/pypy/bin/checkmodule.py b/pypy/bin/checkmodule.py
--- a/pypy/bin/checkmodule.py
+++ b/pypy/bin/checkmodule.py
@@ -1,43 +1,45 @@
 #! /usr/bin/env python
 """
-Usage:  checkmodule.py [-b backend] <module-name>
+Usage:  checkmodule.py <module-name>
 
-Compiles the PyPy extension module from pypy/module/<module-name>/
-into a fake program which does nothing. Useful for testing whether a
-modules compiles without doing a full translation. Default backend is cli.
-
-WARNING: this is still incomplete: there are chances that the
-compilation fails with strange errors not due to the module. If a
-module is known to compile during a translation but don't pass
-checkmodule.py, please report the bug (or, better, correct it :-).
+Check annotation and rtyping of the PyPy extension module from
+pypy/module/<module-name>/.  Useful for testing whether a
+modules compiles without doing a full translation.
 """
 import autopath
-import sys
+import sys, os
 
 from pypy.objspace.fake.checkmodule import checkmodule
 
 def main(argv):
-    try:
-        assert len(argv) in (2, 4)
-        if len(argv) == 2:
-            backend = 'cli'
-            modname = argv[1]
-            if modname in ('-h', '--help'):
-                print >> sys.stderr, __doc__
-                sys.exit(0)
-            if modname.startswith('-'):
-                print >> sys.stderr, "Bad command line"
-                print >> sys.stderr, __doc__
-                sys.exit(1)
-        else:
-            _, b, backend, modname = argv
-            assert b == '-b'
-    except AssertionError:
+    if len(argv) != 2:
         print >> sys.stderr, __doc__
         sys.exit(2)
+    modname = argv[1]
+    if modname in ('-h', '--help'):
+        print >> sys.stderr, __doc__
+        sys.exit(0)
+    if modname.startswith('-'):
+        print >> sys.stderr, "Bad command line"
+        print >> sys.stderr, __doc__
+        sys.exit(1)
+    if os.path.sep in modname:
+        if os.path.basename(modname) == '':
+            modname = os.path.dirname(modname)
+        if os.path.basename(os.path.dirname(modname)) != 'module':
+            print >> sys.stderr, "Must give '../module/xxx', or just 'xxx'."
+            sys.exit(1)
+        modname = os.path.basename(modname)
+    try:
+        checkmodule(modname)
+    except Exception, e:
+        import traceback, pdb
+        traceback.print_exc()
+        pdb.post_mortem(sys.exc_info()[2])
+        return 1
     else:
-        checkmodule(modname, backend, interactive=True)
-        print 'Module compiled succesfully'
+        print 'Passed.'
+        return 0
 
 if __name__ == '__main__':
-    main(sys.argv)
+    sys.exit(main(sys.argv))
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -252,6 +252,10 @@
                    "use small tuples",
                    default=False),
 
+        BoolOption("withspecialisedtuple",
+                   "use specialised tuples",
+                   default=False),
+
         BoolOption("withrope", "use ropes as the string implementation",
                    default=False,
                    requires=[("objspace.std.withstrslice", False),
@@ -365,6 +369,7 @@
         config.objspace.std.suggest(optimized_list_getitem=True)
         config.objspace.std.suggest(getattributeshortcut=True)
         config.objspace.std.suggest(newshortcut=True)
+        config.objspace.std.suggest(withspecialisedtuple=True)
         #if not IS_64_BITS:
         #    config.objspace.std.suggest(withsmalllong=True)
 
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -69,8 +69,8 @@
                      "statistics": [("translation.gctransformer", "framework")],
                      "generation": [("translation.gctransformer", "framework")],
                      "hybrid": [("translation.gctransformer", "framework")],
-                     "boehm": [("translation.gctransformer", "boehm"),
-                               ("translation.continuation", False)],  # breaks
+                     "boehm": [("translation.continuation", False),  # breaks
+                               ("translation.gctransformer", "boehm")],
                      "markcompact": [("translation.gctransformer", "framework")],
                      "minimark": [("translation.gctransformer", "framework")],
                      },
diff --git a/pypy/conftest.py b/pypy/conftest.py
--- a/pypy/conftest.py
+++ b/pypy/conftest.py
@@ -496,6 +496,17 @@
     def setup(self):
         super(AppClassCollector, self).setup()
         cls = self.obj
+        #
+        # <hack>
+        for name in dir(cls):
+            if name.startswith('test_'):
+                func = getattr(cls, name, None)
+                code = getattr(func, 'func_code', None)
+                if code and code.co_flags & 32:
+                    raise AssertionError("unsupported: %r is a generator "
+                                         "app-level test method" % (name,))
+        # </hack>
+        #
         space = cls.space
         clsname = cls.__name__
         if self.config.option.runappdirect:
diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py
--- a/pypy/doc/conf.py
+++ b/pypy/doc/conf.py
@@ -45,9 +45,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.6'
+version = '1.7'
 # The full version, including alpha/beta/rc tags.
-release = '1.6'
+release = '1.7'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/pypy/doc/config/objspace.std.withspecialisedtuple.txt b/pypy/doc/config/objspace.std.withspecialisedtuple.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.std.withspecialisedtuple.txt
@@ -0,0 +1,3 @@
+Use "specialized tuples", a custom implementation for some common kinds
+of tuples.  Currently limited to tuples of length 2, in three variants:
+(int, int), (float, float), and a generic (object, object).
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -304,5 +304,14 @@
   never a dictionary as it sometimes is in CPython. Assigning to
   ``__builtins__`` has no effect.
 
+* directly calling the internal magic methods of a few built-in types
+  with invalid arguments may have a slightly different result.  For
+  example, ``[].__add__(None)`` and ``(2).__add__(None)`` both return
+  ``NotImplemented`` on PyPy; on CPython, only the later does, and the
+  former raises ``TypeError``.  (Of course, ``[]+None`` and ``2+None``
+  both raise ``TypeError`` everywhere.)  This difference is an
+  implementation detail that shows up because of internal C-level slots
+  that PyPy does not have.
+
 
 .. include:: _ref.txt
diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst
--- a/pypy/doc/faq.rst
+++ b/pypy/doc/faq.rst
@@ -112,10 +112,32 @@
 You might be interested in our `benchmarking site`_ and our 
 `jit documentation`_.
 
+Note that the JIT has a very high warm-up cost, meaning that the
+programs are slow at the beginning.  If you want to compare the timings
+with CPython, even relatively simple programs need to run *at least* one
+second, preferrably at least a few seconds.  Large, complicated programs
+need even more time to warm-up the JIT.
+
 .. _`benchmarking site`: http://speed.pypy.org
 
 .. _`jit documentation`: jit/index.html
 
+---------------------------------------------------------------
+Couldn't the JIT dump and reload already-compiled machine code?
+---------------------------------------------------------------
+
+No, we found no way of doing that.  The JIT generates machine code
+containing a large number of constant addresses --- constant at the time
+the machine code is written.  The vast majority is probably not at all
+constants that you find in the executable, with a nice link name.  E.g.
+the addresses of Python classes are used all the time, but Python
+classes don't come statically from the executable; they are created anew
+every time you restart your program.  This makes saving and reloading
+machine code completely impossible without some very advanced way of
+mapping addresses in the old (now-dead) process to addresses in the new
+process, including checking that all the previous assumptions about the
+(now-dead) object are still true about the new object.
+
 
 .. _`prolog and javascript`:
 
diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -15,7 +15,7 @@
 
 * `FAQ`_: some frequently asked questions.
 
-* `Release 1.6`_: the latest official release
+* `Release 1.7`_: the latest official release
 
 * `PyPy Blog`_: news and status info about PyPy 
 
@@ -75,7 +75,7 @@
 .. _`Getting Started`: getting-started.html
 .. _`Papers`: extradoc.html
 .. _`Videos`: video-index.html
-.. _`Release 1.6`: http://pypy.org/download.html
+.. _`Release 1.7`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
 .. _`potential project ideas`: project-ideas.html
@@ -120,9 +120,9 @@
 Windows, on top of .NET, and on top of Java.
 To dig into PyPy it is recommended to try out the current
 Mercurial default branch, which is always working or mostly working,
-instead of the latest release, which is `1.6`__.
+instead of the latest release, which is `1.7`__.
 
-.. __: release-1.6.0.html
+.. __: release-1.7.0.html
 
 PyPy is mainly developed on Linux and Mac OS X.  Windows is supported,
 but platform-specific bugs tend to take longer before we notice and fix
diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -1,6 +1,5 @@
 """codegen helpers and AST constant folding."""
 import sys
-import itertools
 
 from pypy.interpreter.astcompiler import ast, consts, misc
 from pypy.tool import stdlib_opcode as ops
@@ -146,8 +145,7 @@
 }
 unrolling_unary_folders = unrolling_iterable(unary_folders.items())
 
-for folder in itertools.chain(binary_folders.itervalues(),
-                              unary_folders.itervalues()):
+for folder in binary_folders.values() + unary_folders.values():
     folder._always_inline_ = True
 del folder
 
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1,4 +1,3 @@
-import itertools
 import pypy
 from pypy.interpreter.executioncontext import ExecutionContext, ActionFlag
 from pypy.interpreter.executioncontext import UserDelAction, FrameTraceAction
@@ -188,6 +187,12 @@
 
     # -------------------------------------------------------------------
 
+    def is_w(self, space, w_other):
+        return self is w_other
+
+    def immutable_unique_id(self, space):
+        return None
+
     def str_w(self, space):
         w_msg = typed_unwrap_error_msg(space, "string", self)
         raise OperationError(space.w_TypeError, w_msg)
@@ -482,6 +487,16 @@
         'parser', 'fcntl', '_codecs', 'binascii'
     ]
 
+    # These modules are treated like CPython treats built-in modules,
+    # i.e. they always shadow any xx.py.  The other modules are treated
+    # like CPython treats extension modules, and are loaded in sys.path
+    # order by the fake entry '.../lib_pypy/__extensions__'.
+    MODULES_THAT_ALWAYS_SHADOW = dict.fromkeys([
+        '__builtin__', '__pypy__', '_ast', '_codecs', '_sre', '_warnings',
+        '_weakref', 'errno', 'exceptions', 'gc', 'imp', 'marshal',
+        'posix', 'nt', 'pwd', 'signal', 'sys', 'thread', 'zipimport',
+    ], None)
+
     def make_builtins(self):
         "NOT_RPYTHON: only for initializing the space."
 
@@ -513,8 +528,8 @@
         exception_types_w = self.export_builtin_exceptions()
 
         # initialize with "bootstrap types" from objspace  (e.g. w_None)
-        types_w = itertools.chain(self.get_builtin_types().iteritems(),
-                                  exception_types_w.iteritems())
+        types_w = (self.get_builtin_types().items() +
+                   exception_types_w.items())
         for name, w_type in types_w:
             self.setitem(self.builtin.w_dict, self.wrap(name), w_type)
 
@@ -681,9 +696,20 @@
         """shortcut for space.is_true(space.eq(w_obj1, w_obj2))"""
         return self.is_w(w_obj1, w_obj2) or self.is_true(self.eq(w_obj1, w_obj2))
 
-    def is_w(self, w_obj1, w_obj2):
-        """shortcut for space.is_true(space.is_(w_obj1, w_obj2))"""
-        return self.is_true(self.is_(w_obj1, w_obj2))
+    def is_(self, w_one, w_two):
+        return self.newbool(self.is_w(w_one, w_two))
+
+    def is_w(self, w_one, w_two):
+        # done by a method call on w_two (and not on w_one, because of the
+        # expected programming style where we say "if x is None" or
+        # "if x is object").
+        return w_two.is_w(self, w_one)
+
+    def id(self, w_obj):
+        w_result = w_obj.immutable_unique_id(self)
+        if w_result is None:
+            w_result = self.wrap(compute_unique_id(w_obj))
+        return w_result
 
     def hash_w(self, w_obj):
         """shortcut for space.int_w(space.hash(w_obj))"""
@@ -1023,9 +1049,6 @@
     def isinstance_w(self, w_obj, w_type):
         return self.is_true(self.isinstance(w_obj, w_type))
 
-    def id(self, w_obj):
-        return self.wrap(compute_unique_id(w_obj))
-
     # The code below only works
     # for the simple case (new-style instance).
     # These methods are patched with the full logic by the __builtin__
@@ -1597,6 +1620,8 @@
     'UnicodeError',
     'ValueError',
     'ZeroDivisionError',
+    'UnicodeEncodeError',
+    'UnicodeDecodeError',
     ]
 
 ## Irregular part of the interface:
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -10,7 +10,7 @@
 from pypy.rlib.objectmodel import we_are_translated, instantiate
 from pypy.rlib.jit import hint
 from pypy.rlib.debug import make_sure_not_resized, check_nonneg
-from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.rarithmetic import intmask, r_uint
 from pypy.rlib import jit
 from pypy.tool import stdlib_opcode
 from pypy.tool.stdlib_opcode import host_bytecode_spec
@@ -167,7 +167,7 @@
                 # Execution starts just after the last_instr.  Initially,
                 # last_instr is -1.  After a generator suspends it points to
                 # the YIELD_VALUE instruction.
-                next_instr = self.last_instr + 1
+                next_instr = r_uint(self.last_instr + 1)
                 if next_instr != 0:
                     self.pushvalue(w_inputvalue)
             #
@@ -691,6 +691,7 @@
     handlerposition = space.int_w(w_handlerposition)
     valuestackdepth = space.int_w(w_valuestackdepth)
     assert valuestackdepth >= 0
+    assert handlerposition >= 0
     blk = instantiate(get_block_class(opname))
     blk.handlerposition = handlerposition
     blk.valuestackdepth = valuestackdepth
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -837,6 +837,7 @@
         raise Yield
 
     def jump_absolute(self, jumpto, next_instr, ec):
+        check_nonneg(jumpto)
         return jumpto
 
     def JUMP_FORWARD(self, jumpby, next_instr):
@@ -1278,7 +1279,7 @@
 
     def handle(self, frame, unroller):
         next_instr = self.really_handle(frame, unroller)   # JIT hack
-        return next_instr
+        return r_uint(next_instr)
 
     def really_handle(self, frame, unroller):
         """ Purely abstract method
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -54,7 +54,11 @@
 #  Hash support
 
 def default_identity_hash(space, w_obj):
-    return space.wrap(compute_identity_hash(w_obj))
+    w_unique_id = w_obj.immutable_unique_id(space)
+    if w_unique_id is None:     # common case
+        return space.wrap(compute_identity_hash(w_obj))
+    else:
+        return space.hash(w_unique_id)
 
 # ____________________________________________________________
 #
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -8,6 +8,7 @@
 from pypy.objspace.flow.model import Variable, Constant
 from pypy.annotation import model as annmodel
 from pypy.jit.metainterp.history import REF, INT, FLOAT
+from pypy.jit.metainterp import history
 from pypy.jit.codewriter import heaptracker
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass, rstr, rffi
 from pypy.rpython.ootypesystem import ootype
@@ -20,7 +21,7 @@
 from pypy.jit.backend.llgraph import symbolic
 from pypy.jit.codewriter import longlong
 
-from pypy.rlib import libffi
+from pypy.rlib import libffi, clibffi
 from pypy.rlib.objectmodel import ComputedIntSymbolic, we_are_translated
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rlib.rarithmetic import r_longlong, r_ulonglong, r_uint
@@ -48,6 +49,11 @@
         value._the_opaque_pointer = op
         return op
 
+def _normalize(value):
+    if isinstance(value, lltype._ptr):
+        value = lltype.top_container(value._obj)
+    return value
+
 def from_opaque_string(s):
     if isinstance(s, str):
         return s
@@ -322,6 +328,14 @@
     _variables.append(v)
     return r
 
+def compile_started_vars(clt):
+    if not hasattr(clt, '_debug_argtypes'):    # only when compiling the loop
+        argtypes = [v.concretetype for v in _variables]
+        try:
+            clt._debug_argtypes = argtypes
+        except AttributeError:    # when 'clt' is actually a translated
+            pass                  # GcStruct
+
 def compile_add(loop, opnum):
     loop = _from_opaque(loop)
     loop.operations.append(Operation(opnum))
@@ -347,6 +361,16 @@
     op = loop.operations[-1]
     op.descr = weakref.ref(descr)
 
+TARGET_TOKENS = weakref.WeakKeyDictionary()
+
+def compile_add_target_token(loop, descr, clt):
+    # here, 'clt' is the compiled_loop_token of the original loop that
+    # we are compiling
+    loop = _from_opaque(loop)
+    op = loop.operations[-1]
+    descrobj = _normalize(descr)
+    TARGET_TOKENS[descrobj] = loop, len(loop.operations), op.args, clt
+
 def compile_add_var(loop, intvar):
     loop = _from_opaque(loop)
     op = loop.operations[-1]
@@ -381,13 +405,25 @@
     _variables.append(v)
     return r
 
-def compile_add_jump_target(loop, loop_target):
+def compile_add_jump_target(loop, targettoken, source_clt):
     loop = _from_opaque(loop)
-    loop_target = _from_opaque(loop_target)
+    descrobj = _normalize(targettoken)
+    (loop_target, target_opindex, target_inputargs, target_clt
+        ) = TARGET_TOKENS[descrobj]
+    #
+    try:
+        assert source_clt._debug_argtypes == target_clt._debug_argtypes
+    except AttributeError:   # when translated
+        pass
+    #
     op = loop.operations[-1]
     op.jump_target = loop_target
+    op.jump_target_opindex = target_opindex
+    op.jump_target_inputargs = target_inputargs
     assert op.opnum == rop.JUMP
-    assert len(op.args) == len(loop_target.inputargs)
+    assert [v.concretetype for v in op.args] == (
+           [v.concretetype for v in target_inputargs])
+    #
     if loop_target == loop:
         log.info("compiling new loop")
     else:
@@ -521,10 +557,11 @@
                 self.opindex += 1
                 continue
             if op.opnum == rop.JUMP:
-                assert len(op.jump_target.inputargs) == len(args)
-                self.env = dict(zip(op.jump_target.inputargs, args))
+                inputargs = op.jump_target_inputargs
+                assert len(inputargs) == len(args)
+                self.env = dict(zip(inputargs, args))
                 self.loop = op.jump_target
-                self.opindex = 0
+                self.opindex = op.jump_target_opindex
                 _stats.exec_jumps += 1
             elif op.opnum == rop.FINISH:
                 if self.verbose:
@@ -617,6 +654,15 @@
         #
         return _op_default_implementation
 
+    def op_label(self, _, *args):
+        op = self.loop.operations[self.opindex]
+        assert op.opnum == rop.LABEL
+        assert len(op.args) == len(args)
+        newenv = {}
+        for v, value in zip(op.args, args):
+            newenv[v] = value
+        self.env = newenv
+
     def op_debug_merge_point(self, _, *args):
         from pypy.jit.metainterp.warmspot import get_stats
         try:
@@ -959,6 +1005,7 @@
         self._may_force = self.opindex
         try:
             inpargs = _from_opaque(ctl.compiled_version).inputargs
+            assert len(inpargs) == len(args)
             for i, inparg in enumerate(inpargs):
                 TYPE = inparg.concretetype
                 if TYPE is lltype.Signed:
@@ -1432,6 +1479,10 @@
     res = _getinteriorfield_raw(libffi.types.slong, array, index, width, ofs)
     return res
 
+def do_getinteriorfield_raw_float(array, index, width, ofs):
+    res = _getinteriorfield_raw(libffi.types.double, array, index, width, ofs)
+    return res
+
 def _getfield_raw(struct, fieldnum):
     STRUCT, fieldname = symbolic.TokenToField[fieldnum]
     ptr = cast_from_int(lltype.Ptr(STRUCT), struct)
@@ -1510,12 +1561,17 @@
 do_setinteriorfield_gc_float = new_setinteriorfield_gc(cast_from_floatstorage)
 do_setinteriorfield_gc_ptr = new_setinteriorfield_gc(cast_from_ptr)
 
-def new_setinteriorfield_raw(ffitype):
+def new_setinteriorfield_raw(cast_func, ffitype):
     def do_setinteriorfield_raw(array, index, newvalue, width, ofs):
         addr = rffi.cast(rffi.VOIDP, array)
+        for TYPE, ffitype2 in clibffi.ffitype_map:
+            if ffitype2 is ffitype:
+                newvalue = cast_func(TYPE, newvalue)
+                break
         return libffi.array_setitem(ffitype, width, addr, index, ofs, newvalue)
     return do_setinteriorfield_raw
-do_setinteriorfield_raw_int = new_setinteriorfield_raw(libffi.types.slong)
+do_setinteriorfield_raw_int = new_setinteriorfield_raw(cast_from_int, libffi.types.slong)
+do_setinteriorfield_raw_float = new_setinteriorfield_raw(cast_from_floatstorage, libffi.types.double)
 
 def do_setfield_raw_int(struct, fieldnum, newvalue):
     STRUCT, fieldname = symbolic.TokenToField[fieldnum]
@@ -1779,9 +1835,11 @@
 setannotation(compile_start_int_var, annmodel.SomeInteger())
 setannotation(compile_start_ref_var, annmodel.SomeInteger())
 setannotation(compile_start_float_var, annmodel.SomeInteger())
+setannotation(compile_started_vars, annmodel.s_None)
 setannotation(compile_add, annmodel.s_None)
 setannotation(compile_add_descr, annmodel.s_None)
 setannotation(compile_add_descr_arg, annmodel.s_None)
+setannotation(compile_add_target_token, annmodel.s_None)
 setannotation(compile_add_var, annmodel.s_None)
 setannotation(compile_add_int_const, annmodel.s_None)
 setannotation(compile_add_ref_const, annmodel.s_None)
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -138,29 +138,30 @@
         clt = original_loop_token.compiled_loop_token
         clt.loop_and_bridges.append(c)
         clt.compiling_a_bridge()
-        self._compile_loop_or_bridge(c, inputargs, operations)
+        self._compile_loop_or_bridge(c, inputargs, operations, clt)
         old, oldindex = faildescr._compiled_fail
         llimpl.compile_redirect_fail(old, oldindex, c)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
+    def compile_loop(self, inputargs, operations, jitcell_token,
+                     log=True, name=''):
         """In a real assembler backend, this should assemble the given
         list of operations.  Here we just generate a similar CompiledLoop
         instance.  The code here is RPython, whereas the code in llimpl
         is not.
         """
         c = llimpl.compile_start()
-        clt = model.CompiledLoopToken(self, looptoken.number)
+        clt = model.CompiledLoopToken(self, jitcell_token.number)
         clt.loop_and_bridges = [c]
         clt.compiled_version = c
-        looptoken.compiled_loop_token = clt
-        self._compile_loop_or_bridge(c, inputargs, operations)
+        jitcell_token.compiled_loop_token = clt
+        self._compile_loop_or_bridge(c, inputargs, operations, clt)
 
     def free_loop_and_bridges(self, compiled_loop_token):
         for c in compiled_loop_token.loop_and_bridges:
             llimpl.mark_as_free(c)
         model.AbstractCPU.free_loop_and_bridges(self, compiled_loop_token)
 
-    def _compile_loop_or_bridge(self, c, inputargs, operations):
+    def _compile_loop_or_bridge(self, c, inputargs, operations, clt):
         var2index = {}
         for box in inputargs:
             if isinstance(box, history.BoxInt):
@@ -172,10 +173,11 @@
                 var2index[box] = llimpl.compile_start_float_var(c)
             else:
                 raise Exception("box is: %r" % (box,))
-        self._compile_operations(c, operations, var2index)
+        llimpl.compile_started_vars(clt)
+        self._compile_operations(c, operations, var2index, clt)
         return c
 
-    def _compile_operations(self, c, operations, var2index):
+    def _compile_operations(self, c, operations, var2index, clt):
         for op in operations:
             llimpl.compile_add(c, op.getopnum())
             descr = op.getdescr()
@@ -183,9 +185,11 @@
                 llimpl.compile_add_descr(c, descr.ofs, descr.typeinfo,
                                          descr.arg_types, descr.extrainfo,
                                          descr.width)
-            if (isinstance(descr, history.LoopToken) and
-                op.getopnum() != rop.JUMP):
+            if isinstance(descr, history.JitCellToken):
+                assert op.getopnum() != rop.JUMP
                 llimpl.compile_add_loop_token(c, descr)
+            if isinstance(descr, history.TargetToken) and op.getopnum() == rop.LABEL:
+                llimpl.compile_add_target_token(c, descr, clt)
             if self.is_oo and isinstance(descr, (OODescr, MethDescr)):
                 # hack hack, not rpython
                 c._obj.externalobj.operations[-1].setdescr(descr)
@@ -239,9 +243,7 @@
         assert op.is_final()
         if op.getopnum() == rop.JUMP:
             targettoken = op.getdescr()
-            assert isinstance(targettoken, history.LoopToken)
-            compiled_version = targettoken.compiled_loop_token.compiled_version
-            llimpl.compile_add_jump_target(c, compiled_version)
+            llimpl.compile_add_jump_target(c, targettoken, clt)
         elif op.getopnum() == rop.FINISH:
             faildescr = op.getdescr()
             index = self.get_fail_descr_number(faildescr)
@@ -260,21 +262,28 @@
         self.latest_frame = frame
         return fail_index
 
-    def execute_token(self, loop_token):
-        """Calls the assembler generated for the given loop.
-        Returns the ResOperation that failed, of type rop.FAIL.
-        """
-        fail_index = self._execute_token(loop_token)
-        return self.get_fail_descr_from_number(fail_index)
-
-    def set_future_value_int(self, index, intvalue):
-        llimpl.set_future_value_int(index, intvalue)
-
-    def set_future_value_ref(self, index, objvalue):
-        llimpl.set_future_value_ref(index, objvalue)
-
-    def set_future_value_float(self, index, floatvalue):
-        llimpl.set_future_value_float(index, floatvalue)
+    def make_execute_token(self, *argtypes):
+        nb_args = len(argtypes)
+        unroll_argtypes = unrolling_iterable(list(enumerate(argtypes)))
+        #
+        def execute_token(loop_token, *args):
+            assert len(args) == nb_args
+            for index, TYPE in unroll_argtypes:
+                x = args[index]
+                assert TYPE == lltype.typeOf(x)
+                if TYPE == lltype.Signed:
+                    llimpl.set_future_value_int(index, x)
+                elif TYPE == llmemory.GCREF:
+                    llimpl.set_future_value_ref(index, x)
+                elif TYPE == longlong.FLOATSTORAGE:
+                    llimpl.set_future_value_float(index, x)
+                else:
+                    assert 0
+            #
+            fail_index = self._execute_token(loop_token)
+            return self.get_fail_descr_from_number(fail_index)
+        #
+        return execute_token
 
     def get_latest_value_int(self, index):
         return llimpl.frame_int_getvalue(self.latest_frame, index)
diff --git a/pypy/jit/backend/llsupport/asmmemmgr.py b/pypy/jit/backend/llsupport/asmmemmgr.py
--- a/pypy/jit/backend/llsupport/asmmemmgr.py
+++ b/pypy/jit/backend/llsupport/asmmemmgr.py
@@ -37,25 +37,25 @@
             self._add_free_block(smaller_stop, stop)
             stop = smaller_stop
             result = (start, stop)
-        self.total_mallocs += stop - start
+        self.total_mallocs += r_uint(stop - start)
         return result   # pair (start, stop)
 
     def free(self, start, stop):
         """Free a block (start, stop) returned by a previous malloc()."""
-        self.total_mallocs -= (stop - start)
+        self.total_mallocs -= r_uint(stop - start)
         self._add_free_block(start, stop)
 
     def open_malloc(self, minsize):
         """Allocate at least minsize bytes.  Returns (start, stop)."""
         result = self._allocate_block(minsize)
         (start, stop) = result
-        self.total_mallocs += stop - start
+        self.total_mallocs += r_uint(stop - start)
         return result
 
     def open_free(self, middle, stop):
         """Used for freeing the end of an open-allocated block of memory."""
         if stop - middle >= self.min_fragment:
-            self.total_mallocs -= (stop - middle)
+            self.total_mallocs -= r_uint(stop - middle)
             self._add_free_block(middle, stop)
             return True
         else:
@@ -77,7 +77,7 @@
                 # Hack to make sure that mcs are not within 32-bits of one
                 # another for testing purposes
                 rmmap.hint.pos += 0x80000000 - size
-        self.total_memory_allocated += size
+        self.total_memory_allocated += r_uint(size)
         data = rffi.cast(lltype.Signed, data)
         return self._add_free_block(data, data + size)
 
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -648,14 +648,10 @@
         # make a malloc function, with two arguments
         def malloc_basic(size, tid):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
-            has_finalizer = bool(tid & (1<<llgroup.HALFSHIFT))
-            has_light_finalizer = bool(tid & (1<<(llgroup.HALFSHIFT + 1)))
             check_typeid(type_id)
             res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
                                                   type_id, size,
-                                                  has_finalizer,
-                                                  has_light_finalizer,
-                                                  False)
+                                                  False, False, False)
             # In case the operation above failed, we are returning NULL
             # from this function to assembler.  There is also an RPython
             # exception set, typically MemoryError; but it's easier and
@@ -749,11 +745,8 @@
     def init_size_descr(self, S, descr):
         type_id = self.layoutbuilder.get_type_id(S)
         assert not self.layoutbuilder.is_weakref_type(S)
-        has_finalizer = bool(self.layoutbuilder.has_finalizer(S))
-        has_light_finalizer = bool(self.layoutbuilder.has_light_finalizer(S))
-        flags = (int(has_finalizer) << llgroup.HALFSHIFT |
-                 int(has_light_finalizer) << (llgroup.HALFSHIFT + 1))
-        descr.tid = llop.combine_ushort(lltype.Signed, type_id, flags)
+        assert not self.layoutbuilder.has_finalizer(S)
+        descr.tid = llop.combine_ushort(lltype.Signed, type_id, 0)
 
     def init_array_descr(self, A, descr):
         type_id = self.layoutbuilder.get_type_id(A)
@@ -830,6 +823,15 @@
                                             bool(v.value)): # store a non-NULL
                         self._gen_write_barrier(newops, op.getarg(0), v)
                         op = op.copy_and_change(rop.SETFIELD_RAW)
+            # ---------- write barrier for SETINTERIORFIELD_GC ------
+            if op.getopnum() == rop.SETINTERIORFIELD_GC:
+                val = op.getarg(0)
+                if val is not last_malloc:
+                    v = op.getarg(2)
+                    if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
+                                            bool(v.value)): # store a non-NULL
+                        self._gen_write_barrier(newops, op.getarg(0), v)
+                        op = op.copy_and_change(rop.SETINTERIORFIELD_RAW)
             # ---------- write barrier for SETARRAYITEM_GC ----------
             if op.getopnum() == rop.SETARRAYITEM_GC:
                 val = op.getarg(0)
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -16,32 +16,106 @@
     """ Manage frame positions
     """
     def __init__(self):
-        self.frame_bindings = {}
-        self.frame_depth    = 0
+        self.bindings = {}
+        self.used = []      # list of bools
+        self.hint_frame_locations = {}
+
+    frame_depth = property(lambda:xxx, lambda:xxx)   # XXX kill me
+
+    def get_frame_depth(self):
+        return len(self.used)
 
     def get(self, box):
-        return self.frame_bindings.get(box, None)
+        return self.bindings.get(box, None)
 
     def loc(self, box):
-        res = self.get(box)
-        if res is not None:
-            return res
+        """Return or create the frame location associated with 'box'."""
+        # first check if it's already in the frame_manager
+        try:
+            return self.bindings[box]
+        except KeyError:
+            pass
+        # check if we have a hint for this box
+        if box in self.hint_frame_locations:
+            # if we do, try to reuse the location for this box
+            loc = self.hint_frame_locations[box]
+            if self.try_to_reuse_location(box, loc):
+                return loc
+        # no valid hint.  make up a new free location
+        return self.get_new_loc(box)
+
+    def get_new_loc(self, box):
         size = self.frame_size(box.type)
-        self.frame_depth += ((-self.frame_depth) & (size-1))
-        # ^^^ frame_depth is rounded up to a multiple of 'size', assuming
+        # frame_depth is rounded up to a multiple of 'size', assuming
         # that 'size' is a power of two.  The reason for doing so is to
         # avoid obscure issues in jump.py with stack locations that try
         # to move from position (6,7) to position (7,8).
-        newloc = self.frame_pos(self.frame_depth, box.type)
-        self.frame_bindings[box] = newloc
-        self.frame_depth += size
+        while self.get_frame_depth() & (size - 1):
+            self.used.append(False)
+        #
+        index = self.get_frame_depth()
+        newloc = self.frame_pos(index, box.type)
+        for i in range(size):
+            self.used.append(True)
+        #
+        if not we_are_translated():    # extra testing
+            testindex = self.get_loc_index(newloc)
+            assert testindex == index
+        #
+        self.bindings[box] = newloc
         return newloc
 
+    def set_binding(self, box, loc):
+        self.bindings[box] = loc
+        #
+        index = self.get_loc_index(loc)
+        if index < 0:
+            return
+        endindex = index + self.frame_size(box.type)
+        while len(self.used) < endindex:
+            self.used.append(False)
+        while index < endindex:
+            self.used[index] = True
+            index += 1
+
     def reserve_location_in_frame(self, size):
-        frame_depth = self.frame_depth
-        self.frame_depth += size
+        frame_depth = self.get_frame_depth()
+        for i in range(size):
+            self.used.append(True)
         return frame_depth
 
+    def mark_as_free(self, box):
+        try:
+            loc = self.bindings[box]
+        except KeyError:
+            return    # already gone
+        del self.bindings[box]
+        #
+        size = self.frame_size(box.type)
+        baseindex = self.get_loc_index(loc)
+        if baseindex < 0:
+            return
+        for i in range(size):
+            index = baseindex + i
+            assert 0 <= index < len(self.used)
+            self.used[index] = False
+
+    def try_to_reuse_location(self, box, loc):
+        index = self.get_loc_index(loc)
+        if index < 0:
+            return False
+        size = self.frame_size(box.type)
+        for i in range(size):
+            while (index + i) >= len(self.used):
+                self.used.append(False)
+            if self.used[index + i]:
+                return False    # already in use
+        # good, we can reuse the location
+        for i in range(size):
+            self.used[index + i] = True
+        self.bindings[box] = loc
+        return True
+
     # abstract methods that need to be overwritten for specific assemblers
     @staticmethod
     def frame_pos(loc, type):
@@ -49,6 +123,10 @@
     @staticmethod
     def frame_size(type):
         return 1
+    @staticmethod
+    def get_loc_index(loc):
+        raise NotImplementedError("Purely abstract")
+
 
 class RegisterManager(object):
     """ Class that keeps track of register allocations
@@ -68,7 +146,14 @@
         self.frame_manager = frame_manager
         self.assembler = assembler
 
+    def is_still_alive(self, v):
+        # Check if 'v' is alive at the current position.
+        # Return False if the last usage is strictly before.
+        return self.longevity[v][1] >= self.position
+
     def stays_alive(self, v):
+        # Check if 'v' stays alive after the current position.
+        # Return False if the last usage is before or at position.
         return self.longevity[v][1] > self.position
 
     def next_instruction(self, incr=1):
@@ -84,11 +169,14 @@
             point for all variables that might be in registers.
         """
         self._check_type(v)
-        if isinstance(v, Const) or v not in self.reg_bindings:
+        if isinstance(v, Const):
             return
         if v not in self.longevity or self.longevity[v][1] <= self.position:
-            self.free_regs.append(self.reg_bindings[v])
-            del self.reg_bindings[v]
+            if v in self.reg_bindings:
+                self.free_regs.append(self.reg_bindings[v])
+                del self.reg_bindings[v]
+            if self.frame_manager is not None:
+                self.frame_manager.mark_as_free(v)
 
     def possibly_free_vars(self, vars):
         """ Same as 'possibly_free_var', but for all v in vars.
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -570,6 +570,28 @@
             assert operations[1].getarg(2) == v_value
             assert operations[1].getdescr() == array_descr
 
+    def test_rewrite_assembler_5(self):
+        S = lltype.GcStruct('S')
+        A = lltype.GcArray(lltype.Struct('A', ('x', lltype.Ptr(S))))
+        interiordescr = get_interiorfield_descr(self.gc_ll_descr, A,
+                                                A.OF, 'x')
+        wbdescr = self.gc_ll_descr.write_barrier_descr
+        ops = parse("""
+        [p1, p2]
+        setinteriorfield_gc(p1, 0, p2, descr=interiordescr)
+        jump(p1, p2)
+        """, namespace=locals())
+        expected = parse(""" 
+        [p1, p2]
+        cond_call_gc_wb(p1, p2, descr=wbdescr)
+        setinteriorfield_raw(p1, 0, p2, descr=interiordescr)
+        jump(p1, p2)
+        """, namespace=locals())
+        operations = get_deep_immutable_oplist(ops.operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
+        equaloplists(operations, expected.operations)
+
     def test_rewrite_assembler_initialization_store(self):
         S = lltype.GcStruct('S', ('parent', OBJECT),
                             ('x', lltype.Signed))
diff --git a/pypy/jit/backend/llsupport/test/test_regalloc.py b/pypy/jit/backend/llsupport/test/test_regalloc.py
--- a/pypy/jit/backend/llsupport/test/test_regalloc.py
+++ b/pypy/jit/backend/llsupport/test/test_regalloc.py
@@ -2,6 +2,8 @@
 from pypy.jit.metainterp.history import BoxInt, ConstInt, BoxFloat, INT, FLOAT
 from pypy.jit.backend.llsupport.regalloc import FrameManager
 from pypy.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan
+from pypy.jit.tool.oparser import parse
+from pypy.jit.backend.detect_cpu import getcpuclass
 
 def newboxes(*values):
     return [BoxInt(v) for v in values]
@@ -40,8 +42,13 @@
     def frame_size(self, box_type):
         if box_type == FLOAT:
             return 2
+        elif box_type == INT:
+            return 1
         else:
-            return 1
+            raise ValueError(box_type)
+    def get_loc_index(self, loc):
+        assert isinstance(loc, FakeFramePos)
+        return loc.pos
 
 class MockAsm(object):
     def __init__(self):
@@ -280,7 +287,7 @@
             rm.force_allocate_reg(b)
         rm.before_call()
         assert len(rm.reg_bindings) == 2
-        assert fm.frame_depth == 2
+        assert fm.get_frame_depth() == 2
         assert len(asm.moves) == 2
         rm._check_invariants()
         rm.after_call(boxes[-1])
@@ -303,7 +310,7 @@
             rm.force_allocate_reg(b)
         rm.before_call(save_all_regs=True)
         assert len(rm.reg_bindings) == 0
-        assert fm.frame_depth == 4
+        assert fm.get_frame_depth() == 4
         assert len(asm.moves) == 4
         rm._check_invariants()
         rm.after_call(boxes[-1])
@@ -325,7 +332,7 @@
         xrm = XRegisterManager(longevity, frame_manager=fm, assembler=asm)
         xrm.loc(f0)
         rm.loc(b0)
-        assert fm.frame_depth == 3
+        assert fm.get_frame_depth() == 3
         
         
 
@@ -346,3 +353,123 @@
         spilled2 = rm.force_allocate_reg(b5)
         assert spilled2 is loc
         rm._check_invariants()
+
+
+    def test_hint_frame_locations_1(self):
+        b0, = newboxes(0)
+        fm = TFrameManager()
+        loc123 = FakeFramePos(123, INT)
+        fm.hint_frame_locations[b0] = loc123
+        assert fm.get_frame_depth() == 0
+        loc = fm.loc(b0)
+        assert loc == loc123
+        assert fm.get_frame_depth() == 124
+
+    def test_hint_frame_locations_2(self):
+        b0, b1, b2 = newboxes(0, 1, 2)
+        longevity = {b0: (0, 1), b1: (0, 2), b2: (0, 2)}
+        fm = TFrameManager()
+        asm = MockAsm()
+        rm = RegisterManager(longevity, frame_manager=fm, assembler=asm)
+        rm.force_allocate_reg(b0)
+        rm.force_allocate_reg(b1)
+        rm.force_allocate_reg(b2)
+        rm.force_spill_var(b0)
+        loc = rm.loc(b0)
+        assert isinstance(loc, FakeFramePos)
+        assert fm.get_loc_index(loc) == 0
+        rm.position = 1
+        assert fm.used == [True]
+        rm.possibly_free_var(b0)
+        assert fm.used == [False]
+        #
+        fm.hint_frame_locations[b1] = loc
+        rm.force_spill_var(b1)
+        loc1 = rm.loc(b1)
+        assert loc1 == loc
+        assert fm.used == [True]
+        #
+        fm.hint_frame_locations[b2] = loc
+        rm.force_spill_var(b2)
+        loc2 = rm.loc(b2)
+        assert loc2 != loc1     # because it was not free
+        assert fm.used == [True, True]
+        #
+        rm._check_invariants()
+
+    def test_frame_manager_basic(self):
+        b0, b1 = newboxes(0, 1)
+        fm = TFrameManager()
+        loc0 = fm.loc(b0)
+        assert fm.get_loc_index(loc0) == 0
+        #
+        assert fm.get(b1) is None
+        loc1 = fm.loc(b1)
+        assert fm.get_loc_index(loc1) == 1
+        assert fm.get(b1) == loc1
+        #
+        loc0b = fm.loc(b0)
+        assert loc0b == loc0
+        #
+        fm.loc(BoxInt())
+        assert fm.get_frame_depth() == 3
+        #
+        f0 = BoxFloat()
+        locf0 = fm.loc(f0)
+        assert fm.get_loc_index(locf0) == 4
+        assert fm.get_frame_depth() == 6
+        #
+        f1 = BoxFloat()
+        locf1 = fm.loc(f1)
+        assert fm.get_loc_index(locf1) == 6
+        assert fm.get_frame_depth() == 8
+        assert fm.used == [True, True, True, False, True, True, True, True]
+        #
+        fm.mark_as_free(b0)
+        assert fm.used == [False, True, True, False, True, True, True, True]
+        fm.mark_as_free(b0)
+        assert fm.used == [False, True, True, False, True, True, True, True]
+        fm.mark_as_free(f1)
+        assert fm.used == [False, True, True, False, True, True, False, False]
+        #
+        fm.reserve_location_in_frame(1)
+        assert fm.get_frame_depth() == 9
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        #
+        assert b0 not in fm.bindings
+        fm.set_binding(b0, loc0)
+        assert b0 in fm.bindings
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        b3 = BoxInt()
+        assert not fm.try_to_reuse_location(b3, loc0)
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        fm.mark_as_free(b0)
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        assert fm.try_to_reuse_location(b3, loc0)
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        fm.mark_as_free(b0)   # already free
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        fm.mark_as_free(b3)
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        f3 = BoxFloat()
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(0, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(1, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(2, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(3, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(4, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(5, FLOAT))
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        assert fm.try_to_reuse_location(f3, fm.frame_pos(6, FLOAT))
+        assert fm.used == [False, True, True, False, True, True, True, True, True]
+        #
+        fm.used = [False]
+        assert fm.try_to_reuse_location(BoxFloat(), fm.frame_pos(0, FLOAT))
+        assert fm.used == [True, True]
+        #
+        fm.used = [True]
+        assert not fm.try_to_reuse_location(BoxFloat(), fm.frame_pos(0, FLOAT))
+        assert fm.used == [True]
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -1,5 +1,6 @@
 from pypy.rlib.debug import debug_start, debug_print, debug_stop
 from pypy.jit.metainterp import history
+from pypy.rpython.lltypesystem import lltype
 
 
 class AbstractCPU(object):
@@ -84,24 +85,21 @@
         """Print a disassembled version of looptoken to stdout"""
         raise NotImplementedError
 
-    def execute_token(self, looptoken):
-        """Execute the generated code referenced by the looptoken.
+    def execute_token(self, looptoken, *args):
+        """NOT_RPYTHON (for tests only)
+        Execute the generated code referenced by the looptoken.
         Returns the descr of the last executed operation: either the one
         attached to the failing guard, or the one attached to the FINISH.
-        Use set_future_value_xxx() before, and get_latest_value_xxx() after.
+        Use get_latest_value_xxx() afterwards to read the result(s).
         """
-        raise NotImplementedError
+        argtypes = [lltype.typeOf(x) for x in args]
+        execute = self.make_execute_token(*argtypes)
+        return execute(looptoken, *args)
 
-    def set_future_value_int(self, index, intvalue):
-        """Set the value for the index'th argument for the loop to run."""
-        raise NotImplementedError
-
-    def set_future_value_float(self, index, floatvalue):
-        """Set the value for the index'th argument for the loop to run."""
-        raise NotImplementedError
-
-    def set_future_value_ref(self, index, objvalue):
-        """Set the value for the index'th argument for the loop to run."""
+    def make_execute_token(self, *argtypes):
+        """Must make and return an execute_token() function that will be
+        called with the given argtypes.
+        """
         raise NotImplementedError
 
     def get_latest_value_int(self, index):
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -2,7 +2,7 @@
                                          AbstractDescr,
                                          BasicFailDescr,
                                          BoxInt, Box, BoxPtr,
-                                         LoopToken,
+                                         JitCellToken,
                                          ConstInt, ConstPtr,
                                          BoxObj, Const,
                                          ConstObj, BoxFloat, ConstFloat)
@@ -40,17 +40,18 @@
         local_floats = list(floats)
         local_ints = list(ints)
         expected_result = 0.0
+        arguments = []
         for i in range(len(args)):
             x = args[i]
             if x[0] == 'f':
                 x = local_floats.pop()
                 t = longlong.getfloatstorage(x)
-                self.cpu.set_future_value_float(i, t)
+                arguments.append(t)
             else:
                 x = local_ints.pop()
-                self.cpu.set_future_value_int(i, x)
+                arguments.append(x)
             expected_result += x
-        return expected_result
+        return arguments, expected_result
 
     @classmethod
     def get_funcbox(cls, cpu, func_ptr):
@@ -107,12 +108,12 @@
             ops += 'finish(f99, %s)\n' % arguments
 
             loop = parse(ops, namespace=locals())
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
             done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
             self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
-            expected_result = self._prepare_args(args, floats, ints)
+            argvals, expected_result = self._prepare_args(args, floats, ints)
 
-            res = self.cpu.execute_token(looptoken)
+            res = self.cpu.execute_token(looptoken, *argvals)
             x = longlong.getrealfloat(cpu.get_latest_value_float(0))
             assert abs(x - expected_result) < 0.0001
 
@@ -253,13 +254,13 @@
             called_ops += 'finish(f%d, descr=fdescr3)\n' % total_index
             # compile called loop
             called_loop = parse(called_ops, namespace=locals())
-            called_looptoken = LoopToken()
+            called_looptoken = JitCellToken()
             called_looptoken.outermost_jitdriver_sd = FakeJitDriverSD()
             done_number = self.cpu.get_fail_descr_number(called_loop.operations[-1].getdescr())
             self.cpu.compile_loop(called_loop.inputargs, called_loop.operations, called_looptoken)
 
-            expected_result = self._prepare_args(args, floats, ints)
-            res = cpu.execute_token(called_looptoken)
+            argvals, expected_result = self._prepare_args(args, floats, ints)
+            res = cpu.execute_token(called_looptoken, *argvals)
             assert res.identifier == 3
             t = longlong.getrealfloat(cpu.get_latest_value_float(0))
             assert abs(t - expected_result) < 0.0001
@@ -284,12 +285,12 @@
             # we want to take the fast path
             self.cpu.done_with_this_frame_float_v = done_number
             try:
-                othertoken = LoopToken()
+                othertoken = JitCellToken()
                 self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
 
                 # prepare call to called_loop
-                self._prepare_args(args, floats, ints)
-                res = cpu.execute_token(othertoken)
+                argvals, _ = self._prepare_args(args, floats, ints)
+                res = cpu.execute_token(othertoken, *argvals)
                 x = longlong.getrealfloat(cpu.get_latest_value_float(0))
                 assert res.identifier == 4
                 assert abs(x - expected_result) < 0.0001
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -3,7 +3,7 @@
                                          AbstractDescr,
                                          BasicFailDescr,
                                          BoxInt, Box, BoxPtr,
-                                         LoopToken,
+                                         JitCellToken, TargetToken,
                                          ConstInt, ConstPtr,
                                          BoxObj,
                                          ConstObj, BoxFloat, ConstFloat)
@@ -32,22 +32,19 @@
                                                                 result_type,
                                                                 valueboxes,
                                                                 descr)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop(inputargs, operations, looptoken)
-        j = 0
+        args = []
         for box in inputargs:
             if isinstance(box, BoxInt):
-                self.cpu.set_future_value_int(j, box.getint())
-                j += 1
+                args.append(box.getint())
             elif isinstance(box, (BoxPtr, BoxObj)):
-                self.cpu.set_future_value_ref(j, box.getref_base())
-                j += 1
+                args.append(box.getref_base())
             elif isinstance(box, BoxFloat):
-                self.cpu.set_future_value_float(j, box.getfloatstorage())
-                j += 1
+                args.append(box.getfloatstorage())
             else:
                 raise NotImplementedError(box)
-        res = self.cpu.execute_token(looptoken)
+        res = self.cpu.execute_token(looptoken, *args)
         if res is operations[-1].getdescr():
             self.guard_failed = False
         else:
@@ -106,10 +103,9 @@
             ResOperation(rop.FINISH, [i1], None, descr=BasicFailDescr(1))
             ]
         inputargs = [i0]
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop(inputargs, operations, looptoken)
-        self.cpu.set_future_value_int(0, 2)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 2)
         res = self.cpu.get_latest_value_int(0)
         assert res == 3
         assert fail.identifier == 1
@@ -118,19 +114,20 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=BasicFailDescr(2)),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
-        operations[2].setfailargs([i1])
+        operations[3].setfailargs([i1])
 
         self.cpu.compile_loop(inputargs, operations, looptoken)
-        self.cpu.set_future_value_int(0, 2)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 2)
         assert fail.identifier == 2
         res = self.cpu.get_latest_value_int(0)
         assert res == 10
@@ -139,19 +136,22 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
-        looptoken = LoopToken()
+        i3 = BoxInt()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.INT_SUB, [i3, ConstInt(42)], i0),
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=BasicFailDescr(2)),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
-        inputargs = [i0]
-        operations[2].setfailargs([None, None, i1, None])
+        inputargs = [i3]
+        operations[4].setfailargs([None, None, i1, None])
 
         self.cpu.compile_loop(inputargs, operations, looptoken)
-        self.cpu.set_future_value_int(0, 2)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 44)
         assert fail.identifier == 2
         res = self.cpu.get_latest_value_int(2)
         assert res == 10
@@ -162,15 +162,17 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=BasicFailDescr()),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
-        operations[2].setfailargs([i1])
+        operations[3].setfailargs([i1])
         wr_i1 = weakref.ref(i1)
         wr_guard = weakref.ref(operations[2])
         self.cpu.compile_loop(inputargs, operations, looptoken)
@@ -190,15 +192,17 @@
         i2 = BoxInt()
         faildescr1 = BasicFailDescr(1)
         faildescr2 = BasicFailDescr(2)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
-        operations[2].setfailargs([i1])
+        operations[3].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
 
         i1b = BoxInt()
@@ -206,14 +210,13 @@
         bridge = [
             ResOperation(rop.INT_LE, [i1b, ConstInt(19)], i3),
             ResOperation(rop.GUARD_TRUE, [i3], None, descr=faildescr2),
-            ResOperation(rop.JUMP, [i1b], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1b], None, descr=targettoken),
         ]
         bridge[1].setfailargs([i1b])
 
         self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken)
 
-        self.cpu.set_future_value_int(0, 2)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 2)
         assert fail.identifier == 2
         res = self.cpu.get_latest_value_int(0)
         assert res == 20
@@ -226,17 +229,21 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
+        i3 = BoxInt()
         faildescr1 = BasicFailDescr(1)
         faildescr2 = BasicFailDescr(2)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.INT_SUB, [i3, ConstInt(42)], i0),
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
-        inputargs = [i0]
-        operations[2].setfailargs([None, i1, None])
+        inputargs = [i3]
+        operations[4].setfailargs([None, i1, None])
         self.cpu.compile_loop(inputargs, operations, looptoken)
 
         i1b = BoxInt()
@@ -244,14 +251,13 @@
         bridge = [
             ResOperation(rop.INT_LE, [i1b, ConstInt(19)], i3),
             ResOperation(rop.GUARD_TRUE, [i3], None, descr=faildescr2),
-            ResOperation(rop.JUMP, [i1b], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1b], None, descr=targettoken),
         ]
         bridge[1].setfailargs([i1b])
 
         self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken)
 
-        self.cpu.set_future_value_int(0, 2)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 2)
         assert fail.identifier == 2
         res = self.cpu.get_latest_value_int(0)
         assert res == 20
@@ -261,19 +267,20 @@
         i1 = BoxInt()
         i2 = BoxInt()
         faildescr1 = BasicFailDescr(1)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
-        operations[2].setfailargs([None, i1, None])
+        operations[3].setfailargs([None, i1, None])
         self.cpu.compile_loop(inputargs, operations, looptoken)
 
-        self.cpu.set_future_value_int(0, 2)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 2)
         assert fail is faildescr1
 
         count = self.cpu.get_latest_value_count()
@@ -290,18 +297,17 @@
                     return AbstractFailDescr.__setattr__(self, name, value)
                 py.test.fail("finish descrs should not be touched")
         faildescr = UntouchableFailDescr() # to check that is not touched
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         operations = [
             ResOperation(rop.FINISH, [i0], None, descr=faildescr)
             ]
         self.cpu.compile_loop([i0], operations, looptoken)
-        self.cpu.set_future_value_int(0, 99)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 99)
         assert fail is faildescr
         res = self.cpu.get_latest_value_int(0)
         assert res == 99
 
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         operations = [
             ResOperation(rop.FINISH, [ConstInt(42)], None, descr=faildescr)
             ]
@@ -311,7 +317,7 @@
         res = self.cpu.get_latest_value_int(0)
         assert res == 42
 
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         operations = [
             ResOperation(rop.FINISH, [], None, descr=faildescr)
             ]
@@ -320,20 +326,19 @@
         assert fail is faildescr
 
         if self.cpu.supports_floats:
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
             f0 = BoxFloat()
             operations = [
                 ResOperation(rop.FINISH, [f0], None, descr=faildescr)
                 ]
             self.cpu.compile_loop([f0], operations, looptoken)
             value = longlong.getfloatstorage(-61.25)
-            self.cpu.set_future_value_float(0, value)
-            fail = self.cpu.execute_token(looptoken)
+            fail = self.cpu.execute_token(looptoken, value)
             assert fail is faildescr
             res = self.cpu.get_latest_value_float(0)
             assert longlong.getrealfloat(res) == -61.25
 
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
             operations = [
                 ResOperation(rop.FINISH, [constfloat(42.5)], None, descr=faildescr)
                 ]
@@ -350,20 +355,20 @@
         z = BoxInt(579)
         t = BoxInt(455)
         u = BoxInt(0)    # False
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [y, x], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [x, y], z),
             ResOperation(rop.INT_SUB, [y, ConstInt(1)], t),
             ResOperation(rop.INT_EQ, [t, ConstInt(0)], u),
             ResOperation(rop.GUARD_FALSE, [u], None,
                          descr=BasicFailDescr()),
-            ResOperation(rop.JUMP, [z, t], None, descr=looptoken),
+            ResOperation(rop.JUMP, [t, z], None, descr=targettoken),
             ]
         operations[-2].setfailargs([t, z])
         cpu.compile_loop([x, y], operations, looptoken)
-        self.cpu.set_future_value_int(0, 0)
-        self.cpu.set_future_value_int(1, 10)
-        res = self.cpu.execute_token(looptoken)
+        res = self.cpu.execute_token(looptoken, 0, 10)
         assert self.cpu.get_latest_value_int(0) == 0
         assert self.cpu.get_latest_value_int(1) == 55
 
@@ -419,14 +424,12 @@
                     ]
                 ops[1].setfailargs([v_res])
             #
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
             self.cpu.compile_loop([v1, v2], ops, looptoken)
             for x, y, z in testcases:
                 excvalue = self.cpu.grab_exc_value()
                 assert not excvalue
-                self.cpu.set_future_value_int(0, x)
-                self.cpu.set_future_value_int(1, y)
-                fail = self.cpu.execute_token(looptoken)
+                fail = self.cpu.execute_token(looptoken, x, y)
                 if (z == boom) ^ reversed:
                     assert fail.identifier == 1
                 else:
@@ -1082,16 +1085,18 @@
             inputargs.insert(index_counter, i0)
             jumpargs.insert(index_counter, i1)
             #
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
+            targettoken = TargetToken()
             faildescr = BasicFailDescr(15)
             operations = [
+                ResOperation(rop.LABEL, inputargs, None, descr=targettoken),
                 ResOperation(rop.INT_SUB, [i0, ConstInt(1)], i1),
                 ResOperation(rop.INT_GE, [i1, ConstInt(0)], i2),
                 ResOperation(rop.GUARD_TRUE, [i2], None),
-                ResOperation(rop.JUMP, jumpargs, None, descr=looptoken),
+                ResOperation(rop.JUMP, jumpargs, None, descr=targettoken),
                 ]
-            operations[2].setfailargs(inputargs[:])
-            operations[2].setdescr(faildescr)
+            operations[3].setfailargs(inputargs[:])
+            operations[3].setdescr(faildescr)
             #
             self.cpu.compile_loop(inputargs, operations, looptoken)
             #
@@ -1109,17 +1114,7 @@
                     assert 0
             values[index_counter] = 11
             #
-            for i, (box, val) in enumerate(zip(inputargs, values)):
-                if isinstance(box, BoxInt):
-                    self.cpu.set_future_value_int(i, val)
-                elif isinstance(box, BoxPtr):
-                    self.cpu.set_future_value_ref(i, val)
-                elif isinstance(box, BoxFloat):
-                    self.cpu.set_future_value_float(i, val)
-                else:
-                    assert 0
-            #
-            fail = self.cpu.execute_token(looptoken)
+            fail = self.cpu.execute_token(looptoken, *values)
             assert fail.identifier == 15
             #
             dstvalues = values[:]
@@ -1149,30 +1144,33 @@
             py.test.skip("requires floats")
         fboxes = [BoxFloat() for i in range(12)]
         i2 = BoxInt()
+        targettoken = TargetToken()
         faildescr1 = BasicFailDescr(1)
         faildescr2 = BasicFailDescr(2)
         operations = [
+            ResOperation(rop.LABEL, fboxes, None, descr=targettoken),
             ResOperation(rop.FLOAT_LE, [fboxes[0], constfloat(9.2)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
             ResOperation(rop.FINISH, fboxes, None, descr=faildescr2),
             ]
         operations[-2].setfailargs(fboxes)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop(fboxes, operations, looptoken)
 
         fboxes2 = [BoxFloat() for i in range(12)]
         f3 = BoxFloat()
         bridge = [
             ResOperation(rop.FLOAT_SUB, [fboxes2[0], constfloat(1.0)], f3),
-            ResOperation(rop.JUMP, [f3] + fboxes2[1:], None, descr=looptoken),
+            ResOperation(rop.JUMP, [f3]+fboxes2[1:], None, descr=targettoken),
         ]
 
         self.cpu.compile_bridge(faildescr1, fboxes2, bridge, looptoken)
 
+        args = []
         for i in range(len(fboxes)):
             x = 13.5 + 6.73 * i
-            self.cpu.set_future_value_float(i, longlong.getfloatstorage(x))
-        fail = self.cpu.execute_token(looptoken)
+            args.append(longlong.getfloatstorage(x))
+        fail = self.cpu.execute_token(looptoken, *args)
         assert fail.identifier == 2
         res = self.cpu.get_latest_value_float(0)
         assert longlong.getrealfloat(res) == 8.5
@@ -1214,7 +1212,7 @@
                         ResOperation(rop.FINISH, [], None, descr=faildescr2),
                         ]
                     operations[-2].setfailargs([])
-                    looptoken = LoopToken()
+                    looptoken = JitCellToken()
                     self.cpu.compile_loop(inputargs, operations, looptoken)
                     #
                     cpu = self.cpu
@@ -1222,14 +1220,12 @@
                         if test1 == -42 or combinaison[0] == 'b':
                             for test2 in [-65, -42, -11]:
                                 if test2 == -42 or combinaison[1] == 'b':
-                                    n = 0
+                                    args = []
                                     if combinaison[0] == 'b':
-                                        cpu.set_future_value_int(n, test1)
-                                        n += 1
+                                        args.append(test1)
                                     if combinaison[1] == 'b':
-                                        cpu.set_future_value_int(n, test2)
-                                        n += 1
-                                    fail = cpu.execute_token(looptoken)
+                                        args.append(test2)
+                                    fail = cpu.execute_token(looptoken, *args)
                                     #
                                     expected = compare(test1, test2)
                                     expected ^= guard_case
@@ -1271,7 +1267,7 @@
                         ResOperation(rop.FINISH, [], None, descr=faildescr2),
                         ]
                     operations[-2].setfailargs([])
-                    looptoken = LoopToken()
+                    looptoken = JitCellToken()
                     self.cpu.compile_loop(inputargs, operations, looptoken)
                     #
                     cpu = self.cpu
@@ -1281,16 +1277,14 @@
                         if test1 == -4.5 or combinaison[0] == 'b':
                             for test2 in [-6.5, -4.5, -2.5, nan]:
                                 if test2 == -4.5 or combinaison[1] == 'b':
-                                    n = 0
+                                    args = []
                                     if combinaison[0] == 'b':
-                                        cpu.set_future_value_float(
-                                            n, longlong.getfloatstorage(test1))
-                                        n += 1
+                                        args.append(
+                                            longlong.getfloatstorage(test1))
                                     if combinaison[1] == 'b':
-                                        cpu.set_future_value_float(
-                                            n, longlong.getfloatstorage(test2))
-                                        n += 1
-                                    fail = cpu.execute_token(looptoken)
+                                        args.append(
+                                            longlong.getfloatstorage(test2))
+                                    fail = cpu.execute_token(looptoken, *args)
                                     #
                                     expected = compare(test1, test2)
                                     expected ^= guard_case
@@ -1330,19 +1324,20 @@
         faildescr = BasicFailDescr(1)
         operations.append(ResOperation(rop.FINISH, [], None,
                                        descr=faildescr))
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         #
         self.cpu.compile_loop(inputargs, operations, looptoken)
         #
-        for i, box in enumerate(inputargs):
+        args = []
+        for box in inputargs:
             if isinstance(box, BoxInt):
-                self.cpu.set_future_value_int(i, box.getint())
+                args.append(box.getint())
             elif isinstance(box, BoxFloat):
-                self.cpu.set_future_value_float(i, box.getfloatstorage())
+                args.append(box.getfloatstorage())
             else:
                 assert 0
         #
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, *args)
         assert fail.identifier == 1
 
     def test_nan_and_infinity(self):
@@ -1400,15 +1395,14 @@
                             ResOperation(rop.FINISH, [], None,
                                          descr=BasicFailDescr(5))]
                         operations[1].setfailargs([])
-                        looptoken = LoopToken()
+                        looptoken = JitCellToken()
                         # Use "set" to unique-ify inputargs
                         unique_testcase_list = list(set(testcase))
                         self.cpu.compile_loop(unique_testcase_list, operations,
                                               looptoken)
-                        for i, box in enumerate(unique_testcase_list):
-                            self.cpu.set_future_value_float(
-                                i, box.getfloatstorage())
-                        fail = self.cpu.execute_token(looptoken)
+                        args = [box.getfloatstorage()
+                                for box in unique_testcase_list]
+                        fail = self.cpu.execute_token(looptoken, *args)
                         if fail.identifier != 5 - (expected_id^expected):
                             if fail.identifier == 4:
                                 msg = "was taken"
@@ -1675,15 +1669,14 @@
         exc_tp = xtp
         exc_ptr = xptr
         loop = parse(ops, self.cpu, namespace=locals())
-        self.cpu.compile_loop(loop.inputargs, loop.operations, loop.token)
-        self.cpu.set_future_value_int(0, 1)
-        self.cpu.execute_token(loop.token)
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        self.cpu.execute_token(looptoken, 1)
         assert self.cpu.get_latest_value_int(0) == 0
         assert self.cpu.get_latest_value_ref(1) == xptr
         excvalue = self.cpu.grab_exc_value()
         assert not excvalue
-        self.cpu.set_future_value_int(0, 0)
-        self.cpu.execute_token(loop.token)
+        self.cpu.execute_token(looptoken, 0)
         assert self.cpu.get_latest_value_int(0) == 1
         excvalue = self.cpu.grab_exc_value()
         assert not excvalue
@@ -1700,9 +1693,9 @@
         exc_tp = ytp
         exc_ptr = yptr
         loop = parse(ops, self.cpu, namespace=locals())
-        self.cpu.compile_loop(loop.inputargs, loop.operations, loop.token)
-        self.cpu.set_future_value_int(0, 1)
-        self.cpu.execute_token(loop.token)
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        self.cpu.execute_token(looptoken, 1)
         assert self.cpu.get_latest_value_int(0) == 1
         excvalue = self.cpu.grab_exc_value()
         assert excvalue == yptr
@@ -1718,14 +1711,13 @@
         finish(0)
         '''
         loop = parse(ops, self.cpu, namespace=locals())
-        self.cpu.compile_loop(loop.inputargs, loop.operations, loop.token)
-        self.cpu.set_future_value_int(0, 1)
-        self.cpu.execute_token(loop.token)
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        self.cpu.execute_token(looptoken, 1)
         assert self.cpu.get_latest_value_int(0) == 1
         excvalue = self.cpu.grab_exc_value()
         assert excvalue == xptr
-        self.cpu.set_future_value_int(0, 0)
-        self.cpu.execute_token(loop.token)
+        self.cpu.execute_token(looptoken, 0)
         assert self.cpu.get_latest_value_int(0) == 0
         excvalue = self.cpu.grab_exc_value()
         assert not excvalue
@@ -1895,18 +1887,14 @@
         ResOperation(rop.FINISH, [i0], None, descr=BasicFailDescr(0))
         ]
         ops[2].setfailargs([i1, i0])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i0, i1], ops, looptoken)
-        self.cpu.set_future_value_int(0, 20)
-        self.cpu.set_future_value_int(1, 0)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 20, 0)
         assert fail.identifier == 0
         assert self.cpu.get_latest_value_int(0) == 20
         assert values == []
 
-        self.cpu.set_future_value_int(0, 10)
-        self.cpu.set_future_value_int(1, 1)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 10, 1)
         assert fail.identifier == 1
         assert self.cpu.get_latest_value_int(0) == 1
         assert self.cpu.get_latest_value_int(1) == 10
@@ -1940,18 +1928,14 @@
         ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
         ]
         ops[2].setfailargs([i1, i2, i0])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i0, i1], ops, looptoken)
-        self.cpu.set_future_value_int(0, 20)
-        self.cpu.set_future_value_int(1, 0)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 20, 0)
         assert fail.identifier == 0
         assert self.cpu.get_latest_value_int(0) == 42
         assert values == []
 
-        self.cpu.set_future_value_int(0, 10)
-        self.cpu.set_future_value_int(1, 1)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 10, 1)
         assert fail.identifier == 1
         assert self.cpu.get_latest_value_int(0) == 1
         assert self.cpu.get_latest_value_int(1) == 42
@@ -1986,19 +1970,15 @@
         ResOperation(rop.FINISH, [f2], None, descr=BasicFailDescr(0))
         ]
         ops[2].setfailargs([i1, f2, i0])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i0, i1], ops, looptoken)
-        self.cpu.set_future_value_int(0, 20)
-        self.cpu.set_future_value_int(1, 0)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 20, 0)
         assert fail.identifier == 0
         x = self.cpu.get_latest_value_float(0)
         assert longlong.getrealfloat(x) == 42.5
         assert values == []
 
-        self.cpu.set_future_value_int(0, 10)
-        self.cpu.set_future_value_int(1, 1)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 10, 1)
         assert fail.identifier == 1
         assert self.cpu.get_latest_value_int(0) == 1
         x = self.cpu.get_latest_value_float(1)
@@ -2031,10 +2011,9 @@
         ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
         ]
         ops[1].setfailargs([i1, i2])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i1], ops, looptoken)
-        self.cpu.set_future_value_int(0, ord('G'))
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, ord('G'))
         assert fail.identifier == 0
         assert self.cpu.get_latest_value_int(0) == ord('g')
 
@@ -2091,14 +2070,14 @@
         ResOperation(rop.FINISH, [], None, descr=BasicFailDescr(0))
         ]
         ops[1].setfailargs([])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i0, i1, i2, i3], ops, looptoken)
-        self.cpu.set_future_value_int(0, rffi.cast(lltype.Signed, raw))
-        self.cpu.set_future_value_int(1, 2)
-        self.cpu.set_future_value_int(2, 4)
-        self.cpu.set_future_value_int(3, rffi.cast(lltype.Signed, fn))
+        args = [rffi.cast(lltype.Signed, raw),
+                2,
+                4,
+                rffi.cast(lltype.Signed, fn)]
         assert glob.lst == []
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, *args)
         assert fail.identifier == 0
         assert len(glob.lst) > 0
         lltype.free(raw, flavor='raw')
@@ -2147,13 +2126,12 @@
         ops += [
             ResOperation(rop.FINISH, [i3], None, descr=BasicFailDescr(0))
         ]
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i1, i2], ops, looptoken)
 
         buffer = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw')
-        self.cpu.set_future_value_int(0, buflen)
-        self.cpu.set_future_value_int(1, rffi.cast(lltype.Signed, buffer))
-        fail = self.cpu.execute_token(looptoken)
+        args = [buflen, rffi.cast(lltype.Signed, buffer)]
+        fail = self.cpu.execute_token(looptoken, *args)
         assert fail.identifier == 0
         assert self.cpu.get_latest_value_int(0) == len(cwd)
         assert rffi.charp2strn(buffer, buflen) == cwd
@@ -2169,12 +2147,10 @@
             ResOperation(rop.FINISH, [i0], None, descr=BasicFailDescr(0))
         ]
         ops[0].setfailargs([i1])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i0, i1], ops, looptoken)
 
-        self.cpu.set_future_value_int(0, -42)
-        self.cpu.set_future_value_int(1, 9)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, -42, 9)
         assert fail.identifier == 0
         assert self.cpu.get_latest_value_int(0) == -42
         print 'step 1 ok'
@@ -2183,9 +2159,7 @@
         # mark as failing
         self.cpu.invalidate_loop(looptoken)
 
-        self.cpu.set_future_value_int(0, -42)
-        self.cpu.set_future_value_int(1, 9)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, -42, 9)
         assert fail is faildescr
         assert self.cpu.get_latest_value_int(0) == 9
         print 'step 2 ok'
@@ -2201,9 +2175,7 @@
         ops[0].setfailargs([])
         self.cpu.compile_bridge(faildescr, [i2], ops, looptoken)
 
-        self.cpu.set_future_value_int(0, -42)
-        self.cpu.set_future_value_int(1, 9)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, -42, 9)
         assert fail.identifier == 3
         assert self.cpu.get_latest_value_int(0) == 9
         print 'step 3 ok'
@@ -2212,9 +2184,7 @@
         # mark as failing again
         self.cpu.invalidate_loop(looptoken)
 
-        self.cpu.set_future_value_int(0, -42)
-        self.cpu.set_future_value_int(1, 9)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, -42, 9)
         assert fail is faildescr2
         print 'step 4 ok'
         print '-'*79
@@ -2415,7 +2385,7 @@
         i18 = int_add(i17, i9)
         finish(i18)'''
         loop = parse(ops)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         looptoken.outermost_jitdriver_sd = FakeJitDriverSD()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
         ARGS = [lltype.Signed] * 10
@@ -2423,9 +2393,8 @@
         FakeJitDriverSD.portal_calldescr = self.cpu.calldescrof(
             lltype.Ptr(lltype.FuncType(ARGS, RES)), ARGS, RES,
             EffectInfo.MOST_GENERAL)
-        for i in range(10):
-            self.cpu.set_future_value_int(i, i+1)
-        res = self.cpu.execute_token(looptoken)
+        args = [i+1 for i in range(10)]
+        res = self.cpu.execute_token(looptoken, *args)
         assert self.cpu.get_latest_value_int(0) == 55
         ops = '''
         [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9]
@@ -2435,11 +2404,10 @@
         finish(i11)
         '''
         loop = parse(ops, namespace=locals())
-        othertoken = LoopToken()
+        othertoken = JitCellToken()
         self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
-        for i in range(10):
-            self.cpu.set_future_value_int(i, i+1)
-        res = self.cpu.execute_token(othertoken)
+        args = [i+1 for i in range(10)]
+        res = self.cpu.execute_token(othertoken, *args)
         assert self.cpu.get_latest_value_int(0) == 13
         assert called
 
@@ -2471,12 +2439,12 @@
         finish(f2)'''
         loop = parse(ops)
         done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         looptoken.outermost_jitdriver_sd = FakeJitDriverSD()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
-        self.cpu.set_future_value_float(0, longlong.getfloatstorage(1.2))
-        self.cpu.set_future_value_float(1, longlong.getfloatstorage(2.3))
-        res = self.cpu.execute_token(looptoken)
+        args = [longlong.getfloatstorage(1.2),
+                longlong.getfloatstorage(2.3)]
+        res = self.cpu.execute_token(looptoken, *args)
         x = self.cpu.get_latest_value_float(0)
         assert longlong.getrealfloat(x) == 1.2 + 2.3
         ops = '''
@@ -2486,11 +2454,11 @@
         finish(f3)
         '''
         loop = parse(ops, namespace=locals())
-        othertoken = LoopToken()
+        othertoken = JitCellToken()
         self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
-        self.cpu.set_future_value_float(0, longlong.getfloatstorage(1.2))
-        self.cpu.set_future_value_float(1, longlong.getfloatstorage(3.2))
-        res = self.cpu.execute_token(othertoken)
+        args = [longlong.getfloatstorage(1.2),
+                longlong.getfloatstorage(3.2)]
+        res = self.cpu.execute_token(othertoken, *args)
         x = self.cpu.get_latest_value_float(0)
         assert longlong.getrealfloat(x) == 13.5
         assert called
@@ -2499,11 +2467,11 @@
         del called[:]
         self.cpu.done_with_this_frame_float_v = done_number
         try:
-            othertoken = LoopToken()
+            othertoken = JitCellToken()
             self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
-            self.cpu.set_future_value_float(0, longlong.getfloatstorage(1.2))
-            self.cpu.set_future_value_float(1, longlong.getfloatstorage(3.2))
-            res = self.cpu.execute_token(othertoken)
+            args = [longlong.getfloatstorage(1.2),
+                    longlong.getfloatstorage(3.2)]
+            res = self.cpu.execute_token(othertoken, *args)
             x = self.cpu.get_latest_value_float(0)
             assert longlong.getrealfloat(x) == 1.2 + 3.2
             assert not called
@@ -2561,12 +2529,12 @@
         f2 = float_add(f0, f1)
         finish(f2)'''
         loop = parse(ops)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         looptoken.outermost_jitdriver_sd = FakeJitDriverSD()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
-        self.cpu.set_future_value_float(0, longlong.getfloatstorage(1.25))
-        self.cpu.set_future_value_float(1, longlong.getfloatstorage(2.35))
-        res = self.cpu.execute_token(looptoken)
+        args = [longlong.getfloatstorage(1.25),
+                longlong.getfloatstorage(2.35)]
+        res = self.cpu.execute_token(looptoken, *args)
         x = self.cpu.get_latest_value_float(0)
         assert longlong.getrealfloat(x) == 1.25 + 2.35
         assert not called
@@ -2578,13 +2546,13 @@
         finish(f3)
         '''
         loop = parse(ops, namespace=locals())
-        othertoken = LoopToken()
+        othertoken = JitCellToken()
         self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
 
         # normal call_assembler: goes to looptoken
-        self.cpu.set_future_value_float(0, longlong.getfloatstorage(1.25))
-        self.cpu.set_future_value_float(1, longlong.getfloatstorage(3.25))
-        res = self.cpu.execute_token(othertoken)
+        args = [longlong.getfloatstorage(1.25),
+                longlong.getfloatstorage(3.25)]
+        res = self.cpu.execute_token(othertoken, *args)
         x = self.cpu.get_latest_value_float(0)
         assert longlong.getrealfloat(x) == 13.5
         assert called
@@ -2596,7 +2564,7 @@
         f2 = float_sub(f0, f1)
         finish(f2)'''
         loop = parse(ops)
-        looptoken2 = LoopToken()
+        looptoken2 = JitCellToken()
         looptoken2.outermost_jitdriver_sd = FakeJitDriverSD()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken2)
 
@@ -2604,10 +2572,9 @@
         self.cpu.redirect_call_assembler(looptoken, looptoken2)
 
         # now, our call_assembler should go to looptoken2
-        self.cpu.set_future_value_float(0, longlong.getfloatstorage(6.0))
-        self.cpu.set_future_value_float(1, longlong.getfloatstorage(1.5))
-                                                       # 6.0-1.5 == 1.25+3.25
-        res = self.cpu.execute_token(othertoken)
+        args = [longlong.getfloatstorage(6.0),
+                longlong.getfloatstorage(1.5)]         # 6.0-1.5 == 1.25+3.25
+        res = self.cpu.execute_token(othertoken, *args)
         x = self.cpu.get_latest_value_float(0)
         assert longlong.getrealfloat(x) == 13.5
         assert called
@@ -2958,13 +2925,137 @@
             ResOperation(rop.FINISH, [p0], None, descr=BasicFailDescr(1))
             ]
         inputargs = [i0]
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop(inputargs, operations, looptoken)
         # overflowing value:
-        self.cpu.set_future_value_int(0, sys.maxint // 4 + 1)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, sys.maxint // 4 + 1)
         assert fail.identifier == excdescr.identifier
 
+    def test_compile_loop_with_target(self):
+        i0 = BoxInt()
+        i1 = BoxInt()
+        i2 = BoxInt()
+        i3 = BoxInt()
+        looptoken = JitCellToken()
+        targettoken1 = TargetToken()
+        targettoken2 = TargetToken()
+        faildescr = BasicFailDescr(2)
+        operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken1),
+            ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
+            ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
+            ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr),
+            ResOperation(rop.LABEL, [i1], None, descr=targettoken2),
+            ResOperation(rop.INT_GE, [i1, ConstInt(0)], i3),
+            ResOperation(rop.GUARD_TRUE, [i3], None, descr=BasicFailDescr(3)),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken1),
+            ]
+        inputargs = [i0]
+        operations[3].setfailargs([i1])
+        operations[6].setfailargs([i1])
+
+        self.cpu.compile_loop(inputargs, operations, looptoken)
+        fail = self.cpu.execute_token(looptoken, 2)
+        assert fail.identifier == 2
+        res = self.cpu.get_latest_value_int(0)
+        assert res == 10
+
+        inputargs = [i0]
+        operations = [
+            ResOperation(rop.INT_SUB, [i0, ConstInt(20)], i2),
+            ResOperation(rop.JUMP, [i2], None, descr=targettoken2),
+            ]
+        self.cpu.compile_bridge(faildescr, inputargs, operations, looptoken)
+        
+        fail = self.cpu.execute_token(looptoken, 2)
+        assert fail.identifier == 3
+        res = self.cpu.get_latest_value_int(0)
+        assert res == -10
+
+    def test_compile_bridge_with_target(self):
+        # This test creates a loopy piece of code in a bridge, and builds another
+        # unrelated loop that ends in a jump directly to this loopy bit of code.
+        # It catches a case in which we underestimate the needed frame_depth across
+        # the cross-loop JUMP, because we estimate it based on the frame_depth stored
+        # in the original loop.
+        i0 = BoxInt()
+        i1 = BoxInt()
+        looptoken1 = JitCellToken()
+        targettoken1 = TargetToken()
+        faildescr1 = BasicFailDescr(2)
+        inputargs = [i0]
+        operations = [
+            ResOperation(rop.INT_LE, [i0, ConstInt(1)], i1),
+            ResOperation(rop.GUARD_TRUE, [i1], None, descr=faildescr1),
+            ResOperation(rop.FINISH, [i0], None, descr=BasicFailDescr(1234)),
+            ]
+        operations[1].setfailargs([i0])
+        self.cpu.compile_loop(inputargs, operations, looptoken1)
+
+        def func(a, b, c, d, e, f, g, h, i):
+            assert a + 2 == b
+            assert a + 4 == c
+            assert a + 6 == d
+            assert a + 8 == e
+            assert a + 10 == f
+            assert a + 12 == g
+            assert a + 14 == h
+            assert a + 16 == i
+        FPTR = self.Ptr(self.FuncType([lltype.Signed]*9, lltype.Void))
+        func_ptr = llhelper(FPTR, func)
+        cpu = self.cpu
+        calldescr = cpu.calldescrof(deref(FPTR), (lltype.Signed,)*9, lltype.Void,
+                                    EffectInfo.MOST_GENERAL)
+        funcbox = self.get_funcbox(cpu, func_ptr)
+
+        i0 = BoxInt(); i1 = BoxInt(); i2 = BoxInt(); i3 = BoxInt(); i4 = BoxInt()
+        i5 = BoxInt(); i6 = BoxInt(); i7 = BoxInt(); i8 = BoxInt(); i9 = BoxInt()
+        i10 = BoxInt(); i11 = BoxInt(); i12 = BoxInt(); i13 = BoxInt(); i14 = BoxInt()
+        i15 = BoxInt(); i16 = BoxInt(); i17 = BoxInt(); i18 = BoxInt(); i19 = BoxInt()
+        i20 = BoxInt()
+        inputargs = [i0]
+        operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken1),
+            ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
+            ResOperation(rop.INT_ADD, [i1, ConstInt(1)], i2),
+            ResOperation(rop.INT_ADD, [i2, ConstInt(1)], i3),
+            ResOperation(rop.INT_ADD, [i3, ConstInt(1)], i4),
+            ResOperation(rop.INT_ADD, [i4, ConstInt(1)], i5),
+            ResOperation(rop.INT_ADD, [i5, ConstInt(1)], i6),
+            ResOperation(rop.INT_ADD, [i6, ConstInt(1)], i7),
+            ResOperation(rop.INT_ADD, [i7, ConstInt(1)], i8),
+            ResOperation(rop.INT_ADD, [i8, ConstInt(1)], i9),
+            ResOperation(rop.INT_ADD, [i9, ConstInt(1)], i10),
+            ResOperation(rop.INT_ADD, [i10, ConstInt(1)], i11),
+            ResOperation(rop.INT_ADD, [i11, ConstInt(1)], i12),
+            ResOperation(rop.INT_ADD, [i12, ConstInt(1)], i13),
+            ResOperation(rop.INT_ADD, [i13, ConstInt(1)], i14),
+            ResOperation(rop.INT_ADD, [i14, ConstInt(1)], i15),
+            ResOperation(rop.INT_ADD, [i15, ConstInt(1)], i16),
+            ResOperation(rop.INT_ADD, [i16, ConstInt(1)], i17),
+            ResOperation(rop.INT_ADD, [i17, ConstInt(1)], i18),
+            ResOperation(rop.INT_ADD, [i18, ConstInt(1)], i19),
+            ResOperation(rop.CALL, [funcbox, i2, i4, i6, i8, i10, i12, i14, i16, i18],
+                         None, descr=calldescr),
+            ResOperation(rop.CALL, [funcbox, i2, i4, i6, i8, i10, i12, i14, i16, i18],
+                         None, descr=calldescr),
+            ResOperation(rop.INT_LT, [i19, ConstInt(100)], i20),
+            ResOperation(rop.GUARD_TRUE, [i20], None, descr=BasicFailDescr(42)),
+            ResOperation(rop.JUMP, [i19], None, descr=targettoken1),
+            ]
+        operations[-2].setfailargs([])
+        self.cpu.compile_bridge(faildescr1, inputargs, operations, looptoken1)
+
+        looptoken2 = JitCellToken()
+        inputargs = [BoxInt()]
+        operations = [
+            ResOperation(rop.JUMP, [ConstInt(0)], None, descr=targettoken1),
+            ]
+        self.cpu.compile_loop(inputargs, operations, looptoken2)
+
+        fail = self.cpu.execute_token(looptoken2, -9)
+        assert fail.identifier == 42
+
 
 class OOtypeBackendTest(BaseBackendTest):
 
diff --git a/pypy/jit/backend/test/test_random.py b/pypy/jit/backend/test/test_random.py
--- a/pypy/jit/backend/test/test_random.py
+++ b/pypy/jit/backend/test/test_random.py
@@ -3,9 +3,10 @@
 from pypy.rlib.rarithmetic import intmask, LONG_BIT
 from pypy.rpython.lltypesystem import llmemory
 from pypy.jit.metainterp.history import BasicFailDescr, TreeLoop
-from pypy.jit.metainterp.history import BoxInt, ConstInt, LoopToken
-from pypy.jit.metainterp.history import BoxPtr, ConstPtr
+from pypy.jit.metainterp.history import BoxInt, ConstInt, JitCellToken
+from pypy.jit.metainterp.history import BoxPtr, ConstPtr, TargetToken
 from pypy.jit.metainterp.history import BoxFloat, ConstFloat, Const
+from pypy.jit.metainterp.history import INT, FLOAT
 from pypy.jit.metainterp.resoperation import ResOperation, rop
 from pypy.jit.metainterp.executor import execute_nonspec
 from pypy.jit.metainterp.resoperation import opname
@@ -179,7 +180,7 @@
                 #print >>s, '    operations[%d].suboperations = [' % i
                 #print >>s, '        ResOperation(rop.FAIL, [%s], None)]' % (
                 #    ', '.join([names[v] for v in op.args]))
-        print >>s, '    looptoken = LoopToken()'
+        print >>s, '    looptoken = JitCellToken()'
         print >>s, '    cpu.compile_loop(inputargs, operations, looptoken)'
         if hasattr(self.loop, 'inputargs'):
             for i, v in enumerate(self.loop.inputargs):
@@ -525,29 +526,53 @@
                     startvars.append(BoxFloat(r.random_float_storage()))
                 else:
                     startvars.append(BoxInt(r.random_integer()))
+            allow_delay = True
+        else:
+            allow_delay = False
         assert len(dict.fromkeys(startvars)) == len(startvars)
         self.startvars = startvars
         self.prebuilt_ptr_consts = []
         self.r = r
-        self.build_random_loop(cpu, builder_factory, r, startvars)
+        self.build_random_loop(cpu, builder_factory, r, startvars, allow_delay)
 
-    def build_random_loop(self, cpu, builder_factory, r, startvars):
+    def build_random_loop(self, cpu, builder_factory, r, startvars, allow_delay):
 
         loop = TreeLoop('test_random_function')
         loop.inputargs = startvars[:]
         loop.operations = []
-        loop.token = LoopToken()
-
+        loop._jitcelltoken = JitCellToken()
         builder = builder_factory(cpu, loop, startvars[:])
-        self.generate_ops(builder, r, loop, startvars)
+        if allow_delay:
+            needs_a_label = True
+        else:
+            self.insert_label(loop, 0, r)
+            needs_a_label = False
+        self.generate_ops(builder, r, loop, startvars, needs_a_label=needs_a_label)
         self.builder = builder
         self.loop = loop
-        cpu.compile_loop(loop.inputargs, loop.operations, loop.token)
+        dump(loop)
+        cpu.compile_loop(loop.inputargs, loop.operations, loop._jitcelltoken)
 
-    def generate_ops(self, builder, r, loop, startvars):
+    def insert_label(self, loop, position, r):
+        assert not hasattr(loop, '_targettoken')
+        for i in range(position):
+            op = loop.operations[i]
+            if (not op.has_no_side_effect()
+                    or not isinstance(op.result, (BoxInt, BoxFloat))):
+                position = i
+                break       # cannot move the LABEL later
+            randompos = r.randrange(0, len(self.startvars)+1)
+            self.startvars.insert(randompos, op.result)
+        loop._targettoken = TargetToken()
+        loop.operations.insert(position, ResOperation(rop.LABEL, self.startvars, None,
+                                                      loop._targettoken))
+
+    def generate_ops(self, builder, r, loop, startvars, needs_a_label=False):
         block_length = pytest.config.option.block_length
+        istart = 0
 
         for i in range(block_length):
+            istart = len(loop.operations)
             try:
                 op = r.choice(builder.OPERATIONS)
                 op.filter(builder)
@@ -556,6 +581,12 @@
                 pass
             if builder.should_fail_by is not None:
                 break
+            if needs_a_label and r.random() < 0.2:
+                self.insert_label(loop, istart, r)
+                needs_a_label = False
+        if needs_a_label:
+            self.insert_label(loop, istart, r)
+
         endvars = []
         used_later = {}
         for op in loop.operations:
@@ -581,6 +612,22 @@
         if pytest.config.option.output:
             builder.print_loop()
 
+    def runjitcelltoken(self):
+        if self.startvars == self.loop.inputargs:
+            return self.loop._jitcelltoken
+        if not hasattr(self, '_initialjumploop_celltoken'):
+            self._initialjumploop_celltoken = JitCellToken()
+            args = []
+            for box in self.startvars:
+                if box not in self.loop.inputargs:
+                    box = box.constbox()
+                args.append(box)
+            self.cpu.compile_loop(self.loop.inputargs,
+                                  [ResOperation(rop.JUMP, args, None,
+                                                descr=self.loop._targettoken)],
+                                  self._initialjumploop_celltoken)
+        return self._initialjumploop_celltoken
+
     def get_fail_args(self):
         if self.should_fail_by.is_guard():
             assert self.should_fail_by.getfailargs() is not None
@@ -608,14 +655,8 @@
         exc = cpu.grab_exc_value()
         assert not exc
 
-        for i, box in enumerate(self.startvars):
-            if isinstance(box, BoxInt):
-                cpu.set_future_value_int(i, box.value)
-            elif isinstance(box, BoxFloat):
-                cpu.set_future_value_float(i, box.value)
-            else:
-                raise NotImplementedError(box)
-        fail = cpu.execute_token(self.loop.token)
+        arguments = [box.value for box in self.loop.inputargs]
+        fail = cpu.execute_token(self.runjitcelltoken(), *arguments)
         assert fail is self.should_fail_by.getdescr()
         for i, v in enumerate(self.get_fail_args()):
             if isinstance(v, (BoxFloat, ConstFloat)):
@@ -676,33 +717,55 @@
             # to build_bridge().)
 
             # First make up the other loop...
-            subset = bridge_builder.subset_of_intvars(r)
-            subset = [i for i in subset if i in fail_args]
-            if len(subset) == 0:
-                return False
+            #
+            # New restriction: must have the same argument count and types
+            # as the original loop
+            subset = []
+            for box in self.loop.inputargs:
+                srcbox = r.choice(fail_args)
+                if srcbox.type != box.type:
+                    if box.type == INT:
+                        srcbox = ConstInt(r.random_integer())
+                    elif box.type == FLOAT:
+                        srcbox = ConstFloat(r.random_float_storage())
+                    else:
+                        raise AssertionError(box.type)
+                subset.append(srcbox)
+            #
             args = [x.clonebox() for x in subset]
             rl = RandomLoop(self.builder.cpu, self.builder.fork,
                                      r, args)
+            dump(rl.loop)
             self.cpu.compile_loop(rl.loop.inputargs, rl.loop.operations,
-                                  rl.loop.token)
+                                  rl.loop._jitcelltoken)
             # done
             self.should_fail_by = rl.should_fail_by
             self.expected = rl.expected
             assert len(rl.loop.inputargs) == len(args)
             # The new bridge's execution will end normally at its FINISH.
             # Just replace the FINISH with the JUMP to the new loop.
-            jump_op = ResOperation(rop.JUMP, subset, None, descr=rl.loop.token)
+            jump_op = ResOperation(rop.JUMP, subset, None,
+                                   descr=rl.loop._targettoken)
             subloop.operations[-1] = jump_op
             self.guard_op = rl.guard_op
             self.prebuilt_ptr_consts += rl.prebuilt_ptr_consts
-            self.loop.token.record_jump_to(rl.loop.token)
+            self.loop._jitcelltoken.record_jump_to(rl.loop._jitcelltoken)
             self.dont_generate_more = True
         if r.random() < .05:
             return False
+        dump(subloop)
         self.builder.cpu.compile_bridge(fail_descr, fail_args,
-                                        subloop.operations, self.loop.token)
+                                        subloop.operations,
+                                        self.loop._jitcelltoken)
         return True
 
+def dump(loop):
+    print >> sys.stderr, loop
+    if hasattr(loop, 'inputargs'):
+        print >> sys.stderr, '\t', loop.inputargs
+    for op in loop.operations:
+        print >> sys.stderr, '\t', op
+
 def check_random_function(cpu, BuilderClass, r, num=None, max=None):
     loop = RandomLoop(cpu, BuilderClass, r)
     while True:
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -2,8 +2,8 @@
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from pypy.jit.metainterp.history import Const, Box, BoxInt, ConstInt
-from pypy.jit.metainterp.history import (AbstractFailDescr, INT, REF, FLOAT,
-                                         LoopToken)
+from pypy.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
+from pypy.jit.metainterp.history import JitCellToken
 from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.annlowlevel import llhelper
@@ -38,6 +38,7 @@
 from pypy.jit.backend.x86.jump import remap_frame_layout
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.codewriter import longlong
+from pypy.rlib.rarithmetic import intmask
 
 # darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
 # better safe than sorry
@@ -152,14 +153,13 @@
         allblocks = self.get_asmmemmgr_blocks(looptoken)
         self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
                                                         allblocks)
+        self.target_tokens_currently_compiling = {}
 
     def teardown(self):
         self.pending_guard_tokens = None
         if WORD == 8:
             self.pending_memoryerror_trampoline_from = None
         self.mc = None
-        self.looppos = -1
-        self.currently_compiling_loop = None
         self.current_clt = None
 
     def finish_once(self):
@@ -310,12 +310,11 @@
                 mc.MOVSD_sx(8*i, i)     # xmm0 to xmm7
         #
         if IS_X86_32:
-            mc.LEA_rb(eax.value, +8)
             stack_size += 2*WORD
             mc.PUSH_r(eax.value)        # alignment
-            mc.PUSH_r(eax.value)
+            mc.PUSH_r(esp.value)
         elif IS_X86_64:
-            mc.LEA_rb(edi.value, +16)
+            mc.MOV_rr(edi.value, esp.value)
         #
         # esp is now aligned to a multiple of 16 again
         mc.CALL(imm(slowpathaddr))
@@ -326,7 +325,7 @@
         jnz_location = mc.get_relative_pos()
         #
         if IS_X86_32:
-            mc.ADD_ri(esp.value, 2*WORD)
+            mc.ADD_ri(esp.value, 2*WORD)    # cancel the two PUSHes above
         elif IS_X86_64:
             # restore the registers
             for i in range(7, -1, -1):
@@ -422,12 +421,8 @@
 
     def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
         '''adds the following attributes to looptoken:
-               _x86_loop_code       (an integer giving an address)
-               _x86_bootstrap_code  (an integer giving an address)
-               _x86_direct_bootstrap_code  ( "    "     "    "   )
-               _x86_frame_depth
-               _x86_param_depth
-               _x86_arglocs
+               _x86_function_addr   (address of the generated func, as an int)
+               _x86_loop_code       (debug: addr of the start of the ResOps)
                _x86_debug_checksum
         '''
         # XXX this function is too longish and contains some code
@@ -443,37 +438,35 @@
             assert len(set(inputargs)) == len(inputargs)
 
         self.setup(looptoken)
-        self.currently_compiling_loop = looptoken
         if log:
             self._register_counter(False, looptoken.number)
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        arglocs, operations = regalloc.prepare_loop(inputargs, operations,
-                                                    looptoken, clt.allgcrefs)
-        looptoken._x86_arglocs = arglocs
-
-        bootstrappos = self.mc.get_relative_pos()
-        stackadjustpos = self._assemble_bootstrap_code(inputargs, arglocs)
-        self.looppos = self.mc.get_relative_pos()
-        looptoken._x86_frame_depth = -1     # temporarily
-        looptoken._x86_param_depth = -1     # temporarily
+        #
+        self._call_header_with_stack_check()
+        stackadjustpos = self._patchable_stackadjust()
+        clt._debug_nbargs = len(inputargs)
+        operations = regalloc.prepare_loop(inputargs, operations,
+                                           looptoken, clt.allgcrefs)
+        looppos = self.mc.get_relative_pos()
+        looptoken._x86_loop_code = looppos
+        clt.frame_depth = -1     # temporarily
+        clt.param_depth = -1     # temporarily
         frame_depth, param_depth = self._assemble(regalloc, operations)
-        looptoken._x86_frame_depth = frame_depth
-        looptoken._x86_param_depth = param_depth
-
-        directbootstrappos = self.mc.get_relative_pos()
-        self._assemble_bootstrap_direct_call(arglocs, self.looppos,
-                                             frame_depth+param_depth)
+        clt.frame_depth = frame_depth
+        clt.param_depth = param_depth
+        #
+        size_excluding_failure_stuff = self.mc.get_relative_pos()
         self.write_pending_failure_recoveries()
-        fullsize = self.mc.get_relative_pos()
+        full_size = self.mc.get_relative_pos()
         #
         rawstart = self.materialize_loop(looptoken)
         debug_start("jit-backend-addr")
         debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
             looptoken.number, loopname,
-            rawstart + self.looppos,
-            rawstart + directbootstrappos,
+            rawstart + looppos,
+            rawstart + size_excluding_failure_stuff,
             rawstart))
         debug_stop("jit-backend-addr")
         self._patch_stackadjust(rawstart + stackadjustpos,
@@ -484,18 +477,17 @@
         if not we_are_translated():
             # used only by looptoken.dump() -- useful in tests
             looptoken._x86_rawstart = rawstart
-            looptoken._x86_fullsize = fullsize
+            looptoken._x86_fullsize = full_size
             looptoken._x86_ops_offset = ops_offset
+        looptoken._x86_function_addr = rawstart
 
-        looptoken._x86_bootstrap_code = rawstart + bootstrappos
-        looptoken._x86_loop_code = rawstart + self.looppos
-        looptoken._x86_direct_bootstrap_code = rawstart + directbootstrappos
+        self.fixup_target_tokens(rawstart)
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
             name = "Loop # %s: %s" % (looptoken.number, loopname)
             self.cpu.profile_agent.native_code_written(name,
-                                                       rawstart, fullsize)
+                                                       rawstart, full_size)
         return ops_offset
 
     def assemble_bridge(self, faildescr, inputargs, operations,
@@ -548,6 +540,9 @@
         # patch the jump from original guard
         self.patch_jump_for_descr(faildescr, rawstart)
         ops_offset = self.mc.ops_offset
+        self.fixup_target_tokens(rawstart)
+        self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
+        self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
@@ -668,6 +663,11 @@
             mc.copy_to_raw_memory(adr_target)
         faildescr._x86_adr_jump_offset = 0    # means "patched"
 
+    def fixup_target_tokens(self, rawstart):
+        for targettoken in self.target_tokens_currently_compiling:
+            targettoken._x86_loop_code += rawstart
+        self.target_tokens_currently_compiling = None
+
     @specialize.argtype(1)
     def _inject_debugging_code(self, looptoken, operations):
         if self._debug:
@@ -685,20 +685,24 @@
                    ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
                    ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
                                 None, descr=self.debug_counter_descr)]
-            operations = ops + operations
+            if operations[0].getopnum() == rop.LABEL:
+                operations = [operations[0]] + ops + operations[1:]
+            else:
+                operations =  ops + operations
         return operations
 
     def _assemble(self, regalloc, operations):
         self._regalloc = regalloc
+        regalloc.compute_hint_frame_locations(operations)
         regalloc.walk_operations(operations)
         if we_are_translated() or self.cpu.dont_keepalive_stuff:
             self._regalloc = None   # else keep it around for debugging
-        frame_depth = regalloc.fm.frame_depth
+        frame_depth = regalloc.fm.get_frame_depth()
         param_depth = regalloc.param_depth
         jump_target_descr = regalloc.jump_target_descr
         if jump_target_descr is not None:
-            target_frame_depth = jump_target_descr._x86_frame_depth
-            target_param_depth = jump_target_descr._x86_param_depth
+            target_frame_depth = jump_target_descr._x86_clt.frame_depth
+            target_param_depth = jump_target_descr._x86_clt.param_depth
             frame_depth = max(frame_depth, target_frame_depth)
             param_depth = max(param_depth, target_param_depth)
         return frame_depth, param_depth
@@ -793,152 +797,21 @@
             self.mc.MOV_ri(ebx.value, rst)           # MOV ebx, rootstacktop
             self.mc.SUB_mi8((ebx.value, 0), 2*WORD)  # SUB [ebx], 2*WORD
 
-    def _assemble_bootstrap_direct_call(self, arglocs, jmppos, stackdepth):
-        if IS_X86_64:
-            return self._assemble_bootstrap_direct_call_64(arglocs, jmppos, stackdepth)
-        # XXX pushing ebx esi and edi is a bit pointless, since we store
-        #     all regsiters anyway, for the case of guard_not_forced
-        # XXX this can be improved greatly. Right now it'll behave like
-        #     a normal call
-        nonfloatlocs, floatlocs = arglocs
-        self._call_header_with_stack_check()
-        self.mc.LEA_rb(esp.value, self._get_offset_of_ebp_from_esp(stackdepth))
-        offset = 2 * WORD
-        tmp = eax
-        xmmtmp = xmm0
-        for i in range(len(nonfloatlocs)):
-            loc = nonfloatlocs[i]
-            if loc is not None:
-                if isinstance(loc, RegLoc):
-                    assert not loc.is_xmm
-                    self.mc.MOV_rb(loc.value, offset)
-                else:
-                    self.mc.MOV_rb(tmp.value, offset)
-                    self.mc.MOV(loc, tmp)
-                offset += WORD
-            loc = floatlocs[i]
-            if loc is not None:
-                if isinstance(loc, RegLoc):
-                    assert loc.is_xmm
-                    self.mc.MOVSD_xb(loc.value, offset)
-                else:
-                    self.mc.MOVSD_xb(xmmtmp.value, offset)
-                    assert isinstance(loc, StackLoc)
-                    self.mc.MOVSD_bx(loc.value, xmmtmp.value)
-                offset += 2 * WORD
-        endpos = self.mc.get_relative_pos() + 5
-        self.mc.JMP_l(jmppos - endpos)
-        assert endpos == self.mc.get_relative_pos()
-
-    def _assemble_bootstrap_direct_call_64(self, arglocs, jmppos, stackdepth):
-        # XXX: Very similar to _emit_call_64
-
-        src_locs = []
-        dst_locs = []
-        xmm_src_locs = []
-        xmm_dst_locs = []
-        get_from_stack = []
-
-        # In reverse order for use with pop()
-        unused_gpr = [r9, r8, ecx, edx, esi, edi]
-        unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
-
-        nonfloatlocs, floatlocs = arglocs
-        self._call_header_with_stack_check()
-        self.mc.LEA_rb(esp.value, self._get_offset_of_ebp_from_esp(stackdepth))
-
-        # The lists are padded with Nones
-        assert len(nonfloatlocs) == len(floatlocs)
-
-        for i in range(len(nonfloatlocs)):
-            loc = nonfloatlocs[i]
-            if loc is not None:
-                if len(unused_gpr) > 0:
-                    src_locs.append(unused_gpr.pop())
-                    dst_locs.append(loc)
-                else:
-                    get_from_stack.append((loc, False))
-
-            floc = floatlocs[i]
-            if floc is not None:
-                if len(unused_xmm) > 0:
-                    xmm_src_locs.append(unused_xmm.pop())
-                    xmm_dst_locs.append(floc)
-                else:
-                    get_from_stack.append((floc, True))
-
-        remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
-        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs, X86_64_XMM_SCRATCH_REG)
-
-        for i in range(len(get_from_stack)):
-            loc, is_xmm = get_from_stack[i]
-            if is_xmm:
-                self.mc.MOVSD_xb(X86_64_XMM_SCRATCH_REG.value, (2 + i) * WORD)
-                self.mc.MOVSD(loc, X86_64_XMM_SCRATCH_REG)
-            else:
-                self.mc.MOV_rb(X86_64_SCRATCH_REG.value, (2 + i) * WORD)
-                # XXX: We're assuming that "loc" won't require regloc to
-                # clobber the scratch register
-                self.mc.MOV(loc, X86_64_SCRATCH_REG)
-
-        endpos = self.mc.get_relative_pos() + 5
-        self.mc.JMP_l(jmppos - endpos)
-        assert endpos == self.mc.get_relative_pos()
-
     def redirect_call_assembler(self, oldlooptoken, newlooptoken):
         # some minimal sanity checking
-        oldnonfloatlocs, oldfloatlocs = oldlooptoken._x86_arglocs
-        newnonfloatlocs, newfloatlocs = newlooptoken._x86_arglocs
-        assert len(oldnonfloatlocs) == len(newnonfloatlocs)
-        assert len(oldfloatlocs) == len(newfloatlocs)
+        old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
+        new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
+        assert old_nbargs == new_nbargs
         # we overwrite the instructions at the old _x86_direct_bootstrap_code
         # to start with a JMP to the new _x86_direct_bootstrap_code.
         # Ideally we should rather patch all existing CALLs, but well.
-        oldadr = oldlooptoken._x86_direct_bootstrap_code
-        target = newlooptoken._x86_direct_bootstrap_code
+        oldadr = oldlooptoken._x86_function_addr
+        target = newlooptoken._x86_function_addr
         mc = codebuf.MachineCodeBlockWrapper()
         mc.JMP(imm(target))
+        assert mc.get_relative_pos() <= 13  # keep in sync with prepare_loop()
         mc.copy_to_raw_memory(oldadr)
 
-    def _assemble_bootstrap_code(self, inputargs, arglocs):
-        nonfloatlocs, floatlocs = arglocs
-        self._call_header()
-        stackadjustpos = self._patchable_stackadjust()
-        tmp = eax
-        xmmtmp = xmm0
-        self.mc.begin_reuse_scratch_register()
-        for i in range(len(nonfloatlocs)):
-            loc = nonfloatlocs[i]
-            if loc is None:
-                continue
-            if isinstance(loc, RegLoc):
-                target = loc
-            else:
-                target = tmp
-            if inputargs[i].type == REF:
-                adr = self.fail_boxes_ptr.get_addr_for_num(i)
-                self.mc.MOV(target, heap(adr))
-                self.mc.MOV(heap(adr), imm0)
-            else:
-                adr = self.fail_boxes_int.get_addr_for_num(i)
-                self.mc.MOV(target, heap(adr))
-            if target is not loc:
-                assert isinstance(loc, StackLoc)
-                self.mc.MOV_br(loc.value, target.value)
-        for i in range(len(floatlocs)):
-            loc = floatlocs[i]
-            if loc is None:
-                continue
-            adr = self.fail_boxes_float.get_addr_for_num(i)
-            if isinstance(loc, RegLoc):
-                self.mc.MOVSD(loc, heap(adr))
-            else:
-                self.mc.MOVSD(xmmtmp, heap(adr))
-                assert isinstance(loc, StackLoc)
-                self.mc.MOVSD_bx(loc.value, xmmtmp.value)
-        self.mc.end_reuse_scratch_register()
-        return stackadjustpos
-
     def dump(self, text):
         if not self.verbose:
             return
@@ -965,7 +838,7 @@
         if isinstance(loc, RegLoc) and loc.is_xmm:
             self.mc.SUB_ri(esp.value, 8)   # = size of doubles
             self.mc.MOVSD_sx(0, loc.value)
-        elif WORD == 4 and isinstance(loc, StackLoc) and loc.width == 8:
+        elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
             # XXX evil trick
             self.mc.PUSH_b(get_ebp_ofs(loc.position))
             self.mc.PUSH_b(get_ebp_ofs(loc.position + 1))
@@ -976,13 +849,25 @@
         if isinstance(loc, RegLoc) and loc.is_xmm:
             self.mc.MOVSD_xs(loc.value, 0)
             self.mc.ADD_ri(esp.value, 8)   # = size of doubles
-        elif WORD == 4 and isinstance(loc, StackLoc) and loc.width == 8:
+        elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
             # XXX evil trick
             self.mc.POP_b(get_ebp_ofs(loc.position + 1))
             self.mc.POP_b(get_ebp_ofs(loc.position))
         else:
             self.mc.POP(loc)
 
+    def regalloc_immedmem2mem(self, from_loc, to_loc):
+        # move a ConstFloatLoc directly to a StackLoc, as two MOVs
+        # (even on x86-64, because the immediates are encoded as 32 bits)
+        assert isinstance(from_loc, ConstFloatLoc)
+        assert isinstance(to_loc,   StackLoc)
+        low_part  = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[0]
+        high_part = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[1]
+        low_part  = intmask(low_part)
+        high_part = intmask(high_part)
+        self.mc.MOV_bi(to_loc.value,     low_part)
+        self.mc.MOV_bi(to_loc.value + 4, high_part)
+
     def regalloc_perform(self, op, arglocs, resloc):
         genop_list[op.getopnum()](self, op, arglocs, resloc)
 
@@ -1134,18 +1019,18 @@
                     self.mc.MOVSD_sx(p, loc.value)
                 else:
                     self.mc.MOV_sr(p, loc.value)
-            p += round_up_to_4(loc.width)
+            p += loc.get_width()
         p = 0
         for i in range(start, n):
             loc = arglocs[i]
             if not isinstance(loc, RegLoc):
-                if loc.width == 8:
+                if loc.get_width() == 8:
                     self.mc.MOVSD(xmm0, loc)
                     self.mc.MOVSD_sx(p, xmm0.value)
                 else:
                     self.mc.MOV(tmp, loc)
                     self.mc.MOV_sr(p, tmp.value)
-            p += round_up_to_4(loc.width)
+            p += loc.get_width()
         self._regalloc.reserve_param(p//WORD)
         # x is a location
         self.mc.CALL(x)
@@ -1882,10 +1767,10 @@
     DESCR_INT       = 0x01
     DESCR_FLOAT     = 0x02
     DESCR_SPECIAL   = 0x03
-    # XXX: 4*8 works on i386, should we optimize for that case?
-    CODE_FROMSTACK  = 4*16
+    CODE_FROMSTACK  = 4 * (8 + 8*IS_X86_64)
     CODE_STOP       = 0 | DESCR_SPECIAL
     CODE_HOLE       = 4 | DESCR_SPECIAL
+    CODE_INPUTARG   = 8 | DESCR_SPECIAL
 
     def write_failure_recovery_description(self, mc, failargs, locs):
         for i in range(len(failargs)):
@@ -1901,7 +1786,11 @@
                     raise AssertionError("bogus kind")
                 loc = locs[i]
                 if isinstance(loc, StackLoc):
-                    n = self.CODE_FROMSTACK//4 + loc.position
+                    pos = loc.position
+                    if pos < 0:
+                        mc.writechar(chr(self.CODE_INPUTARG))
+                        pos = ~pos
+                    n = self.CODE_FROMSTACK//4 + pos
                 else:
                     assert isinstance(loc, RegLoc)
                     n = loc.value
@@ -1921,6 +1810,7 @@
         descr_to_box_type = [REF, INT, FLOAT]
         bytecode = rffi.cast(rffi.UCHARP, bytecode)
         arglocs = []
+        code_inputarg = False
         while 1:
             # decode the next instruction from the bytecode
             code = rffi.cast(lltype.Signed, bytecode[0])
@@ -1939,11 +1829,17 @@
                             break
                 kind = code & 3
                 code = (code - self.CODE_FROMSTACK) >> 2
+                if code_inputarg:
+                    code = ~code
+                    code_inputarg = False
                 loc = X86FrameManager.frame_pos(code, descr_to_box_type[kind])
             elif code == self.CODE_STOP:
                 break
             elif code == self.CODE_HOLE:
                 continue
+            elif code == self.CODE_INPUTARG:
+                code_inputarg = True
+                continue
             else:
                 # 'code' identifies a register
                 kind = code & 3
@@ -1959,6 +1855,7 @@
     def grab_frame_values(self, bytecode, frame_addr, allregisters):
         # no malloc allowed here!!
         self.fail_ebp = allregisters[16 + ebp.value]
+        code_inputarg = False
         num = 0
         value_hi = 0
         while 1:
@@ -1979,6 +1876,9 @@
                 # load the value from the stack
                 kind = code & 3
                 code = (code - self.CODE_FROMSTACK) >> 2
+                if code_inputarg:
+                    code = ~code
+                    code_inputarg = False
                 stackloc = frame_addr + get_ebp_ofs(code)
                 value = rffi.cast(rffi.LONGP, stackloc)[0]
                 if kind == self.DESCR_FLOAT and WORD == 4:
@@ -1991,6 +1891,9 @@
                     if code == self.CODE_HOLE:
                         num += 1
                         continue
+                    if code == self.CODE_INPUTARG:
+                        code_inputarg = True
+                        continue
                     assert code == self.CODE_STOP
                     break
                 code >>= 2
@@ -2095,9 +1998,9 @@
         # returns in eax the fail_index
 
         # now we return from the complete frame, which starts from
-        # _assemble_bootstrap_code().  The LEA in _call_footer below throws
-        # away most of the frame, including all the PUSHes that we did just
-        # above.
+        # _call_header_with_stack_check().  The LEA in _call_footer below
+        # throws away most of the frame, including all the PUSHes that we
+        # did just above.
 
         self._call_footer()
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
@@ -2180,7 +2083,7 @@
                         argtypes=op.getdescr().get_arg_types(),
                         callconv=op.getdescr().get_call_conv())
 
-        if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.width == 8:
+        if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.type == FLOAT:
             # a float or a long long return
             if op.getdescr().get_return_type() == 'L':
                 self.mc.MOV_br(resloc.value, eax.value)      # long long
@@ -2344,11 +2247,11 @@
         fail_index = self.cpu.get_fail_descr_number(faildescr)
         self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
         descr = op.getdescr()
-        assert isinstance(descr, LoopToken)
-        assert len(arglocs) - 2 == len(descr._x86_arglocs[0])
+        assert isinstance(descr, JitCellToken)
+        assert len(arglocs) - 2 == descr.compiled_loop_token._debug_nbargs
         #
-        # Write a call to the direct_bootstrap_code of the target assembler
-        self._emit_call(fail_index, imm(descr._x86_direct_bootstrap_code),
+        # Write a call to the target assembler
+        self._emit_call(fail_index, imm(descr._x86_function_addr),
                         arglocs, 2, tmp=eax)
         if op.result is None:
             assert result_loc is None
@@ -2578,15 +2481,21 @@
                     gcrootmap.put(self.gcrootmap_retaddr_forced, mark)
                     self.gcrootmap_retaddr_forced = -1
 
-    def target_arglocs(self, loop_token):
-        return loop_token._x86_arglocs
-
-    def closing_jump(self, loop_token):
-        if loop_token is self.currently_compiling_loop:
+    def closing_jump(self, target_token):
+        # The backend's logic assumes that the target code is in a piece of
+        # assembler that was also called with the same number of arguments,
+        # so that the locations [ebp+8..] of the input arguments are valid
+        # stack locations both before and after the jump.
+        my_nbargs = self.current_clt._debug_nbargs
+        target_nbargs = target_token._x86_clt._debug_nbargs
+        assert my_nbargs == target_nbargs
+        #
+        target = target_token._x86_loop_code
+        if target_token in self.target_tokens_currently_compiling:
             curpos = self.mc.get_relative_pos() + 5
-            self.mc.JMP_l(self.looppos - curpos)
+            self.mc.JMP_l(target - curpos)
         else:
-            self.mc.JMP(imm(loop_token._x86_loop_code))
+            self.mc.JMP(imm(target))
 
     def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, tid):
         size = max(size, self.cpu.gc_ll_descr.minimal_size_in_nursery)
@@ -2659,11 +2568,6 @@
         num = getattr(rop, opname.upper())
         genop_list[num] = value
 
-def round_up_to_4(size):
-    if size < 4:
-        return 4
-    return size
-
 # XXX: ri386 migration shims:
 def addr_add(reg_or_imm1, reg_or_imm2, offset=0, scale=0):
     return AddressLoc(reg_or_imm1, reg_or_imm2, scale, offset)
diff --git a/pypy/jit/backend/x86/jump.py b/pypy/jit/backend/x86/jump.py
--- a/pypy/jit/backend/x86/jump.py
+++ b/pypy/jit/backend/x86/jump.py
@@ -1,6 +1,6 @@
 import sys
 from pypy.tool.pairtype import extendabletype
-from pypy.jit.backend.x86.regloc import ImmedLoc, StackLoc
+from pypy.jit.backend.x86.regloc import ImmediateAssemblerLocation, StackLoc
 
 def remap_frame_layout(assembler, src_locations, dst_locations, tmpreg):
     pending_dests = len(dst_locations)
@@ -12,7 +12,7 @@
         srccount[key] = 0
     for i in range(len(dst_locations)):
         src = src_locations[i]
-        if isinstance(src, ImmedLoc):
+        if isinstance(src, ImmediateAssemblerLocation):
             continue
         key = src._getregkey()
         if key in srccount:
@@ -31,7 +31,7 @@
                 srccount[key] = -1       # means "it's done"
                 pending_dests -= 1
                 src = src_locations[i]
-                if not isinstance(src, ImmedLoc):
+                if not isinstance(src, ImmediateAssemblerLocation):
                     key = src._getregkey()
                     if key in srccount:
                         srccount[key] -= 1
@@ -66,6 +66,13 @@
 
 def _move(assembler, src, dst, tmpreg):
     if dst.is_memory_reference() and src.is_memory_reference():
+        if isinstance(src, ImmediateAssemblerLocation):
+            assembler.regalloc_immedmem2mem(src, dst)
+            return
+        if tmpreg is None:
+            assembler.regalloc_push(src)
+            assembler.regalloc_pop(dst)
+            return
         assembler.regalloc_mov(src, tmpreg)
         src = tmpreg
     assembler.regalloc_mov(src, dst)
@@ -87,7 +94,7 @@
         dstloc = dst_locations2[i]
         if isinstance(loc, StackLoc):
             key = loc._getregkey()
-            if (key in dst_keys or (loc.width > WORD and
+            if (key in dst_keys or (loc.get_width() > WORD and
                                     (key + WORD) in dst_keys)):
                 assembler.regalloc_push(loc)
                 extrapushes.append(dstloc)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -5,7 +5,8 @@
 import os
 from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
                                          ResOperation, BoxPtr, ConstFloat,
-                                         BoxFloat, LoopToken, INT, REF, FLOAT)
+                                         BoxFloat, INT, REF, FLOAT,
+                                         TargetToken, JitCellToken)
 from pypy.jit.backend.x86.regloc import *
 from pypy.rpython.lltypesystem import lltype, rffi, rstr
 from pypy.rlib.objectmodel import we_are_translated
@@ -27,7 +28,7 @@
 class X86RegisterManager(RegisterManager):
 
     box_types = [INT, REF]
-    all_regs = [eax, ecx, edx, ebx, esi, edi]
+    all_regs = [ecx, eax, edx, ebx, esi, edi]
     no_lower_byte_regs = [esi, edi]
     save_around_call_regs = [eax, edx, ecx]
     frame_reg = ebp
@@ -59,7 +60,7 @@
 
 class X86_64_RegisterManager(X86RegisterManager):
     # r11 omitted because it's used as scratch
-    all_regs = [eax, ecx, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
+    all_regs = [ecx, eax, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
     no_lower_byte_regs = []
     save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
 
@@ -129,15 +130,19 @@
     @staticmethod
     def frame_pos(i, box_type):
         if IS_X86_32 and box_type == FLOAT:
-            return StackLoc(i, get_ebp_ofs(i+1), 2, box_type)
+            return StackLoc(i, get_ebp_ofs(i+1), box_type)
         else:
-            return StackLoc(i, get_ebp_ofs(i), 1, box_type)
+            return StackLoc(i, get_ebp_ofs(i), box_type)
     @staticmethod
     def frame_size(box_type):
         if IS_X86_32 and box_type == FLOAT:
             return 2
         else:
             return 1
+    @staticmethod
+    def get_loc_index(loc):
+        assert isinstance(loc, StackLoc)
+        return loc.position
 
 if WORD == 4:
     gpr_reg_mgr_cls = X86RegisterManager
@@ -159,6 +164,8 @@
         # to be read/used by the assembler too
         self.jump_target_descr = None
         self.close_stack_struct = 0
+        self.final_jump_op = None
+        self.min_bytes_before_label = 0
 
     def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
@@ -167,74 +174,83 @@
         operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
                                                        allgcrefs)
         # compute longevity of variables
-        longevity = self._compute_vars_longevity(inputargs, operations)
-        self.longevity = longevity
-        self.rm = gpr_reg_mgr_cls(longevity,
+        self._compute_vars_longevity(inputargs, operations)
+        self.rm = gpr_reg_mgr_cls(self.longevity,
                                   frame_manager = self.fm,
                                   assembler = self.assembler)
-        self.xrm = xmm_reg_mgr_cls(longevity, frame_manager = self.fm,
+        self.xrm = xmm_reg_mgr_cls(self.longevity, frame_manager = self.fm,
                                    assembler = self.assembler)
         return operations
 
     def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
         operations = self._prepare(inputargs, operations, allgcrefs)
-        jump = operations[-1]
-        loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
-        self.loop_consts = loop_consts
-        return self._process_inputargs(inputargs), operations
+        self._set_initial_bindings(inputargs)
+        # note: we need to make a copy of inputargs because possibly_free_vars
+        # is also used on op args, which is a non-resizable list
+        self.possibly_free_vars(list(inputargs))
+        self.min_bytes_before_label = 13
+        return operations
 
     def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
                        allgcrefs):
         operations = self._prepare(inputargs, operations, allgcrefs)
-        self.loop_consts = {}
         self._update_bindings(arglocs, inputargs)
-        self.fm.frame_depth = prev_depths[0]
         self.param_depth = prev_depths[1]
         return operations
 
     def reserve_param(self, n):
         self.param_depth = max(self.param_depth, n)
 
-    def _process_inputargs(self, inputargs):
-        # XXX we can sort out here by longevity if we need something
-        # more optimal
-        floatlocs = [None] * len(inputargs)
-        nonfloatlocs = [None] * len(inputargs)
-        # Don't use all_regs[0] for passing arguments around a loop.
-        # Must be kept in sync with consider_jump().
-        # XXX this should probably go to llsupport/regalloc.py
-        xmmtmp = self.xrm.free_regs.pop(0)
-        tmpreg = self.rm.free_regs.pop(0)
-        assert tmpreg == X86RegisterManager.all_regs[0]
-        assert xmmtmp == X86XMMRegisterManager.all_regs[0]
-        for i in range(len(inputargs)):
-            arg = inputargs[i]
-            assert not isinstance(arg, Const)
-            reg = None
-            if arg not in self.loop_consts and self.longevity[arg][1] > -1:
-                if arg.type == FLOAT:
-                    # xxx is it really a good idea?  at the first CALL they
-                    # will all be flushed anyway
-                    reg = self.xrm.try_allocate_reg(arg)
+    def _set_initial_bindings(self, inputargs):
+        if IS_X86_64:
+            inputargs = self._set_initial_bindings_regs_64(inputargs)
+        #                   ...
+        # stack layout:     arg2
+        #                   arg1
+        #                   arg0
+        #                   return address
+        #                   saved ebp        <-- ebp points here
+        #                   ...
+        cur_frame_pos = - 1 - FRAME_FIXED_SIZE
+        assert get_ebp_ofs(cur_frame_pos-1) == 2*WORD
+        assert get_ebp_ofs(cur_frame_pos-2) == 3*WORD
+        #
+        for box in inputargs:
+            assert isinstance(box, Box)
+            #
+            if IS_X86_32 and box.type == FLOAT:
+                cur_frame_pos -= 2
+            else:
+                cur_frame_pos -= 1
+            loc = self.fm.frame_pos(cur_frame_pos, box.type)
+            self.fm.set_binding(box, loc)
+
+    def _set_initial_bindings_regs_64(self, inputargs):
+        # In reverse order for use with pop()
+        unused_gpr = [r9, r8, ecx, edx, esi, edi]
+        unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
+        #
+        pass_on_stack = []
+        #
+        for box in inputargs:
+            assert isinstance(box, Box)
+            #
+            if box.type == FLOAT:
+                if len(unused_xmm) > 0:
+                    ask = unused_xmm.pop()
+                    got = self.xrm.try_allocate_reg(box, selected_reg=ask)
+                    assert ask == got
                 else:
-                    reg = self.rm.try_allocate_reg(arg)
-            if reg:
-                loc = reg
+                    pass_on_stack.append(box)
             else:
-                loc = self.fm.loc(arg)
-            if arg.type == FLOAT:
-                floatlocs[i] = loc
-            else:
-                nonfloatlocs[i] = loc
-            # otherwise we have it saved on stack, so no worry
-        self.rm.free_regs.insert(0, tmpreg)
-        self.xrm.free_regs.insert(0, xmmtmp)
-        assert tmpreg not in nonfloatlocs
-        assert xmmtmp not in floatlocs
-        # note: we need to make a copy of inputargs because possibly_free_vars
-        # is also used on op args, which is a non-resizable list
-        self.possibly_free_vars(list(inputargs))
-        return nonfloatlocs, floatlocs
+                if len(unused_gpr) > 0:
+                    ask = unused_gpr.pop()
+                    got = self.rm.try_allocate_reg(box, selected_reg=ask)
+                    assert ask == got
+                else:
+                    pass_on_stack.append(box)
+        #
+        return pass_on_stack
 
     def possibly_free_var(self, var):
         if var.type == FLOAT:
@@ -287,15 +303,15 @@
         else:
             return self.xrm.make_sure_var_in_reg(var, forbidden_vars)
 
-    def _compute_loop_consts(self, inputargs, jump, looptoken):
-        if jump.getopnum() != rop.JUMP or jump.getdescr() is not looptoken:
-            loop_consts = {}
-        else:
-            loop_consts = {}
-            for i in range(len(inputargs)):
-                if inputargs[i] is jump.getarg(i):
-                    loop_consts[inputargs[i]] = i
-        return loop_consts
+    #def _compute_loop_consts(self, inputargs, jump, looptoken):
+    #    if jump.getopnum() != rop.JUMP or jump.getdescr() is not looptoken:
+    #        loop_consts = {}
+    #    else:
+    #        loop_consts = {}
+    #        for i in range(len(inputargs)):
+    #            if inputargs[i] is jump.getarg(i):
+    #                loop_consts[inputargs[i]] = i
+    #    return loop_consts
 
     def _update_bindings(self, locs, inputargs):
         # XXX this should probably go to llsupport/regalloc.py
@@ -311,7 +327,7 @@
                     self.xrm.reg_bindings[arg] = loc
                     used[loc] = None
                 else:
-                    self.fm.frame_bindings[arg] = loc
+                    self.fm.set_binding(arg, loc)
             else:
                 if isinstance(loc, RegLoc):
                     if loc is ebp:
@@ -320,7 +336,7 @@
                         self.rm.reg_bindings[arg] = loc
                         used[loc] = None
                 else:
-                    self.fm.frame_bindings[arg] = loc
+                    self.fm.set_binding(arg, loc)
         self.rm.free_regs = []
         for reg in self.rm.all_regs:
             if reg not in used:
@@ -356,7 +372,7 @@
     def get_current_depth(self):
         # return (self.fm.frame_depth, self.param_depth), but trying to share
         # the resulting tuple among several calls
-        arg0 = self.fm.frame_depth
+        arg0 = self.fm.get_frame_depth()
         arg1 = self.param_depth
         result = self.assembler._current_depths_cache
         if result[0] != arg0 or result[1] != arg1:
@@ -445,13 +461,26 @@
             i += 1
         assert not self.rm.reg_bindings
         assert not self.xrm.reg_bindings
+        self.flush_loop()
         self.assembler.mc.mark_op(None) # end of the loop
 
+    def flush_loop(self):
+        # rare case: if the loop is too short, pad with NOPs
+        mc = self.assembler.mc
+        while mc.get_relative_pos() < self.min_bytes_before_label:
+            mc.NOP()
+
     def _compute_vars_longevity(self, inputargs, operations):
         # compute a dictionary that maps variables to index in
         # operations that is a "last-time-seen"
+
+        # returns a pair longevity/useful. Non-useful variables are ones that
+        # never appear in the assembler or it does not matter if they appear on
+        # stack or in registers. Main example is loop arguments that go
+        # only to guard operations or to jump or to finish
         produced = {}
         last_used = {}
+        last_real_usage = {}
         for i in range(len(operations)-1, -1, -1):
             op = operations[i]
             if op.result:
@@ -459,10 +488,16 @@
                     continue
                 assert op.result not in produced
                 produced[op.result] = i
+            opnum = op.getopnum()
             for j in range(op.numargs()):
                 arg = op.getarg(j)
-                if isinstance(arg, Box) and arg not in last_used:
+                if not isinstance(arg, Box):
+                    continue
+                if arg not in last_used:
                     last_used[arg] = i
+                if opnum != rop.JUMP and opnum != rop.LABEL:
+                    if arg not in last_real_usage:
+                        last_real_usage[arg] = i
             if op.is_guard():
                 for arg in op.getfailargs():
                     if arg is None: # hole
@@ -470,7 +505,8 @@
                     assert isinstance(arg, Box)
                     if arg not in last_used:
                         last_used[arg] = i
-
+        self.last_real_usage = last_real_usage
+        #
         longevity = {}
         for arg in produced:
             if arg in last_used:
@@ -486,7 +522,7 @@
                 longevity[arg] = (0, last_used[arg])
                 del last_used[arg]
         assert len(last_used) == 0
-        return longevity
+        self.longevity = longevity
 
     def loc(self, v):
         if v is None: # xxx kludgy
@@ -883,7 +919,7 @@
 
     def consider_call_assembler(self, op, guard_op):
         descr = op.getdescr()
-        assert isinstance(descr, LoopToken)
+        assert isinstance(descr, JitCellToken)
         jd = descr.outermost_jitdriver_sd
         assert jd is not None
         size = jd.portal_calldescr.get_result_size(self.translate_support_code)
@@ -1313,35 +1349,72 @@
             self.rm.possibly_free_var(tmpbox_low)
         self.rm.possibly_free_var(tmpbox_high)
 
+    def compute_hint_frame_locations(self, operations):
+        # optimization only: fill in the 'hint_frame_locations' dictionary
+        # of 'fm' based on the JUMP at the end of the loop, by looking
+        # at where we would like the boxes to be after the jump.
+        op = operations[-1]
+        if op.getopnum() != rop.JUMP:
+            return
+        self.final_jump_op = op
+        descr = op.getdescr()
+        assert isinstance(descr, TargetToken)
+        if descr._x86_loop_code != 0:
+            # if the target LABEL was already compiled, i.e. if it belongs
+            # to some already-compiled piece of code
+            self._compute_hint_frame_locations_from_descr(descr)
+        #else:
+        #   The loop ends in a JUMP going back to a LABEL in the same loop.
+        #   We cannot fill 'hint_frame_locations' immediately, but we can
+        #   wait until the corresponding consider_label() to know where the
+        #   we would like the boxes to be after the jump.
+
+    def _compute_hint_frame_locations_from_descr(self, descr):
+        arglocs = descr._x86_arglocs
+        jump_op = self.final_jump_op
+        assert len(arglocs) == jump_op.numargs()
+        for i in range(jump_op.numargs()):
+            box = jump_op.getarg(i)
+            if isinstance(box, Box):
+                loc = arglocs[i]
+                if isinstance(loc, StackLoc):
+                    self.fm.hint_frame_locations[box] = loc
+
     def consider_jump(self, op):
         assembler = self.assembler
         assert self.jump_target_descr is None
         descr = op.getdescr()
-        assert isinstance(descr, LoopToken)
+        assert isinstance(descr, TargetToken)
+        arglocs = descr._x86_arglocs
         self.jump_target_descr = descr
-        nonfloatlocs, floatlocs = assembler.target_arglocs(self.jump_target_descr)
-        # compute 'tmploc' to be all_regs[0] by spilling what is there
-        box = TempBox()
-        box1 = TempBox()
-        tmpreg = X86RegisterManager.all_regs[0]
-        tmploc = self.rm.force_allocate_reg(box, selected_reg=tmpreg)
-        xmmtmp = X86XMMRegisterManager.all_regs[0]
-        self.xrm.force_allocate_reg(box1, selected_reg=xmmtmp)
         # Part about non-floats
-        # XXX we don't need a copy, we only just the original list
-        src_locations1 = [self.loc(op.getarg(i)) for i in range(op.numargs())
-                         if op.getarg(i).type != FLOAT]
-        assert tmploc not in nonfloatlocs
-        dst_locations1 = [loc for loc in nonfloatlocs if loc is not None]
+        src_locations1 = []
+        dst_locations1 = []
         # Part about floats
-        src_locations2 = [self.loc(op.getarg(i)) for i in range(op.numargs())
-                         if op.getarg(i).type == FLOAT]
-        dst_locations2 = [loc for loc in floatlocs if loc is not None]
+        src_locations2 = []
+        dst_locations2 = []
+        # Build the four lists
+        for i in range(op.numargs()):
+            box = op.getarg(i)
+            src_loc = self.loc(box)
+            dst_loc = arglocs[i]
+            if box.type != FLOAT:
+                src_locations1.append(src_loc)
+                dst_locations1.append(dst_loc)
+            else:
+                src_locations2.append(src_loc)
+                dst_locations2.append(dst_loc)
+        # Do we have a temp var?
+        if IS_X86_64:
+            tmpreg = X86_64_SCRATCH_REG
+            xmmtmp = X86_64_XMM_SCRATCH_REG
+        else:
+            tmpreg = None
+            xmmtmp = None
+        # Do the remapping
         remap_frame_layout_mixed(assembler,
-                                 src_locations1, dst_locations1, tmploc,
+                                 src_locations1, dst_locations1, tmpreg,
                                  src_locations2, dst_locations2, xmmtmp)
-        self.rm.possibly_free_var(box)
-        self.xrm.possibly_free_var(box1)
         self.possibly_free_vars_for_op(op)
         assembler.closing_jump(self.jump_target_descr)
 
@@ -1357,7 +1430,7 @@
 
     def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
         shape = gcrootmap.get_basic_shape(IS_X86_64)
-        for v, val in self.fm.frame_bindings.items():
+        for v, val in self.fm.bindings.items():
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
                 assert isinstance(val, StackLoc)
                 gcrootmap.add_frame_offset(shape, get_ebp_ofs(val.position))
@@ -1392,6 +1465,56 @@
         # the FORCE_TOKEN operation returns directly 'ebp'
         self.rm.force_allocate_frame_reg(op.result)
 
+    def consider_label(self, op):
+        descr = op.getdescr()
+        assert isinstance(descr, TargetToken)
+        inputargs = op.getarglist()
+        arglocs = [None] * len(inputargs)
+        #
+        # we use force_spill() on the boxes that are not going to be really
+        # used any more in the loop, but that are kept alive anyway
+        # by being in a next LABEL's or a JUMP's argument or fail_args
+        # of some guard
+        position = self.rm.position
+        for arg in inputargs:
+            assert isinstance(arg, Box)
+            if self.last_real_usage.get(arg, -1) <= position:
+                self.force_spill_var(arg)
+        #
+        # we need to make sure that no variable is stored in ebp
+        for arg in inputargs:
+            if self.loc(arg) is ebp:
+                loc2 = self.fm.loc(arg)
+                self.assembler.mc.MOV(loc2, ebp)
+        self.rm.bindings_to_frame_reg.clear()
+        #
+        for i in range(len(inputargs)):
+            arg = inputargs[i]
+            assert isinstance(arg, Box)
+            loc = self.loc(arg)
+            assert loc is not ebp
+            arglocs[i] = loc
+            if isinstance(loc, RegLoc):
+                self.fm.mark_as_free(arg)
+        #
+        # if we are too close to the start of the loop, the label's target may
+        # get overridden by redirect_call_assembler().  (rare case)
+        self.flush_loop()
+        #
+        descr._x86_arglocs = arglocs
+        descr._x86_loop_code = self.assembler.mc.get_relative_pos()
+        descr._x86_clt = self.assembler.current_clt
+        self.assembler.target_tokens_currently_compiling[descr] = None
+        self.possibly_free_vars_for_op(op)
+        #
+        # if the LABEL's descr is precisely the target of the JUMP at the
+        # end of the same loop, i.e. if what we are compiling is a single
+        # loop that ends up jumping to this LABEL, then we can now provide
+        # the hints about the expected position of the spilled variables.
+        jump_op = self.final_jump_op
+        if jump_op is not None and jump_op.getdescr() is descr:
+            self._compute_hint_frame_locations_from_descr(descr)
+
     def not_implemented_op(self, op):
         not_implemented("not implemented operation: %s" % op.getopname())
 
@@ -1447,3 +1570,7 @@
 def not_implemented(msg):
     os.write(2, '[x86/regalloc] %s\n' % msg)
     raise NotImplementedError(msg)
+
+# xxx hack: set a default value for TargetToken._x86_loop_code.
+# If 0, we know that it is a LABEL that was not compiled yet.
+TargetToken._x86_loop_code = 0
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -16,8 +16,7 @@
 #
 
 class AssemblerLocation(object):
-    # XXX: Is adding "width" here correct?
-    _attrs_ = ('value', 'width', '_location_code')
+    _attrs_ = ('value', '_location_code')
     _immutable_ = True
     def _getregkey(self):
         return self.value
@@ -28,6 +27,9 @@
     def location_code(self):
         return self._location_code
 
+    def get_width(self):
+        raise NotImplementedError
+
     def value_r(self): return self.value
     def value_b(self): return self.value
     def value_s(self): return self.value
@@ -43,14 +45,21 @@
     _immutable_ = True
     _location_code = 'b'
 
-    def __init__(self, position, ebp_offset, num_words, type):
-        assert ebp_offset < 0   # so no confusion with RegLoc.value
+    def __init__(self, position, ebp_offset, type):
+        # _getregkey() returns self.value; the value returned must not
+        # conflict with RegLoc._getregkey().  It doesn't a bit by chance,
+        # so let it fail the following assert if it no longer does.
+        assert not (0 <= ebp_offset < 8 + 8 * IS_X86_64)
         self.position = position
         self.value = ebp_offset
-        self.width = num_words * WORD
         # One of INT, REF, FLOAT
         self.type = type
 
+    def get_width(self):
+        if self.type == FLOAT:
+            return 8
+        return WORD
+
     def __repr__(self):
         return '%d(%%ebp)' % (self.value,)
 
@@ -64,10 +73,8 @@
         self.value = regnum
         self.is_xmm = is_xmm
         if self.is_xmm:
-            self.width = 8
             self._location_code = 'x'
         else:
-            self.width = WORD
             self._location_code = 'r'
     def __repr__(self):
         if self.is_xmm:
@@ -75,6 +82,11 @@
         else:
             return rx86.R.names[self.value]
 
+    def get_width(self):
+        if self.is_xmm:
+            return 8
+        return WORD
+
     def lowest8bits(self):
         assert not self.is_xmm
         return RegLoc(rx86.low_byte(self.value), False)
@@ -92,9 +104,11 @@
         else:
             return eax
 
-class ImmedLoc(AssemblerLocation):
+class ImmediateAssemblerLocation(AssemblerLocation):
     _immutable_ = True
-    width = WORD
+
+class ImmedLoc(ImmediateAssemblerLocation):
+    _immutable_ = True
     _location_code = 'i'
 
     def __init__(self, value):
@@ -105,6 +119,9 @@
     def getint(self):
         return self.value
 
+    def get_width(self):
+        return WORD
+
     def __repr__(self):
         return "ImmedLoc(%d)" % (self.value)
 
@@ -117,7 +134,6 @@
 class AddressLoc(AssemblerLocation):
     _immutable_ = True
 
-    width = WORD
     # The address is base_loc + (scaled_loc << scale) + static_offset
     def __init__(self, base_loc, scaled_loc, scale=0, static_offset=0):
         assert 0 <= scale < 4
@@ -146,6 +162,9 @@
         info = getattr(self, attr, '?')
         return '<AddressLoc %r: %s>' % (self._location_code, info)
 
+    def get_width(self):
+        return WORD
+
     def value_a(self):
         return self.loc_a
 
@@ -180,32 +199,34 @@
             raise AssertionError(self._location_code)
         return result
 
-class ConstFloatLoc(AssemblerLocation):
-    # XXX: We have to use this class instead of just AddressLoc because
-    # we want a width of 8  (... I think.  Check this!)
+class ConstFloatLoc(ImmediateAssemblerLocation):
     _immutable_ = True
-    width = 8
     _location_code = 'j'
 
     def __init__(self, address):
         self.value = address
 
+    def get_width(self):
+        return 8
+
     def __repr__(self):
         return '<ConstFloatLoc @%s>' % (self.value,)
 
 if IS_X86_32:
-    class FloatImmedLoc(AssemblerLocation):
+    class FloatImmedLoc(ImmediateAssemblerLocation):
         # This stands for an immediate float.  It cannot be directly used in
         # any assembler instruction.  Instead, it is meant to be decomposed
         # in two 32-bit halves.  On 64-bit, FloatImmedLoc() is a function
         # instead; see below.
         _immutable_ = True
-        width = 8
         _location_code = '#'     # don't use me
 
         def __init__(self, floatstorage):
             self.aslonglong = floatstorage
 
+        def get_width(self):
+            return 8
+
         def low_part(self):
             return intmask(self.aslonglong)
 
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -3,6 +3,7 @@
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.llinterp import LLInterpreter
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.jit.codewriter import longlong
 from pypy.jit.metainterp import history, compile
 from pypy.jit.backend.x86.assembler import Assembler386
 from pypy.jit.backend.x86.arch import FORCE_INDEX_OFS
@@ -21,7 +22,6 @@
     supports_floats = True
     supports_singlefloats = True
 
-    BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
     with_threads = False
 
@@ -91,15 +91,6 @@
         return self.assembler.assemble_bridge(faildescr, inputargs, operations,
                                               original_loop_token, log=log)
 
-    def set_future_value_int(self, index, intvalue):
-        self.assembler.fail_boxes_int.setitem(index, intvalue)
-
-    def set_future_value_float(self, index, floatvalue):
-        self.assembler.fail_boxes_float.setitem(index, floatvalue)
-
-    def set_future_value_ref(self, index, ptrvalue):
-        self.assembler.fail_boxes_ptr.setitem(index, ptrvalue)
-
     def get_latest_value_int(self, index):
         return self.assembler.fail_boxes_int.getitem(index)
 
@@ -122,27 +113,28 @@
         # the FORCE_TOKEN operation and this helper both return 'ebp'.
         return self.assembler.fail_ebp
 
-    def execute_token(self, executable_token):
-        addr = executable_token._x86_bootstrap_code
-        #llop.debug_print(lltype.Void, ">>>> Entering", addr)
-        func = rffi.cast(lltype.Ptr(self.BOOTSTRAP_TP), addr)
-        fail_index = self._execute_call(func)
-        #llop.debug_print(lltype.Void, "<<<< Back")
-        return self.get_fail_descr_from_number(fail_index)
-
-    def _execute_call(self, func):
-        # help flow objspace
-        prev_interpreter = None
-        if not self.translate_support_code:
-            prev_interpreter = LLInterpreter.current_interpreter
-            LLInterpreter.current_interpreter = self.debug_ll_interpreter
-        res = 0
-        try:
-            res = func()
-        finally:
+    def make_execute_token(self, *ARGS):
+        FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, lltype.Signed))
+        #
+        def execute_token(executable_token, *args):
+            clt = executable_token.compiled_loop_token
+            assert len(args) == clt._debug_nbargs
+            #
+            addr = executable_token._x86_function_addr
+            func = rffi.cast(FUNCPTR, addr)
+            #llop.debug_print(lltype.Void, ">>>> Entering", addr)
+            prev_interpreter = None   # help flow space
             if not self.translate_support_code:
-                LLInterpreter.current_interpreter = prev_interpreter
-        return res
+                prev_interpreter = LLInterpreter.current_interpreter
+                LLInterpreter.current_interpreter = self.debug_ll_interpreter
+            try:
+                fail_index = func(*args)
+            finally:
+                if not self.translate_support_code:
+                    LLInterpreter.current_interpreter = prev_interpreter
+            #llop.debug_print(lltype.Void, "<<<< Back")
+            return self.get_fail_descr_from_number(fail_index)
+        return execute_token
 
     def cast_ptr_to_int(x):
         adr = llmemory.cast_ptr_to_adr(x)
@@ -215,14 +207,3 @@
         super(CPU_X86_64, self).__init__(*args, **kwargs)
 
 CPU = CPU386
-
-# silence warnings
-##history.LoopToken._x86_param_depth = 0
-##history.LoopToken._x86_arglocs = (None, None)
-##history.LoopToken._x86_frame_depth = 0
-##history.LoopToken._x86_bootstrap_code = 0
-##history.LoopToken._x86_direct_bootstrap_code = 0
-##history.LoopToken._x86_loop_code = 0
-##history.LoopToken._x86_debug_checksum = 0
-##compile.AbstractFailDescr._x86_current_depths = (0, 0)
-##compile.AbstractFailDescr._x86_adr_jump_offset = 0
diff --git a/pypy/jit/backend/x86/test/test_assembler.py b/pypy/jit/backend/x86/test/test_assembler.py
--- a/pypy/jit/backend/x86/test/test_assembler.py
+++ b/pypy/jit/backend/x86/test/test_assembler.py
@@ -46,12 +46,13 @@
             xmm2]
     assert len(failargs) == len(locs)
     assembler.write_failure_recovery_description(mc, failargs, locs)
-    nums = [Assembler386.DESCR_INT   + 4*(16+0),
-            Assembler386.DESCR_REF   + 4*(16+1),
-            Assembler386.DESCR_FLOAT + 4*(16+10),
-            Assembler386.DESCR_INT   + 4*(16+100),
-            Assembler386.DESCR_REF   + 4*(16+101),
-            Assembler386.DESCR_FLOAT + 4*(16+110),
+    base = 8 + 8*IS_X86_64
+    nums = [Assembler386.DESCR_INT   + 4*(base+0),
+            Assembler386.DESCR_REF   + 4*(base+1),
+            Assembler386.DESCR_FLOAT + 4*(base+10),
+            Assembler386.DESCR_INT   + 4*(base+100),
+            Assembler386.DESCR_REF   + 4*(base+101),
+            Assembler386.DESCR_FLOAT + 4*(base+110),
             Assembler386.CODE_HOLE,
             Assembler386.CODE_HOLE,
             Assembler386.DESCR_INT   + 4*ebx.value,
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -4,7 +4,7 @@
 
 import py
 from pypy.jit.metainterp.history import BoxInt, ConstInt,\
-     BoxPtr, ConstPtr, TreeLoop
+     BoxPtr, ConstPtr, TreeLoop, TargetToken
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.codewriter import heaptracker
 from pypy.jit.codewriter.effectinfo import EffectInfo
@@ -113,6 +113,8 @@
     descr0 = cpu.fielddescrof(S, 'int')
     ptr0 = struct_ref
 
+    targettoken = TargetToken()
+
     namespace = locals().copy()
 
     def test_basic(self):
@@ -136,6 +138,7 @@
     def test_bug_0(self):
         ops = '''
         [i0, i1, i2, i3, i4, i5, i6, i7, i8]
+        label(i0, i1, i2, i3, i4, i5, i6, i7, i8, descr=targettoken)
         guard_value(i2, 1) [i2, i3, i4, i5, i6, i7, i0, i1, i8]
         guard_class(i4, 138998336) [i4, i5, i6, i7, i0, i1, i8]
         i11 = getfield_gc(i4, descr=descr0)
@@ -163,7 +166,7 @@
         guard_false(i32) [i4, i6, i7, i0, i1, i24]
         i33 = getfield_gc(i0, descr=descr0)
         guard_value(i33, ConstPtr(ptr0)) [i4, i6, i7, i0, i1, i33, i24]
-        jump(i0, i1, 1, 17, i4, ConstPtr(ptr0), i6, i7, i24)
+        jump(i0, i1, 1, 17, i4, ConstPtr(ptr0), i6, i7, i24, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 0, 0, 0, 0, 0, 0, 0], run=False)
 
diff --git a/pypy/jit/backend/x86/test/test_jump.py b/pypy/jit/backend/x86/test/test_jump.py
--- a/pypy/jit/backend/x86/test/test_jump.py
+++ b/pypy/jit/backend/x86/test/test_jump.py
@@ -71,6 +71,18 @@
                              ('mov', eax, s24),
                              ('mov', s12, edi)]
 
+def test_no_tmp_reg():
+    assembler = MockAssembler()
+    s8 = frame_pos(0, INT)
+    s12 = frame_pos(13, INT)
+    s20 = frame_pos(20, INT)
+    s24 = frame_pos(221, INT)
+    remap_frame_layout(assembler, [s8, eax, s12], [s20, s24, edi], None)
+    assert assembler.ops == [('push', s8),
+                             ('pop', s20),
+                             ('mov', eax, s24),
+                             ('mov', s12, edi)]
+
 def test_reordering():
     assembler = MockAssembler()
     s8 = frame_pos(8, INT)
diff --git a/pypy/jit/backend/x86/test/test_recompilation.py b/pypy/jit/backend/x86/test/test_recompilation.py
--- a/pypy/jit/backend/x86/test/test_recompilation.py
+++ b/pypy/jit/backend/x86/test/test_recompilation.py
@@ -5,10 +5,11 @@
     def test_compile_bridge_not_deeper(self):
         ops = '''
         [i0]
+        label(i0, descr=targettoken)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 20)
         guard_true(i2, descr=fdescr1) [i1]
-        jump(i1)
+        jump(i1, descr=targettoken)
         '''
         loop = self.interpret(ops, [0])
         assert self.getint(0) == 20
@@ -18,22 +19,22 @@
         finish(i3, descr=fdescr2)
         '''
         bridge = self.attach_bridge(ops, loop, -2)
-        self.cpu.set_future_value_int(0, 0)
-        fail = self.run(loop)
+        fail = self.run(loop, 0)
         assert fail.identifier == 2
         assert self.getint(0) == 21
     
     def test_compile_bridge_deeper(self):
         ops = '''
         [i0]
+        label(i0, descr=targettoken)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 20)
         guard_true(i2, descr=fdescr1) [i1]
-        jump(i1)
+        jump(i1, descr=targettoken)
         '''
         loop = self.interpret(ops, [0])
-        previous = loop.token._x86_frame_depth
-        assert loop.token._x86_param_depth == 0
+        previous = loop._jitcelltoken.compiled_loop_token.frame_depth
+        assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
         assert self.getint(0) == 20
         ops = '''
         [i1]
@@ -42,19 +43,18 @@
         i5 = int_add(i4, 1)
         i6 = int_add(i5, 1)
         i7 = int_add(i5, i4)
+        force_spill(i5)
         i8 = int_add(i7, 1)
         i9 = int_add(i8, 1)
         finish(i3, i4, i5, i6, i7, i8, i9, descr=fdescr2)
         '''
         bridge = self.attach_bridge(ops, loop, -2)
-        descr = loop.operations[2].getdescr()
+        descr = loop.operations[3].getdescr()
         new = descr._x86_bridge_frame_depth
-        assert descr._x86_bridge_param_depth == 0        
-        # XXX: Maybe add enough ops to force stack on 64-bit as well?
-        if IS_X86_32:
-            assert new > previous
-        self.cpu.set_future_value_int(0, 0)
-        fail = self.run(loop)
+        assert descr._x86_bridge_param_depth == 0
+        # the force_spill() forces the stack to grow
+        assert new > previous
+        fail = self.run(loop, 0)
         assert fail.identifier == 2
         assert self.getint(0) == 21
         assert self.getint(1) == 22
@@ -64,28 +64,30 @@
     def test_bridge_jump_to_other_loop(self):
         loop = self.interpret('''
         [i0, i10, i11, i12, i13, i14, i15, i16]
+        label(i0, i10, i11, i12, i13, i14, i15, i16, descr=targettoken)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 20)
         guard_true(i2, descr=fdescr1) [i1]
-        jump(i1, i10, i11, i12, i13, i14, i15, i16)
-        ''', [0])
+        jump(i1, i10, i11, i12, i13, i14, i15, i16, descr=targettoken)
+        ''', [0, 0, 0, 0, 0, 0, 0, 0])
         other_loop = self.interpret('''
-        [i3]
+        [i3, i10, i11, i12, i13, i14, i15, i16]
+        label(i3, descr=targettoken2)
         guard_false(i3, descr=fdescr2) [i3]
-        jump(i3)
-        ''', [1])
+        jump(i3, descr=targettoken2)
+        ''', [1, 0, 0, 0, 0, 0, 0, 0])
         ops = '''
         [i3]
-        jump(i3, 1, 2, 3, 4, 5, 6, 7, descr=looptoken)
+        jump(i3, 1, 2, 3, 4, 5, 6, 7, descr=targettoken)
         '''
-        bridge = self.attach_bridge(ops, other_loop, 0, looptoken=loop.token)
-        self.cpu.set_future_value_int(0, 1)
-        fail = self.run(other_loop)
+        bridge = self.attach_bridge(ops, other_loop, 1)
+        fail = self.run(other_loop, 1, 0, 0, 0, 0, 0, 0, 0)
         assert fail.identifier == 1
 
     def test_bridge_jumps_to_self_deeper(self):
         loop = self.interpret('''
         [i0, i1, i2, i31, i32, i33]
+        label(i0, i1, i2, i31, i32, i33, descr=targettoken)
         i98 = same_as(0)
         i99 = same_as(1)
         i30 = int_add(i1, i2)
@@ -94,8 +96,8 @@
         guard_false(i4) [i98, i3]
         i5 = int_lt(i3, 20)
         guard_true(i5) [i99, i3]
-        jump(i3, i30, 1, i30, i30, i30)
-        ''', [0])
+        jump(i3, i30, 1, i30, i30, i30, descr=targettoken)
+        ''', [0, 0, 0, 0, 0, 0])
         assert self.getint(0) == 0
         assert self.getint(1) == 1
         ops = '''
@@ -104,28 +106,28 @@
         i8 = int_add(i3, 1)
         i6 = int_add(i8, i10)
         i7 = int_add(i3, i6)
+        force_spill(i6)
+        force_spill(i7)
+        force_spill(i8)
         i12 = int_add(i7, i8)
         i11 = int_add(i12, i6)
-        jump(i3, i12, i11, i10, i6, i7, descr=looptoken)
+        jump(i3, i12, i11, i10, i6, i7, descr=targettoken)
         '''
-        bridge = self.attach_bridge(ops, loop, 5, looptoken=loop.token)
-        guard_op = loop.operations[5]
-        loop_frame_depth = loop.token._x86_frame_depth
-        assert loop.token._x86_param_depth == 0
-        # XXX: Maybe add enough ops to force stack on 64-bit as well?
-        if IS_X86_32:
-            assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
+        loop_frame_depth = loop._jitcelltoken.compiled_loop_token.frame_depth
+        bridge = self.attach_bridge(ops, loop, 6)
+        guard_op = loop.operations[6]
+        assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
+        # the force_spill() forces the stack to grow
+        assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
         assert guard_op.getdescr()._x86_bridge_param_depth == 0
-        self.cpu.set_future_value_int(0, 0)
-        self.cpu.set_future_value_int(1, 0)
-        self.cpu.set_future_value_int(2, 0)
-        self.run(loop)
+        self.run(loop, 0, 0, 0, 0, 0, 0)
         assert self.getint(0) == 1
         assert self.getint(1) == 20
 
     def test_bridge_jumps_to_self_shallower(self):
         loop = self.interpret('''
         [i0, i1, i2]
+        label(i0, i1, i2, descr=targettoken)
         i98 = same_as(0)
         i99 = same_as(1)
         i3 = int_add(i0, 1)
@@ -133,19 +135,16 @@
         guard_false(i4) [i98, i3]
         i5 = int_lt(i3, 20)
         guard_true(i5) [i99, i3]
-        jump(i3, i1, i2)
-        ''', [0])
+        jump(i3, i1, i2, descr=targettoken)
+        ''', [0, 0, 0])
         assert self.getint(0) == 0
         assert self.getint(1) == 1
         ops = '''
         [i97, i3]
-        jump(i3, 0, 1, descr=looptoken)
+        jump(i3, 0, 1, descr=targettoken)
         '''
-        bridge = self.attach_bridge(ops, loop, 4, looptoken=loop.token)
-        self.cpu.set_future_value_int(0, 0)
-        self.cpu.set_future_value_int(1, 0)
-        self.cpu.set_future_value_int(2, 0)
-        self.run(loop)
+        bridge = self.attach_bridge(ops, loop, 5)
+        self.run(loop, 0, 0, 0)
         assert self.getint(0) == 1
         assert self.getint(1) == 20
         
diff --git a/pypy/jit/backend/x86/test/test_regalloc.py b/pypy/jit/backend/x86/test/test_regalloc.py
--- a/pypy/jit/backend/x86/test/test_regalloc.py
+++ b/pypy/jit/backend/x86/test/test_regalloc.py
@@ -4,7 +4,7 @@
 
 import py
 from pypy.jit.metainterp.history import BoxInt, ConstInt,\
-     BoxPtr, ConstPtr, LoopToken, BasicFailDescr
+     BoxPtr, ConstPtr, BasicFailDescr, JitCellToken, TargetToken
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.backend.llsupport.descr import GcCache
 from pypy.jit.backend.detect_cpu import getcpuclass
@@ -96,10 +96,16 @@
     raising_calldescr = cpu.calldescrof(FPTR.TO, FPTR.TO.ARGS, FPTR.TO.RESULT,
                                         EffectInfo.MOST_GENERAL)
 
+    targettoken = TargetToken()
+    targettoken2 = TargetToken()
     fdescr1 = BasicFailDescr(1)
     fdescr2 = BasicFailDescr(2)
     fdescr3 = BasicFailDescr(3)
 
+    def setup_method(self, meth):
+        self.targettoken._x86_loop_code = 0
+        self.targettoken2._x86_loop_code = 0
+
     def f1(x):
         return x+1
 
@@ -134,21 +140,31 @@
 
     def interpret(self, ops, args, run=True):
         loop = self.parse(ops)
-        self.cpu.compile_loop(loop.inputargs, loop.operations, loop.token)
-        for i, arg in enumerate(args):
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        arguments = []
+        for arg in args:
             if isinstance(arg, int):
-                self.cpu.set_future_value_int(i, arg)
+                arguments.append(arg)
             elif isinstance(arg, float):
                 arg = longlong.getfloatstorage(arg)
-                self.cpu.set_future_value_float(i, arg)
+                arguments.append(arg)
             else:
                 assert isinstance(lltype.typeOf(arg), lltype.Ptr)
                 llgcref = lltype.cast_opaque_ptr(llmemory.GCREF, arg)
-                self.cpu.set_future_value_ref(i, llgcref)
+                arguments.append(llgcref)
+        loop._jitcelltoken = looptoken
         if run:
-            self.cpu.execute_token(loop.token)
+            self.cpu.execute_token(looptoken, *arguments)
         return loop
 
+    def prepare_loop(self, ops):
+        loop = self.parse(ops)
+        regalloc = RegAlloc(self.cpu.assembler, False)
+        regalloc.prepare_loop(loop.inputargs, loop.operations,
+                              loop.original_jitcell_token, [])
+        return regalloc
+
     def getint(self, index):
         return self.cpu.get_latest_value_int(index)
 
@@ -167,10 +183,7 @@
         gcref = self.cpu.get_latest_value_ref(index)
         return lltype.cast_opaque_ptr(T, gcref)
 
-    def attach_bridge(self, ops, loop, guard_op_index, looptoken=None, **kwds):
-        if looptoken is not None:
-            self.namespace = self.namespace.copy()
-            self.namespace['looptoken'] = looptoken
+    def attach_bridge(self, ops, loop, guard_op_index, **kwds):
         guard_op = loop.operations[guard_op_index]
         assert guard_op.is_guard()
         bridge = self.parse(ops, **kwds)
@@ -178,20 +191,21 @@
                 [box.type for box in guard_op.getfailargs()])
         faildescr = guard_op.getdescr()
         self.cpu.compile_bridge(faildescr, bridge.inputargs, bridge.operations,
-                                loop.token)
+                                loop._jitcelltoken)
         return bridge
 
-    def run(self, loop):
-        return self.cpu.execute_token(loop.token)
+    def run(self, loop, *arguments):
+        return self.cpu.execute_token(loop._jitcelltoken, *arguments)
 
 class TestRegallocSimple(BaseTestRegalloc):
     def test_simple_loop(self):
         ops = '''
         [i0]
+        label(i0, descr=targettoken)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 20)
         guard_true(i2) [i1]
-        jump(i1)
+        jump(i1, descr=targettoken)
         '''
         self.interpret(ops, [0])
         assert self.getint(0) == 20
@@ -199,29 +213,30 @@
     def test_two_loops_and_a_bridge(self):
         ops = '''
         [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
         i4 = int_add(i0, 1)
         i5 = int_lt(i4, 20)
         guard_true(i5) [i4, i1, i2, i3]
-        jump(i4, i1, i2, i3)
+        jump(i4, i1, i2, i3, descr=targettoken)
         '''
         loop = self.interpret(ops, [0, 0, 0, 0])
         ops2 = '''
-        [i5]
+        [i5, i6, i7, i8]
+        label(i5, descr=targettoken2)
         i1 = int_add(i5, 1)
         i3 = int_add(i1, 1)
         i4 = int_add(i3, 1)
         i2 = int_lt(i4, 30)
         guard_true(i2) [i4]
-        jump(i4)
+        jump(i4, descr=targettoken2)
         '''
-        loop2 = self.interpret(ops2, [0])
+        loop2 = self.interpret(ops2, [0, 0, 0, 0])
         bridge_ops = '''
         [i4]
-        jump(i4, i4, i4, i4, descr=looptoken)
+        jump(i4, i4, i4, i4, descr=targettoken)
         '''
-        bridge = self.attach_bridge(bridge_ops, loop2, 4, looptoken=loop.token)
-        self.cpu.set_future_value_int(0, 0)
-        self.run(loop2)
+        bridge = self.attach_bridge(bridge_ops, loop2, 5)
+        self.run(loop2, 0, 0, 0, 0)
         assert self.getint(0) == 31
         assert self.getint(1) == 30
         assert self.getint(2) == 30
@@ -230,10 +245,11 @@
     def test_pointer_arg(self):
         ops = '''
         [i0, p0]
+        label(i0, p0, descr=targettoken)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 10)
         guard_true(i2) [p0]
-        jump(i1, p0)
+        jump(i1, p0, descr=targettoken)
         '''
         S = lltype.GcStruct('S')
         ptr = lltype.malloc(S)
@@ -258,8 +274,7 @@
         loop = self.interpret(ops, [0])
         assert self.getint(0) == 1
         bridge = self.attach_bridge(bridge_ops, loop, 2)
-        self.cpu.set_future_value_int(0, 0)
-        self.run(loop)
+        self.run(loop, 0)
         assert self.getint(0) == 1
 
     def test_inputarg_unused(self):
@@ -285,9 +300,7 @@
         assert self.getint(0) == 0
         assert self.getint(1) == 10
         bridge = self.attach_bridge(bridge_ops, loop, 0)
-        self.cpu.set_future_value_int(0, 0)
-        self.cpu.set_future_value_int(1, 10)
-        self.run(loop)
+        self.run(loop, 0, 10)
         assert self.getint(0) == 0
         assert self.getint(1) == 10
 
@@ -304,17 +317,16 @@
         finish(1, 2)
         '''
         self.attach_bridge(bridge_ops, loop, 0)
-        self.cpu.set_future_value_int(0, 0)
-        self.cpu.set_future_value_int(1, 1)
-        self.run(loop)
+        self.run(loop, 0, 1)
 
     def test_spill_for_constant(self):
         ops = '''
         [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
         i4 = int_add(3, i1)
         i5 = int_lt(i4, 30)
         guard_true(i5) [i0, i4, i2, i3]
-        jump(1, i4, 3, 4)
+        jump(1, i4, 3, 4, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 0, 0])
         assert self.getints(4) == [1, 30, 3, 4]
@@ -322,31 +334,34 @@
     def test_spill_for_constant_lshift(self):
         ops = '''
         [i0, i2, i1, i3]
+        label(i0, i2, i1, i3, descr=targettoken)
         i4 = int_lshift(1, i1)
         i5 = int_add(1, i1)
         i6 = int_lt(i5, 30)
         guard_true(i6) [i4, i5, i2, i3]
-        jump(i4, 3, i5, 4)
+        jump(i4, 3, i5, 4, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 0, 0])
         assert self.getints(4) == [1<<29, 30, 3, 4]
         ops = '''
         [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
         i4 = int_lshift(1, i1)
         i5 = int_add(1, i1)
         i6 = int_lt(i5, 30)
         guard_true(i6) [i4, i5, i2, i3]
-        jump(i4, i5, 3, 4)
+        jump(i4, i5, 3, 4, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 0, 0])
         assert self.getints(4) == [1<<29, 30, 3, 4]
         ops = '''
         [i0, i3, i1, i2]
+        label(i0, i3, i1, i2, descr=targettoken)
         i4 = int_lshift(1, i1)
         i5 = int_add(1, i1)
         i6 = int_lt(i5, 30)
         guard_true(i6) [i4, i5, i2, i3]
-        jump(i4, 4, i5, 3)
+        jump(i4, 4, i5, 3, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 0, 0])
         assert self.getints(4) == [1<<29, 30, 3, 4]
@@ -354,11 +369,12 @@
     def test_result_selected_reg_via_neg(self):
         ops = '''
         [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
         i6 = int_neg(i2)
         i7 = int_add(1, i1)
         i4 = int_lt(i7, 10)
         guard_true(i4) [i0, i6, i7]
-        jump(1, i7, i2, i6)
+        jump(1, i7, i2, i6, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 3, 0])
         assert self.getints(3) == [1, -3, 10]
@@ -366,11 +382,12 @@
     def test_compare_memory_result_survives(self):
         ops = '''
         [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
         i4 = int_lt(i0, i1)
         i5 = int_add(i3, 1)
         i6 = int_lt(i5, 30)
         guard_true(i6) [i4]
-        jump(i0, i1, i4, i5)
+        jump(i0, i1, i4, i5, descr=targettoken)
         '''
         self.interpret(ops, [0, 10, 0, 0])
         assert self.getint(0) == 1
@@ -378,12 +395,13 @@
     def test_jump_different_args(self):
         ops = '''
         [i0, i15, i16, i18, i1, i2, i3]
+        label(i0, i15, i16, i18, i1, i2, i3, descr=targettoken)
         i4 = int_add(i3, 1)
         i5 = int_lt(i4, 20)
         guard_true(i5) [i2, i1]
-        jump(i0, i18, i15, i16, i2, i1, i4)
+        jump(i0, i18, i15, i16, i2, i1, i4, descr=targettoken)
         '''
-        self.interpret(ops, [0, 1, 2, 3])
+        self.interpret(ops, [0, 1, 2, 3, 0, 0, 0])
 
     def test_op_result_unused(self):
         ops = '''
@@ -417,11 +435,24 @@
         finish(i0, i1, i2, i3, i4, i5, i6, i7, i8)
         '''
         self.attach_bridge(bridge_ops, loop, 1)
-        for i in range(9):
-            self.cpu.set_future_value_int(i, i)
-        self.run(loop)
+        self.run(loop, 0, 1, 2, 3, 4, 5, 6, 7, 8)
         assert self.getints(9) == range(9)
 
+    def test_loopargs(self):
+        ops = """
+        [i0, i1, i2, i3]
+        i4 = int_add(i0, i1)
+        jump(i4, i1, i2, i3)
+        """
+        regalloc = self.prepare_loop(ops)
+        if IS_X86_64:
+            assert len(regalloc.rm.reg_bindings) == 4
+            assert len(regalloc.fm.bindings) == 0
+        else:
+            assert len(regalloc.rm.reg_bindings) == 0
+            assert len(regalloc.fm.bindings) == 4
+
+
 class TestRegallocCompOps(BaseTestRegalloc):
     
     def test_cmp_op_0(self):
@@ -438,6 +469,7 @@
 class TestRegallocMoreRegisters(BaseTestRegalloc):
 
     cpu = BaseTestRegalloc.cpu
+    targettoken = TargetToken()
 
     S = lltype.GcStruct('S', ('field', lltype.Char))
     fielddescr = cpu.fielddescrof(S, 'field')
@@ -510,6 +542,7 @@
     def test_division_optimized(self):
         ops = '''
         [i7, i6]
+        label(i7, i6, descr=targettoken)
         i18 = int_floordiv(i7, i6)
         i19 = int_xor(i7, i6)
         i21 = int_lt(i19, 0)
@@ -517,7 +550,7 @@
         i23 = int_is_true(i22)
         i24 = int_eq(i6, 4)
         guard_false(i24) [i18]
-        jump(i18, i6)
+        jump(i18, i6, descr=targettoken)
         '''
         self.interpret(ops, [10, 4])
         assert self.getint(0) == 2
@@ -586,9 +619,10 @@
         i10 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
         finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9)
         '''
-        loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9, 9])
-        assert self.getints(11) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9]
-        assert loop.token._x86_param_depth == self.expected_param_depth(1)
+        loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9])
+        assert self.getints(10) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9]
+        clt = loop._jitcelltoken.compiled_loop_token
+        assert clt.param_depth == self.expected_param_depth(1)
 
     def test_two_calls(self):
         ops = '''
@@ -597,9 +631,10 @@
         i11 = call(ConstClass(f2ptr), i10, i1, descr=f2_calldescr)        
         finish(i11, i1,  i2, i3, i4, i5, i6, i7, i8, i9)
         '''
-        loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9, 9])
-        assert self.getints(11) == [5*7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9]
-        assert loop.token._x86_param_depth == self.expected_param_depth(2)
+        loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9])
+        assert self.getints(10) == [5*7, 7, 9, 9, 9, 9, 9, 9, 9, 9]
+        clt = loop._jitcelltoken.compiled_loop_token
+        assert clt.param_depth == self.expected_param_depth(2)
 
     def test_call_many_arguments(self):
         # NB: The first and last arguments in the call are constants. This
@@ -612,7 +647,8 @@
         '''
         loop = self.interpret(ops, [2, 3, 4, 5, 6, 7, 8, 9])
         assert self.getint(0) == 55
-        assert loop.token._x86_param_depth == self.expected_param_depth(10)
+        clt = loop._jitcelltoken.compiled_loop_token
+        assert clt.param_depth == self.expected_param_depth(10)
 
     def test_bridge_calls_1(self):
         ops = '''
@@ -632,9 +668,7 @@
 
         assert loop.operations[-2].getdescr()._x86_bridge_param_depth == self.expected_param_depth(2)
 
-        self.cpu.set_future_value_int(0, 4)
-        self.cpu.set_future_value_int(1, 7)        
-        self.run(loop)
+        self.run(loop, 4, 7)
         assert self.getint(0) == 5*7
 
     def test_bridge_calls_2(self):
@@ -655,8 +689,6 @@
 
         assert loop.operations[-2].getdescr()._x86_bridge_param_depth == self.expected_param_depth(2)
 
-        self.cpu.set_future_value_int(0, 4)
-        self.cpu.set_future_value_int(1, 7)        
-        self.run(loop)
+        self.run(loop, 4, 7)
         assert self.getint(0) == 29
 
diff --git a/pypy/jit/backend/x86/test/test_regalloc2.py b/pypy/jit/backend/x86/test/test_regalloc2.py
--- a/pypy/jit/backend/x86/test/test_regalloc2.py
+++ b/pypy/jit/backend/x86/test/test_regalloc2.py
@@ -1,6 +1,6 @@
 import py
 from pypy.jit.metainterp.history import ResOperation, BoxInt, ConstInt,\
-     BoxPtr, ConstPtr, BasicFailDescr, LoopToken
+     BoxPtr, ConstPtr, BasicFailDescr, JitCellToken
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.x86.arch import WORD
@@ -20,10 +20,9 @@
         ]
     cpu = CPU(None, None)
     cpu.setup_once()
-    looptoken = LoopToken()
+    looptoken = JitCellToken()
     cpu.compile_loop(inputargs, operations, looptoken)
-    cpu.set_future_value_int(0, 9)
-    cpu.execute_token(looptoken)
+    cpu.execute_token(looptoken, 9)
     assert cpu.get_latest_value_int(0) == (9 >> 3)
     assert cpu.get_latest_value_int(1) == (~18)
 
@@ -43,10 +42,9 @@
             ]
     cpu = CPU(None, None)
     cpu.setup_once()
-    looptoken = LoopToken()
+    looptoken = JitCellToken()
     cpu.compile_loop(inputargs, operations, looptoken)
-    cpu.set_future_value_int(0, -10)
-    cpu.execute_token(looptoken)
+    cpu.execute_token(looptoken, -10)
     assert cpu.get_latest_value_int(0) == 0
     assert cpu.get_latest_value_int(1) == -1000
     assert cpu.get_latest_value_int(2) == 1
@@ -140,19 +138,9 @@
             ]
     cpu = CPU(None, None)
     cpu.setup_once()
-    looptoken = LoopToken()
+    looptoken = JitCellToken()
     cpu.compile_loop(inputargs, operations, looptoken)
-    cpu.set_future_value_int(0, -13)
-    cpu.set_future_value_int(1, 10)
-    cpu.set_future_value_int(2, 10)
-    cpu.set_future_value_int(3, 8)
-    cpu.set_future_value_int(4, -8)
-    cpu.set_future_value_int(5, -16)
-    cpu.set_future_value_int(6, -18)
-    cpu.set_future_value_int(7, 46)
-    cpu.set_future_value_int(8, -12)
-    cpu.set_future_value_int(9, 26)
-    cpu.execute_token(looptoken)
+    cpu.execute_token(looptoken, -13, 10, 10, 8, -8, -16, -18, 46, -12, 26)
     assert cpu.get_latest_value_int(0) == 0
     assert cpu.get_latest_value_int(1) == 0
     assert cpu.get_latest_value_int(2) == 0
@@ -255,19 +243,9 @@
             ]
     cpu = CPU(None, None)
     cpu.setup_once()
-    looptoken = LoopToken()
+    looptoken = JitCellToken()
     cpu.compile_loop(inputargs, operations, looptoken)
-    cpu.set_future_value_int(0, 17)
-    cpu.set_future_value_int(1, -20)
-    cpu.set_future_value_int(2, -6)
-    cpu.set_future_value_int(3, 6)
-    cpu.set_future_value_int(4, 1)
-    cpu.set_future_value_int(5, 13)
-    cpu.set_future_value_int(6, 13)
-    cpu.set_future_value_int(7, 9)
-    cpu.set_future_value_int(8, 49)
-    cpu.set_future_value_int(9, 8)
-    cpu.execute_token(looptoken)
+    cpu.execute_token(looptoken, 17, -20, -6, 6, 1, 13, 13, 9, 49, 8)
     assert cpu.get_latest_value_int(0) == 0
     assert cpu.get_latest_value_int(1) == 8
     assert cpu.get_latest_value_int(2) == 1
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -1,9 +1,10 @@
 import py
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rstr, rclass
 from pypy.rpython.annlowlevel import llhelper
-from pypy.jit.metainterp.history import ResOperation, LoopToken
+from pypy.jit.metainterp.history import ResOperation, TargetToken, JitCellToken
 from pypy.jit.metainterp.history import (BoxInt, BoxPtr, ConstInt, ConstFloat,
-                                         ConstPtr, Box, BoxFloat, BasicFailDescr)
+                                         ConstPtr, Box, BoxFloat,
+                                         BasicFailDescr)
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.x86.arch import WORD
 from pypy.jit.backend.x86.rx86 import fits_in_32bits
@@ -279,13 +280,9 @@
                                      descr=BasicFailDescr()),
                         ]
                     ops[-2].setfailargs([i1])
-                    looptoken = LoopToken()
+                    looptoken = JitCellToken()
                     self.cpu.compile_loop([b], ops, looptoken)
-                    if op == rop.INT_IS_TRUE:
-                        self.cpu.set_future_value_int(0, b.value)
-                    else:
-                        self.cpu.set_future_value_ref(0, b.value)
-                    self.cpu.execute_token(looptoken)
+                    self.cpu.execute_token(looptoken, b.value)
                     result = self.cpu.get_latest_value_int(0)
                     if guard == rop.GUARD_FALSE:
                         assert result == execute(self.cpu, None,
@@ -329,11 +326,10 @@
                         ]
                     ops[-2].setfailargs([i1])
                     inputargs = [i for i in (a, b) if isinstance(i, Box)]
-                    looptoken = LoopToken()
+                    looptoken = JitCellToken()
                     self.cpu.compile_loop(inputargs, ops, looptoken)
-                    for i, box in enumerate(inputargs):
-                        self.cpu.set_future_value_int(i, box.value)
-                    self.cpu.execute_token(looptoken)
+                    inputvalues = [box.value for box in inputargs]
+                    self.cpu.execute_token(looptoken, *inputvalues)
                     result = self.cpu.get_latest_value_int(0)
                     expected = execute(self.cpu, None, op, None, a, b).value
                     if guard == rop.GUARD_FALSE:
@@ -353,9 +349,10 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
+        targettoken = TargetToken()
         faildescr1 = BasicFailDescr(1)
         faildescr2 = BasicFailDescr(2)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         looptoken.number = 17
         class FakeString(object):
             def __init__(self, val):
@@ -365,14 +362,15 @@
                 return self.val
 
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("hello"), 0], None),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
-        operations[3].setfailargs([i1])
+        operations[-2].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
         name, loopaddress, loopsize = agent.functions[0]
         assert name == "Loop # 17: hello (loop counter 0)"
@@ -385,7 +383,7 @@
             ResOperation(rop.INT_LE, [i1b, ConstInt(19)], i3),
             ResOperation(rop.GUARD_TRUE, [i3], None, descr=faildescr2),
             ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("bye"), 0], None),
-            ResOperation(rop.JUMP, [i1b], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1b], None, descr=targettoken),
         ]
         bridge[1].setfailargs([i1b])
 
@@ -397,8 +395,7 @@
         assert address >= loopaddress + loopsize
         assert size >= 10 # randomish number
 
-        self.cpu.set_future_value_int(0, 2)
-        fail = self.cpu.execute_token(looptoken)
+        fail = self.cpu.execute_token(looptoken, 2)
         assert fail.identifier == 2
         res = self.cpu.get_latest_value_int(0)
         assert res == 20
@@ -408,11 +405,13 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
         debug._log = dlog = debug.DebugLog()
@@ -499,12 +498,10 @@
             ops[3].setfailargs([])
             ops[5].setfailargs([])
             ops[7].setfailargs([])
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
             self.cpu.compile_loop([i1, i2], ops, looptoken)
 
-            self.cpu.set_future_value_int(0, 123450)
-            self.cpu.set_future_value_int(1, 123408)
-            fail = self.cpu.execute_token(looptoken)
+            fail = self.cpu.execute_token(looptoken, 123450, 123408)
             assert fail.identifier == 0
             assert self.cpu.get_latest_value_int(0) == 42
             assert self.cpu.get_latest_value_int(1) == 42
@@ -523,19 +520,20 @@
 
         loop = """
         [i0]
+        label(i0, descr=targettoken)
         debug_merge_point('xyz', 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
-        jump(i1)
+        jump(i1, descr=targettoken)
         """
-        ops = parse(loop)
+        ops = parse(loop, namespace={'targettoken': TargetToken()})
         debug._log = dlog = debug.DebugLog()
         try:
             self.cpu.assembler.set_debug(True)
-            self.cpu.compile_loop(ops.inputargs, ops.operations, ops.token)
-            self.cpu.set_future_value_int(0, 0)
-            self.cpu.execute_token(ops.token)
+            looptoken = JitCellToken()
+            self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
+            self.cpu.execute_token(looptoken, 0)
             # check debugging info
             struct = self.cpu.assembler.loop_run_counters[0]
             assert struct.i == 10
@@ -547,16 +545,17 @@
     def test_debugger_checksum(self):
         loop = """
         [i0]
+        label(i0, descr=targettoken)
         debug_merge_point('xyz', 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
-        jump(i1)
+        jump(i1, descr=targettoken)
         """
-        ops = parse(loop)
+        ops = parse(loop, namespace={'targettoken': TargetToken()})
         self.cpu.assembler.set_debug(True)
-        self.cpu.compile_loop(ops.inputargs, ops.operations, ops.token)
-        self.cpu.set_future_value_int(0, 0)
-        self.cpu.execute_token(ops.token)
-        assert ops.token._x86_debug_checksum == sum([op.getopnum()
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
+        self.cpu.execute_token(looptoken, 0)
+        assert looptoken._x86_debug_checksum == sum([op.getopnum()
                                                      for op in ops.operations])
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -457,6 +457,46 @@
     def test_compile_framework_7(self):
         self.run('compile_framework_7')
 
+    def define_compile_framework_7_interior(cls):
+        # Array of structs containing pointers (test the write barrier
+        # for setinteriorfield_gc)
+        S = lltype.GcStruct('S', ('i', lltype.Signed))
+        A = lltype.GcArray(lltype.Struct('entry', ('x', lltype.Ptr(S)),
+                                                  ('y', lltype.Ptr(S)),
+                                                  ('z', lltype.Ptr(S))))
+        class Glob:
+            a = lltype.nullptr(A)
+        glob = Glob()
+        #
+        def make_s(i):
+            s = lltype.malloc(S)
+            s.i = i
+            return s
+        #
+        @unroll_safe
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            a = glob.a
+            if not a:
+                a = glob.a = lltype.malloc(A, 10)
+            i = 0
+            while i < 10:
+                a[i].x = make_s(n + i * 100 + 1)
+                a[i].y = make_s(n + i * 100 + 2)
+                a[i].z = make_s(n + i * 100 + 3)
+                i += 1
+            i = 0
+            while i < 10:
+                check(a[i].x.i == n + i * 100 + 1)
+                check(a[i].y.i == n + i * 100 + 2)
+                check(a[i].z.i == n + i * 100 + 3)
+                i += 1
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        return None, f, None
+
+    def test_compile_framework_7_interior(self):
+        self.run('compile_framework_7_interior')
+
     def define_compile_framework_8(cls):
         # Array of pointers, of unknown length (test write_barrier_from_array)
         def before(n, x):
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -241,12 +241,15 @@
         return op.opname == 'jit_force_quasi_immutable'
 
 class RandomEffectsAnalyzer(BoolGraphAnalyzer):
-    def analyze_direct_call(self, graph, seen=None):
-        if hasattr(graph, "func") and hasattr(graph.func, "_ptr"):
-            if graph.func._ptr._obj.random_effects_on_gcobjs:
+    def analyze_external_call(self, op, seen=None):
+        try:
+            funcobj = op.args[0].value._obj
+            if funcobj.random_effects_on_gcobjs:
                 return True
-        return super(RandomEffectsAnalyzer, self).analyze_direct_call(graph,
-                                                                      seen)
+        except (AttributeError, lltype.DelayedPointer):
+            return True   # better safe than sorry
+        return super(RandomEffectsAnalyzer, self).analyze_external_call(
+            op, seen)
 
     def analyze_simple_operation(self, op, graphinfo):
         return False
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -15,6 +15,8 @@
 from pypy.translator.simplify import get_funcobj
 from pypy.translator.unsimplify import varoftype
 
+class UnsupportedMallocFlags(Exception):
+    pass
 
 def transform_graph(graph, cpu=None, callcontrol=None, portal_jd=None):
     """Transform a control flow graph to make it suitable for
@@ -205,7 +207,24 @@
         if op.args[0] in self.vable_array_vars:
             self.vable_array_vars[op.result]= self.vable_array_vars[op.args[0]]
 
-    rewrite_op_cast_pointer = rewrite_op_same_as
+    def rewrite_op_cast_pointer(self, op):
+        newop = self.rewrite_op_same_as(op)
+        assert newop is None
+        return
+        # disabled for now
+        if (self._is_rclass_instance(op.args[0]) and
+                self._is_rclass_instance(op.result)):
+            FROM = op.args[0].concretetype.TO
+            TO = op.result.concretetype.TO
+            if lltype._castdepth(TO, FROM) > 0:
+                vtable = heaptracker.get_vtable_for_gcstruct(self.cpu, TO)
+                const_vtable = Constant(vtable, lltype.typeOf(vtable))
+                return [None, # hack, do the right renaming from op.args[0] to op.result
+                        SpaceOperation("record_known_class", [op.args[0], const_vtable], None)]
+
+    def rewrite_op_jit_record_known_class(self, op):
+        return SpaceOperation("record_known_class", [op.args[0], op.args[1]], None)
+
     def rewrite_op_cast_bool_to_int(self, op): pass
     def rewrite_op_cast_bool_to_uint(self, op): pass
     def rewrite_op_cast_char_to_int(self, op): pass
@@ -479,13 +498,29 @@
         else:
             log.WARNING('ignoring hint %r at %r' % (hints, self.graph))
 
+    def _rewrite_raw_malloc(self, op, name, args):
+        d = op.args[1].value.copy()
+        d.pop('flavor')
+        add_memory_pressure = d.pop('add_memory_pressure', False)
+        zero = d.pop('zero', False)
+        track_allocation = d.pop('track_allocation', True)
+        if d:
+            raise UnsupportedMallocFlags(d)
+        TYPE = op.args[0].value
+        if zero:
+            name += '_zero'
+        if add_memory_pressure:
+            name += '_add_memory_pressure'
+        if not track_allocation:
+            name += '_no_track_allocation'
+        return self._do_builtin_call(op, name, args,
+                                     extra = (TYPE,),
+                                     extrakey = TYPE)
+
     def rewrite_op_malloc_varsize(self, op):
         if op.args[1].value['flavor'] == 'raw':
-            ARRAY = op.args[0].value
-            return self._do_builtin_call(op, 'raw_malloc',
-                                         [op.args[2]],
-                                         extra = (ARRAY,),
-                                         extrakey = ARRAY)
+            return self._rewrite_raw_malloc(op, 'raw_malloc_varsize',
+                                            [op.args[2]])
         if op.args[0].value == rstr.STR:
             return SpaceOperation('newstr', [op.args[2]], op.result)
         elif op.args[0].value == rstr.UNICODE:
@@ -498,11 +533,18 @@
                                   op.result)
 
     def rewrite_op_free(self, op):
-        flags = op.args[1].value
-        assert flags['flavor'] == 'raw'
-        ARRAY = op.args[0].concretetype.TO
-        return self._do_builtin_call(op, 'raw_free', [op.args[0]],
-                                     extra = (ARRAY,), extrakey = ARRAY)
+        d = op.args[1].value.copy()
+        assert d['flavor'] == 'raw'
+        d.pop('flavor')
+        track_allocation = d.pop('track_allocation', True)
+        if d:
+            raise UnsupportedMallocFlags(d)
+        STRUCT = op.args[0].concretetype.TO
+        name = 'raw_free'
+        if not track_allocation:
+            name += '_no_track_allocation'
+        return self._do_builtin_call(op, name, [op.args[0]],
+                                     extra = (STRUCT,), extrakey = STRUCT)
 
     def rewrite_op_getarrayitem(self, op):
         ARRAY = op.args[0].concretetype.TO
@@ -703,6 +745,9 @@
         return [op0, op1]
 
     def rewrite_op_malloc(self, op):
+        if op.args[1].value['flavor'] == 'raw':
+            return self._rewrite_raw_malloc(op, 'raw_malloc_fixedsize', [])
+        #
         assert op.args[1].value == {'flavor': 'gc'}
         STRUCT = op.args[0].value
         vtable = heaptracker.get_vtable_for_gcstruct(self.cpu, STRUCT)
@@ -1053,35 +1098,20 @@
     # jit.codewriter.support.
 
     for _op, _oopspec in [('llong_invert',  'INVERT'),
-                          ('ullong_invert', 'INVERT'),
                           ('llong_lt',      'LT'),
                           ('llong_le',      'LE'),
                           ('llong_eq',      'EQ'),
                           ('llong_ne',      'NE'),
                           ('llong_gt',      'GT'),
                           ('llong_ge',      'GE'),
-                          ('ullong_lt',     'ULT'),
-                          ('ullong_le',     'ULE'),
-                          ('ullong_eq',     'EQ'),
-                          ('ullong_ne',     'NE'),
-                          ('ullong_gt',     'UGT'),
-                          ('ullong_ge',     'UGE'),
                           ('llong_add',     'ADD'),
                           ('llong_sub',     'SUB'),
                           ('llong_mul',     'MUL'),
                           ('llong_and',     'AND'),
                           ('llong_or',      'OR'),
                           ('llong_xor',     'XOR'),
-                          ('ullong_add',    'ADD'),
-                          ('ullong_sub',    'SUB'),
-                          ('ullong_mul',    'MUL'),
-                          ('ullong_and',    'AND'),
-                          ('ullong_or',     'OR'),
-                          ('ullong_xor',    'XOR'),
                           ('llong_lshift',  'LSHIFT'),
                           ('llong_rshift',  'RSHIFT'),
-                          ('ullong_lshift', 'LSHIFT'),
-                          ('ullong_rshift', 'URSHIFT'),
                           ('cast_int_to_longlong',     'FROM_INT'),
                           ('truncate_longlong_to_int', 'TO_INT'),
                           ('cast_float_to_longlong',   'FROM_FLOAT'),
@@ -1104,6 +1134,21 @@
                           ('cast_uint_to_ulonglong',    'FROM_UINT'),
                           ('cast_float_to_ulonglong',   'FROM_FLOAT'),
                           ('cast_ulonglong_to_float',   'U_TO_FLOAT'),
+                          ('ullong_invert', 'INVERT'),
+                          ('ullong_lt',     'ULT'),
+                          ('ullong_le',     'ULE'),
+                          ('ullong_eq',     'EQ'),
+                          ('ullong_ne',     'NE'),
+                          ('ullong_gt',     'UGT'),
+                          ('ullong_ge',     'UGE'),
+                          ('ullong_add',    'ADD'),
+                          ('ullong_sub',    'SUB'),
+                          ('ullong_mul',    'MUL'),
+                          ('ullong_and',    'AND'),
+                          ('ullong_or',     'OR'),
+                          ('ullong_xor',    'XOR'),
+                          ('ullong_lshift', 'LSHIFT'),
+                          ('ullong_rshift', 'URSHIFT'),
                          ]:
         exec py.code.Source('''
             def rewrite_op_%s(self, op):
@@ -1134,7 +1179,7 @@
 
     def rewrite_op_llong_is_true(self, op):
         v = varoftype(op.args[0].concretetype)
-        op0 = SpaceOperation('cast_int_to_longlong',
+        op0 = SpaceOperation('cast_primitive',
                              [Constant(0, lltype.Signed)],
                              v)
         args = [op.args[0], v]
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -258,6 +258,9 @@
     y = ~r_ulonglong(xll)
     return u_to_longlong(y)
 
+def _ll_1_ullong_invert(xull):
+    return ~xull
+
 def _ll_2_llong_lt(xll, yll):
     return xll < yll
 
@@ -276,16 +279,22 @@
 def _ll_2_llong_ge(xll, yll):
     return xll >= yll
 
-def _ll_2_llong_ult(xull, yull):
+def _ll_2_ullong_eq(xull, yull):
+    return xull == yull
+
+def _ll_2_ullong_ne(xull, yull):
+    return xull != yull
+
+def _ll_2_ullong_ult(xull, yull):
     return xull < yull
 
-def _ll_2_llong_ule(xull, yull):
+def _ll_2_ullong_ule(xull, yull):
     return xull <= yull
 
-def _ll_2_llong_ugt(xull, yull):
+def _ll_2_ullong_ugt(xull, yull):
     return xull > yull
 
-def _ll_2_llong_uge(xull, yull):
+def _ll_2_ullong_uge(xull, yull):
     return xull >= yull
 
 def _ll_2_llong_add(xll, yll):
@@ -312,14 +321,41 @@
     z = r_ulonglong(xll) ^ r_ulonglong(yll)
     return u_to_longlong(z)
 
+def _ll_2_ullong_add(xull, yull):
+    z = (xull) + (yull)
+    return (z)
+
+def _ll_2_ullong_sub(xull, yull):
+    z = (xull) - (yull)
+    return (z)
+
+def _ll_2_ullong_mul(xull, yull):
+    z = (xull) * (yull)
+    return (z)
+
+def _ll_2_ullong_and(xull, yull):
+    z = (xull) & (yull)
+    return (z)
+
+def _ll_2_ullong_or(xull, yull):
+    z = (xull) | (yull)
+    return (z)
+
+def _ll_2_ullong_xor(xull, yull):
+    z = (xull) ^ (yull)
+    return (z)
+
 def _ll_2_llong_lshift(xll, y):
     z = r_ulonglong(xll) << y
     return u_to_longlong(z)
 
+def _ll_2_ullong_lshift(xull, y):
+    return xull << y
+
 def _ll_2_llong_rshift(xll, y):
     return xll >> y
 
-def _ll_2_llong_urshift(xull, y):
+def _ll_2_ullong_urshift(xull, y):
     return xull >> y
 
 def _ll_1_llong_from_int(x):
@@ -563,15 +599,75 @@
             return p
         return _ll_0_alloc_with_del
 
-    def build_ll_1_raw_malloc(ARRAY):
-        def _ll_1_raw_malloc(n):
-            return lltype.malloc(ARRAY, n, flavor='raw')
-        return _ll_1_raw_malloc
+    def build_raw_malloc_varsize_builder(zero=False,
+                                         add_memory_pressure=False,
+                                         track_allocation=True):
+        def build_ll_1_raw_malloc_varsize(ARRAY):
+            def _ll_1_raw_malloc_varsize(n):
+                return lltype.malloc(ARRAY, n, flavor='raw', zero=zero,
+                                     add_memory_pressure=add_memory_pressure,
+                                     track_allocation=track_allocation)
+            return _ll_1_raw_malloc_varsize
+        return build_ll_1_raw_malloc_varsize
 
-    def build_ll_1_raw_free(ARRAY):
-        def _ll_1_raw_free(p):
-            lltype.free(p, flavor='raw')
-        return _ll_1_raw_free
+    build_ll_1_raw_malloc_varsize = (
+        build_raw_malloc_varsize_builder())
+    build_ll_1_raw_malloc_varsize_zero = (
+        build_raw_malloc_varsize_builder(zero=True))
+    build_ll_1_raw_malloc_varsize_zero_add_memory_pressure = (
+        build_raw_malloc_varsize_builder(zero=True, add_memory_pressure=True))
+    build_ll_1_raw_malloc_varsize_add_memory_pressure = (
+        build_raw_malloc_varsize_builder(add_memory_pressure=True))
+    build_ll_1_raw_malloc_varsize_no_track_allocation = (
+        build_raw_malloc_varsize_builder(track_allocation=False))
+    build_ll_1_raw_malloc_varsize_zero_no_track_allocation = (
+        build_raw_malloc_varsize_builder(zero=True, track_allocation=False))
+    build_ll_1_raw_malloc_varsize_zero_add_memory_pressure_no_track_allocation = (
+        build_raw_malloc_varsize_builder(zero=True, add_memory_pressure=True, track_allocation=False))
+    build_ll_1_raw_malloc_varsize_add_memory_pressure_no_track_allocation = (
+        build_raw_malloc_varsize_builder(add_memory_pressure=True, track_allocation=False))
+
+    def build_raw_malloc_fixedsize_builder(zero=False,
+                                           add_memory_pressure=False,
+                                           track_allocation=True):
+        def build_ll_0_raw_malloc_fixedsize(STRUCT):
+            def _ll_0_raw_malloc_fixedsize():
+                return lltype.malloc(STRUCT, flavor='raw', zero=zero,
+                                     add_memory_pressure=add_memory_pressure,
+                                     track_allocation=track_allocation)
+            return _ll_0_raw_malloc_fixedsize
+        return build_ll_0_raw_malloc_fixedsize
+
+    build_ll_0_raw_malloc_fixedsize = (
+        build_raw_malloc_fixedsize_builder())
+    build_ll_0_raw_malloc_fixedsize_zero = (
+        build_raw_malloc_fixedsize_builder(zero=True))
+    build_ll_0_raw_malloc_fixedsize_zero_add_memory_pressure = (
+        build_raw_malloc_fixedsize_builder(zero=True, add_memory_pressure=True))
+    build_ll_0_raw_malloc_fixedsize_add_memory_pressure = (
+        build_raw_malloc_fixedsize_builder(add_memory_pressure=True))
+    build_ll_0_raw_malloc_fixedsize_no_track_allocation = (
+        build_raw_malloc_fixedsize_builder(track_allocation=False))
+    build_ll_0_raw_malloc_fixedsize_zero_no_track_allocation = (
+        build_raw_malloc_fixedsize_builder(zero=True, track_allocation=False))
+    build_ll_0_raw_malloc_fixedsize_zero_add_memory_pressure_no_track_allocation = (
+        build_raw_malloc_fixedsize_builder(zero=True, add_memory_pressure=True, track_allocation=False))
+    build_ll_0_raw_malloc_fixedsize_add_memory_pressure_no_track_allocation = (
+        build_raw_malloc_fixedsize_builder(add_memory_pressure=True, track_allocation=False))
+
+    def build_raw_free_builder(track_allocation=True):
+        def build_ll_1_raw_free(ARRAY):
+            def _ll_1_raw_free(p):
+                lltype.free(p, flavor='raw',
+                            track_allocation=track_allocation)
+            return _ll_1_raw_free
+        return build_ll_1_raw_free
+
+    build_ll_1_raw_free = (
+        build_raw_free_builder())
+    build_ll_1_raw_free_no_track_allocation = (
+        build_raw_free_builder(track_allocation=False))
+
 
 class OOtypeHelpers:
 
diff --git a/pypy/jit/codewriter/test/test_call.py b/pypy/jit/codewriter/test/test_call.py
--- a/pypy/jit/codewriter/test/test_call.py
+++ b/pypy/jit/codewriter/test/test_call.py
@@ -192,3 +192,21 @@
     [op] = block.operations
     call_descr = cc.getcalldescr(op)
     assert call_descr.extrainfo.has_random_effects()
+
+def test_random_effects_on_stacklet_switch():
+    from pypy.jit.backend.llgraph.runner import LLtypeCPU
+    from pypy.rlib._rffi_stacklet import switch, thread_handle, handle
+    @jit.dont_look_inside
+    def f():
+        switch(rffi.cast(thread_handle, 0), rffi.cast(handle, 0))
+
+    rtyper = support.annotate(f, [])
+    jitdriver_sd = FakeJitDriverSD(rtyper.annotator.translator.graphs[0])
+    cc = CallControl(LLtypeCPU(rtyper), jitdrivers_sd=[jitdriver_sd])
+    res = cc.find_all_graphs(FakePolicy())
+
+    [f_graph] = [x for x in res if x.func is f]
+    [block, _] = list(f_graph.iterblocks())
+    op = block.operations[-1]
+    call_descr = cc.getcalldescr(op)
+    assert call_descr.extrainfo.has_random_effects()
diff --git a/pypy/jit/codewriter/test/test_codewriter.py b/pypy/jit/codewriter/test/test_codewriter.py
--- a/pypy/jit/codewriter/test/test_codewriter.py
+++ b/pypy/jit/codewriter/test/test_codewriter.py
@@ -217,7 +217,7 @@
     cw.make_jitcodes(verbose=True)
     #
     s = jitdriver_sd.mainjitcode.dump()
-    assert 'residual_call_ir_i $<* fn _ll_1_raw_malloc__Signed>' in s
+    assert 'residual_call_ir_i $<* fn _ll_1_raw_malloc_varsize__Signed>' in s
     assert 'setarrayitem_raw_i' in s
     assert 'getarrayitem_raw_i' in s
     assert 'residual_call_ir_v $<* fn _ll_1_raw_free__arrayPtr>' in s
diff --git a/pypy/jit/codewriter/test/test_jtransform.py b/pypy/jit/codewriter/test/test_jtransform.py
--- a/pypy/jit/codewriter/test/test_jtransform.py
+++ b/pypy/jit/codewriter/test/test_jtransform.py
@@ -1,3 +1,5 @@
+
+import py
 import random
 try:
     from itertools import product
@@ -15,12 +17,12 @@
 
 from pypy.objspace.flow.model import FunctionGraph, Block, Link
 from pypy.objspace.flow.model import SpaceOperation, Variable, Constant
-from pypy.rpython.lltypesystem import lltype, llmemory, rclass, rstr
+from pypy.rpython.lltypesystem import lltype, llmemory, rclass, rstr, rffi
 from pypy.rpython.lltypesystem.module import ll_math
 from pypy.translator.unsimplify import varoftype
 from pypy.jit.codewriter import heaptracker, effectinfo
 from pypy.jit.codewriter.flatten import ListOfKind
-from pypy.jit.codewriter.jtransform import Transformer
+from pypy.jit.codewriter.jtransform import Transformer, UnsupportedMallocFlags
 from pypy.jit.metainterp.history import getkind
 
 def const(x):
@@ -538,6 +540,73 @@
     assert op1.opname == '-live-'
     assert op1.args == []
 
+def test_raw_malloc():
+    S = rffi.CArray(lltype.Signed)
+    v1 = varoftype(lltype.Signed)
+    v = varoftype(lltype.Ptr(S))
+    flags = Constant({'flavor': 'raw'}, lltype.Void)
+    op = SpaceOperation('malloc_varsize', [Constant(S, lltype.Void), flags,
+                                           v1], v)
+    tr = Transformer(FakeCPU(), FakeResidualCallControl())
+    op0, op1 = tr.rewrite_operation(op)
+    assert op0.opname == 'residual_call_ir_i'
+    assert op0.args[0].value == 'raw_malloc_varsize' # pseudo-function as a str
+    assert op1.opname == '-live-'
+    assert op1.args == []
+
+def test_raw_malloc_zero():
+    S = rffi.CArray(lltype.Signed)
+    v1 = varoftype(lltype.Signed)
+    v = varoftype(lltype.Ptr(S))
+    flags = Constant({'flavor': 'raw', 'zero': True}, lltype.Void)
+    op = SpaceOperation('malloc_varsize', [Constant(S, lltype.Void), flags,
+                                           v1], v)
+    tr = Transformer(FakeCPU(), FakeResidualCallControl())
+    op0, op1 = tr.rewrite_operation(op)
+    assert op0.opname == 'residual_call_ir_i'
+    assert op0.args[0].value == 'raw_malloc_varsize_zero'  # pseudo-fn as a str
+    assert op1.opname == '-live-'
+    assert op1.args == []
+
+def test_raw_malloc_unsupported_flag():
+    S = rffi.CArray(lltype.Signed)
+    v1 = varoftype(lltype.Signed)
+    v = varoftype(lltype.Ptr(S))
+    flags = Constant({'flavor': 'raw', 'unsupported_flag': True}, lltype.Void)
+    op = SpaceOperation('malloc_varsize', [Constant(S, lltype.Void), flags,
+                                           v1], v)
+    tr = Transformer(FakeCPU(), FakeResidualCallControl())
+    py.test.raises(UnsupportedMallocFlags, tr.rewrite_operation, op)
+
+def test_raw_malloc_fixedsize():
+    S = lltype.Struct('dummy', ('x', lltype.Signed))
+    v = varoftype(lltype.Ptr(S))
+    flags = Constant({'flavor': 'raw', 'zero': True}, lltype.Void)
+    op = SpaceOperation('malloc', [Constant(S, lltype.Void), flags], v)
+    tr = Transformer(FakeCPU(), FakeResidualCallControl())
+    op0, op1 = tr.rewrite_operation(op)
+    assert op0.opname == 'residual_call_r_i'
+    assert op0.args[0].value == 'raw_malloc_fixedsize_zero' #pseudo-fn as a str
+    assert op1.opname == '-live-'
+    assert op1.args == []
+
+def test_raw_free():
+    S = lltype.Struct('dummy', ('x', lltype.Signed))
+    for flag in [True, False]:
+        flags = Constant({'flavor': 'raw', 'track_allocation': flag},
+                         lltype.Void)
+        op = SpaceOperation('free', [varoftype(lltype.Ptr(S)), flags],
+                            varoftype(lltype.Void))
+        tr = Transformer(FakeCPU(), FakeResidualCallControl())
+        op0, op1 = tr.rewrite_operation(op)
+        assert op0.opname == 'residual_call_ir_v'
+        if flag:
+            pseudo_op_name = 'raw_free'
+        else:
+            pseudo_op_name = 'raw_free_no_track_allocation'
+        assert op0.args[0].value == pseudo_op_name   # pseudo-function as a str
+        assert op1.opname == '-live-'
+
 def test_rename_on_links():
     v1 = Variable()
     v2 = Variable(); v2.concretetype = llmemory.Address
@@ -1140,4 +1209,4 @@
     assert op1.opname == 'mark_opaque_ptr'
     assert op1.args == [v1]
     assert op1.result is None
-    assert op2 is None
\ No newline at end of file
+    assert op2 is None
diff --git a/pypy/jit/codewriter/test/test_longlong.py b/pypy/jit/codewriter/test/test_longlong.py
--- a/pypy/jit/codewriter/test/test_longlong.py
+++ b/pypy/jit/codewriter/test/test_longlong.py
@@ -78,7 +78,7 @@
             oplist = tr.rewrite_operation(op)
             assert len(oplist) == 2
             assert oplist[0].opname == 'residual_call_irf_f'
-            assert oplist[0].args[0].value == 'llong_from_int'
+            assert oplist[0].args[0].value == opname.split('_')[0]+'_from_int'
             assert oplist[0].args[1] == 'calldescr-84'
             assert list(oplist[0].args[2]) == [const(0)]
             assert list(oplist[0].args[3]) == []
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -518,6 +518,9 @@
     @arguments("r")
     def bhimpl_mark_opaque_ptr(a):
         pass
+    @arguments("r", "i")
+    def bhimpl_record_known_class(a, b):
+        pass
 
     @arguments("i", returns="i")
     def bhimpl_int_copy(a):
@@ -1501,7 +1504,6 @@
                         all_virtuals=None):
     from pypy.jit.metainterp.resume import blackhole_from_resumedata
     #debug_start('jit-blackhole')
-    metainterp_sd.profiler.start_blackhole()
     blackholeinterp = blackhole_from_resumedata(
         metainterp_sd.blackholeinterpbuilder,
         jitdriver_sd,
@@ -1515,10 +1517,9 @@
     current_exc = blackholeinterp._prepare_resume_from_failure(
         resumedescr.guard_opnum, dont_change_position)
 
-    try:
-        _run_forever(blackholeinterp, current_exc)
-    finally:
-        metainterp_sd.profiler.end_blackhole()
+    #try:
+    _run_forever(blackholeinterp, current_exc)
+    #finally:
         #debug_stop('jit-blackhole')
 
 def convert_and_run_from_pyjitpl(metainterp, raising_exception=False):
@@ -1526,7 +1527,6 @@
     # 'metainterp.framestack'.
     #debug_start('jit-blackhole')
     metainterp_sd = metainterp.staticdata
-    metainterp_sd.profiler.start_blackhole()
     nextbh = None
     for frame in metainterp.framestack:
         curbh = metainterp_sd.blackholeinterpbuilder.acquire_interp()
@@ -1543,8 +1543,7 @@
         firstbh.exception_last_value = current_exc
         current_exc = lltype.nullptr(rclass.OBJECTPTR.TO)
     #
-    try:
-        _run_forever(firstbh, current_exc)
-    finally:
-        metainterp_sd.profiler.end_blackhole()
+    #try:
+    _run_forever(firstbh, current_exc)
+    #finally:
         #debug_stop('jit-blackhole')
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -9,12 +9,13 @@
 from pypy.tool.sourcetools import func_with_new_name
 
 from pypy.jit.metainterp.resoperation import ResOperation, rop, get_deep_immutable_oplist
-from pypy.jit.metainterp.history import TreeLoop, Box, History, LoopToken
+from pypy.jit.metainterp.history import TreeLoop, Box, History, JitCellToken, TargetToken
 from pypy.jit.metainterp.history import AbstractFailDescr, BoxInt
-from pypy.jit.metainterp.history import BoxPtr, BoxObj, BoxFloat, Const
+from pypy.jit.metainterp.history import BoxPtr, BoxObj, BoxFloat, Const, ConstInt
 from pypy.jit.metainterp import history
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
 from pypy.jit.metainterp.optimize import InvalidLoop
+from pypy.jit.metainterp.inliner import Inliner
 from pypy.jit.metainterp.resume import NUMBERING, PENDINGFIELDSP
 from pypy.jit.codewriter import heaptracker, longlong
 
@@ -23,7 +24,7 @@
     from pypy.jit.metainterp.jitprof import ABORT_BRIDGE
     raise SwitchToBlackhole(ABORT_BRIDGE)
 
-def show_loop(metainterp_sd, loop=None, error=None):
+def show_procedures(metainterp_sd, procedure=None, error=None):
     # debugging
     if option.view or option.viewloops:
         if error:
@@ -32,11 +33,12 @@
                 errmsg += ': ' + str(error)
         else:
             errmsg = None
-        if loop is None: # or type(loop) is TerminatingLoop:
-            extraloops = []
+        if procedure is None:
+            extraprocedures = []
         else:
-            extraloops = [loop]
-        metainterp_sd.stats.view(errmsg=errmsg, extraloops=extraloops)
+            extraprocedures = [procedure]
+        metainterp_sd.stats.view(errmsg=errmsg,
+                                 extraprocedures=extraprocedures)
 
 def create_empty_loop(metainterp, name_prefix=''):
     name = metainterp.staticdata.stats.name_for_new_loop()
@@ -45,131 +47,261 @@
     return loop
 
 
-def make_loop_token(nb_args, jitdriver_sd):
-    loop_token = LoopToken()
-    loop_token.outermost_jitdriver_sd = jitdriver_sd
-    return loop_token
+def make_jitcell_token(jitdriver_sd):
+    jitcell_token = JitCellToken()
+    jitcell_token.outermost_jitdriver_sd = jitdriver_sd
+    return jitcell_token
 
 def record_loop_or_bridge(metainterp_sd, loop):
     """Do post-backend recordings and cleanups on 'loop'.
     """
-    # get the original loop token (corresponding to 'loop', or if that is
-    # a bridge, to the loop that this bridge belongs to)
-    looptoken = loop.token
-    assert looptoken is not None
+    # get the original jitcell token corresponding to jitcell form which
+    # this trace starts
+    original_jitcell_token = loop.original_jitcell_token
+    assert original_jitcell_token is not None
     if metainterp_sd.warmrunnerdesc is not None:    # for tests
-        assert looptoken.generation > 0     # has been registered with memmgr
-    wref = weakref.ref(looptoken)
+        assert original_jitcell_token.generation > 0     # has been registered with memmgr
+    wref = weakref.ref(original_jitcell_token)
     for op in loop.operations:
         descr = op.getdescr()
         if isinstance(descr, ResumeDescr):
             descr.wref_original_loop_token = wref   # stick it there
             n = descr.index
             if n >= 0:       # we also record the resumedescr number
-                looptoken.compiled_loop_token.record_faildescr_index(n)
-        elif isinstance(descr, LoopToken):
-            # for a JUMP or a CALL_ASSEMBLER: record it as a potential jump.
+                original_jitcell_token.compiled_loop_token.record_faildescr_index(n)
+        elif isinstance(descr, JitCellToken):
+            # for a CALL_ASSEMBLER: record it as a potential jump.
+            if descr is not original_jitcell_token:
+                original_jitcell_token.record_jump_to(descr)
+            descr.exported_state = None
+            op._descr = None    # clear reference, mostly for tests
+        elif isinstance(descr, TargetToken):
+            # for a JUMP: record it as a potential jump.
             # (the following test is not enough to prevent more complicated
             # cases of cycles, but at least it helps in simple tests of
             # test_memgr.py)
-            if descr is not looptoken:
-                looptoken.record_jump_to(descr)
-            op._descr = None    # clear reference, mostly for tests
+            if descr.original_jitcell_token is not original_jitcell_token:
+                assert descr.original_jitcell_token is not None
+                original_jitcell_token.record_jump_to(descr.original_jitcell_token)
+            # exported_state is clear by optimizeopt when the short preamble is
+            # constrcucted. if that did not happen the label should not show up
+            # in a trace that will be used
+            assert descr.exported_state is None 
             if not we_are_translated():
-                op._jumptarget_number = descr.number
+                op._descr_wref = weakref.ref(op._descr)
+            op._descr = None    # clear reference to prevent the history.Stats
+                                # from keeping the loop alive during tests
     # record this looptoken on the QuasiImmut used in the code
     if loop.quasi_immutable_deps is not None:
         for qmut in loop.quasi_immutable_deps:
             qmut.register_loop_token(wref)
         # XXX maybe we should clear the dictionary here
     # mostly for tests: make sure we don't keep a reference to the LoopToken
-    loop.token = None
+    loop.original_jitcell_token = None
     if not we_are_translated():
-        loop._looptoken_number = looptoken.number
+        loop._looptoken_number = original_jitcell_token.number
 
 # ____________________________________________________________
 
-def compile_new_loop(metainterp, old_loop_tokens, greenkey, start,
-                     start_resumedescr, full_preamble_needed=True):
-    """Try to compile a new loop by closing the current history back
+def compile_loop(metainterp, greenkey, start,
+                 inputargs, jumpargs,
+                 start_resumedescr, full_preamble_needed=True):
+    """Try to compile a new procedure by closing the current history back
     to the first operation.
     """
-    from pypy.jit.metainterp.optimize import optimize_loop
+    from pypy.jit.metainterp.optimizeopt import optimize_trace
 
     history = metainterp.history
-    loop = create_empty_loop(metainterp)
-    loop.inputargs = history.inputargs[:]
+    metainterp_sd = metainterp.staticdata
+    jitdriver_sd = metainterp.jitdriver_sd
+
+    if False:
+        part = partial_trace
+        assert False
+        procedur_token = metainterp.get_procedure_token(greenkey)
+        assert procedure_token
+        all_target_tokens = []
+    else:
+        jitcell_token = make_jitcell_token(jitdriver_sd)
+        part = create_empty_loop(metainterp)
+        part.inputargs = inputargs[:]
+        h_ops = history.operations
+        part.start_resumedescr = start_resumedescr
+        part.operations = [ResOperation(rop.LABEL, inputargs, None, descr=TargetToken(jitcell_token))] + \
+                          [h_ops[i].clone() for i in range(start, len(h_ops))] + \
+                          [ResOperation(rop.JUMP, jumpargs, None, descr=jitcell_token)]
+        try:
+            optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
+        except InvalidLoop:
+            return None
+        target_token = part.operations[0].getdescr()
+        assert isinstance(target_token, TargetToken)
+        all_target_tokens = [target_token]
+
+    loop = create_empty_loop(metainterp)        
+    loop.inputargs = part.inputargs
+    loop.operations = part.operations
+    loop.quasi_immutable_deps = {}
+    if part.quasi_immutable_deps:
+        loop.quasi_immutable_deps.update(part.quasi_immutable_deps)
+    while part.operations[-1].getopnum() == rop.LABEL:
+        inliner = Inliner(inputargs, jumpargs)
+        part.quasi_immutable_deps = None
+        part.operations = [part.operations[-1]] + \
+                          [inliner.inline_op(h_ops[i]) for i in range(start, len(h_ops))] + \
+                          [ResOperation(rop.JUMP, [inliner.inline_arg(a) for a in jumpargs],
+                                        None, descr=jitcell_token)]
+        target_token = part.operations[0].getdescr()
+        assert isinstance(target_token, TargetToken)
+        all_target_tokens.append(target_token)
+        inputargs = jumpargs
+        jumpargs = part.operations[-1].getarglist()
+
+        try:
+            optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
+        except InvalidLoop:
+            return None
+            
+        loop.operations = loop.operations[:-1] + part.operations
+        if part.quasi_immutable_deps:
+            loop.quasi_immutable_deps.update(part.quasi_immutable_deps)
+
+    if not loop.quasi_immutable_deps:
+        loop.quasi_immutable_deps = None
     for box in loop.inputargs:
         assert isinstance(box, Box)
-    # make a copy, because optimize_loop can mutate the ops and descrs
-    h_ops = history.operations
-    loop.operations = [h_ops[i].clone() for i in range(start, len(h_ops))]
+
+    loop.original_jitcell_token = jitcell_token
+    for label in all_target_tokens:
+        assert isinstance(label, TargetToken)
+        label.original_jitcell_token = jitcell_token
+        if label.virtual_state and label.short_preamble:
+            metainterp_sd.logger_ops.log_short_preamble([], label.short_preamble)
+    jitcell_token.target_tokens = all_target_tokens
+    send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, "loop")
+    record_loop_or_bridge(metainterp_sd, loop)
+    return all_target_tokens[0]
+
+def compile_retrace(metainterp, greenkey, start,
+                    inputargs, jumpargs,
+                    start_resumedescr, partial_trace, resumekey):
+    """Try to compile a new procedure by closing the current history back
+    to the first operation.
+    """
+    from pypy.jit.metainterp.optimizeopt import optimize_trace
+
+    history = metainterp.history
     metainterp_sd = metainterp.staticdata
     jitdriver_sd = metainterp.jitdriver_sd
-    loop_token = make_loop_token(len(loop.inputargs), jitdriver_sd)
-    loop.token = loop_token
-    loop.operations[-1].setdescr(loop_token)     # patch the target of the JUMP
 
-    loop.preamble = create_empty_loop(metainterp, 'Preamble ')
-    loop.preamble.inputargs = loop.inputargs
-    loop.preamble.token = make_loop_token(len(loop.inputargs), jitdriver_sd)
-    loop.preamble.start_resumedescr = start_resumedescr
+    loop_jitcell_token = metainterp.get_procedure_token(greenkey)
+    assert loop_jitcell_token
+    assert partial_trace.operations[-1].getopnum() == rop.LABEL
 
+    part = create_empty_loop(metainterp)
+    part.inputargs = inputargs[:]
+    part.start_resumedescr = start_resumedescr
+    h_ops = history.operations
+
+    part.operations = [partial_trace.operations[-1]] + \
+                      [h_ops[i].clone() for i in range(start, len(h_ops))] + \
+                      [ResOperation(rop.JUMP, jumpargs, None, descr=loop_jitcell_token)]
+    label = part.operations[0]
+    orignial_label = label.clone()
+    assert label.getopnum() == rop.LABEL
     try:
-        old_loop_token = optimize_loop(metainterp_sd, old_loop_tokens, loop,
-                                       jitdriver_sd.warmstate.enable_opts)
+        optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
     except InvalidLoop:
-        debug_print("compile_new_loop: got an InvalidLoop")
-        return None
-    if old_loop_token is not None:
-        metainterp.staticdata.log("reusing old loop")
-        return old_loop_token
+        #return None # XXX: Dissable for now
+        # Fall back on jumping to preamble
+        target_token = label.getdescr()
+        assert isinstance(target_token, TargetToken)
+        assert target_token.exported_state
+        part.operations = [orignial_label] + \
+                          [ResOperation(rop.JUMP, target_token.exported_state.jump_args,
+                                        None, descr=loop_jitcell_token)]
+        try:
+            optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts,
+                           inline_short_preamble=False)
+            
+        except InvalidLoop:
+            return None
+    assert part.operations[-1].getopnum() != rop.LABEL
+    target_token = label.getdescr()
+    assert isinstance(target_token, TargetToken)
+    assert loop_jitcell_token.target_tokens
+    loop_jitcell_token.target_tokens.append(target_token)
 
-    if loop.preamble.operations is not None:
-        send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop,
-                             "loop")
-        record_loop_or_bridge(metainterp_sd, loop)
-        token = loop.preamble.token
-        if full_preamble_needed:
-            send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd,
-                                 loop.preamble, "entry bridge")
-            insert_loop_token(old_loop_tokens, loop.preamble.token)
-            jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
-                greenkey, loop.preamble.token)
-            record_loop_or_bridge(metainterp_sd, loop.preamble)
-        elif token.short_preamble:
-            short = token.short_preamble[-1]
-            metainterp_sd.logger_ops.log_short_preamble(short.inputargs,
-                                                        short.operations)
-        return token
-    else:
-        send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop,
-                             "loop")
-        insert_loop_token(old_loop_tokens, loop_token)
-        jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
-            greenkey, loop.token)
-        record_loop_or_bridge(metainterp_sd, loop)
-        return loop_token
+    loop = partial_trace
+    loop.operations = loop.operations[:-1] + part.operations
 
-def insert_loop_token(old_loop_tokens, loop_token):
-    # Find where in old_loop_tokens we should insert this new loop_token.
-    # The following algo means "as late as possible, but before another
-    # loop token that would be more general and so completely mask off
-    # the new loop_token".
-    # XXX do we still need a list?
-    old_loop_tokens.append(loop_token)
+    quasi_immutable_deps = {}
+    if loop.quasi_immutable_deps:
+        quasi_immutable_deps.update(loop.quasi_immutable_deps)
+    if part.quasi_immutable_deps:
+        quasi_immutable_deps.update(part.quasi_immutable_deps)
+    if quasi_immutable_deps:
+        loop.quasi_immutable_deps = quasi_immutable_deps
+
+    for box in loop.inputargs:
+        assert isinstance(box, Box)
+
+    target_token = loop.operations[-1].getdescr()
+    resumekey.compile_and_attach(metainterp, loop)
+    target_token = label.getdescr()
+    assert isinstance(target_token, TargetToken)
+    target_token.original_jitcell_token = loop.original_jitcell_token
+    record_loop_or_bridge(metainterp_sd, loop)
+    return target_token
+
+def patch_new_loop_to_load_virtualizable_fields(loop, jitdriver_sd):
+    vinfo = jitdriver_sd.virtualizable_info
+    extra_ops = []
+    inputargs = loop.inputargs
+    vable_box = inputargs[jitdriver_sd.index_of_virtualizable]
+    i = jitdriver_sd.num_red_args
+    loop.inputargs = inputargs[:i]
+    for descr in vinfo.static_field_descrs:
+        assert i < len(inputargs)
+        box = inputargs[i]
+        extra_ops.append(
+            ResOperation(rop.GETFIELD_GC, [vable_box], box, descr))
+        i += 1
+    arrayindex = 0
+    for descr in vinfo.array_field_descrs:
+        vable = vable_box.getref_base()
+        arraylen = vinfo.get_array_length(vable, arrayindex)
+        arraybox = BoxPtr()
+        extra_ops.append(
+            ResOperation(rop.GETFIELD_GC, [vable_box], arraybox, descr))
+        arraydescr = vinfo.array_descrs[arrayindex]
+        assert i + arraylen <= len(inputargs)
+        for index in range(arraylen):
+            box = inputargs[i]
+            extra_ops.append(
+                ResOperation(rop.GETARRAYITEM_GC,
+                             [arraybox, ConstInt(index)],
+                             box, descr=arraydescr))
+            i += 1
+        arrayindex += 1
+    assert i == len(inputargs)
+    loop.operations = extra_ops + loop.operations
 
 def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
-    jitdriver_sd.on_compile(metainterp_sd.logger_ops, loop.token,
+    vinfo = jitdriver_sd.virtualizable_info
+    if vinfo is not None:
+        patch_new_loop_to_load_virtualizable_fields(loop, jitdriver_sd)
+
+    original_jitcell_token = loop.original_jitcell_token
+    jitdriver_sd.on_compile(metainterp_sd.logger_ops, original_jitcell_token,
                             loop.operations, type, greenkey)
     loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     globaldata = metainterp_sd.globaldata
-    loop_token = loop.token
-    loop_token.number = n = globaldata.loopnumbering
+    original_jitcell_token.number = n = globaldata.loopnumbering
     globaldata.loopnumbering += 1
 
     if not we_are_translated():
-        show_loop(metainterp_sd, loop)
+        show_procedures(metainterp_sd, loop)
         loop.check_consistency()
 
     operations = get_deep_immutable_oplist(loop.operations)
@@ -177,26 +309,19 @@
     debug_start("jit-backend")
     try:
         ops_offset = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                    loop.token, name=loopname)
+                                                    original_jitcell_token, name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
     metainterp_sd.stats.add_new_loop(loop)
     if not we_are_translated():
-        if type != "entry bridge":
-            metainterp_sd.stats.compiled()
-        else:
-            loop._ignore_during_counting = True
+        metainterp_sd.stats.compiled()
     metainterp_sd.log("compiled new " + type)
     #
     metainterp_sd.logger_ops.log_loop(loop.inputargs, loop.operations, n, type, ops_offset)
-    short = loop.token.short_preamble
-    if short:
-        metainterp_sd.logger_ops.log_short_preamble(short[-1].inputargs,
-                                                    short[-1].operations)
     #
     if metainterp_sd.warmrunnerdesc is not None:    # for tests
-        metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(loop.token)
+        metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(original_jitcell_token)
 
 def send_bridge_to_backend(jitdriver_sd, metainterp_sd, faildescr, inputargs,
                            operations, original_loop_token):
@@ -204,8 +329,9 @@
     jitdriver_sd.on_compile_bridge(metainterp_sd.logger_ops,
                                    original_loop_token, operations, n)
     if not we_are_translated():
-        show_loop(metainterp_sd)
-        TreeLoop.check_consistency_of(inputargs, operations)
+        show_procedures(metainterp_sd)
+        seen = dict.fromkeys(inputargs)
+        TreeLoop.check_consistency_of_branch(operations, seen)
     metainterp_sd.profiler.start_backend()
     operations = get_deep_immutable_oplist(operations)
     debug_start("jit-backend")
@@ -221,9 +347,9 @@
     #
     metainterp_sd.logger_ops.log_bridge(inputargs, operations, n, ops_offset)
     #
-    if metainterp_sd.warmrunnerdesc is not None:    # for tests
-        metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(
-            original_loop_token)
+    #if metainterp_sd.warmrunnerdesc is not None:    # for tests
+    #    metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(
+    #        original_loop_token)
 
 # ____________________________________________________________
 
@@ -263,7 +389,7 @@
         raise metainterp_sd.ExitFrameWithExceptionRef(cpu, value)
 
 
-class TerminatingLoopToken(LoopToken):
+class TerminatingLoopToken(JitCellToken): # FIXME: kill?
     terminating = True
 
     def __init__(self, nargs, finishdescr):
@@ -298,7 +424,7 @@
     pass
 
 class ResumeGuardDescr(ResumeDescr):
-    _counter = 0        # if < 0, there is one counter per value;
+    _counter = 0        # on a GUARD_VALUE, there is one counter per value;
     _counters = None    # they get stored in _counters then.
 
     # this class also gets the following attributes stored by resume.py code
@@ -309,10 +435,13 @@
     rd_virtuals = None
     rd_pendingfields = lltype.nullptr(PENDINGFIELDSP.TO)
 
-    CNT_INT   = -0x20000000
-    CNT_REF   = -0x40000000
-    CNT_FLOAT = -0x60000000
-    CNT_MASK  =  0x1FFFFFFF
+    CNT_BASE_MASK  =  0x0FFFFFFF     # the base counter value
+    CNT_BUSY_FLAG  =  0x10000000     # if set, busy tracing from the guard
+    CNT_TYPE_MASK  =  0x60000000     # mask for the type
+
+    CNT_INT        =  0x20000000
+    CNT_REF        =  0x40000000
+    CNT_FLOAT      =  0x60000000
 
     def store_final_boxes(self, guard_op, boxes):
         guard_op.setfailargs(boxes)
@@ -326,6 +455,8 @@
         except ValueError:
             return     # xxx probably very rare
         else:
+            if i > self.CNT_BASE_MASK:
+                return    # probably never, but better safe than sorry
             if box.type == history.INT:
                 cnt = self.CNT_INT
             elif box.type == history.REF:
@@ -334,18 +465,21 @@
                 cnt = self.CNT_FLOAT
             else:
                 assert 0, box.type
-            # we build the following value for _counter, which is always
-            # a negative value
+            assert cnt > self.CNT_BASE_MASK
             self._counter = cnt | i
 
     def handle_fail(self, metainterp_sd, jitdriver_sd):
         if self.must_compile(metainterp_sd, jitdriver_sd):
-            return self._trace_and_compile_from_bridge(metainterp_sd,
-                                                       jitdriver_sd)
+            self.start_compiling()
+            try:
+                self._trace_and_compile_from_bridge(metainterp_sd,
+                                                    jitdriver_sd)
+            finally:
+                self.done_compiling()
         else:
             from pypy.jit.metainterp.blackhole import resume_in_blackhole
             resume_in_blackhole(metainterp_sd, jitdriver_sd, self)
-            assert 0, "unreachable"
+        assert 0, "unreachable"
 
     def _trace_and_compile_from_bridge(self, metainterp_sd, jitdriver_sd):
         # 'jitdriver_sd' corresponds to the outermost one, i.e. the one
@@ -354,17 +488,27 @@
         # jitdrivers.
         from pypy.jit.metainterp.pyjitpl import MetaInterp
         metainterp = MetaInterp(metainterp_sd, jitdriver_sd)
-        return metainterp.handle_guard_failure(self)
+        metainterp.handle_guard_failure(self)
     _trace_and_compile_from_bridge._dont_inline_ = True
 
     def must_compile(self, metainterp_sd, jitdriver_sd):
         trace_eagerness = jitdriver_sd.warmstate.trace_eagerness
-        if self._counter >= 0:
+        #
+        if self._counter <= self.CNT_BASE_MASK:
+            # simple case: just counting from 0 to trace_eagerness
             self._counter += 1
             return self._counter >= trace_eagerness
-        else:
-            index = self._counter & self.CNT_MASK
-            typetag = self._counter & ~ self.CNT_MASK
+        #
+        # do we have the BUSY flag?  If so, we're tracing right now, e.g. in an
+        # outer invocation of the same function, so don't trace again for now.
+        elif self._counter & self.CNT_BUSY_FLAG:
+            return False
+        #
+        else: # we have a GUARD_VALUE that fails.  Make a _counters instance
+            # (only now, when the guard is actually failing at least once),
+            # and use it to record some statistics about the failing values.
+            index = self._counter & self.CNT_BASE_MASK
+            typetag = self._counter & self.CNT_TYPE_MASK
             counters = self._counters
             if typetag == self.CNT_INT:
                 intvalue = metainterp_sd.cpu.get_latest_value_int(index)
@@ -391,7 +535,16 @@
                 assert 0, typetag
             return counter >= trace_eagerness
 
-    def reset_counter_from_failure(self):
+    def start_compiling(self):
+        # start tracing and compiling from this guard.
+        self._counter |= self.CNT_BUSY_FLAG
+
+    def done_compiling(self):
+        # done tracing and compiling from this guard.  Either the bridge has
+        # been successfully compiled, in which case whatever value we store
+        # in self._counter will not be seen any more, or not, in which case
+        # we should reset the counter to 0, in order to wait a bit until the
+        # next attempt.
         if self._counter >= 0:
             self._counter = 0
         self._counters = None
@@ -400,13 +553,13 @@
         # We managed to create a bridge.  Attach the new operations
         # to the corresponding guard_op and compile from there
         assert metainterp.resumekey_original_loop_token is not None
-        new_loop.token = metainterp.resumekey_original_loop_token
+        new_loop.original_jitcell_token = metainterp.resumekey_original_loop_token
         inputargs = metainterp.history.inputargs
         if not we_are_translated():
             self._debug_suboperations = new_loop.operations
         send_bridge_to_backend(metainterp.jitdriver_sd, metainterp.staticdata,
                                self, inputargs, new_loop.operations,
-                               new_loop.token)
+                               new_loop.original_jitcell_token)
 
     def copy_all_attributes_into(self, res):
         # XXX a bit ugly to have to list them all here
@@ -589,44 +742,32 @@
         metainterp_sd = metainterp.staticdata
         jitdriver_sd = metainterp.jitdriver_sd
         redargs = new_loop.inputargs
-        # We make a new LoopToken for this entry bridge, and stick it
-        # to every guard in the loop.
-        new_loop_token = make_loop_token(len(redargs), jitdriver_sd)
-        new_loop.token = new_loop_token
+        new_loop.original_jitcell_token = jitcell_token = make_jitcell_token(jitdriver_sd)
         send_loop_to_backend(self.original_greenkey, metainterp.jitdriver_sd,
                              metainterp_sd, new_loop, "entry bridge")
         # send the new_loop to warmspot.py, to be called directly the next time
-        jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
-            self.original_greenkey,
-            new_loop_token)
-        # store the new loop in compiled_merge_points_wref too
-        old_loop_tokens = metainterp.get_compiled_merge_points(
-            self.original_greenkey)
-        # it always goes at the end of the list, as it is the most
-        # general loop token
-        old_loop_tokens.append(new_loop_token)
-        metainterp.set_compiled_merge_points(self.original_greenkey,
-                                             old_loop_tokens)
+        jitdriver_sd.warmstate.attach_procedure_to_interp(
+            self.original_greenkey, jitcell_token)
+        metainterp_sd.stats.add_jitcell_token(jitcell_token)
 
-    def reset_counter_from_failure(self):
-        pass
 
-
-def compile_new_bridge(metainterp, old_loop_tokens, resumekey, retraced=False):
+def compile_trace(metainterp, resumekey, start_resumedescr=None):
     """Try to compile a new bridge leading from the beginning of the history
     to some existing place.
     """
-    from pypy.jit.metainterp.optimize import optimize_bridge
+    from pypy.jit.metainterp.optimizeopt import optimize_trace
     
     # The history contains new operations to attach as the code for the
     # failure of 'resumekey.guard_op'.
-    #
+    # 
     # Attempt to use optimize_bridge().  This may return None in case
     # it does not work -- i.e. none of the existing old_loop_tokens match.
-    new_loop = create_empty_loop(metainterp)
-    new_loop.inputargs = metainterp.history.inputargs[:]
+    new_trace = create_empty_loop(metainterp)
+    new_trace.inputargs = inputargs = metainterp.history.inputargs[:]
     # clone ops, as optimize_bridge can mutate the ops
-    new_loop.operations = [op.clone() for op in metainterp.history.operations]
+
+    new_trace.operations = [op.clone() for op in metainterp.history.operations]
+    new_trace.start_resumedescr = start_resumedescr
     metainterp_sd = metainterp.staticdata
     state = metainterp.jitdriver_sd.warmstate
     if isinstance(resumekey, ResumeAtPositionDescr):
@@ -634,38 +775,25 @@
     else:
         inline_short_preamble = True
     try:
-        target_loop_token = optimize_bridge(metainterp_sd, old_loop_tokens,
-                                            new_loop, state.enable_opts,
-                                            inline_short_preamble, retraced)
+        optimize_trace(metainterp_sd, new_trace, state.enable_opts, inline_short_preamble)
     except InvalidLoop:
         debug_print("compile_new_bridge: got an InvalidLoop")
         # XXX I am fairly convinced that optimize_bridge cannot actually raise
         # InvalidLoop
         debug_print('InvalidLoop in compile_new_bridge')
         return None
-    # Did it work?
-    if target_loop_token is not None:
-        # Yes, we managed to create a bridge.  Dispatch to resumekey to
+
+    if new_trace.operations[-1].getopnum() != rop.LABEL:
+        # We managed to create a bridge.  Dispatch to resumekey to
         # know exactly what we must do (ResumeGuardDescr/ResumeFromInterpDescr)
-        prepare_last_operation(new_loop, target_loop_token)
-        resumekey.compile_and_attach(metainterp, new_loop)
-        record_loop_or_bridge(metainterp_sd, new_loop)
-    return target_loop_token
-
-def prepare_last_operation(new_loop, target_loop_token):
-    op = new_loop.operations[-1]
-    if not isinstance(target_loop_token, TerminatingLoopToken):
-        # normal case
-        #op.setdescr(target_loop_token)     # patch the jump target
-        pass
+        target_token = new_trace.operations[-1].getdescr()
+        resumekey.compile_and_attach(metainterp, new_trace)
+        record_loop_or_bridge(metainterp_sd, new_trace)
+        return target_token
     else:
-        # The target_loop_token is a pseudo loop token,
-        # e.g. loop_tokens_done_with_this_frame_void[0]
-        # Replace the operation with the real operation we want, i.e. a FINISH
-        descr = target_loop_token.finishdescr
-        args = op.getarglist()
-        new_op = ResOperation(rop.FINISH, args, None, descr=descr)
-        new_loop.operations[-1] = new_op
+        metainterp.retrace_needed(new_trace)
+        return None
+        
 
 # ____________________________________________________________
 
@@ -676,21 +804,25 @@
         assert exception, "PropagateExceptionDescr: no exception??"
         raise metainterp_sd.ExitFrameWithExceptionRef(cpu, exception)
 
-def compile_tmp_callback(cpu, jitdriver_sd, greenboxes, redboxes,
+def compile_tmp_callback(cpu, jitdriver_sd, greenboxes, redargtypes,
                          memory_manager=None):
     """Make a LoopToken that corresponds to assembler code that just
     calls back the interpreter.  Used temporarily: a fully compiled
     version of the code may end up replacing it.
     """
-    # 'redboxes' is only used to know the types of red arguments.
-    inputargs = [box.clonebox() for box in redboxes]
-    loop_token = make_loop_token(len(inputargs), jitdriver_sd)
-    # 'nb_red_args' might be smaller than len(redboxes),
-    # because it doesn't include the virtualizable boxes.
+    jitcell_token = make_jitcell_token(jitdriver_sd)
     nb_red_args = jitdriver_sd.num_red_args
+    assert len(redargtypes) == nb_red_args
+    inputargs = []
+    for kind in redargtypes:
+        if   kind == history.INT:   box = BoxInt()
+        elif kind == history.REF:   box = BoxPtr()
+        elif kind == history.FLOAT: box = BoxFloat()
+        else: raise AssertionError
+        inputargs.append(box)
     k = jitdriver_sd.portal_runner_adr
     funcbox = history.ConstInt(heaptracker.adr2int(k))
-    callargs = [funcbox] + greenboxes + inputargs[:nb_red_args]
+    callargs = [funcbox] + greenboxes + inputargs
     #
     result_type = jitdriver_sd.result_type
     if result_type == history.INT:
@@ -717,7 +849,7 @@
         ]
     operations[1].setfailargs([])
     operations = get_deep_immutable_oplist(operations)
-    cpu.compile_loop(inputargs, operations, loop_token, log=False)
+    cpu.compile_loop(inputargs, operations, jitcell_token, log=False)
     if memory_manager is not None:    # for tests
-        memory_manager.keep_loop_alive(loop_token)
-    return loop_token
+        memory_manager.keep_loop_alive(jitcell_token)
+    return jitcell_token
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -344,6 +344,7 @@
                          rop.SETINTERIORFIELD_RAW,
                          rop.CALL_RELEASE_GIL,
                          rop.QUASIIMMUT_FIELD,
+                         rop.LABEL,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
             raise AssertionError("missing %r" % (key,))
diff --git a/pypy/jit/metainterp/gc.py b/pypy/jit/metainterp/gc.py
--- a/pypy/jit/metainterp/gc.py
+++ b/pypy/jit/metainterp/gc.py
@@ -7,6 +7,9 @@
         self.config = config
 
 
+class GC_none(GcDescription):
+    malloc_zero_filled = True
+
 class GC_boehm(GcDescription):
     malloc_zero_filled = True
 
diff --git a/pypy/jit/metainterp/graphpage.py b/pypy/jit/metainterp/graphpage.py
--- a/pypy/jit/metainterp/graphpage.py
+++ b/pypy/jit/metainterp/graphpage.py
@@ -12,8 +12,9 @@
     def get_display_text(self):
         return None
 
-def display_loops(loops, errmsg=None, highlight_loops={}):
-    graphs = [(loop, highlight_loops.get(loop, 0)) for loop in loops]    
+def display_procedures(procedures, errmsg=None, highlight_procedures={}):
+    graphs = [(procedure, highlight_procedures.get(procedure, 0))
+              for procedure in procedures]
     for graph, highlight in graphs:
         for op in graph.get_operations():
             if is_interesting_guard(op):
@@ -25,18 +26,19 @@
 def is_interesting_guard(op):
     return hasattr(op.getdescr(), '_debug_suboperations')
 
+def getdescr(op):
+    if op._descr is not None:
+        return op._descr
+    if hasattr(op, '_descr_wref'):
+        return op._descr_wref()
+    return None
+
 
 class ResOpGraphPage(GraphPage):
 
     def compute(self, graphs, errmsg=None):
         resopgen = ResOpGen()
         for graph, highlight in graphs:
-            if getattr(graph, 'token', None) is not None:
-                resopgen.jumps_to_graphs[graph.token] = graph
-            if getattr(graph, '_looptoken_number', None) is not None:
-                resopgen.jumps_to_graphs[graph._looptoken_number] = graph
-        
-        for graph, highlight in graphs:
             resopgen.add_graph(graph, highlight)
         if errmsg:
             resopgen.set_errmsg(errmsg)
@@ -54,7 +56,7 @@
         self.block_starters = {}    # {graphindex: {set-of-operation-indices}}
         self.all_operations = {}
         self.errmsg = None
-        self.jumps_to_graphs = {}
+        self.target_tokens = {}
 
     def op_name(self, graphindex, opindex):
         return 'g%dop%d' % (graphindex, opindex)
@@ -73,16 +75,21 @@
         for graphindex in range(len(self.graphs)):
             self.block_starters[graphindex] = {0: True}
         for graphindex, graph in enumerate(self.graphs):
-            last_was_mergepoint = False
+            mergepointblock = None
             for i, op in enumerate(graph.get_operations()):
                 if is_interesting_guard(op):
                     self.mark_starter(graphindex, i+1)
                 if op.getopnum() == rop.DEBUG_MERGE_POINT:
-                    if not last_was_mergepoint:
-                        last_was_mergepoint = True
-                        self.mark_starter(graphindex, i)
+                    if mergepointblock is None:
+                        mergepointblock = i
+                elif op.getopnum() == rop.LABEL:
+                    self.mark_starter(graphindex, i)
+                    self.target_tokens[getdescr(op)] = (graphindex, i)
+                    mergepointblock = i
                 else:
-                    last_was_mergepoint = False
+                    if mergepointblock is not None:
+                        self.mark_starter(graphindex, mergepointblock)
+                        mergepointblock = None
 
     def set_errmsg(self, errmsg):
         self.errmsg = errmsg
@@ -172,24 +179,10 @@
                              (graphindex, opindex))
                 break
         if op.getopnum() == rop.JUMP:
-            tgt_g = -1
-            tgt = None
-            tgt_number = getattr(op, '_jumptarget_number', None)
-            if tgt_number is not None:
-                tgt = self.jumps_to_graphs.get(tgt_number)
-            else:
-                tgt_descr = op.getdescr()
-                if tgt_descr is None:
-                    tgt_g = graphindex
-                else:
-                    tgt = self.jumps_to_graphs.get(tgt_descr.number)
-                    if tgt is None:
-                        tgt = self.jumps_to_graphs.get(tgt_descr)
-            if tgt is not None:
-                tgt_g = self.graphs.index(tgt)
-            if tgt_g != -1:
+            tgt_descr = getdescr(op)
+            if tgt_descr is not None and tgt_descr in self.target_tokens:
                 self.genedge((graphindex, opstartindex),
-                             (tgt_g, 0),
+                             self.target_tokens[tgt_descr],
                              weight="0")
         lines.append("")
         label = "\\l".join(lines)
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -10,6 +10,7 @@
 from pypy.jit.metainterp.resoperation import ResOperation, rop
 from pypy.jit.codewriter import heaptracker, longlong
 from pypy.rlib.objectmodel import compute_identity_hash
+import weakref
 
 # ____________________________________________________________
 
@@ -123,9 +124,6 @@
     def sort_key(self):
         raise NotImplementedError
 
-    def set_future_value(self, cpu, j):
-        raise NotImplementedError
-
     def nonnull(self):
         raise NotImplementedError
 
@@ -288,9 +286,6 @@
     def _get_hash_(self):
         return make_hashable_int(self.value)
 
-    def set_future_value(self, cpu, j):
-        cpu.set_future_value_int(j, self.value)
-
     def same_constant(self, other):
         if isinstance(other, ConstInt):
             return self.value == other.value
@@ -328,9 +323,6 @@
     def _get_hash_(self):
         return longlong.gethash(self.value)
 
-    def set_future_value(self, cpu, j):
-        cpu.set_future_value_float(j, self.value)
-
     def same_constant(self, other):
         if isinstance(other, ConstFloat):
             return self.value == other.value
@@ -377,9 +369,6 @@
     def getaddr(self):
         return llmemory.cast_ptr_to_adr(self.value)
 
-    def set_future_value(self, cpu, j):
-        cpu.set_future_value_ref(j, self.value)
-
     def same_constant(self, other):
         if isinstance(other, ConstPtr):
             return self.value == other.value
@@ -431,9 +420,6 @@
         else:
             return 0
 
-    def set_future_value(self, cpu, j):
-        cpu.set_future_value_ref(j, self.value)
-
 ##    def getaddr(self):
 ##        # so far this is used only when calling
 ##        # CodeWriter.IndirectCallset.bytecode_for_address.  We don't need a
@@ -539,9 +525,6 @@
     def _get_hash_(self):
         return make_hashable_int(self.value)
 
-    def set_future_value(self, cpu, j):
-        cpu.set_future_value_int(j, self.value)
-
     def nonnull(self):
         return self.value != 0
 
@@ -574,9 +557,6 @@
     def _get_hash_(self):
         return longlong.gethash(self.value)
 
-    def set_future_value(self, cpu, j):
-        cpu.set_future_value_float(j, self.value)
-
     def nonnull(self):
         return self.value != longlong.ZEROF
 
@@ -619,9 +599,6 @@
         else:
             return 0
 
-    def set_future_value(self, cpu, j):
-        cpu.set_future_value_ref(j, self.value)
-
     def nonnull(self):
         return bool(self.value)
 
@@ -666,19 +643,12 @@
     def nonnull(self):
         return bool(self.value)
 
-    def set_future_value(self, cpu, j):
-        cpu.set_future_value_ref(j, self.value)
-
     def repr_rpython(self):
         return repr_rpython(self, 'bo')
 
     _getrepr_ = repr_object
 
 
-def set_future_values(cpu, boxes):
-    for j in range(len(boxes)):
-        boxes[j].set_future_value(cpu, j)
-
 # ____________________________________________________________
 
 
@@ -723,18 +693,17 @@
 
 # ____________________________________________________________
 
-# The TreeLoop class contains a loop or a generalized loop, i.e. a tree
-# of operations.  Each branch ends in a jump which can go either to
-# the top of the same loop, or to another TreeLoop; or it ends in a FINISH.
+# The JitCellToken class is the root of a tree of traces.  Each branch ends
+# in a jump which goes to a LABEL operation; or it ends in a FINISH.
 
-class LoopToken(AbstractDescr):
+class JitCellToken(AbstractDescr):
     """Used for rop.JUMP, giving the target of the jump.
     This is different from TreeLoop: the TreeLoop class contains the
     whole loop, including 'operations', and goes away after the loop
     was compiled; but the LoopDescr remains alive and points to the
     generated assembler.
     """
-    short_preamble = None
+    target_tokens = None
     failed_states = None
     retraced_count = 0
     terminating = False # see TerminatingLoopToken in compile.py
@@ -751,10 +720,11 @@
 
     def __init__(self):
         # For memory management of assembled loops
-        self._keepalive_target_looktokens = {}      # set of other LoopTokens
+        self._keepalive_jitcell_tokens = {}      # set of other JitCellToken
 
-    def record_jump_to(self, target_loop_token):
-        self._keepalive_target_looktokens[target_loop_token] = None
+    def record_jump_to(self, jitcell_token):
+        assert isinstance(jitcell_token, JitCellToken)
+        self._keepalive_jitcell_tokens[jitcell_token] = None
 
     def __repr__(self):
         return '<Loop %d, gen=%d>' % (self.number, self.generation)
@@ -765,17 +735,49 @@
     def dump(self):
         self.compiled_loop_token.cpu.dump_loop_token(self)
 
+class TargetToken(AbstractDescr):
+    def __init__(self, targeting_jitcell_token=None):
+        # Warning, two different jitcell_tokens here!
+        #
+        # * 'targeting_jitcell_token' is only useful for the front-end,
+        #   and it means: consider the LABEL that uses this TargetToken.
+        #   At this position, the state is logically the one given
+        #   by targeting_jitcell_token.  So e.g. if we want to enter the
+        #   JIT with some given green args, if the jitcell matches, then
+        #   we can jump to this LABEL.
+        #
+        # * 'original_jitcell_token' is information from the backend's
+        #   point of view: it means that this TargetToken is used in
+        #   a LABEL that belongs to either:
+        #   - a loop; then 'original_jitcell_token' is this loop
+        #   - or a bridge; then 'original_jitcell_token' is the loop
+        #     out of which we made this bridge
+        #
+        self.targeting_jitcell_token = targeting_jitcell_token
+        self.original_jitcell_token = None
+
+        self.virtual_state = None
+        self.exported_state = None
+        
 class TreeLoop(object):
     inputargs = None
     operations = None
-    token = None
     call_pure_results = None
     logops = None
     quasi_immutable_deps = None
+    start_resumedescr = None
+
+    def _token(*args):
+        raise Exception("TreeLoop.token is killed")
+    token = property(_token, _token)
+
+    # This is the jitcell where the trace starts. Labels within the trace might
+    # belong to some other jitcells in the sens that jumping to this other
+    # jitcell will result in a jump to the label.
+    original_jitcell_token = None
 
     def __init__(self, name):
         self.name = name
-        # self.inputargs = list of distinct Boxes
         # self.operations = list of ResOperations
         #   ops of the kind 'guard_xxx' contain a further list of operations,
         #   which may itself contain 'guard_xxx' and so on, making a tree.
@@ -808,6 +810,10 @@
     def check_consistency(self):     # for testing
         "NOT_RPYTHON"
         self.check_consistency_of(self.inputargs, self.operations)
+        for op in self.operations:
+            descr = op.getdescr()
+            if op.getopnum() == rop.LABEL and isinstance(descr, TargetToken):
+                assert descr.original_jitcell_token is self.original_jitcell_token
 
     @staticmethod
     def check_consistency_of(inputargs, operations):
@@ -842,15 +848,23 @@
                 assert isinstance(box, Box)
                 assert box not in seen
                 seen[box] = True
+            if op.getopnum() == rop.LABEL:
+                inputargs = op.getarglist()
+                for box in inputargs:
+                    assert isinstance(box, Box), "LABEL contains %r" % (box,)
+                seen = dict.fromkeys(inputargs)
+                assert len(seen) == len(inputargs), (
+                    "duplicate Box in the LABEL arguments")
+                
         assert operations[-1].is_final()
         if operations[-1].getopnum() == rop.JUMP:
             target = operations[-1].getdescr()
             if target is not None:
-                assert isinstance(target, LoopToken)
+                assert isinstance(target, TargetToken)
 
     def dump(self):
         # RPython-friendly
-        print '%r: inputargs =' % self, self._dump_args(self.inputargs)
+        print '%r: inputargs =' % self, self._dump_args(self.inputargs)        
         for op in self.operations:
             args = op.getarglist()
             print '\t', op.getopname(), self._dump_args(args), \
@@ -932,6 +946,9 @@
     def clear(self):
         pass
 
+    def add_jitcell_token(self, token):
+        pass
+
 class Stats(object):
     """For tests."""
 
@@ -944,17 +961,26 @@
         self.loops = []
         self.locations = []
         self.aborted_keys = []
-        self.invalidated_token_numbers = set()
+        self.invalidated_token_numbers = set()    # <- not RPython
+        self.jitcell_token_wrefs = []
+        self.jitcell_dicts = []                   # <- not RPython
 
     def clear(self):
         del self.loops[:]
         del self.locations[:]
         del self.aborted_keys[:]
+        del self.jitcell_token_wrefs[:]
         self.invalidated_token_numbers.clear()
         self.compiled_count = 0
         self.enter_count = 0
         self.aborted_count = 0
+        for dict in self.jitcell_dicts:
+            dict.clear()
 
+    def add_jitcell_token(self, token):
+        assert isinstance(token, JitCellToken)
+        self.jitcell_token_wrefs.append(weakref.ref(token))
+        
     def set_history(self, history):
         self.operations = history.operations
 
@@ -984,6 +1010,15 @@
     def get_all_loops(self):
         return self.loops
 
+    def get_all_jitcell_tokens(self):
+        tokens = [t() for t in self.jitcell_token_wrefs]
+        if None in tokens:
+            assert False, "get_all_jitcell_tokens will not work as "+\
+                          "loops have been freed"
+        return tokens
+            
+        
+
     def check_history(self, expected=None, **check):
         insns = {}
         for op in self.operations:
@@ -999,16 +1034,90 @@
                 "found %d %r, expected %d" % (found, insn, expected_count))
         return insns
 
+    def check_resops(self, expected=None, **check):
+        insns = {}
+        for loop in self.get_all_loops():
+            insns = loop.summary(adding_insns=insns)
+        return self._check_insns(insns, expected, check)
+
+    def _check_insns(self, insns, expected, check):
+        if expected is not None:
+            insns.pop('debug_merge_point', None)
+            insns.pop('label', None)
+            assert insns == expected
+        for insn, expected_count in check.items():
+            getattr(rop, insn.upper())  # fails if 'rop.INSN' does not exist
+            found = insns.get(insn, 0)
+            assert found == expected_count, (
+                "found %d %r, expected %d" % (found, insn, expected_count))
+        return insns
+
+    def check_simple_loop(self, expected=None, **check):
+        # Usefull in the simplest case when we have only one trace ending with
+        # a jump back to itself and possibly a few bridges ending with finnish.
+        # Only the operations within the loop formed by that single jump will
+        # be counted.
+
+        # XXX hacked version, ignore and remove me when jit-targets is merged.
+        loops = self.get_all_loops()
+        loops = [loop for loop in loops if 'Preamble' not in repr(loop)] #XXX
+        assert len(loops) == 1
+        loop, = loops
+        jumpop = loop.operations[-1]
+        assert jumpop.getopnum() == rop.JUMP
+        insns = {}
+        for op in loop.operations:
+            opname = op.getopname()
+            insns[opname] = insns.get(opname, 0) + 1
+        return self._check_insns(insns, expected, check)
+
+    def check_simple_loop(self, expected=None, **check):
+        # Usefull in the simplest case when we have only one trace ending with
+        # a jump back to itself and possibly a few bridges ending with finnish.
+        # Only the operations within the loop formed by that single jump will
+        # be counted.
+        loops = self.get_all_loops()
+        assert len(loops) == 1
+        loop = loops[0]
+        jumpop = loop.operations[-1]
+        assert jumpop.getopnum() == rop.JUMP
+        assert self.check_resops(jump=1)
+        labels = [op for op in loop.operations if op.getopnum() == rop.LABEL]
+        targets = [op._descr_wref() for op in labels]
+        assert None not in targets # TargetToken was freed, give up
+        target = jumpop._descr_wref()
+        assert target
+        assert targets.count(target) == 1
+        i = loop.operations.index(labels[targets.index(target)])
+        insns = {}
+        for op in loop.operations[i:]:
+            opname = op.getopname()
+            insns[opname] = insns.get(opname, 0) + 1
+        return self._check_insns(insns, expected, check)
+        
     def check_loops(self, expected=None, everywhere=False, **check):
         insns = {}
-        for loop in self.loops:
-            if not everywhere:
-                if getattr(loop, '_ignore_during_counting', False):
-                    continue
+        for loop in self.get_all_loops():
+            #if not everywhere:
+            #    if getattr(loop, '_ignore_during_counting', False):
+            #        continue
             insns = loop.summary(adding_insns=insns)
         if expected is not None:
             insns.pop('debug_merge_point', None)
-            assert insns == expected
+            print
+            print
+            print "        self.check_resops(%s)" % str(insns)
+            print
+            import pdb; pdb.set_trace()
+        else:
+            chk = ['%s=%d' % (i, insns.get(i, 0)) for i in check]
+            print
+            print
+            print "        self.check_resops(%s)" % ', '.join(chk)
+            print
+            import pdb; pdb.set_trace()
+        return
+        
         for insn, expected_count in check.items():
             getattr(rop, insn.upper())  # fails if 'rop.INSN' does not exist
             found = insns.get(insn, 0)
@@ -1018,26 +1127,26 @@
 
     def check_consistency(self):
         "NOT_RPYTHON"
-        for loop in self.loops:
+        for loop in self.get_all_loops():
             loop.check_consistency()
 
     def maybe_view(self):
         if option.view:
             self.view()
 
-    def view(self, errmsg=None, extraloops=[]):
-        from pypy.jit.metainterp.graphpage import display_loops
-        loops = self.get_all_loops()[:]
-        for loop in extraloops:
-            if loop in loops:
-                loops.remove(loop)
-            loops.append(loop)
-        highlight_loops = dict.fromkeys(extraloops, 1)
-        for loop in loops:
-            if hasattr(loop, '_looptoken_number') and (
-                    loop._looptoken_number in self.invalidated_token_numbers):
-                highlight_loops.setdefault(loop, 2)
-        display_loops(loops, errmsg, highlight_loops)
+    def view(self, errmsg=None, extraprocedures=[]):
+        from pypy.jit.metainterp.graphpage import display_procedures
+        procedures = self.get_all_loops()[:]
+        for procedure in extraprocedures:
+            if procedure in procedures:
+                procedures.remove(procedure)
+            procedures.append(procedure)
+        highlight_procedures = dict.fromkeys(extraprocedures, 1)
+        for procedure in procedures:
+            if hasattr(procedure, '_looptoken_number') and (
+               procedure._looptoken_number in self.invalidated_token_numbers):
+                highlight_procedures.setdefault(procedure, 2)
+        display_procedures(procedures, errmsg, highlight_procedures)
 
 # ----------------------------------------------------------------
 
diff --git a/pypy/jit/metainterp/inliner.py b/pypy/jit/metainterp/inliner.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/inliner.py
@@ -0,0 +1,57 @@
+from pypy.jit.metainterp.history import Const
+from pypy.jit.metainterp.resume import Snapshot
+
+class Inliner(object):
+    def __init__(self, inputargs, jump_args):
+        assert len(inputargs) == len(jump_args)
+        self.argmap = {}
+        for i in range(len(inputargs)):
+            if inputargs[i] in self.argmap:
+                assert self.argmap[inputargs[i]] == jump_args[i]
+            else:
+                self.argmap[inputargs[i]] = jump_args[i]
+        self.snapshot_map = {None: None}
+
+    def inline_op(self, newop, ignore_result=False, clone=True,
+                  ignore_failargs=False):
+        if clone:
+            newop = newop.clone()
+        args = newop.getarglist()
+        newop.initarglist([self.inline_arg(a) for a in args])
+
+        if newop.is_guard():
+            args = newop.getfailargs()
+            if args and not ignore_failargs:
+                newop.setfailargs([self.inline_arg(a) for a in args])
+            else:
+                newop.setfailargs([])
+
+        if newop.result and not ignore_result:
+            old_result = newop.result
+            newop.result = newop.result.clonebox()
+            self.argmap[old_result] = newop.result
+
+        self.inline_descr_inplace(newop.getdescr())
+
+        return newop
+
+    def inline_descr_inplace(self, descr):
+        from pypy.jit.metainterp.compile import ResumeGuardDescr
+        if isinstance(descr, ResumeGuardDescr):
+            descr.rd_snapshot = self.inline_snapshot(descr.rd_snapshot)
+
+    def inline_arg(self, arg):
+        if arg is None:
+            return None
+        if isinstance(arg, Const):
+            return arg
+        return self.argmap[arg]
+
+    def inline_snapshot(self, snapshot):
+        if snapshot in self.snapshot_map:
+            return self.snapshot_map[snapshot]
+        boxes = [self.inline_arg(a) for a in snapshot.boxes]
+        new_snapshot = Snapshot(self.inline_snapshot(snapshot.prev), boxes)
+        self.snapshot_map[snapshot] = new_snapshot
+        return new_snapshot
+
diff --git a/pypy/jit/metainterp/jitdriver.py b/pypy/jit/metainterp/jitdriver.py
--- a/pypy/jit/metainterp/jitdriver.py
+++ b/pypy/jit/metainterp/jitdriver.py
@@ -11,6 +11,7 @@
     #    self.portal_calldescr  ... pypy.jit.metainterp.warmspot
     #    self.num_green_args    ... pypy.jit.metainterp.warmspot
     #    self.num_red_args      ... pypy.jit.metainterp.warmspot
+    #    self.red_args_types    ... pypy.jit.metainterp.warmspot
     #    self.result_type       ... pypy.jit.metainterp.warmspot
     #    self.virtualizable_info... pypy.jit.metainterp.warmspot
     #    self.greenfield_info   ... pypy.jit.metainterp.warmspot
diff --git a/pypy/jit/metainterp/jitprof.py b/pypy/jit/metainterp/jitprof.py
--- a/pypy/jit/metainterp/jitprof.py
+++ b/pypy/jit/metainterp/jitprof.py
@@ -10,8 +10,6 @@
 counters="""
 TRACING
 BACKEND
-RUNNING
-BLACKHOLE
 OPS
 RECORDED_OPS
 GUARDS
@@ -67,18 +65,6 @@
     def end_backend(self):
         pass
 
-    def start_running(self):
-        pass
-
-    def end_running(self):
-        pass
-
-    def start_blackhole(self):
-        pass
-
-    def end_blackhole(self):
-        pass
-
     def count(self, kind, inc=1):
         pass
 
@@ -134,16 +120,6 @@
     def start_backend(self):   self._start(BACKEND)
     def end_backend(self):     self._end  (BACKEND)
 
-    # Don't record times for 'running' and 'blackhole' because there are
-    # too many of them: calling time.time() is a major blocker.
-    # If you are interested in these numbers, use 'PYPYLOG=file' and
-    # look at the resulting file with pypy/tool/logparser.py.
-    def start_running(self): self.count(RUNNING)
-    def end_running(self):   pass
-
-    def start_blackhole(self): self.count(BLACKHOLE)
-    def end_blackhole(self):   pass
-
     def count(self, kind, inc=1):
         self.counters[kind] += inc        
     
@@ -165,8 +141,6 @@
         calls = self.calls
         self._print_line_time("Tracing", cnt[TRACING],   tim[TRACING])
         self._print_line_time("Backend", cnt[BACKEND],   tim[BACKEND])
-        self._print_intline("Running asm", cnt[RUNNING])
-        self._print_intline("Blackhole", cnt[BLACKHOLE])
         line = "TOTAL:      \t\t%f" % (self.tk - self.starttime, )
         debug_print(line)
         self._print_intline("ops", cnt[OPS])
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -4,13 +4,15 @@
 from pypy.jit.metainterp.optimizeopt.virtualize import OptVirtualize
 from pypy.jit.metainterp.optimizeopt.heap import OptHeap
 from pypy.jit.metainterp.optimizeopt.vstring import OptString
-from pypy.jit.metainterp.optimizeopt.unroll import optimize_unroll, OptInlineShortPreamble
+from pypy.jit.metainterp.optimizeopt.unroll import optimize_unroll
 from pypy.jit.metainterp.optimizeopt.fficall import OptFfiCall
 from pypy.jit.metainterp.optimizeopt.simplify import OptSimplify
 from pypy.jit.metainterp.optimizeopt.pure import OptPure
 from pypy.jit.metainterp.optimizeopt.earlyforce import OptEarlyForce
 from pypy.rlib.jit import PARAMETERS
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.debug import debug_start, debug_stop, debug_print
+
 
 ALL_OPTS = [('intbounds', OptIntBounds),
             ('rewrite', OptRewrite),
@@ -28,8 +30,7 @@
 ALL_OPTS_LIST = [name for name, _ in ALL_OPTS]
 ALL_OPTS_NAMES = ':'.join([name for name, _ in ALL_OPTS])
 
-def build_opt_chain(metainterp_sd, enable_opts,
-                    inline_short_preamble=True, retraced=False):
+def build_opt_chain(metainterp_sd, enable_opts):
     config = metainterp_sd.config
     optimizations = []
     unroll = 'unroll' in enable_opts    # 'enable_opts' is normally a dict
@@ -45,12 +46,9 @@
                 optimizations.append(OptFfiCall())
 
     if ('rewrite' not in enable_opts or 'virtualize' not in enable_opts
-        or 'heap' not in enable_opts):
+        or 'heap' not in enable_opts or 'unroll' not in enable_opts):
         optimizations.append(OptSimplify())
 
-    if inline_short_preamble:
-        optimizations = [OptInlineShortPreamble(retraced)] + optimizations
-
     return optimizations, unroll
 
 
@@ -80,3 +78,21 @@
 
 if __name__ == '__main__':
     print ALL_OPTS_NAMES
+
+def optimize_trace(metainterp_sd, loop, enable_opts, inline_short_preamble=True):
+    """Optimize loop.operations to remove internal overheadish operations.
+    """
+
+    debug_start("jit-optimize")
+    try:
+        loop.logops = metainterp_sd.logger_noopt.log_loop(loop.inputargs,
+                                                          loop.operations)
+        optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts)
+        if unroll:
+            optimize_unroll(metainterp_sd, loop, optimizations, inline_short_preamble)
+        else:
+            optimizer = Optimizer(metainterp_sd, loop, optimizations)
+            optimizer.propagate_all_forward()
+    finally:
+        debug_stop("jit-optimize")
+        
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -7,7 +7,7 @@
 from pypy.rlib.libffi import Func
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
-from pypy.rpython.lltypesystem import llmemory
+from pypy.rpython.lltypesystem import llmemory, rffi
 
 
 class FuncInfo(object):
@@ -234,10 +234,13 @@
             # longlongs are treated as floats, see
             # e.g. llsupport/descr.py:getDescrClass
             is_float = True
+        elif kind == 'u':
+            # they're all False
+            pass
         else:
             assert False, "unsupported ffitype or kind"
         #
-        fieldsize = ffitype.c_size
+        fieldsize = rffi.getintfield(ffitype, 'c_size')
         return self.optimizer.cpu.interiorfielddescrof_dynamic(
             offset, width, fieldsize, is_pointer, is_float, is_signed
         )
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -500,8 +500,9 @@
         else:
             return CVAL_ZERO
 
-    def propagate_all_forward(self):
-        self.clear_newoperations()
+    def propagate_all_forward(self, clear=True):
+        if clear:
+            self.clear_newoperations()
         for op in self.loop.operations:
             self.first_optimization.propagate_forward(op)
         self.loop.operations = self.get_newoperations()
@@ -564,9 +565,12 @@
         descr = op.getdescr()
         assert isinstance(descr, compile.ResumeGuardDescr)
         modifier = resume.ResumeDataVirtualAdder(descr, self.resumedata_memo)
-        newboxes = modifier.finish(self.values, self.pendingfields)
-        if len(newboxes) > self.metainterp_sd.options.failargs_limit: # XXX be careful here
-            compile.giveup()
+        try:
+            newboxes = modifier.finish(self.values, self.pendingfields)
+            if len(newboxes) > self.metainterp_sd.options.failargs_limit:
+                raise resume.TagOverflow
+        except resume.TagOverflow:
+            raise compile.giveup()
         descr.store_final_boxes(op, newboxes)
         #
         if op.getopnum() == rop.GUARD_VALUE:
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -260,6 +260,16 @@
     def optimize_GUARD_FALSE(self, op):
         self.optimize_guard(op, CONST_0)
 
+    def optimize_RECORD_KNOWN_CLASS(self, op):
+        value = self.getvalue(op.getarg(0))
+        expectedclassbox = op.getarg(1)
+        assert isinstance(expectedclassbox, Const)
+        realclassbox = value.get_constant_class(self.optimizer.cpu)
+        if realclassbox is not None:
+            assert realclassbox.same_constant(expectedclassbox)
+            return
+        value.make_constant_class(expectedclassbox, None)
+
     def optimize_GUARD_CLASS(self, op):
         value = self.getvalue(op.getarg(0))
         expectedclassbox = op.getarg(1)
@@ -481,6 +491,9 @@
         self.pure(rop.CAST_PTR_TO_INT, [op.result], op.getarg(0))
         self.emit_operation(op)
 
+    def optimize_SAME_AS(self, op):
+        self.make_equal_to(op.result, self.getvalue(op.getarg(0)))
+
 dispatch_opt = make_dispatcher_method(OptRewrite, 'optimize_',
         default=OptRewrite.emit_operation)
 optimize_guards = _findall(OptRewrite, 'optimize_', 'GUARD')
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -1,9 +1,12 @@
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.resoperation import ResOperation, rop
-
+from pypy.jit.metainterp.history import TargetToken, JitCellToken
 
 class OptSimplify(Optimization):
+    def __init__(self):
+        self.last_label_descr = None
+        
     def optimize_CALL_PURE(self, op):
         args = op.getarglist()
         self.emit_operation(ResOperation(rop.CALL, args, op.result,
@@ -28,6 +31,26 @@
     def optimize_MARK_OPAQUE_PTR(self, op):
         pass
 
+    def optimize_RECORD_KNOWN_CLASS(self, op):
+        pass
+
+    def optimize_LABEL(self, op):
+        self.last_label_descr = op.getdescr()
+        self.emit_operation(op)
+        
+    def optimize_JUMP(self, op):
+        descr = op.getdescr()
+        assert isinstance(descr, JitCellToken)
+        if not descr.target_tokens:
+            assert self.last_label_descr is not None
+            target_token = self.last_label_descr
+            assert isinstance(target_token, TargetToken)
+            assert target_token.targeting_jitcell_token is descr
+            op.setdescr(self.last_label_descr)
+        else:
+            assert len(descr.target_tokens) == 1
+            op.setdescr(descr.target_tokens[0])
+        self.emit_operation(op)
 
 dispatch_opt = make_dispatcher_method(OptSimplify, 'optimize_',
         default=OptSimplify.emit_operation)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -0,0 +1,200 @@
+from pypy.jit.metainterp.optimizeopt.test.test_util import (
+    LLtypeMixin, BaseTest, Storage, _sortboxes, FakeDescrWithSnapshot)
+from pypy.jit.metainterp.history import TreeLoop, JitCellToken, TargetToken
+from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
+from pypy.jit.metainterp.optimize import InvalidLoop
+from py.test import raises
+
+class BaseTestMultiLabel(BaseTest):
+    enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll"
+
+    def optimize_loop(self, ops, expected):
+        loop = self.parse(ops)
+        if expected != "crash!":
+            expected = self.parse(expected)
+
+        part = TreeLoop('part')
+        part.inputargs = loop.inputargs
+        part.start_resumedescr = FakeDescrWithSnapshot()
+        token = loop.original_jitcell_token
+
+        optimized = TreeLoop('optimized')
+        optimized.inputargs = loop.inputargs
+        optimized.operations = []
+        
+        labels = [i for i, op in enumerate(loop.operations) \
+                  if op.getopnum()==rop.LABEL]
+        prv = 0
+        last_label = []
+        for nxt in labels + [len(loop.operations)]:
+            assert prv != nxt
+            operations = last_label + loop.operations[prv:nxt]
+            if nxt < len(loop.operations):
+                label = loop.operations[nxt]
+                assert label.getopnum() == rop.LABEL
+                jumpop = ResOperation(rop.JUMP, label.getarglist(),
+                                      None, descr=token)
+                operations.append(jumpop)
+            part.operations = operations
+            self._do_optimize_loop(part, None)
+            if part.operations[-1].getopnum() == rop.LABEL:
+                last_label = [part.operations.pop()]
+            else:
+                last_label = []
+            optimized.operations.extend(part.operations)
+            prv = nxt + 1
+        
+        #
+        print
+        print "Optimized:"
+        if optimized.operations:
+            print '\n'.join([str(o) for o in optimized.operations])
+        else:
+            print 'Failed!'
+        print
+
+        assert expected != "crash!", "should have raised an exception"
+        self.assert_equal(optimized, expected)
+
+        return optimized
+
+    def test_simple(self):
+        ops = """
+        [i1]
+        i2 = int_add(i1, 1)
+        escape(i2)
+        label(i1)
+        i3 = int_add(i1, 1)
+        escape(i3)
+        jump(i1)
+        """
+        expected = """
+        [i1]
+        i2 = int_add(i1, 1)
+        escape(i2)
+        label(i1, i2)
+        escape(i2)
+        jump(i1, i2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_forced_virtual(self):
+        ops = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        label(p3)
+        escape(p3)
+        jump(p3)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_virtuals_with_nonmatching_fields(self):
+        ops = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p3, 1, descr=valuedescr)
+        label(p3)
+        p4 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p4, 1, descr=nextdescr)
+        jump(p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_virtual_arrays_with_nonmatching_lens(self):
+        ops = """
+        [p1]
+        p2 = new_array(3, descr=arraydescr)
+        label(p2)
+        p4 = new_array(2, descr=arraydescr)        
+        jump(p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+        
+    def test_nonmatching_arraystruct_1(self):
+        ops = """
+        [p1, f0]
+        p2 = new_array(3, descr=complexarraydescr)
+        setinteriorfield_gc(p2, 2, f0, descr=complexrealdescr)
+        label(p2, f0)
+        p4 = new_array(3, descr=complexarraydescr)
+        setinteriorfield_gc(p4, 2, f0, descr=compleximagdescr)
+        jump(p4, f0)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+        
+    def test_nonmatching_arraystruct_2(self):
+        ops = """
+        [p1, f0]
+        p2 = new_array(3, descr=complexarraydescr)
+        setinteriorfield_gc(p2, 2, f0, descr=complexrealdescr)
+        label(p2, f0)
+        p4 = new_array(2, descr=complexarraydescr)
+        setinteriorfield_gc(p4, 0, f0, descr=complexrealdescr)        
+        jump(p4, f0)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_not_virtual(self):
+        ops = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        label(p3)
+        p4 = escape()
+        jump(p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_not_virtual_array(self):
+        ops = """
+        [p1]
+        p3 = new_array(3, descr=arraydescr)
+        label(p3)
+        p4 = escape()
+        jump(p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_not_virtual_arraystruct(self):
+        ops = """
+        [p1]
+        p3 = new_array(3, descr=complexarraydescr)
+        label(p3)
+        p4 = escape()
+        jump(p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_virtual_turns_constant(self):
+        ops = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        label(p3)
+        guard_value(p3, ConstPtr(myptr)) []
+        jump(p3)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+        
+    def test_virtuals_turns_not_equal(self):
+        ops = """
+        [p1, p2]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        label(p3, p3)
+        p4 = new_with_vtable(ConstClass(node_vtable))
+        jump(p3, p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+        
+    
+class TestLLtype(BaseTestMultiLabel, LLtypeMixin):
+    pass
+
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -1,7 +1,8 @@
 import py
 from pypy.rlib.objectmodel import instantiate
 from pypy.jit.metainterp.optimizeopt.test.test_util import (
-    LLtypeMixin, BaseTest, FakeMetaInterpStaticData)
+    LLtypeMixin, BaseTest, FakeMetaInterpStaticData, convert_old_style_to_targets)
+from pypy.jit.metainterp.history import TargetToken, JitCellToken
 from pypy.jit.metainterp.test.test_compile import FakeLogger
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
@@ -11,7 +12,6 @@
 from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
 from pypy.rlib.rarithmetic import LONG_BIT
 
-
 def test_store_final_boxes_in_guard():
     from pypy.jit.metainterp.compile import ResumeGuardDescr
     from pypy.jit.metainterp.resume import tag, TAGBOX
@@ -116,9 +116,13 @@
     enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap"
 
     def optimize_loop(self, ops, optops, call_pure_results=None):
-
         loop = self.parse(ops)
-        expected = self.parse(optops)
+        token = JitCellToken() 
+        loop.operations = [ResOperation(rop.LABEL, loop.inputargs, None, descr=TargetToken(token))] + \
+                          loop.operations
+        if loop.operations[-1].getopnum() == rop.JUMP:
+            loop.operations[-1].setdescr(token)
+        expected = convert_old_style_to_targets(self.parse(optops), jump=True)
         self._do_optimize_loop(loop, call_pure_results)
         print '\n'.join([str(o) for o in loop.operations])
         self.assert_equal(loop, expected)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -1,13 +1,13 @@
 import py
 from pypy.rlib.objectmodel import instantiate
 from pypy.jit.metainterp.optimizeopt.test.test_util import (
-    LLtypeMixin, BaseTest, Storage, _sortboxes)
+    LLtypeMixin, BaseTest, Storage, _sortboxes, convert_old_style_to_targets)
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
 from pypy.jit.metainterp.optimizeopt import optimize_loop_1, ALL_OPTS_DICT, build_opt_chain
 from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp.history import AbstractDescr, ConstInt, BoxInt
-from pypy.jit.metainterp.history import TreeLoop, LoopToken
+from pypy.jit.metainterp.history import TreeLoop, JitCellToken, TargetToken
 from pypy.jit.metainterp.jitprof import EmptyProfiler
 from pypy.jit.metainterp import executor, compile, resume, history
 from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
@@ -15,7 +15,7 @@
 from pypy.jit.metainterp.optimizeopt.util import args_dict
 from pypy.jit.metainterp.optimizeopt.test.test_optimizebasic import FakeMetaInterpStaticData
 from pypy.config.pypyoption import get_pypy_config
-
+from pypy.jit.metainterp.optimizeopt.unroll import Inliner
 
 def test_build_opt_chain():
     def check(chain, expected_names):
@@ -23,49 +23,37 @@
         assert names == expected_names
     #
     metainterp_sd = FakeMetaInterpStaticData(None)
-    chain, _ = build_opt_chain(metainterp_sd, "", inline_short_preamble=False)
+    chain, _ = build_opt_chain(metainterp_sd, "")
     check(chain, ["OptSimplify"])
     #
     chain, _ = build_opt_chain(metainterp_sd, "")
-    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    check(chain, ["OptSimplify"])
     #
     chain, _ = build_opt_chain(metainterp_sd, "")
-    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    check(chain, ["OptSimplify"])
     #
     chain, _ = build_opt_chain(metainterp_sd, "heap:intbounds")
-    check(chain, ["OptInlineShortPreamble", "OptIntBounds", "OptHeap", "OptSimplify"])
+    check(chain, ["OptIntBounds", "OptHeap", "OptSimplify"])
     #
     chain, unroll = build_opt_chain(metainterp_sd, "unroll")
-    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    check(chain, ["OptSimplify"])
     assert unroll
     #
-    chain, _ = build_opt_chain(metainterp_sd, "aaa:bbb", inline_short_preamble=False)
+    chain, _ = build_opt_chain(metainterp_sd, "aaa:bbb")
     check(chain, ["OptSimplify"])
     #
-    chain, _ = build_opt_chain(metainterp_sd, "ffi", inline_short_preamble=False)
+    chain, _ = build_opt_chain(metainterp_sd, "ffi")
     check(chain, ["OptFfiCall", "OptSimplify"])
     #
     metainterp_sd.config = get_pypy_config(translating=True)
     assert not metainterp_sd.config.translation.jit_ffi
-    chain, _ = build_opt_chain(metainterp_sd, "ffi", inline_short_preamble=False)
+    chain, _ = build_opt_chain(metainterp_sd, "ffi")
     check(chain, ["OptSimplify"])
 
 
 # ____________________________________________________________
 
 
-class FakeDescr(compile.ResumeGuardDescr):
-    class rd_snapshot:
-        class prev:
-            prev = None
-            boxes = []
-        boxes = []
-    def clone_if_mutable(self):
-        return FakeDescr()
-    def __eq__(self, other):
-        return isinstance(other, Storage) or isinstance(other, FakeDescr)
-
-
 class BaseTestWithUnroll(BaseTest):
 
     enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll"
@@ -79,40 +67,41 @@
             expected_preamble = self.parse(expected_preamble)
         if expected_short:
             expected_short = self.parse(expected_short)
-        loop.preamble = TreeLoop('preamble')
-        loop.preamble.inputargs = loop.inputargs
-        loop.preamble.token = LoopToken()
-        loop.preamble.start_resumedescr = FakeDescr()
-        #
-        self._do_optimize_loop(loop, call_pure_results)
+
+        preamble = self.unroll_and_optimize(loop, call_pure_results)
+        
         #
         print
         print "Preamble:"
-        print loop.preamble.inputargs
-        if loop.preamble.operations:
-            print '\n'.join([str(o) for o in loop.preamble.operations])
+        if preamble.operations:
+            print '\n'.join([str(o) for o in preamble.operations])
         else:
             print 'Failed!'
         print
         print "Loop:"
-        print loop.inputargs
         print '\n'.join([str(o) for o in loop.operations])
         print
         if expected_short:
             print "Short Preamble:"
-            short = loop.preamble.token.short_preamble[0]
-            print short.inputargs
-            print '\n'.join([str(o) for o in short.operations])
+            short = loop.operations[0].getdescr().short_preamble
+            print '\n'.join([str(o) for o in short])
             print
 
         assert expected != "crash!", "should have raised an exception"
-        self.assert_equal(loop, expected)
+        self.assert_equal(loop, convert_old_style_to_targets(expected, jump=True))
+        assert loop.operations[0].getdescr() == loop.operations[-1].getdescr()
         if expected_preamble:
-            self.assert_equal(loop.preamble, expected_preamble,
+            self.assert_equal(preamble, convert_old_style_to_targets(expected_preamble, jump=False),
                               text_right='expected preamble')
+            assert preamble.operations[-1].getdescr() == loop.operations[0].getdescr()
         if expected_short:
-            self.assert_equal(short, expected_short,
+            short_preamble = TreeLoop('short preamble')
+            assert short[0].getopnum() == rop.LABEL
+            short_preamble.inputargs = short[0].getarglist()
+            short_preamble.operations = short
+            self.assert_equal(short_preamble, convert_old_style_to_targets(expected_short, jump=True),
                               text_right='expected short preamble')
+            assert short[-1].getdescr() == loop.operations[0].getdescr()
 
         return loop
 
@@ -234,7 +223,7 @@
             """ % expected_value
             self.optimize_loop(ops, expected)
 
-    def test_reverse_of_cast(self):
+    def test_reverse_of_cast_1(self):
         ops = """
         [i0]
         p0 = cast_int_to_ptr(i0)
@@ -246,6 +235,8 @@
         jump(i0)
         """
         self.optimize_loop(ops, expected)
+
+    def test_reverse_of_cast_2(self):        
         ops = """
         [p0]
         i1 = cast_ptr_to_int(p0)
@@ -1181,6 +1172,7 @@
         i1 = getfield_gc(p0, descr=valuedescr)
         i2 = int_sub(i1, 1)
         i3 = int_add(i0, i1)
+        i4 = same_as(i2) # This same_as should be killed by backend
         jump(i3, i2, i1)
         """
         expected = """
@@ -1252,10 +1244,10 @@
         i1 = int_add(i0, 1)
         p1 = new_with_vtable(ConstClass(node_vtable2))
         p2 = new_with_vtable(ConstClass(node_vtable2))
-        setfield_gc(p0, p1, descr=nextdescr)
+        setfield_gc(p2, i1, descr=valuedescr)
         setfield_gc(p2, p1, descr=nextdescr)
         setfield_gc(p1, p2, descr=nextdescr)
-        setfield_gc(p2, i1, descr=valuedescr)
+        setfield_gc(p0, p1, descr=nextdescr)
         jump(p1)
         """
         self.optimize_loop(ops, loop, preamble)
@@ -1317,6 +1309,7 @@
         p30 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p30, i28, descr=nextdescr)
         setfield_gc(p3, p30, descr=valuedescr)
+        p46 = same_as(p30) # This same_as should be killed by backend        
         jump(i29, p30, p3)
         """
         expected = """
@@ -1324,8 +1317,8 @@
         i28 = int_add(i0, 1)
         i29 = int_add(i28, 1)
         p30 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p30, i28, descr=nextdescr)
         setfield_gc(p3, p30, descr=valuedescr)
-        setfield_gc(p30, i28, descr=nextdescr)
         jump(i29, p30, p3)
         """
         self.optimize_loop(ops, expected, preamble)
@@ -2118,7 +2111,9 @@
         guard_true(i3) []
         i4 = int_neg(i2)
         setfield_gc(p1, i2, descr=valuedescr)
-        jump(p1, i1, i2, i4, i4)
+        i7 = same_as(i2) # This same_as should be killed by backend
+        i6 = same_as(i4)
+        jump(p1, i1, i2, i4, i6)
         """
         expected = """
         [p1, i1, i2, i4, i5]
@@ -2148,7 +2143,8 @@
         i4 = int_neg(i2)
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, i4, i4)
+        i5 = same_as(i4)
+        jump(p1, i2, i4, i5)
         """
         expected = """
         [p1, i2, i4, i5]
@@ -2177,7 +2173,8 @@
         i4 = int_neg(i2)
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, i4, i4)
+        i5 = same_as(i4)
+        jump(p1, i2, i4, i5)
         """
         expected = """
         [p1, i2, i4, i5]
@@ -2207,7 +2204,9 @@
         guard_true(i5) []
         i4 = int_neg(i2)
         setfield_gc(p1, i2, descr=valuedescr)
-        jump(p1, i1, i2, i4, i4)
+        i8 = same_as(i2) # This same_as should be killed by backend
+        i7 = same_as(i4)
+        jump(p1, i1, i2, i4, i7)
         """
         expected = """
         [p1, i1, i2, i4, i7]
@@ -2433,7 +2432,8 @@
         p2 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p2, p4, descr=nextdescr)
         setfield_gc(p1, p2, descr=nextdescr)
-        jump(p1, i2, i4, p4, i4)
+        i101 = same_as(i4) 
+        jump(p1, i2, i4, p4, i101)
         """
         expected = """
         [p1, i2, i4, p4, i5]
@@ -3276,7 +3276,15 @@
         setfield_gc(p1, i3, descr=valuedescr)
         jump(p1, i4, i3)
         '''
-        self.optimize_loop(ops, ops, ops)
+        preamble = '''
+        [p1, i1, i4]
+        setfield_gc(p1, i1, descr=valuedescr)
+        i3 = call_assembler(i1, descr=asmdescr)
+        setfield_gc(p1, i3, descr=valuedescr)
+        i143 = same_as(i3) # Should be killed by backend        
+        jump(p1, i4, i3)
+        '''
+        self.optimize_loop(ops, ops, preamble)
 
     def test_call_assembler_invalidates_heap_knowledge(self):
         ops = '''
@@ -3307,7 +3315,9 @@
         setfield_gc(p1, i1, descr=valuedescr)
         i3 = call(p1, descr=plaincalldescr)
         setfield_gc(p1, i3, descr=valuedescr)
-        jump(p1, i4, i3, i3)
+        i148 = same_as(i3)
+        i147 = same_as(i3)
+        jump(p1, i4, i3, i148)
         '''
         self.optimize_loop(ops, expected, preamble)
 
@@ -3330,7 +3340,8 @@
         setfield_gc(p1, i1, descr=valuedescr)
         i3 = call(p1, descr=plaincalldescr)
         setfield_gc(p1, i1, descr=valuedescr)
-        jump(p1, i4, i3, i3)
+        i151 = same_as(i3)
+        jump(p1, i4, i3, i151)
         '''
         self.optimize_loop(ops, expected, preamble)
 
@@ -3350,7 +3361,8 @@
         escape(i1)
         escape(i2)
         i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
-        jump(i0, i4, i4)
+        i153 = same_as(i4)
+        jump(i0, i4, i153)
         '''
         expected = '''
         [i0, i4, i5]
@@ -3380,7 +3392,8 @@
         escape(i2)
         i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
         guard_no_exception() []
-        jump(i0, i4, i4)
+        i155 = same_as(i4)        
+        jump(i0, i4, i155)
         '''
         expected = '''
         [i0, i2, i3]
@@ -4198,6 +4211,7 @@
         preamble = """
         [p0]
         i0 = strlen(p0)
+        i3 = same_as(i0) # Should be killed by backend        
         jump(p0)
         """
         expected = """
@@ -5418,6 +5432,7 @@
         [p0]
         p1 = getfield_gc(p0, descr=valuedescr)
         setfield_gc(p0, p0, descr=valuedescr)
+        p4450 = same_as(p0) # Should be killed by backend
         jump(p0)
         """
         expected = """
@@ -5653,7 +5668,8 @@
         p3 = newstr(i3)
         copystrcontent(p1, p3, 0, 0, i1)
         copystrcontent(p2, p3, 0, i1, i2)
-        jump(p2, p3, i2)
+        i7 = same_as(i2)        
+        jump(p2, p3, i7)
         """
         expected = """
         [p1, p2, i1]
@@ -5728,7 +5744,9 @@
         copystrcontent(p1, p5, 0, 0, i1)
         copystrcontent(p2, p5, 0, i1, i2)
         copystrcontent(p3, p5, 0, i12, i3)
-        jump(p2, p3, p5, i2, i3)
+        i129 = same_as(i2)
+        i130 = same_as(i3)
+        jump(p2, p3, p5, i129, i130)
         """
         expected = """
         [p1, p2, p3, i1, i2]
@@ -5788,7 +5806,8 @@
         [p1, i1, i2, i3]
         escape(i3)
         i4 = int_sub(i2, i1)
-        jump(p1, i1, i2, i4, i4)
+        i5 = same_as(i4)        
+        jump(p1, i1, i2, i4, i5)
         """
         expected = """
         [p1, i1, i2, i3, i4]
@@ -5813,7 +5832,8 @@
         escape(i5)
         i4 = int_sub(i2, i1)
         setfield_gc(p2, i4, descr=valuedescr)
-        jump(p1, i1, i2, p2, i4, i4)
+        i8 = same_as(i4)
+        jump(p1, i1, i2, p2, i8, i4)
         """
         expected = """
         [p1, i1, i2, p2, i5, i6]
@@ -5939,7 +5959,8 @@
         p4 = newstr(i5)
         copystrcontent(p1, p4, i1, 0, i3)
         copystrcontent(p2, p4, 0, i3, i4)
-        jump(p4, i1, i2, p2, i5, i3, i4)
+        i9 = same_as(i4)
+        jump(p4, i1, i2, p2, i5, i3, i9)
         """
         expected = """
         [p1, i1, i2, p2, i5, i3, i4]
@@ -6061,7 +6082,9 @@
         copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, p3, p4, descr=strequaldescr)
         escape(i0)
-        jump(p1, p2, p3, i3, i1, i2)
+        i11 = same_as(i1)
+        i12 = same_as(i2)
+        jump(p1, p2, p3, i3, i11, i12)
         """
         expected = """
         [p1, p2, p3, i3, i1, i2]
@@ -6281,6 +6304,7 @@
         i1 = strlen(p1)
         i0 = int_eq(i1, 0)
         escape(i0)
+        i3 = same_as(i1)        
         jump(p1, i0)
         """
         self.optimize_strunicode_loop_extradescrs(ops, expected, preamble)
@@ -6326,7 +6350,9 @@
         copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, s"hello world", p4, descr=streq_nonnull_descr)
         escape(i0)
-        jump(p1, p2, i3, i1, i2)
+        i11 = same_as(i1)
+        i12 = same_as(i2)
+        jump(p1, p2, i3, i11, i12)
         """
         expected = """
         [p1, p2, i3, i1, i2]
@@ -6482,6 +6508,21 @@
         # not obvious, because of the exception UnicodeDecodeError that
         # can be raised by ll_str2unicode()
 
+    def test_record_known_class(self):
+        ops = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        record_known_class(p1, ConstClass(node_vtable))
+        guard_class(p1, ConstClass(node_vtable)) []
+        jump(p1)
+        """
+        expected = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        jump(p1)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_quasi_immut(self):
         ops = """
         [p0, p1, i0]
@@ -6614,7 +6655,8 @@
         p188 = getarrayitem_gc(p187, 42, descr=<GcPtrArrayDescr>)
         guard_value(p188, ConstPtr(myptr)) []
         p25 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
-        jump(p25, p187, i184, p25)
+        p26 = same_as(p25)
+        jump(p25, p187, i184, p26)
         """
         short = """
         [p1, p187, i184]
@@ -6883,7 +6925,8 @@
         [p9]
         i843 = strlen(p9)
         call(i843, descr=nonwritedescr)
-        jump(p9, i843)
+        i0 = same_as(i843)
+        jump(p9, i0)
         """
         short = """
         [p9]
@@ -6999,6 +7042,40 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_duplicated_aliased_virtual(self):
+        ops = """
+        [p1, p2]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p3, p3, descr=nextdescr)
+        p4 = getfield_gc(p3, descr=nextdescr)
+        jump(p3, p4)
+        """
+        expected = """
+        []
+        jump()
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_imported_aliased_virtual_in_failargs(self):
+        ops = """
+        [p1, p2, i0]
+        i2 = int_lt(i0, 10)
+        guard_true(i2) [p1, p2]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p3, p3, descr=nextdescr)
+        p4 = getfield_gc(p3, descr=nextdescr)
+        i1 = int_add(i0, 1)
+        jump(p3, p4, i1)
+        """
+        expected = """
+        [i0]
+        i2 = int_lt(i0, 10)
+        guard_true(i2) []
+        i1 = int_add(i0, 1)        
+        jump(i1)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_chained_virtuals(self):
         ops = """
         [p0, p1]
@@ -7575,7 +7652,8 @@
         call(i2, descr=nonwritedescr)
         setfield_gc(p22, i1, descr=valuedescr)
         guard_nonnull_class(p18, ConstClass(node_vtable)) []
-        jump(p22, p18, i1, i1)
+        i10 = same_as(i1)
+        jump(p22, p18, i1, i10)
         """
         short = """
         [p22, p18, i1]
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_util.py b/pypy/jit/metainterp/optimizeopt/test/test_util.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_util.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_util.py
@@ -8,7 +8,8 @@
 from pypy.jit.backend.llgraph import runner
 from pypy.jit.metainterp.history import (BoxInt, BoxPtr, ConstInt, ConstPtr,
                                          Const, TreeLoop, BoxObj,
-                                         ConstObj, AbstractDescr)
+                                         ConstObj, AbstractDescr,
+                                         JitCellToken, TargetToken)
 from pypy.jit.metainterp.optimizeopt.util import sort_descrs, equaloplists
 from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.codewriter.effectinfo import EffectInfo
@@ -18,6 +19,8 @@
 from pypy.jit.metainterp import compile, resume, history
 from pypy.jit.metainterp.jitprof import EmptyProfiler
 from pypy.config.pypyoption import get_pypy_config
+from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
+from pypy.jit.metainterp.optimizeopt.unroll import Inliner
 
 def test_sort_descrs():
     class PseudoDescr(AbstractDescr):
@@ -344,6 +347,11 @@
         self.config = get_pypy_config(translating=True)
         self.config.translation.jit_ffi = True
 
+    class logger_noopt:
+        @classmethod
+        def log_loop(*args):
+            pass
+
     class warmrunnerdesc:
         class memory_manager:
             retrace_limit = 5
@@ -394,7 +402,7 @@
                             expected.operations, False, remap, text_right)
 
     def _do_optimize_loop(self, loop, call_pure_results):
-        from pypy.jit.metainterp.optimizeopt import optimize_loop_1
+        from pypy.jit.metainterp.optimizeopt import optimize_trace
         from pypy.jit.metainterp.optimizeopt.util import args_dict
 
         self.loop = loop
@@ -408,7 +416,83 @@
         if hasattr(self, 'callinfocollection'):
             metainterp_sd.callinfocollection = self.callinfocollection
         #
-        optimize_loop_1(metainterp_sd, loop, self.enable_opts)
+        optimize_trace(metainterp_sd, loop, self.enable_opts)
+
+    def unroll_and_optimize(self, loop, call_pure_results=None):
+        operations =  loop.operations
+        jumpop = operations[-1]
+        assert jumpop.getopnum() == rop.JUMP
+        inputargs = loop.inputargs
+
+        jump_args = jumpop.getarglist()[:]
+        operations = operations[:-1]
+        cloned_operations = [op.clone() for op in operations]
+
+        preamble = TreeLoop('preamble')
+        preamble.inputargs = inputargs
+        preamble.start_resumedescr = FakeDescrWithSnapshot()
+
+        token = JitCellToken() 
+        preamble.operations = [ResOperation(rop.LABEL, inputargs, None, descr=TargetToken(token))] + \
+                              operations +  \
+                              [ResOperation(rop.JUMP, jump_args, None, descr=token)]
+        self._do_optimize_loop(preamble, call_pure_results)
+
+        assert preamble.operations[-1].getopnum() == rop.LABEL
+
+        inliner = Inliner(inputargs, jump_args)
+        loop.start_resumedescr = preamble.start_resumedescr
+        loop.operations = [preamble.operations[-1]] + \
+                          [inliner.inline_op(op, clone=False) for op in cloned_operations] + \
+                          [ResOperation(rop.JUMP, [inliner.inline_arg(a) for a in jump_args],
+                                        None, descr=token)] 
+                          #[inliner.inline_op(jumpop)]
+        assert loop.operations[-1].getopnum() == rop.JUMP
+        assert loop.operations[0].getopnum() == rop.LABEL
+        loop.inputargs = loop.operations[0].getarglist()
+
+        self._do_optimize_loop(loop, call_pure_results)
+        extra_same_as = []
+        while loop.operations[0].getopnum() != rop.LABEL:
+            extra_same_as.append(loop.operations[0])
+            del loop.operations[0]
+
+        # Hack to prevent random order of same_as ops
+        extra_same_as.sort(key=lambda op: str(preamble.operations).find(str(op.getarg(0))))
+
+        for op in extra_same_as:
+            preamble.operations.insert(-1, op)
+
+        return preamble
+        
+
+class FakeDescr(compile.ResumeGuardDescr):
+    def clone_if_mutable(self):
+        return FakeDescr()
+    def __eq__(self, other):
+        return isinstance(other, FakeDescr)
+
+class FakeDescrWithSnapshot(compile.ResumeGuardDescr):
+    class rd_snapshot:
+        class prev:
+            prev = None
+            boxes = []
+        boxes = []
+    def clone_if_mutable(self):
+        return FakeDescrWithSnapshot()
+    def __eq__(self, other):
+        return isinstance(other, Storage) or isinstance(other, FakeDescrWithSnapshot)
+
+
+def convert_old_style_to_targets(loop, jump):
+    newloop = TreeLoop(loop.name)
+    newloop.inputargs = loop.inputargs
+    newloop.operations = [ResOperation(rop.LABEL, loop.inputargs, None, descr=FakeDescr())] + \
+                      loop.operations
+    if not jump:
+        assert newloop.operations[-1].getopnum() == rop.JUMP
+        newloop.operations[-1] = ResOperation(rop.LABEL, newloop.operations[-1].getarglist(), None, descr=FakeDescr())
+    return newloop
 
 # ____________________________________________________________
 
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -1,11 +1,12 @@
 from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.metainterp.optimizeopt.virtualstate import VirtualStateAdder, ShortBoxes
+from pypy.jit.metainterp.optimizeopt.virtualstate import VirtualStateAdder, ShortBoxes, BadVirtualState
 from pypy.jit.metainterp.compile import ResumeGuardDescr
-from pypy.jit.metainterp.history import TreeLoop, LoopToken
+from pypy.jit.metainterp.history import TreeLoop, TargetToken, JitCellToken
 from pypy.jit.metainterp.jitexc import JitException
 from pypy.jit.metainterp.optimize import InvalidLoop, RetraceLoop
 from pypy.jit.metainterp.optimizeopt.optimizer import *
 from pypy.jit.metainterp.optimizeopt.generalize import KillHugeIntBounds
+from pypy.jit.metainterp.inliner import Inliner
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.resume import Snapshot
 from pypy.rlib.debug import debug_print
@@ -13,63 +14,11 @@
 
 # FIXME: Introduce some VirtualOptimizer super class instead
 
-def optimize_unroll(metainterp_sd, loop, optimizations):
+def optimize_unroll(metainterp_sd, loop, optimizations, inline_short_preamble=True):
     opt = UnrollOptimizer(metainterp_sd, loop, optimizations)
+    opt.inline_short_preamble = inline_short_preamble
     opt.propagate_all_forward()
 
-class Inliner(object):
-    def __init__(self, inputargs, jump_args):
-        assert len(inputargs) == len(jump_args)
-        self.argmap = {}
-        for i in range(len(inputargs)):
-            if inputargs[i] in self.argmap:
-                assert self.argmap[inputargs[i]] == jump_args[i]
-            else:
-                self.argmap[inputargs[i]] = jump_args[i]
-        self.snapshot_map = {None: None}
-
-    def inline_op(self, newop, ignore_result=False, clone=True,
-                  ignore_failargs=False):
-        if clone:
-            newop = newop.clone()
-        args = newop.getarglist()
-        newop.initarglist([self.inline_arg(a) for a in args])
-
-        if newop.is_guard():
-            args = newop.getfailargs()
-            if args and not ignore_failargs:
-                newop.setfailargs([self.inline_arg(a) for a in args])
-            else:
-                newop.setfailargs([])
-
-        if newop.result and not ignore_result:
-            old_result = newop.result
-            newop.result = newop.result.clonebox()
-            self.argmap[old_result] = newop.result
-
-        self.inline_descr_inplace(newop.getdescr())
-
-        return newop
-
-    def inline_descr_inplace(self, descr):
-        if isinstance(descr, ResumeGuardDescr):
-            descr.rd_snapshot = self.inline_snapshot(descr.rd_snapshot)
-
-    def inline_arg(self, arg):
-        if arg is None:
-            return None
-        if isinstance(arg, Const):
-            return arg
-        return self.argmap[arg]
-
-    def inline_snapshot(self, snapshot):
-        if snapshot in self.snapshot_map:
-            return self.snapshot_map[snapshot]
-        boxes = [self.inline_arg(a) for a in snapshot.boxes]
-        new_snapshot = Snapshot(self.inline_snapshot(snapshot.prev), boxes)
-        self.snapshot_map[snapshot] = new_snapshot
-        return new_snapshot
-
 class UnrollableOptimizer(Optimizer):
     def setup(self):
         self.importable_values = {}
@@ -101,14 +50,13 @@
     become the preamble or entry bridge (don't think there is a
     distinction anymore)"""
 
+    inline_short_preamble = True
+    did_import = False
+    
     def __init__(self, metainterp_sd, loop, optimizations):
         self.optimizer = UnrollableOptimizer(metainterp_sd, loop, optimizations)
-        self.cloned_operations = []
-        for op in self.optimizer.loop.operations:
-            newop = op.clone()
-            self.cloned_operations.append(newop)
 
-    def fix_snapshot(self, loop, jump_args, snapshot):
+    def fix_snapshot(self, jump_args, snapshot):
         if snapshot is None:
             return None
         snapshot_args = snapshot.boxes 
@@ -116,116 +64,348 @@
         for a in snapshot_args:
             a = self.getvalue(a).get_key_box()
             new_snapshot_args.append(a)
-        prev = self.fix_snapshot(loop, jump_args, snapshot.prev)
+        prev = self.fix_snapshot(jump_args, snapshot.prev)
         return Snapshot(prev, new_snapshot_args)
             
     def propagate_all_forward(self):
         loop = self.optimizer.loop
+        self.optimizer.clear_newoperations()
+
+
+        start_label = loop.operations[0]
+        if start_label.getopnum() == rop.LABEL:
+            loop.operations = loop.operations[1:]
+            # We need to emit the label op before import_state() as emitting it
+            # will clear heap caches
+            self.optimizer.send_extra_operation(start_label)
+        else:
+            start_label = None            
+
         jumpop = loop.operations[-1]
         if jumpop.getopnum() == rop.JUMP:
             loop.operations = loop.operations[:-1]
         else:
-            loopop = None
+            jumpop = None
 
-        self.optimizer.propagate_all_forward()
+        self.import_state(start_label)
+        self.optimizer.propagate_all_forward(clear=False)
 
+        if not jumpop:
+            return 
+        if self.jump_to_already_compiled_trace(jumpop):
+            # Found a compiled trace to jump to
+            if self.did_import:
 
-        if jumpop:
-            assert jumpop.getdescr() is loop.token
-            jump_args = jumpop.getarglist()
-            jumpop.initarglist([])
+                self.close_bridge(start_label)
+                self.finilize_short_preamble(start_label)
+            return
+
+        cell_token = jumpop.getdescr()
+        assert isinstance(cell_token, JitCellToken)
+        stop_label = ResOperation(rop.LABEL, jumpop.getarglist(), None, TargetToken(cell_token))
+
+        if not self.did_import: # Enforce the previous behaviour of always peeling  exactly one iteration (for now)
             self.optimizer.flush()
+            KillHugeIntBounds(self.optimizer).apply()
 
-            KillHugeIntBounds(self.optimizer).apply()
+            loop.operations = self.optimizer.get_newoperations()
+            self.export_state(stop_label)
+            loop.operations.append(stop_label)            
+        else:
+            assert stop_label
+            assert start_label
+            stop_target = stop_label.getdescr()
+            start_target = start_label.getdescr()
+            assert isinstance(stop_target, TargetToken)
+            assert isinstance(start_target, TargetToken)
+            assert stop_target.targeting_jitcell_token is start_target.targeting_jitcell_token
+            jumpop = ResOperation(rop.JUMP, stop_label.getarglist(), None, descr=start_label.getdescr())
+
+            self.close_loop(jumpop)
+            self.finilize_short_preamble(start_label)
+
+    def export_state(self, targetop):
+        original_jump_args = targetop.getarglist()
+        jump_args = [self.getvalue(a).get_key_box() for a in original_jump_args]
+
+        assert self.optimizer.loop.start_resumedescr
+        start_resumedescr = self.optimizer.loop.start_resumedescr.clone_if_mutable()
+        assert isinstance(start_resumedescr, ResumeGuardDescr)
+        start_resumedescr.rd_snapshot = self.fix_snapshot(jump_args, start_resumedescr.rd_snapshot)
+        # FIXME: I dont thnik we need fix_snapshot anymore
+
+        modifier = VirtualStateAdder(self.optimizer)
+        virtual_state = modifier.get_virtual_state(jump_args)
             
-            loop.preamble.operations = self.optimizer.get_newoperations()
-            jump_args = [self.getvalue(a).get_key_box() for a in jump_args]
+        values = [self.getvalue(arg) for arg in jump_args]
+        inputargs = virtual_state.make_inputargs(values, self.optimizer)
+        short_inputargs = virtual_state.make_inputargs(values, self.optimizer, keyboxes=True)
 
-            start_resumedescr = loop.preamble.start_resumedescr.clone_if_mutable()
-            self.start_resumedescr = start_resumedescr
-            assert isinstance(start_resumedescr, ResumeGuardDescr)
-            start_resumedescr.rd_snapshot = self.fix_snapshot(loop, jump_args,
-                                                              start_resumedescr.rd_snapshot)
+        constant_inputargs = {}
+        for box in jump_args: 
+            const = self.get_constant_box(box)
+            if const:
+                constant_inputargs[box] = const
 
-            modifier = VirtualStateAdder(self.optimizer)
-            virtual_state = modifier.get_virtual_state(jump_args)
+        short_boxes = ShortBoxes(self.optimizer, inputargs + constant_inputargs.keys())
+        aliased_vrituals = {}
+        for i in range(len(original_jump_args)):
+            if original_jump_args[i] is not jump_args[i]:
+                if values[i].is_virtual():
+                    aliased_vrituals[original_jump_args[i]] = jump_args[i] 
+                else:
+                    short_boxes.alias(original_jump_args[i], jump_args[i])
+
+        self.optimizer.clear_newoperations()
+        for box in short_inputargs:
+            value = self.getvalue(box)
+            if value.is_virtual():
+                value.force_box(self.optimizer)
+        inputarg_setup_ops = self.optimizer.get_newoperations()
+
+        target_token = targetop.getdescr()
+        assert isinstance(target_token, TargetToken)
+        targetop.initarglist(inputargs)
+        target_token.virtual_state = virtual_state
+        target_token.short_preamble = [ResOperation(rop.LABEL, short_inputargs, None)]
+        target_token.start_resumedescr = start_resumedescr
+        target_token.exported_state = ExportedState(constant_inputargs, short_boxes,
+                                                    inputarg_setup_ops, self.optimizer,
+                                                    aliased_vrituals, jump_args)
+
+    def import_state(self, targetop):
+        self.did_import = False
+        if not targetop:
+            # FIXME: Set up some sort of empty state with no virtuals?
+            return
+        target_token = targetop.getdescr()
+        if not target_token:
+            return
+        assert isinstance(target_token, TargetToken)
+        exported_state = target_token.exported_state
+        if not exported_state:
+            # FIXME: Set up some sort of empty state with no virtuals
+            return
+        self.did_import = True
+        
+        self.short = target_token.short_preamble[:]
+        self.short_seen = {}
+        self.short_boxes = exported_state.short_boxes.clone()
+        for box, const in exported_state.constant_inputargs.items():
+            self.short_seen[box] = True
+        self.imported_state = exported_state
+        self.inputargs = targetop.getarglist()
+        self.initial_virtual_state = target_token.virtual_state
+        self.start_resumedescr = target_token.start_resumedescr
+
+        seen = {}
+        for box in self.inputargs:
+            if box in seen:
+                continue
+            seen[box] = True
+            preamble_value = exported_state.optimizer.getvalue(box)
+            value = self.optimizer.getvalue(box)
+            value.import_from(preamble_value, self.optimizer)
+
+        for newbox, oldbox in self.short_boxes.aliases.items():
+            self.optimizer.make_equal_to(newbox, self.optimizer.getvalue(oldbox))
+        
+        # Setup the state of the new optimizer by emiting the
+        # short operations and discarding the result
+        self.optimizer.emitting_dissabled = True
+        for op in exported_state.inputarg_setup_ops:
+            self.optimizer.send_extra_operation(op)
+        seen = {}
+        
+        for op in self.short_boxes.operations():
+            self.ensure_short_op_emitted(op, self.optimizer, seen)
+            if op and op.result:
+                preamble_value = exported_state.optimizer.getvalue(op.result)
+                value = self.optimizer.getvalue(op.result)
+                if not value.is_virtual():
+                    imp = ValueImporter(self, preamble_value, op)
+                    self.optimizer.importable_values[value] = imp
+                newvalue = self.optimizer.getvalue(op.result)
+                newresult = newvalue.get_key_box()
+                if newresult is not op.result and not newvalue.is_constant():
+                    self.short_boxes.alias(newresult, op.result)
+                    op = ResOperation(rop.SAME_AS, [op.result], newresult)
+                    self.optimizer._newoperations = [op] + self.optimizer._newoperations # XXX
+                    #self.optimizer.getvalue(op.result).box = op.result # FIXME: HACK!!!
+        self.optimizer.flush()
+        self.optimizer.emitting_dissabled = False
+
+        for box, key_box in exported_state.aliased_vrituals.items():
+            self.optimizer.make_equal_to(box, self.getvalue(key_box))
+
+    def close_bridge(self, start_label):
+        inputargs = self.inputargs        
+        short_jumpargs = inputargs[:]
+
+        # We dont need to inline the short preamble we are creating as we are conneting
+        # the bridge to a different trace with a different short preamble
+        self.short_inliner = None
+        
+        newoperations = self.optimizer.get_newoperations()
+        self.boxes_created_this_iteration = {}
+        i = 0
+        while newoperations[i].getopnum() != rop.LABEL:
+            i += 1
+        while i < len(newoperations):
+            op = newoperations[i]
+            self.boxes_created_this_iteration[op.result] = True
+            args = op.getarglist()
+            if op.is_guard():
+                args = args + op.getfailargs()
+            for a in args:
+                self.import_box(a, inputargs, short_jumpargs, [])
+            i += 1
+            newoperations = self.optimizer.get_newoperations()
+        self.short.append(ResOperation(rop.JUMP, short_jumpargs, None, descr=start_label.getdescr()))
+        
+    def close_loop(self, jumpop):
+        virtual_state = self.initial_virtual_state
+        short_inputargs = self.short[0].getarglist()
+        constant_inputargs = self.imported_state.constant_inputargs
+        inputargs = self.inputargs
+        short_jumpargs = inputargs[:]
+
+        # Construct jumpargs from the virtual state
+        original_jumpargs = jumpop.getarglist()[:]
+        values = [self.getvalue(arg) for arg in jumpop.getarglist()]
+        try:
+            jumpargs = virtual_state.make_inputargs(values, self.optimizer)
+        except BadVirtualState:
+            raise InvalidLoop
+        jumpop.initarglist(jumpargs)
+
+        # Inline the short preamble at the end of the loop
+        jmp_to_short_args = virtual_state.make_inputargs(values, self.optimizer, keyboxes=True)
+        assert len(short_inputargs) == len(jmp_to_short_args)
+        args = {}
+        for i in range(len(short_inputargs)):
+            if short_inputargs[i] in args:
+                if args[short_inputargs[i]] != jmp_to_short_args[i]:
+                    raise InvalidLoop
+            args[short_inputargs[i]] = jmp_to_short_args[i]
+        self.short_inliner = Inliner(short_inputargs, jmp_to_short_args)
+        for box, const in constant_inputargs.items():
+            self.short_inliner.argmap[box] = const
+        for op in self.short[1:]:
+            newop = self.short_inliner.inline_op(op)
+            self.optimizer.send_extra_operation(newop)
+
+        # Import boxes produced in the preamble but used in the loop
+        newoperations = self.optimizer.get_newoperations()
+        self.boxes_created_this_iteration = {}
+        i = j = 0
+        while newoperations[i].getopnum() != rop.LABEL:
+            i += 1
+        while i < len(newoperations) or j < len(jumpargs):
+            if i == len(newoperations):
+                while j < len(jumpargs):
+                    a = jumpargs[j]
+                    if self.optimizer.loop.logops:
+                        debug_print('J:  ' + self.optimizer.loop.logops.repr_of_arg(a))
+                    self.import_box(a, inputargs, short_jumpargs, jumpargs)
+                    j += 1
+            else:
+                op = newoperations[i]
+
+                self.boxes_created_this_iteration[op.result] = True
+                args = op.getarglist()
+                if op.is_guard():
+                    args = args + op.getfailargs()
+
+                if self.optimizer.loop.logops:
+                    debug_print('OP: ' + self.optimizer.loop.logops.repr_of_resop(op))
+                for a in args:
+                    if self.optimizer.loop.logops:
+                        debug_print('A:  ' + self.optimizer.loop.logops.repr_of_arg(a))
+                    self.import_box(a, inputargs, short_jumpargs, jumpargs)
+                i += 1
+            newoperations = self.optimizer.get_newoperations()
+
+        jumpop.initarglist(jumpargs)
+        self.optimizer.send_extra_operation(jumpop)
+        self.short.append(ResOperation(rop.JUMP, short_jumpargs, None, descr=jumpop.getdescr()))
+
+        # Verify that the virtual state at the end of the loop is one
+        # that is compatible with the virtual state at the start of the loop
+        modifier = VirtualStateAdder(self.optimizer)
+        final_virtual_state = modifier.get_virtual_state(original_jumpargs)
+        debug_start('jit-log-virtualstate')
+        virtual_state.debug_print('Closed loop with ')
+        bad = {}
+        if not virtual_state.generalization_of(final_virtual_state, bad):
+            # We ended up with a virtual state that is not compatible
+            # and we are thus unable to jump to the start of the loop
+            final_virtual_state.debug_print("Bad virtual state at end of loop, ",
+                                            bad)
+            debug_stop('jit-log-virtualstate')
+            raise InvalidLoop
             
-            values = [self.getvalue(arg) for arg in jump_args]
-            inputargs = virtual_state.make_inputargs(values, self.optimizer)
-            short_inputargs = virtual_state.make_inputargs(values, self.optimizer,
-                                                           keyboxes=True)
+        debug_stop('jit-log-virtualstate')
 
-            self.constant_inputargs = {}
-            for box in jump_args: 
-                const = self.get_constant_box(box)
-                if const:
-                    self.constant_inputargs[box] = const
+        maxguards = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.max_retrace_guards
+        if self.optimizer.emitted_guards > maxguards:
+            target_token = jumpop.getdescr()
+            assert isinstance(target_token, TargetToken)
+            target_token.targeting_jitcell_token.retraced_count = sys.maxint
+            
+    def finilize_short_preamble(self, start_label):
+        short = self.short
+        assert short[-1].getopnum() == rop.JUMP
+        target_token = start_label.getdescr()
+        assert isinstance(target_token, TargetToken)
 
-            sb = ShortBoxes(self.optimizer, inputargs + self.constant_inputargs.keys())
-            self.short_boxes = sb
+        # Turn guards into conditional jumps to the preamble
+        for i in range(len(short)):
+            op = short[i]
+            if op.is_guard():
+                op = op.clone()
+                op.setfailargs(None)
+                descr = target_token.start_resumedescr.clone_if_mutable()
+                op.setdescr(descr)
+                short[i] = op
+
+        # Clone ops and boxes to get private versions and
+        short_inputargs = short[0].getarglist()
+        boxmap = {}
+        newargs = [None] * len(short_inputargs)
+        for i in range(len(short_inputargs)):
+            a = short_inputargs[i]
+            if a in boxmap:
+                newargs[i] = boxmap[a]
+            else:
+                newargs[i] = a.clonebox()
+                boxmap[a] = newargs[i]
+        inliner = Inliner(short_inputargs, newargs)
+        for box, const in self.imported_state.constant_inputargs.items():
+            inliner.argmap[box] = const
+        for i in range(len(short)):
+            short[i] = inliner.inline_op(short[i])
+
+        target_token.start_resumedescr = self.start_resumedescr.clone_if_mutable()            
+        inliner.inline_descr_inplace(target_token.start_resumedescr)
+
+        # Forget the values to allow them to be freed
+        for box in short[0].getarglist():
+            box.forget_value()
+        for op in short:
+            if op.result:
+                op.result.forget_value()
+        target_token.short_preamble = self.short
+        target_token.exported_state = None
+
+        
+    def FIXME_old_stuff():
             preamble_optimizer = self.optimizer
             loop.preamble.quasi_immutable_deps = (
                 self.optimizer.quasi_immutable_deps)
             self.optimizer = self.optimizer.new()
             loop.quasi_immutable_deps = self.optimizer.quasi_immutable_deps
 
-            logops = self.optimizer.loop.logops
-            if logops:
-                args = ", ".join([logops.repr_of_arg(arg) for arg in inputargs])
-                debug_print('inputargs:       ' + args)
-                args = ", ".join([logops.repr_of_arg(arg) for arg in short_inputargs])
-                debug_print('short inputargs: ' + args)
-                self.short_boxes.debug_print(logops)
-                
-
-            # Force virtuals amoung the jump_args of the preamble to get the
-            # operations needed to setup the proper state of those virtuals
-            # in the peeled loop
-            inputarg_setup_ops = []
-            preamble_optimizer.clear_newoperations()
-            seen = {}
-            for box in inputargs:
-                if box in seen:
-                    continue
-                seen[box] = True
-                preamble_value = preamble_optimizer.getvalue(box)
-                value = self.optimizer.getvalue(box)
-                value.import_from(preamble_value, self.optimizer)
-            for box in short_inputargs:
-                if box in seen:
-                    continue
-                seen[box] = True
-                value = preamble_optimizer.getvalue(box)
-                value.force_box(preamble_optimizer)
-            inputarg_setup_ops += preamble_optimizer.get_newoperations()
-
-            # Setup the state of the new optimizer by emiting the
-            # short preamble operations and discarding the result
-            self.optimizer.emitting_dissabled = True
-            for op in inputarg_setup_ops:
-                self.optimizer.send_extra_operation(op)
-            seen = {}
-            for op in self.short_boxes.operations():
-                self.ensure_short_op_emitted(op, self.optimizer, seen)
-                if op and op.result:
-                    preamble_value = preamble_optimizer.getvalue(op.result)
-                    value = self.optimizer.getvalue(op.result)
-                    if not value.is_virtual():
-                        imp = ValueImporter(self, preamble_value, op)
-                        self.optimizer.importable_values[value] = imp
-                    newresult = self.optimizer.getvalue(op.result).get_key_box()
-                    if newresult is not op.result:
-                        self.short_boxes.alias(newresult, op.result)
-            self.optimizer.flush()
-            self.optimizer.emitting_dissabled = False
-
-            initial_inputargs_len = len(inputargs)
-            self.inliner = Inliner(loop.inputargs, jump_args)
-
-
-            short = self.inline(inputargs, self.cloned_operations,
-                                loop.inputargs, short_inputargs,
-                                virtual_state)
             
             loop.inputargs = inputargs
             args = [preamble_optimizer.getvalue(self.short_boxes.original(a)).force_box(preamble_optimizer)\
@@ -241,149 +421,7 @@
                 loop.preamble.token.retraced_count = sys.maxint
 
             if short:
-                assert short[-1].getopnum() == rop.JUMP
-                short[-1].setdescr(loop.token)
-
-                # Turn guards into conditional jumps to the preamble
-                for i in range(len(short)):
-                    op = short[i]
-                    if op.is_guard():
-                        op = op.clone()
-                        op.setfailargs(None)
-                        descr = self.start_resumedescr.clone_if_mutable()
-                        op.setdescr(descr)
-                        short[i] = op
-
-                short_loop = TreeLoop('short preamble')
-                short_loop.inputargs = short_inputargs
-                short_loop.operations = short
-
-                # Clone ops and boxes to get private versions and
-                boxmap = {}
-                newargs = [None] * len(short_loop.inputargs)
-                for i in range(len(short_loop.inputargs)):
-                    a = short_loop.inputargs[i]
-                    if a in boxmap:
-                        newargs[i] = boxmap[a]
-                    else:
-                        newargs[i] = a.clonebox()
-                        boxmap[a] = newargs[i]
-                inliner = Inliner(short_loop.inputargs, newargs)
-                for box, const in self.constant_inputargs.items():
-                    inliner.argmap[box] = const
-                short_loop.inputargs = newargs
-                ops = [inliner.inline_op(op) for op in short_loop.operations]
-                short_loop.operations = ops
-                descr = self.start_resumedescr.clone_if_mutable()
-                inliner.inline_descr_inplace(descr)
-                short_loop.start_resumedescr = descr
-
-                assert isinstance(loop.preamble.token, LoopToken)
-                if loop.preamble.token.short_preamble:
-                    loop.preamble.token.short_preamble.append(short_loop)
-                else:
-                    loop.preamble.token.short_preamble = [short_loop]
-                short_loop.virtual_state = virtual_state
-
-                # Forget the values to allow them to be freed
-                for box in short_loop.inputargs:
-                    box.forget_value()
-                for op in short_loop.operations:
-                    if op.result:
-                        op.result.forget_value()
-
-    def inline(self, inputargs, loop_operations, loop_args, short_inputargs, virtual_state):
-        inliner = self.inliner
-
-        short_jumpargs = inputargs[:]
-
-        short = self.short = []
-        short_seen = self.short_seen = {}
-        for box, const in self.constant_inputargs.items():
-            short_seen[box] = True
-
-        # This loop is equivalent to the main optimization loop in
-        # Optimizer.propagate_all_forward
-        jumpop = None
-        for newop in loop_operations:
-            newop = inliner.inline_op(newop, clone=False)
-            if newop.getopnum() == rop.JUMP:
-                jumpop = newop
-                break
-
-            #self.optimizer.first_optimization.propagate_forward(newop)
-            self.optimizer.send_extra_operation(newop)
-
-        self.boxes_created_this_iteration = {}
-
-        assert jumpop
-        original_jumpargs = jumpop.getarglist()[:]
-        values = [self.getvalue(arg) for arg in jumpop.getarglist()]
-        jumpargs = virtual_state.make_inputargs(values, self.optimizer)
-        jumpop.initarglist(jumpargs)
-        jmp_to_short_args = virtual_state.make_inputargs(values, self.optimizer,
-                                                         keyboxes=True)
-        self.short_inliner = Inliner(short_inputargs, jmp_to_short_args)
-        
-        for box, const in self.constant_inputargs.items():
-            self.short_inliner.argmap[box] = const
-
-        for op in short:
-            newop = self.short_inliner.inline_op(op)
-            self.optimizer.send_extra_operation(newop)
-        
-        newoperations = self.optimizer.get_newoperations()
-
-        i = j = 0
-        while i < len(newoperations) or j < len(jumpargs):
-            if i == len(newoperations):
-                while j < len(jumpargs):
-                    a = jumpargs[j]
-                    if self.optimizer.loop.logops:
-                        debug_print('J:  ' + self.optimizer.loop.logops.repr_of_arg(a))
-                    self.import_box(a, inputargs, short, short_jumpargs,
-                                    jumpargs, short_seen)
-                    j += 1
-            else:
-                op = newoperations[i]
-
-                self.boxes_created_this_iteration[op.result] = True
-                args = op.getarglist()
-                if op.is_guard():
-                    args = args + op.getfailargs()
-
-                if self.optimizer.loop.logops:
-                    debug_print('OP: ' + self.optimizer.loop.logops.repr_of_resop(op))
-                for a in args:
-                    if self.optimizer.loop.logops:
-                        debug_print('A:  ' + self.optimizer.loop.logops.repr_of_arg(a))
-                    self.import_box(a, inputargs, short, short_jumpargs,
-                                    jumpargs, short_seen)
-                i += 1
-            newoperations = self.optimizer.get_newoperations()
-
-        jumpop.initarglist(jumpargs)
-        self.optimizer.send_extra_operation(jumpop)
-        short.append(ResOperation(rop.JUMP, short_jumpargs, None))
-
-        modifier = VirtualStateAdder(self.optimizer)
-        final_virtual_state = modifier.get_virtual_state(original_jumpargs)
-        debug_start('jit-log-virtualstate')
-        virtual_state.debug_print('Closed loop with ')
-        bad = {}
-        if not virtual_state.generalization_of(final_virtual_state, bad):
-            # We ended up with a virtual state that is not compatible
-            # and we are thus unable to jump to the start of the loop
-            # XXX Is it possible to end up here? If so, consider:
-            #    - Fallback on having the preamble jump to itself?
-            #    - Would virtual_state.generate_guards make sense here?
-            final_virtual_state.debug_print("Bad virtual state at end of loop, ",
-                                            bad)
-            debug_stop('jit-log-virtualstate')
-            raise InvalidLoop
-        debug_stop('jit-log-virtualstate')
-        
-        return short
+                pass
 
     def ensure_short_op_emitted(self, op, optimizer, seen):
         if op is None:
@@ -399,19 +437,18 @@
             guard = ResOperation(rop.GUARD_NO_OVERFLOW, [], None)
             optimizer.send_extra_operation(guard)
 
-    def add_op_to_short(self, op, short, short_seen, emit=True, guards_needed=False):
+    def add_op_to_short(self, op, emit=True, guards_needed=False):
         if op is None:
             return None
-        if op.result is not None and op.result in short_seen:
-            if emit:
+        if op.result is not None and op.result in self.short_seen:
+            if emit and self.short_inliner:                
                 return self.short_inliner.inline_arg(op.result)
             else:
                 return None
         
         for a in op.getarglist():
-            if not isinstance(a, Const) and a not in short_seen:
-                self.add_op_to_short(self.short_boxes.producer(a), short, short_seen,
-                                     emit, guards_needed)
+            if not isinstance(a, Const) and a not in self.short_seen:
+                self.add_op_to_short(self.short_boxes.producer(a), emit, guards_needed)
         if op.is_guard():
             descr = self.start_resumedescr.clone_if_mutable()
             op.setdescr(descr)
@@ -421,9 +458,9 @@
         else:
             value_guards = []            
 
-        short.append(op)
-        short_seen[op.result] = True
-        if emit:
+        self.short.append(op)
+        self.short_seen[op.result] = True
+        if emit and self.short_inliner:
             newop = self.short_inliner.inline_op(op)
             self.optimizer.send_extra_operation(newop)
         else:
@@ -432,23 +469,22 @@
         if op.is_ovf():
             # FIXME: ensure that GUARD_OVERFLOW:ed ops not end up here
             guard = ResOperation(rop.GUARD_NO_OVERFLOW, [], None)
-            self.add_op_to_short(guard, short, short_seen, emit, guards_needed)
+            self.add_op_to_short(guard, emit, guards_needed)
         for guard in value_guards:
-            self.add_op_to_short(guard, short, short_seen, emit, guards_needed)
+            self.add_op_to_short(guard, emit, guards_needed)
 
         if newop:
             return newop.result
         return None
         
-    def import_box(self, box, inputargs, short, short_jumpargs,
-                   jumpargs, short_seen):
+    def import_box(self, box, inputargs, short_jumpargs, jumpargs):
         if isinstance(box, Const) or box in inputargs:
             return
         if box in self.boxes_created_this_iteration:
             return
 
         short_op = self.short_boxes.producer(box)
-        newresult = self.add_op_to_short(short_op, short, short_seen)
+        newresult = self.add_op_to_short(short_op)
 
         short_jumpargs.append(short_op.result)
         inputargs.append(box)
@@ -456,98 +492,94 @@
         if box in self.optimizer.values:
             box = self.optimizer.values[box].force_box(self.optimizer)
         jumpargs.append(box)
-        
 
-class OptInlineShortPreamble(Optimization):
-    def __init__(self, retraced):
-        self.retraced = retraced
+    def jump_to_already_compiled_trace(self, jumpop):
+        assert jumpop.getopnum() == rop.JUMP
+        cell_token = jumpop.getdescr()
 
-    def new(self):
-        return OptInlineShortPreamble(self.retraced)
+        assert isinstance(cell_token, JitCellToken)
+        if not cell_token.target_tokens:
+            return False
 
-    def propagate_forward(self, op):
-        if op.getopnum() == rop.JUMP:
-            loop_token = op.getdescr()
-            assert isinstance(loop_token, LoopToken)
-            short = loop_token.short_preamble
-            if short:
-                args = op.getarglist()
-                modifier = VirtualStateAdder(self.optimizer)
-                virtual_state = modifier.get_virtual_state(args)
-                debug_start('jit-log-virtualstate')
-                virtual_state.debug_print("Looking for ")
+        if not self.inline_short_preamble:
+            assert cell_token.target_tokens[0].virtual_state is None
+            jumpop.setdescr(cell_token.target_tokens[0])
+            self.optimizer.send_extra_operation(jumpop)
+            return True
 
-                for sh in short:
-                    ok = False
-                    extra_guards = []
+        args = jumpop.getarglist()
+        modifier = VirtualStateAdder(self.optimizer)
+        virtual_state = modifier.get_virtual_state(args)
+        debug_start('jit-log-virtualstate')
+        virtual_state.debug_print("Looking for ")
 
-                    bad = {}
-                    debugmsg = 'Did not match '
-                    if sh.virtual_state.generalization_of(virtual_state, bad):
-                        ok = True
-                        debugmsg = 'Matched '
-                    else:
-                        try:
-                            cpu = self.optimizer.cpu
-                            sh.virtual_state.generate_guards(virtual_state,
-                                                             args, cpu,
-                                                             extra_guards)
+        for target in cell_token.target_tokens:
+            if not target.virtual_state:
+                continue
+            ok = False
+            extra_guards = []
 
-                            ok = True
-                            debugmsg = 'Guarded to match '
-                        except InvalidLoop:
-                            pass
-                    sh.virtual_state.debug_print(debugmsg, bad)
-                    
-                    if ok:
-                        debug_stop('jit-log-virtualstate')
+            bad = {}
+            debugmsg = 'Did not match '
+            if target.virtual_state.generalization_of(virtual_state, bad):
+                ok = True
+                debugmsg = 'Matched '
+            else:
+                try:
+                    cpu = self.optimizer.cpu
+                    target.virtual_state.generate_guards(virtual_state,
+                                                         args, cpu,
+                                                         extra_guards)
 
-                        values = [self.getvalue(arg)
-                                  for arg in op.getarglist()]
-                        args = sh.virtual_state.make_inputargs(values, self.optimizer,
-                                                               keyboxes=True)
-                        inliner = Inliner(sh.inputargs, args)
-                        
-                        for guard in extra_guards:
-                            if guard.is_guard():
-                                descr = sh.start_resumedescr.clone_if_mutable()
-                                inliner.inline_descr_inplace(descr)
-                                guard.setdescr(descr)
-                            self.emit_operation(guard)
-                        
-                        try:
-                            for shop in sh.operations:
-                                newop = inliner.inline_op(shop)
-                                self.emit_operation(newop)
-                        except InvalidLoop:
-                            debug_print("Inlining failed unexpectedly",
-                                        "jumping to preamble instead")
-                            self.emit_operation(op)
-                        return
+                    ok = True
+                    debugmsg = 'Guarded to match '
+                except InvalidLoop:
+                    pass
+            target.virtual_state.debug_print(debugmsg, bad)
+
+            if ok:
                 debug_stop('jit-log-virtualstate')
-                retraced_count = loop_token.retraced_count
-                limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
-                if not self.retraced and retraced_count<limit:
-                    loop_token.retraced_count += 1
-                    if not loop_token.failed_states:
-                        debug_print("Retracing (%d of %d)" % (retraced_count,
-                                                              limit))
-                        raise RetraceLoop
-                    for failed in loop_token.failed_states:
-                        if failed.generalization_of(virtual_state):
-                            # Retracing once more will most likely fail again
-                            break
-                    else:
-                        debug_print("Retracing (%d of %d)" % (retraced_count,
-                                                              limit))
 
-                        raise RetraceLoop
-                else:
-                    if not loop_token.failed_states:
-                        loop_token.failed_states=[virtual_state]
-                    else:
-                        loop_token.failed_states.append(virtual_state)
-        self.emit_operation(op)
+                values = [self.getvalue(arg)
+                          for arg in jumpop.getarglist()]
+                args = target.virtual_state.make_inputargs(values, self.optimizer,
+                                                           keyboxes=True)
+                short_inputargs = target.short_preamble[0].getarglist()
+                inliner = Inliner(short_inputargs, args)
+
+                for guard in extra_guards:
+                    if guard.is_guard():
+                        descr = target.start_resumedescr.clone_if_mutable()
+                        inliner.inline_descr_inplace(descr)
+                        guard.setdescr(descr)
+                    self.optimizer.send_extra_operation(guard)
+
+                try:
+                    for shop in target.short_preamble[1:]:
+                        newop = inliner.inline_op(shop)
+                        self.optimizer.send_extra_operation(newop)
+                except InvalidLoop:
+                    debug_print("Inlining failed unexpectedly",
+                                "jumping to preamble instead")
+                    assert cell_token.target_tokens[0].virtual_state is None
+                    jumpop.setdescr(cell_token.target_tokens[0])
+                    self.optimizer.send_extra_operation(jumpop)
+                return True
+        debug_stop('jit-log-virtualstate')
+
+        if self.did_import:
+            return False
+        limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
+        if cell_token.retraced_count<limit:
+            cell_token.retraced_count += 1
+            debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
+            return False
+        else:
+            debug_print("Retrace count reached, jumping to preamble")
+            assert cell_token.target_tokens[0].virtual_state is None
+            jumpop.setdescr(cell_token.target_tokens[0])
+            self.optimizer.send_extra_operation(jumpop)
+            return True
 
 class ValueImporter(object):
     def __init__(self, unroll, value, op):
@@ -557,5 +589,15 @@
 
     def import_value(self, value):
         value.import_from(self.preamble_value, self.unroll.optimizer)
-        self.unroll.add_op_to_short(self.op, self.unroll.short, self.unroll.short_seen, False, True)        
-        
+        self.unroll.add_op_to_short(self.op, False, True)        
+
+class ExportedState(object):
+    def __init__(self, constant_inputargs,
+                 short_boxes, inputarg_setup_ops, optimizer, aliased_vrituals,
+                 jump_args):
+        self.constant_inputargs = constant_inputargs
+        self.short_boxes = short_boxes
+        self.inputarg_setup_ops = inputarg_setup_ops
+        self.optimizer = optimizer
+        self.aliased_vrituals = aliased_vrituals
+        self.jump_args = jump_args
diff --git a/pypy/jit/metainterp/optimizeopt/util.py b/pypy/jit/metainterp/optimizeopt/util.py
--- a/pypy/jit/metainterp/optimizeopt/util.py
+++ b/pypy/jit/metainterp/optimizeopt/util.py
@@ -148,7 +148,7 @@
                 assert op1.result.same_box(remap[op2.result])
         else:
             remap[op2.result] = op1.result
-        if op1.getopnum() != rop.JUMP:      # xxx obscure
+        if op1.getopnum() not in (rop.JUMP, rop.LABEL):      # xxx obscure
             assert op1.getdescr() == op2.getdescr()
         if op1.getfailargs() or op2.getfailargs():
             assert len(op1.getfailargs()) == len(op2.getfailargs())
@@ -171,3 +171,4 @@
     assert len(oplist1) == len(oplist2)
     print '-'*totwidth
     return True
+
diff --git a/pypy/jit/metainterp/optimizeopt/virtualstate.py b/pypy/jit/metainterp/optimizeopt/virtualstate.py
--- a/pypy/jit/metainterp/optimizeopt/virtualstate.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualstate.py
@@ -14,6 +14,9 @@
 from pypy.rlib.objectmodel import we_are_translated
 import os
 
+class BadVirtualState(Exception):
+    pass
+
 class AbstractVirtualStateInfo(resume.AbstractVirtualInfo):
     position = -1
 
@@ -103,10 +106,15 @@
         raise NotImplementedError
 
     def enum_forced_boxes(self, boxes, value, optimizer):
-        assert isinstance(value, virtualize.AbstractVirtualStructValue)
-        assert value.is_virtual()
+        if not isinstance(value, virtualize.AbstractVirtualStructValue):
+            raise BadVirtualState
+        if not value.is_virtual():
+            raise BadVirtualState
         for i in range(len(self.fielddescrs)):
-            v = value._fields[self.fielddescrs[i]]
+            try:
+                v = value._fields[self.fielddescrs[i]]
+            except KeyError:
+                raise BadVirtualState
             s = self.fieldstate[i]
             if s.position > self.position:
                 s.enum_forced_boxes(boxes, v, optimizer)
@@ -180,10 +188,15 @@
             self.arraydescr is other.arraydescr)
 
     def enum_forced_boxes(self, boxes, value, optimizer):
-        assert isinstance(value, virtualize.VArrayValue)
-        assert value.is_virtual()
+        if not isinstance(value, virtualize.VArrayValue):
+            raise BadVirtualState
+        if not value.is_virtual():
+            raise BadVirtualState
         for i in range(len(self.fieldstate)):
-            v = value._items[i]
+            try:
+                v = value._items[i]
+            except IndexError:
+                raise BadVirtualState
             s = self.fieldstate[i]
             if s.position > self.position:
                 s.enum_forced_boxes(boxes, v, optimizer)
@@ -248,12 +261,19 @@
             s.enum(virtual_state)
 
     def enum_forced_boxes(self, boxes, value, optimizer):
-        assert isinstance(value, virtualize.VArrayStructValue)
-        assert value.is_virtual()
+        if not isinstance(value, virtualize.VArrayStructValue):
+            raise BadVirtualState
+        if not value.is_virtual():
+            raise BadVirtualState
         p = 0
         for i in range(len(self.fielddescrs)):
             for j in range(len(self.fielddescrs[i])):
-                v = value._items[i][self.fielddescrs[i][j]]
+                try:
+                    v = value._items[i][self.fielddescrs[i][j]]
+                except IndexError:
+                    raise BadVirtualState
+                except KeyError:
+                    raise BadVirtualState
                 s = self.fieldstate[p]
                 if s.position > self.position:
                     s.enum_forced_boxes(boxes, v, optimizer)
@@ -546,18 +566,27 @@
         self.aliases = {}
         self.rename = {}
         self.optimizer = optimizer
-        for box in surviving_boxes:
-            self.potential_ops[box] = None
-        optimizer.produce_potential_short_preamble_ops(self)
 
-        self.short_boxes = {}
-        self.short_boxes_in_production = {}
+        if surviving_boxes is not None:
+            for box in surviving_boxes:
+                self.potential_ops[box] = None
+            optimizer.produce_potential_short_preamble_ops(self)
 
-        for box in self.potential_ops.keys():
-            try:
-                self.produce_short_preamble_box(box)
-            except BoxNotProducable:
-                pass
+            self.short_boxes = {}
+            self.short_boxes_in_production = {}
+
+            for box in self.potential_ops.keys():
+                try:
+                    self.produce_short_preamble_box(box)
+                except BoxNotProducable:
+                    pass
+
+    def clone(self):
+        sb = ShortBoxes(self.optimizer, None)
+        sb.aliases.update(self.aliases)
+        sb.short_boxes = {}
+        sb.short_boxes.update(self.short_boxes)
+        return sb
 
     def prioritized_alternatives(self, box):
         if box not in self.alternatives:
@@ -598,6 +627,7 @@
                 newbox = newop.result = op.result.clonebox()
                 self.short_boxes[newop.result] = newop
             value = self.optimizer.getvalue(box)
+            self.optimizer.emit_operation(ResOperation(rop.SAME_AS, [box], newbox))
             self.optimizer.make_equal_to(newbox, value)
         else:
             self.short_boxes[box] = op
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -8,7 +8,7 @@
 
 from pypy.jit.metainterp import history, compile, resume
 from pypy.jit.metainterp.history import Const, ConstInt, ConstPtr, ConstFloat
-from pypy.jit.metainterp.history import Box
+from pypy.jit.metainterp.history import Box, TargetToken
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.metainterp import executor
 from pypy.jit.metainterp.logger import Logger
@@ -22,7 +22,6 @@
 from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr
 from pypy.jit.codewriter import heaptracker
 from pypy.jit.metainterp.optimizeopt.util import args_dict_box
-from pypy.jit.metainterp.optimize import RetraceLoop
 
 # ____________________________________________________________
 
@@ -243,6 +242,18 @@
     def opimpl_mark_opaque_ptr(self, box):
         return self.execute(rop.MARK_OPAQUE_PTR, box)
 
+    @arguments("box", "box")
+    def opimpl_record_known_class(self, box, clsbox):
+        from pypy.rpython.lltypesystem import llmemory
+        if self.metainterp.heapcache.is_class_known(box):
+            return
+        adr = clsbox.getaddr()
+        bounding_class = llmemory.cast_adr_to_ptr(adr, rclass.CLASSTYPE)
+        if bounding_class.subclassrange_max - bounding_class.subclassrange_min == 1:
+            # precise class knowledge, this can be used
+            self.execute(rop.RECORD_KNOWN_CLASS, box, clsbox)
+            self.metainterp.heapcache.class_now_known(box)
+
     @arguments("box")
     def _opimpl_any_return(self, box):
         self.metainterp.finishframe(box)
@@ -1555,10 +1566,17 @@
         self.portal_trace_positions = []
         self.free_frames_list = []
         self.last_exc_value_box = None
-        self.retracing_loop_from = None
+        self.partial_trace = None
+        self.retracing_from = -1
         self.call_pure_results = args_dict_box()
         self.heapcache = HeapCache()
 
+    def retrace_needed(self, trace):
+        self.partial_trace = trace
+        self.retracing_from = len(self.history.operations) - 1
+        self.heapcache.reset()
+        
+
     def perform_call(self, jitcode, boxes, greenkey=None):
         # causes the metainterp to enter the given subfunction
         f = self.newframe(jitcode, greenkey)
@@ -1778,7 +1796,6 @@
         self.staticdata.profiler.count(reason)
         debug_print('~~~ ABORTING TRACING')
         self.staticdata.stats.aborted()
-        self.resumekey.reset_counter_from_failure()
 
     def blackhole_if_trace_too_long(self):
         warmrunnerstate = self.jitdriver_sd.warmstate
@@ -1793,7 +1810,7 @@
 
     def _interpret(self):
         # Execute the frames forward until we raise a DoneWithThisFrame,
-        # a ExitFrameWithException, or a GenerateMergePoint exception.
+        # a ExitFrameWithException, or a ContinueRunningNormally exception.
         self.staticdata.stats.entered()
         while True:
             self.framestack[-1].run_one_step()
@@ -1841,8 +1858,6 @@
         self.seen_loop_header_for_jdindex = -1
         try:
             self.interpret()
-        except GenerateMergePoint, gmp:
-            return self.designate_target_loop(gmp)
         except SwitchToBlackhole, stb:
             self.run_blackhole_interp_to_cancel_tracing(stb)
         assert False, "should always raise"
@@ -1877,8 +1892,6 @@
             if self.resumekey_original_loop_token is None:   # very rare case
                 raise SwitchToBlackhole(ABORT_BRIDGE)
             self.interpret()
-        except GenerateMergePoint, gmp:
-            return self.designate_target_loop(gmp)
         except SwitchToBlackhole, stb:
             self.run_blackhole_interp_to_cancel_tracing(stb)
         assert False, "should always raise"
@@ -1926,14 +1939,9 @@
         #   that failed;
         # - if self.resumekey is a ResumeFromInterpDescr, it starts directly
         #   from the interpreter.
-        if not self.retracing_loop_from:
-            try:
-                self.compile_bridge(live_arg_boxes)
-            except RetraceLoop:
-                start = len(self.history.operations)
-                self.current_merge_points.append((live_arg_boxes, start))
-                self.retracing_loop_from = RetraceState(self, live_arg_boxes)
-                return
+        if not self.partial_trace:
+            # FIXME: Support a retrace to be a bridge as well as a loop
+            self.compile_trace(live_arg_boxes, resumedescr)
 
         # raises in case it works -- which is the common case, hopefully,
         # at least for bridges starting from a guard.
@@ -1955,14 +1963,10 @@
             else:
                 # Found!  Compile it as a loop.
                 # raises in case it works -- which is the common case
-                if self.retracing_loop_from and \
-                   self.retracing_loop_from.merge_point == j:
-                    bridge_arg_boxes = self.retracing_loop_from.live_arg_boxes
-                    self.compile_bridge_and_loop(original_boxes, \
-                                                 live_arg_boxes, start,
-                                                 bridge_arg_boxes, resumedescr)
-                else:
-                    self.compile(original_boxes, live_arg_boxes, start, resumedescr)
+                if self.partial_trace:
+                    if  start != self.retracing_from: 
+                        raise SwitchToBlackhole(ABORT_BAD_LOOP) # For now
+                self.compile_loop(original_boxes, live_arg_boxes, start, resumedescr)
                 # creation of the loop was cancelled!
                 self.staticdata.log('cancelled, tracing more...')
                 #self.staticdata.log('cancelled, stopping tracing')
@@ -1972,12 +1976,48 @@
         start = len(self.history.operations)
         self.current_merge_points.append((live_arg_boxes, start))
 
-    def designate_target_loop(self, gmp):
-        loop_token = gmp.target_loop_token
+    def _unpack_boxes(self, boxes, start, stop):
+        ints = []; refs = []; floats = []
+        for i in range(start, stop):
+            box = boxes[i]
+            if   box.type == history.INT: ints.append(box.getint())
+            elif box.type == history.REF: refs.append(box.getref_base())
+            elif box.type == history.FLOAT:floats.append(box.getfloatstorage())
+            else: assert 0
+        return ints[:], refs[:], floats[:]
+
+    def raise_continue_running_normally(self, live_arg_boxes, loop_token):
+        self.history.inputargs = None
+        self.history.operations = None
+        # For simplicity, we just raise ContinueRunningNormally here and
+        # ignore the loop_token passed in.  It means that we go back to
+        # interpreted mode, but it should come back very quickly to the
+        # JIT, find probably the same 'loop_token', and execute it.
+        if we_are_translated():
+            num_green_args = self.jitdriver_sd.num_green_args
+            gi, gr, gf = self._unpack_boxes(live_arg_boxes, 0, num_green_args)
+            ri, rr, rf = self._unpack_boxes(live_arg_boxes, num_green_args,
+                                            len(live_arg_boxes))
+            CRN = self.staticdata.ContinueRunningNormally
+            raise CRN(gi, gr, gf, ri, rr, rf)
+        else:
+            # However, in order to keep the existing tests working
+            # (which are based on the assumption that 'loop_token' is
+            # directly used here), a bit of custom non-translatable code...
+            self._nontranslated_run_directly(live_arg_boxes, loop_token)
+            assert 0, "unreachable"
+
+    def _nontranslated_run_directly(self, live_arg_boxes, loop_token):
+        "NOT_RPYTHON"
+        args = []
         num_green_args = self.jitdriver_sd.num_green_args
-        residual_args = gmp.argboxes[num_green_args:]
-        history.set_future_values(self.cpu, residual_args)
-        return loop_token
+        num_red_args = self.jitdriver_sd.num_red_args
+        for box in live_arg_boxes[num_green_args:num_green_args+num_red_args]:
+            if   box.type == history.INT: args.append(box.getint())
+            elif box.type == history.REF: args.append(box.getref_base())
+            elif box.type == history.FLOAT: args.append(box.getfloatstorage())
+            else: assert 0
+        self.jitdriver_sd.warmstate.execute_assembler(loop_token, *args)
 
     def prepare_resume_from_failure(self, opnum, dont_change_position=False):
         frame = self.framestack[-1]
@@ -2018,54 +2058,57 @@
             from pypy.jit.metainterp.resoperation import opname
             raise NotImplementedError(opname[opnum])
 
-    def get_compiled_merge_points(self, greenkey):
-        """Get the list of looptokens corresponding to the greenkey.
-        Turns the (internal) list of weakrefs into regular refs.
-        """
+    def get_procedure_token(self, greenkey):
         cell = self.jitdriver_sd.warmstate.jit_cell_at_key(greenkey)
-        return cell.get_compiled_merge_points()
+        return cell.get_procedure_token()
+        
+    def compile_loop(self, original_boxes, live_arg_boxes, start, start_resumedescr):
+        num_green_args = self.jitdriver_sd.num_green_args
+        greenkey = original_boxes[:num_green_args]
+        if not self.partial_trace:
+            assert self.get_procedure_token(greenkey) is None or \
+                   self.get_procedure_token(greenkey).target_tokens is None
+        if self.partial_trace:
+            target_token = compile.compile_retrace(self, greenkey, start,
+                                                   original_boxes[num_green_args:],
+                                                   live_arg_boxes[num_green_args:],
+                                                   start_resumedescr, self.partial_trace,
+                                                   self.resumekey)
+        else:
+            target_token = compile.compile_loop(self, greenkey, start,
+                                                original_boxes[num_green_args:],
+                                                live_arg_boxes[num_green_args:],
+                                                start_resumedescr)
+            if target_token is not None:
+                assert isinstance(target_token, TargetToken)
+                self.jitdriver_sd.warmstate.attach_procedure_to_interp(greenkey, target_token.targeting_jitcell_token)
+                self.staticdata.stats.add_jitcell_token(target_token.targeting_jitcell_token)
 
-    def set_compiled_merge_points(self, greenkey, looptokens):
-        cell = self.jitdriver_sd.warmstate.jit_cell_at_key(greenkey)
-        cell.set_compiled_merge_points(looptokens)
 
-    def compile(self, original_boxes, live_arg_boxes, start, start_resumedescr):
-        num_green_args = self.jitdriver_sd.num_green_args
-        original_inputargs = self.history.inputargs
-        self.history.inputargs = original_boxes[num_green_args:]
-        greenkey = original_boxes[:num_green_args]
-        old_loop_tokens = self.get_compiled_merge_points(greenkey)
-        self.history.record(rop.JUMP, live_arg_boxes[num_green_args:], None)
-        loop_token = compile.compile_new_loop(self, old_loop_tokens,
-                                              greenkey, start, start_resumedescr)
-        if loop_token is not None: # raise if it *worked* correctly
-            self.set_compiled_merge_points(greenkey, old_loop_tokens)
-            self.history.inputargs = None
-            self.history.operations = None
-            raise GenerateMergePoint(live_arg_boxes, loop_token)
+        if target_token is not None: # raise if it *worked* correctly
+            assert isinstance(target_token, TargetToken)
+            jitcell_token = target_token.targeting_jitcell_token
+            self.raise_continue_running_normally(live_arg_boxes, jitcell_token)
 
-        self.history.inputargs = original_inputargs
-        self.history.operations.pop()     # remove the JUMP
-
-    def compile_bridge(self, live_arg_boxes):
+    def compile_trace(self, live_arg_boxes, start_resumedescr):
         num_green_args = self.jitdriver_sd.num_green_args
         greenkey = live_arg_boxes[:num_green_args]
-        old_loop_tokens = self.get_compiled_merge_points(greenkey)
-        if len(old_loop_tokens) == 0:
+        target_jitcell_token = self.get_procedure_token(greenkey)
+        if not target_jitcell_token:
             return
-        #if self.resumekey.guard_opnum == rop.GUARD_CLASS:
-        #    return # Kepp tracing for another iteration
-        self.history.record(rop.JUMP, live_arg_boxes[num_green_args:], None)
+        if not target_jitcell_token.target_tokens:
+            return
+
+        self.history.record(rop.JUMP, live_arg_boxes[num_green_args:], None,
+                            descr=target_jitcell_token)
         try:
-            target_loop_token = compile.compile_new_bridge(self,
-                                                           old_loop_tokens,
-                                                           self.resumekey)
+            target_token = compile.compile_trace(self, self.resumekey, start_resumedescr)
         finally:
             self.history.operations.pop()     # remove the JUMP
-        if target_loop_token is not None: # raise if it *worked* correctly
-            self.history.inputargs = None
-            self.history.operations = None
-            raise GenerateMergePoint(live_arg_boxes, target_loop_token)
+        if target_token is not None: # raise if it *worked* correctly
+            assert isinstance(target_token, TargetToken)
+            jitcell_token = target_token.targeting_jitcell_token
+            self.raise_continue_running_normally(live_arg_boxes, jitcell_token)
 
     def compile_bridge_and_loop(self, original_boxes, live_arg_boxes, start,
                                 bridge_arg_boxes, start_resumedescr):
@@ -2101,10 +2144,8 @@
         except RetraceLoop:
             assert False
         assert target_loop_token is not None
-
-        self.history.inputargs = None
-        self.history.operations = None
-        raise GenerateMergePoint(live_arg_boxes, old_loop_tokens[0])
+        self.raise_continue_running_normally(live_arg_boxes,
+                                             old_loop_tokens[0])
 
     def compile_done_with_this_frame(self, exitbox):
         self.gen_store_back_in_virtualizable()
@@ -2126,21 +2167,21 @@
             loop_tokens = sd.loop_tokens_done_with_this_frame_float
         else:
             assert False
-        self.history.record(rop.JUMP, exits, None)
-        target_loop_token = compile.compile_new_bridge(self, loop_tokens,
-                                                       self.resumekey)
-        if target_loop_token is not loop_tokens[0]:
+        # FIXME: kill TerminatingLoopToken?
+        # FIXME: can we call compile_trace?
+        token = loop_tokens[0].finishdescr
+        self.history.record(rop.FINISH, exits, None, descr=token)
+        target_token = compile.compile_trace(self, self.resumekey)
+        if target_token is not token:
             compile.giveup()
 
     def compile_exit_frame_with_exception(self, valuebox):
         self.gen_store_back_in_virtualizable()
-        # temporarily put a JUMP to a pseudo-loop
-        self.history.record(rop.JUMP, [valuebox], None)
         sd = self.staticdata
-        loop_tokens = sd.loop_tokens_exit_frame_with_exception_ref
-        target_loop_token = compile.compile_new_bridge(self, loop_tokens,
-                                                       self.resumekey)
-        if target_loop_token is not loop_tokens[0]:
+        token = sd.loop_tokens_exit_frame_with_exception_ref[0].finishdescr
+        self.history.record(rop.FINISH, [valuebox], None, descr=token)
+        target_token = compile.compile_trace(self, self.resumekey)
+        if target_token is not token:
             compile.giveup()
 
     @specialize.arg(1)
@@ -2382,22 +2423,6 @@
                                             abox, ConstInt(j), itembox)
             assert i + 1 == len(self.virtualizable_boxes)
 
-    def gen_load_from_other_virtualizable(self, vinfo, vbox):
-        boxes = []
-        assert vinfo is not None
-        for i in range(vinfo.num_static_extra_boxes):
-            descr = vinfo.static_field_descrs[i]
-            boxes.append(self.execute_and_record(rop.GETFIELD_GC, descr, vbox))
-        virtualizable = vinfo.unwrap_virtualizable_box(vbox)
-        for k in range(vinfo.num_arrays):
-            descr = vinfo.array_field_descrs[k]
-            abox = self.execute_and_record(rop.GETFIELD_GC, descr, vbox)
-            descr = vinfo.array_descrs[k]
-            for j in range(vinfo.get_array_length(virtualizable, k)):
-                boxes.append(self.execute_and_record(rop.GETARRAYITEM_GC, descr,
-                                                     abox, ConstInt(j)))
-        return boxes
-
     def replace_box(self, oldbox, newbox):
         assert isinstance(oldbox, Box)
         for frame in self.framestack:
@@ -2469,25 +2494,13 @@
         greenargs = arglist[1:num_green_args+1]
         args = arglist[num_green_args+1:]
         assert len(args) == targetjitdriver_sd.num_red_args
-        vinfo = targetjitdriver_sd.virtualizable_info
-        if vinfo is not None:
-            index = targetjitdriver_sd.index_of_virtualizable
-            vbox = args[index]
-            args = args + self.gen_load_from_other_virtualizable(vinfo, vbox)
-            # ^^^ and not "+=", which makes 'args' a resizable list
         warmrunnerstate = targetjitdriver_sd.warmstate
-        token = warmrunnerstate.get_assembler_token(greenargs, args)
+        token = warmrunnerstate.get_assembler_token(greenargs)
         op = op.copy_and_change(rop.CALL_ASSEMBLER, args=args, descr=token)
         self.history.operations.append(op)
 
 # ____________________________________________________________
 
-class GenerateMergePoint(JitException):
-    def __init__(self, args, target_loop_token):
-        assert target_loop_token is not None
-        self.argboxes = args
-        self.target_loop_token = target_loop_token
-
 class ChangeFrame(JitException):
     """Raised after we mutated metainterp.framestack, in order to force
     it to reload the current top-of-stack frame that gets interpreted."""
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -369,6 +369,8 @@
     'FINISH/*d',
     '_FINAL_LAST',
 
+    'LABEL/*d',
+
     '_GUARD_FIRST',
     '_GUARD_FOLDABLE_FIRST',
     'GUARD_TRUE/1d',
@@ -379,11 +381,11 @@
     'GUARD_ISNULL/1d',
     'GUARD_NONNULL_CLASS/2d',
     '_GUARD_FOLDABLE_LAST',
-    'GUARD_NO_EXCEPTION/0d',
-    'GUARD_EXCEPTION/1d',
+    'GUARD_NO_EXCEPTION/0d',    # may be called with an exception currently set
+    'GUARD_EXCEPTION/1d',       # may be called with an exception currently set
     'GUARD_NO_OVERFLOW/0d',
     'GUARD_OVERFLOW/0d',
-    'GUARD_NOT_FORCED/0d',
+    'GUARD_NOT_FORCED/0d',      # may be called with an exception currently set
     'GUARD_NOT_INVALIDATED/0d',
     '_GUARD_LAST', # ----- end of guard operations -----
 
@@ -494,6 +496,7 @@
     'COPYSTRCONTENT/5',       # src, dst, srcstart, dststart, length
     'COPYUNICODECONTENT/5',
     'QUASIIMMUT_FIELD/1d',    # [objptr], descr=SlowMutateDescr
+    'RECORD_KNOWN_CLASS/2',   # [objptr, clsptr]
 
     '_CANRAISE_FIRST', # ----- start of can_raise operations -----
     '_CALL_FIRST',
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -93,12 +93,14 @@
 
 TAGMASK = 3
 
+class TagOverflow(Exception):
+    pass
+
 def tag(value, tagbits):
-    if tagbits >> 2:
-        raise ValueError
+    assert 0 <= tagbits <= 3
     sx = value >> 13
     if sx != 0 and sx != -1:
-        raise ValueError
+        raise TagOverflow
     return rffi.r_short(value<<2|tagbits)
 
 def untag(value):
@@ -153,7 +155,7 @@
                 return self._newconst(const)
             try:
                 return tag(val, TAGINT)
-            except ValueError:
+            except TagOverflow:
                 pass
             tagged = self.large_ints.get(val, UNASSIGNED)
             if not tagged_eq(tagged, UNASSIGNED):
@@ -429,8 +431,7 @@
                 fieldnum = self._gettagged(fieldbox)
                 # the index is limited to 2147483647 (64-bit machines only)
                 if itemindex > 2147483647:
-                    from pypy.jit.metainterp import compile
-                    compile.giveup()
+                    raise TagOverflow
                 itemindex = rffi.cast(rffi.INT, itemindex)
                 #
                 rd_pendingfields[i].lldescr  = lldescr
diff --git a/pypy/jit/metainterp/test/support.py b/pypy/jit/metainterp/test/support.py
--- a/pypy/jit/metainterp/test/support.py
+++ b/pypy/jit/metainterp/test/support.py
@@ -4,9 +4,9 @@
 from pypy.rpython.ootypesystem import ootype
 from pypy.jit.backend.llgraph import runner
 from pypy.jit.metainterp.warmspot import ll_meta_interp, get_stats
+from pypy.jit.metainterp.warmstate import unspecialize_value
 from pypy.jit.metainterp.optimizeopt import ALL_OPTS_DICT
 from pypy.jit.metainterp import pyjitpl, history
-from pypy.jit.metainterp.warmstate import set_future_value
 from pypy.jit.codewriter.policy import JitPolicy
 from pypy.jit.codewriter import codewriter, longlong
 from pypy.rlib.rfloat import isnan
@@ -16,15 +16,16 @@
     from pypy.jit.codewriter import support
 
     class FakeJitCell(object):
-        __compiled_merge_points = []
-        def get_compiled_merge_points(self):
-            return self.__compiled_merge_points[:]
-        def set_compiled_merge_points(self, lst):
-            self.__compiled_merge_points = lst
+        __product_token = None
+        def get_procedure_token(self):
+            return self.__product_token
+        def set_procedure_token(self, token):
+            self.__product_token = token
 
     class FakeWarmRunnerState(object):
-        def attach_unoptimized_bridge_from_interp(self, greenkey, newloop):
-            pass
+        def attach_procedure_to_interp(self, greenkey, procedure_token):
+            cell = self.jit_cell_at_key(greenkey)
+            cell.set_procedure_token(procedure_token)
 
         def helper_func(self, FUNCPTR, func):
             from pypy.rpython.annlowlevel import llhelper
@@ -132,16 +133,14 @@
 def _run_with_machine_code(testself, args):
     metainterp = testself.metainterp
     num_green_args = metainterp.jitdriver_sd.num_green_args
-    loop_tokens = metainterp.get_compiled_merge_points(args[:num_green_args])
-    if len(loop_tokens) != 1:
-        return NotImplemented
+    procedure_token = metainterp.get_procedure_token(args[:num_green_args])
     # a loop was successfully created by _run_with_pyjitpl(); call it
     cpu = metainterp.cpu
+    args1 = []
     for i in range(len(args) - num_green_args):
         x = args[num_green_args + i]
-        typecode = history.getkind(lltype.typeOf(x))
-        set_future_value(cpu, i, x, typecode)
-    faildescr = cpu.execute_token(loop_tokens[0])
+        args1.append(unspecialize_value(x))
+    faildescr = cpu.execute_token(procedure_token, *args1)
     assert faildescr.__class__.__name__.startswith('DoneWithThisFrameDescr')
     if metainterp.jitdriver_sd.result_type == history.INT:
         return cpu.get_latest_value_int(0)
@@ -155,26 +154,36 @@
 
 class JitMixin:
     basic = True
-    def check_loops(self, expected=None, everywhere=False, **check):
-        get_stats().check_loops(expected=expected, everywhere=everywhere,
-                                **check)
-    def check_loop_count(self, count):
-        """NB. This is a hack; use check_tree_loop_count() or
-        check_enter_count() for the real thing.
-        This counts as 1 every bridge in addition to every loop; and it does
-        not count at all the entry bridges from interpreter, although they
-        are TreeLoops as well."""
+    def check_resops(self, expected=None, **check):
+        get_stats().check_resops(expected=expected, **check)
+    def check_simple_loop(self, expected=None, **check):
+        get_stats().check_simple_loop(expected=expected, **check)
+
+    
+
+    def check_trace_count(self, count): # was check_loop_count
+        # The number of traces compiled
         assert get_stats().compiled_count == count
-    def check_tree_loop_count(self, count):
-        assert len(get_stats().loops) == count
-    def check_loop_count_at_most(self, count):
+    def check_trace_count_at_most(self, count):
         assert get_stats().compiled_count <= count
+
+    def check_jitcell_token_count(self, count): # was check_tree_loop_count
+        assert len(get_stats().jitcell_token_wrefs) == count
+
+    def check_target_token_count(self, count):
+        tokens = get_stats().get_all_jitcell_tokens()
+        n = sum ([len(t.target_tokens) for t in tokens])
+        assert n == count
+
     def check_enter_count(self, count):
         assert get_stats().enter_count == count
     def check_enter_count_at_most(self, count):
         assert get_stats().enter_count <= count
+
     def check_jumps(self, maxcount):
+        return # FIXME
         assert get_stats().exec_jumps <= maxcount
+
     def check_aborted_count(self, count):
         assert get_stats().aborted_count == count
     def check_aborted_count_at_least(self, count):
@@ -217,7 +226,7 @@
         # this can be used after interp_operations
         if expected is not None:
             expected = dict(expected)
-            expected['jump'] = 1
+            expected['finish'] = 1
         self.metainterp.staticdata.stats.check_history(expected, **isns)
 
 
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -9,12 +9,11 @@
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin, noConst
 from pypy.jit.metainterp.typesystem import LLTypeHelper, OOTypeHelper
 from pypy.jit.metainterp.warmspot import get_stats
-from pypy.jit.metainterp.warmstate import set_future_value
 from pypy.rlib import rerased
 from pypy.rlib.jit import (JitDriver, we_are_jitted, hint, dont_look_inside,
     loop_invariant, elidable, promote, jit_debug, assert_green,
     AssertGreenFailed, unroll_safe, current_trace_length, look_inside_iff,
-    isconstant, isvirtual, promote_string, set_param)
+    isconstant, isvirtual, promote_string, set_param, record_known_class)
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.ootypesystem import ootype
@@ -66,7 +65,7 @@
         res = self.interp_operations(f, [8, 98])
         assert res == 110
 
-    def test_loop(self):
+    def test_loop_1(self):
         myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'res'])
         def f(x, y):
             res = 0
@@ -78,20 +77,20 @@
             return res
         res = self.meta_interp(f, [6, 7])
         assert res == 42
-        self.check_loop_count(1)
-        self.check_loops({'guard_true': 1,
-                          'int_add': 1, 'int_sub': 1, 'int_gt': 1,
-                          'jump': 1})
+        self.check_trace_count(1)
+        self.check_resops({'jump': 1, 'int_gt': 2, 'int_add': 2,
+                           'guard_true': 2, 'int_sub': 2})
+
         if self.basic:
             found = 0
-            for op in get_stats().loops[0]._all_operations():
+            for op in get_stats().get_all_loops()[0]._all_operations():
                 if op.getopname() == 'guard_true':
                     liveboxes = op.getfailargs()
                     assert len(liveboxes) == 3
                     for box in liveboxes:
                         assert isinstance(box, history.BoxInt)
                     found += 1
-            assert found == 1
+            assert found == 2
 
     def test_loop_variant_mul1(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
@@ -107,8 +106,8 @@
             return res
         res = self.meta_interp(f, [6, 7])
         assert res == 1323
-        self.check_loop_count(1)
-        self.check_loops(int_mul=1)
+        self.check_trace_count(1)
+        self.check_simple_loop(int_mul=1)
 
     def test_loop_variant_mul_ovf(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
@@ -124,8 +123,8 @@
             return res
         res = self.meta_interp(f, [6, 7])
         assert res == 1323
-        self.check_loop_count(1)
-        self.check_loops(int_mul_ovf=1)
+        self.check_trace_count(1)
+        self.check_simple_loop(int_mul_ovf=1)
 
     def test_loop_invariant_mul1(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
@@ -139,10 +138,11 @@
             return res
         res = self.meta_interp(f, [6, 7])
         assert res == 252
-        self.check_loop_count(1)
-        self.check_loops({'guard_true': 1,
-                          'int_add': 1, 'int_sub': 1, 'int_gt': 1,
-                          'jump': 1})
+        self.check_trace_count(1)
+        self.check_simple_loop(int_mul=0)
+        self.check_resops({'jump': 1, 'int_gt': 2, 'int_add': 2,
+                           'int_mul': 1, 'guard_true': 2, 'int_sub': 2})
+
 
     def test_loop_invariant_mul_ovf(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
@@ -157,69 +157,63 @@
             return res
         res = self.meta_interp(f, [6, 7])
         assert res == 308
-        self.check_loop_count(1)
-        self.check_loops({'guard_true': 1,
-                          'int_add': 2, 'int_sub': 1, 'int_gt': 1,
-                          'int_lshift': 1,
-                          'jump': 1})
+        self.check_trace_count(1)
+        self.check_simple_loop(int_mul_ovf=0)
+        self.check_resops({'jump': 1, 'int_lshift': 2, 'int_gt': 2,
+                           'int_mul_ovf': 1, 'int_add': 4,
+                           'guard_true': 2, 'guard_no_overflow': 1,
+                           'int_sub': 2})
 
     def test_loop_invariant_mul_bridge1(self):
-        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
-        def f(x, y):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x', 'n'])
+        def f(x, y, n):
             res = 0
             while y > 0:
-                myjitdriver.can_enter_jit(x=x, y=y, res=res)
-                myjitdriver.jit_merge_point(x=x, y=y, res=res)
+                myjitdriver.can_enter_jit(x=x, y=y, n=n, res=res)
+                myjitdriver.jit_merge_point(x=x, y=y, n=n, res=res)
                 res += x * x
-                if y<16:
+                if y<n:
                     x += 1
                 y -= 1


More information about the pypy-commit mailing list