[pypy-commit] pypy streamio-bufinput: merge with default

justinpeel noreply at buildbot.pypy.org
Fri Aug 19 22:27:44 CEST 2011


Author: Justin Peel <notmuchtotell at gmail.com>
Branch: streamio-bufinput
Changeset: r46650:abce39b7256e
Date: 2011-08-19 14:30 -0600
http://bitbucket.org/pypy/pypy/changeset/abce39b7256e/

Log:	merge with default

diff too long, truncating to 10000 out of 15972 lines

diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -37,22 +37,22 @@
     Armin Rigo
     Maciej Fijalkowski
     Carl Friedrich Bolz
+    Antonio Cuni
     Amaury Forgeot d'Arc
-    Antonio Cuni
     Samuele Pedroni
     Michael Hudson
     Holger Krekel
+    Benjamin Peterson
     Christian Tismer
-    Benjamin Peterson
+    Hakan Ardo
+    Alex Gaynor
     Eric van Riet Paap
-    Anders Chrigstr&#246;m
-    H&#229;kan Ard&#246;
+    Anders Chrigstrom
+    David Schneider
     Richard Emslie
     Dan Villiom Podlaski Christiansen
     Alexander Schremmer
-    Alex Gaynor
-    David Schneider
-    Aureli&#233;n Campeas
+    Aurelien Campeas
     Anders Lehmann
     Camillo Bruni
     Niklaus Haldimann
@@ -63,16 +63,17 @@
     Bartosz Skowron
     Jakub Gustak
     Guido Wesdorp
+    Daniel Roberts
     Adrien Di Mascio
     Laura Creighton
     Ludovic Aubry
     Niko Matsakis
-    Daniel Roberts
     Jason Creighton
-    Jacob Hall&#233;n
+    Jacob Hallen
     Alex Martelli
     Anders Hammarquist
     Jan de Mooij
+    Wim Lavrijsen
     Stephan Diehl
     Michael Foord
     Stefan Schwarzer
@@ -83,9 +84,13 @@
     Alexandre Fayolle
     Marius Gedminas
     Simon Burton
+    Justin Peel
     Jean-Paul Calderone
     John Witulski
+    Lukas Diekmann
+    holger krekel
     Wim Lavrijsen
+    Dario Bertini
     Andreas St&#252;hrk
     Jean-Philippe St. Pierre
     Guido van Rossum
@@ -97,15 +102,16 @@
     Georg Brandl
     Gerald Klix
     Wanja Saatkamp
+    Ronny Pfannschmidt
     Boris Feigin
     Oscar Nierstrasz
-    Dario Bertini
     David Malcolm
     Eugene Oden
     Henry Mason
+    Sven Hager
     Lukas Renggli
+    Ilya Osadchiy
     Guenter Jantzen
-    Ronny Pfannschmidt
     Bert Freudenberg
     Amit Regmi
     Ben Young
@@ -122,8 +128,8 @@
     Jared Grubb
     Karl Bartel
     Gabriel Lavoie
+    Victor Stinner
     Brian Dorsey
-    Victor Stinner
     Stuart Williams
     Toby Watson
     Antoine Pitrou
@@ -134,19 +140,23 @@
     Jonathan David Riehl
     Elmo M&#228;ntynen
     Anders Qvist
-    Beatrice D&#252;ring
+    Beatrice During
     Alexander Sedov
+    Timo Paulssen
+    Corbin Simpson
     Vincent Legoll
+    Romain Guillebert
     Alan McIntyre
-    Romain Guillebert
     Alex Perry
     Jens-Uwe Mager
+    Simon Cross
     Dan Stromberg
-    Lukas Diekmann
+    Guillebert Romain
     Carl Meyer
     Pieter Zieschang
     Alejandro J. Cura
     Sylvain Thenault
+    Christoph Gerum
     Travis Francis Athougies
     Henrik Vendelbo
     Lutz Paelike
@@ -157,6 +167,7 @@
     Miguel de Val Borro
     Ignas Mikalajunas
     Artur Lisiecki
+    Philip Jenvey
     Joshua Gilbert
     Godefroid Chappelle
     Yusei Tahara
@@ -165,27 +176,31 @@
     Gustavo Niemeyer
     William Leslie
     Akira Li
-    Kristj&#225;n Valur J&#243;nsson
+    Kristjan Valur Jonsson
     Bobby Impollonia
+    Michael Hudson-Doyle
     Andrew Thompson
     Anders Sigfridsson
+    Floris Bruynooghe
     Jacek Generowicz
     Dan Colish
-    Sven Hager
     Zooko Wilcox-O Hearn
+    Dan Villiom Podlaski Christiansen
     Anders Hammarquist
+    Chris Lambacher
     Dinu Gherman
     Dan Colish
+    Brett Cannon
     Daniel Neuh&#228;user
     Michael Chermside
     Konrad Delong
     Anna Ravencroft
     Greg Price
     Armin Ronacher
+    Christian Muirhead
     Jim Baker
-    Philip Jenvey
     Rodrigo Ara&#250;jo
-    Brett Cannon
+    Romain Guillebert
 
     Heinrich-Heine University, Germany 
     Open End AB (formerly AB Strakt), Sweden
diff --git a/ctypes_configure/stdoutcapture.py b/ctypes_configure/stdoutcapture.py
--- a/ctypes_configure/stdoutcapture.py
+++ b/ctypes_configure/stdoutcapture.py
@@ -15,6 +15,15 @@
             not hasattr(os, 'fdopen')):
             self.dummy = 1
         else:
+            try:
+                self.tmpout = os.tmpfile()
+                if mixed_out_err:
+                    self.tmperr = self.tmpout
+                else:
+                    self.tmperr = os.tmpfile()
+            except OSError:     # bah?  on at least one Windows box
+                self.dummy = 1
+                return
             self.dummy = 0
             # make new stdout/stderr files if needed
             self.localoutfd = os.dup(1)
@@ -29,11 +38,6 @@
                 sys.stderr = os.fdopen(self.localerrfd, 'w', 0)
             else:
                 self.saved_stderr = None
-            self.tmpout = os.tmpfile()
-            if mixed_out_err:
-                self.tmperr = self.tmpout
-            else:
-                self.tmperr = os.tmpfile()
             os.dup2(self.tmpout.fileno(), 1)
             os.dup2(self.tmperr.fileno(), 2)
 
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -154,18 +154,18 @@
     RegrTest('test_cmd.py'),
     RegrTest('test_cmd_line_script.py'),
     RegrTest('test_codeccallbacks.py', core=True),
-    RegrTest('test_codecencodings_cn.py'),
-    RegrTest('test_codecencodings_hk.py'),
-    RegrTest('test_codecencodings_jp.py'),
-    RegrTest('test_codecencodings_kr.py'),
-    RegrTest('test_codecencodings_tw.py'),
+    RegrTest('test_codecencodings_cn.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_hk.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_jp.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_kr.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_tw.py', usemodules='_multibytecodec'),
 
-    RegrTest('test_codecmaps_cn.py'),
-    RegrTest('test_codecmaps_hk.py'),
-    RegrTest('test_codecmaps_jp.py'),
-    RegrTest('test_codecmaps_kr.py'),
-    RegrTest('test_codecmaps_tw.py'),
-    RegrTest('test_codecs.py', core=True),
+    RegrTest('test_codecmaps_cn.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_hk.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_jp.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_kr.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_tw.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecs.py', core=True, usemodules='_multibytecodec'),
     RegrTest('test_codeop.py', core=True),
     RegrTest('test_coercion.py', core=True),
     RegrTest('test_collections.py'),
@@ -314,7 +314,7 @@
     RegrTest('test_mmap.py'),
     RegrTest('test_module.py', core=True),
     RegrTest('test_modulefinder.py'),
-    RegrTest('test_multibytecodec.py'),
+    RegrTest('test_multibytecodec.py', usemodules='_multibytecodec'),
     RegrTest('test_multibytecodec_support.py', skip="not a test"),
     RegrTest('test_multifile.py'),
     RegrTest('test_multiprocessing.py', skip='FIXME leaves subprocesses'),
diff --git a/lib-python/modified-2.7/distutils/unixccompiler.py b/lib-python/modified-2.7/distutils/unixccompiler.py
--- a/lib-python/modified-2.7/distutils/unixccompiler.py
+++ b/lib-python/modified-2.7/distutils/unixccompiler.py
@@ -324,7 +324,7 @@
             # On OSX users can specify an alternate SDK using
             # '-isysroot', calculate the SDK root if it is specified
             # (and use it further on)
-            cflags = sysconfig.get_config_var('CFLAGS')
+            cflags = sysconfig.get_config_var('CFLAGS') or ''
             m = re.search(r'-isysroot\s+(\S+)', cflags)
             if m is None:
                 sysroot = '/'
diff --git a/lib-python/modified-2.7/test/regrtest.py b/lib-python/modified-2.7/test/regrtest.py
--- a/lib-python/modified-2.7/test/regrtest.py
+++ b/lib-python/modified-2.7/test/regrtest.py
@@ -1403,7 +1403,26 @@
         test_zipimport
         test_zlib
         """,
-    'openbsd3':
+    'openbsd4':
+        """
+        test_ascii_formatd
+        test_bsddb
+        test_bsddb3
+        test_ctypes
+        test_dl
+        test_epoll
+        test_gdbm
+        test_locale
+        test_normalization
+        test_ossaudiodev
+        test_pep277
+        test_tcl
+        test_tk
+        test_ttk_guionly
+        test_ttk_textonly
+        test_multiprocessing
+        """,
+    'openbsd5':
         """
         test_ascii_formatd
         test_bsddb
diff --git a/lib-python/modified-2.7/test/test_bz2.py b/lib-python/modified-2.7/test/test_bz2.py
--- a/lib-python/modified-2.7/test/test_bz2.py
+++ b/lib-python/modified-2.7/test/test_bz2.py
@@ -50,6 +50,7 @@
         self.filename = TESTFN
 
     def tearDown(self):
+        test_support.gc_collect()
         if os.path.isfile(self.filename):
             os.unlink(self.filename)
 
diff --git a/lib-python/modified-2.7/test/test_fcntl.py b/lib-python/modified-2.7/test/test_fcntl.py
new file mode 100644
--- /dev/null
+++ b/lib-python/modified-2.7/test/test_fcntl.py
@@ -0,0 +1,108 @@
+"""Test program for the fcntl C module.
+
+OS/2+EMX doesn't support the file locking operations.
+
+"""
+import os
+import struct
+import sys
+import unittest
+from test.test_support import (verbose, TESTFN, unlink, run_unittest,
+    import_module)
+
+# Skip test if no fnctl module.
+fcntl = import_module('fcntl')
+
+
+# TODO - Write tests for flock() and lockf().
+
+def get_lockdata():
+    if sys.platform.startswith('atheos'):
+        start_len = "qq"
+    else:
+        try:
+            os.O_LARGEFILE
+        except AttributeError:
+            start_len = "ll"
+        else:
+            start_len = "qq"
+
+    if sys.platform in ('netbsd1', 'netbsd2', 'netbsd3',
+                        'Darwin1.2', 'darwin',
+                        'freebsd2', 'freebsd3', 'freebsd4', 'freebsd5',
+                        'freebsd6', 'freebsd7', 'freebsd8',
+                        'bsdos2', 'bsdos3', 'bsdos4',
+                        'openbsd', 'openbsd2', 'openbsd3', 'openbsd4', 'openbsd5'):
+        if struct.calcsize('l') == 8:
+            off_t = 'l'
+            pid_t = 'i'
+        else:
+            off_t = 'lxxxx'
+            pid_t = 'l'
+        lockdata = struct.pack(off_t + off_t + pid_t + 'hh', 0, 0, 0,
+                               fcntl.F_WRLCK, 0)
+    elif sys.platform in ['aix3', 'aix4', 'hp-uxB', 'unixware7']:
+        lockdata = struct.pack('hhlllii', fcntl.F_WRLCK, 0, 0, 0, 0, 0, 0)
+    elif sys.platform in ['os2emx']:
+        lockdata = None
+    else:
+        lockdata = struct.pack('hh'+start_len+'hh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+    if lockdata:
+        if verbose:
+            print 'struct.pack: ', repr(lockdata)
+    return lockdata
+
+lockdata = get_lockdata()
+
+
+class TestFcntl(unittest.TestCase):
+
+    def setUp(self):
+        self.f = None
+
+    def tearDown(self):
+        if self.f and not self.f.closed:
+            self.f.close()
+        unlink(TESTFN)
+
+    def test_fcntl_fileno(self):
+        # the example from the library docs
+        self.f = open(TESTFN, 'w')
+        rv = fcntl.fcntl(self.f.fileno(), fcntl.F_SETFL, os.O_NONBLOCK)
+        if verbose:
+            print 'Status from fcntl with O_NONBLOCK: ', rv
+        if sys.platform not in ['os2emx']:
+            rv = fcntl.fcntl(self.f.fileno(), fcntl.F_SETLKW, lockdata)
+            if verbose:
+                print 'String from fcntl with F_SETLKW: ', repr(rv)
+        self.f.close()
+
+    def test_fcntl_file_descriptor(self):
+        # again, but pass the file rather than numeric descriptor
+        self.f = open(TESTFN, 'w')
+        rv = fcntl.fcntl(self.f, fcntl.F_SETFL, os.O_NONBLOCK)
+        if sys.platform not in ['os2emx']:
+            rv = fcntl.fcntl(self.f, fcntl.F_SETLKW, lockdata)
+        self.f.close()
+
+    def test_fcntl_64_bit(self):
+        # Issue #1309352: fcntl shouldn't fail when the third arg fits in a
+        # C 'long' but not in a C 'int'.
+        try:
+            cmd = fcntl.F_NOTIFY
+            # This flag is larger than 2**31 in 64-bit builds
+            flags = fcntl.DN_MULTISHOT
+        except AttributeError:
+            self.skipTest("F_NOTIFY or DN_MULTISHOT unavailable")
+        fd = os.open(os.path.dirname(os.path.abspath(TESTFN)), os.O_RDONLY)
+        try:
+            fcntl.fcntl(fd, cmd, flags)
+        finally:
+            os.close(fd)
+
+
+def test_main():
+    run_unittest(TestFcntl)
+
+if __name__ == '__main__':
+    test_main()
diff --git a/lib-python/modified-2.7/test/test_multibytecodec.py b/lib-python/modified-2.7/test/test_multibytecodec.py
--- a/lib-python/modified-2.7/test/test_multibytecodec.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec.py
@@ -148,7 +148,8 @@
 class Test_StreamReader(unittest.TestCase):
     def test_bug1728403(self):
         try:
-            open(TESTFN, 'w').write('\xa1')
+            with open(TESTFN, 'w') as f:
+                f.write('\xa1')
             f = codecs.open(TESTFN, encoding='cp949')
             self.assertRaises(UnicodeDecodeError, f.read, 2)
         finally:
diff --git a/lib-python/modified-2.7/test/test_tempfile.py b/lib-python/modified-2.7/test/test_tempfile.py
--- a/lib-python/modified-2.7/test/test_tempfile.py
+++ b/lib-python/modified-2.7/test/test_tempfile.py
@@ -23,8 +23,8 @@
 
 # TEST_FILES may need to be tweaked for systems depending on the maximum
 # number of files that can be opened at one time (see ulimit -n)
-if sys.platform in ('openbsd3', 'openbsd4'):
-    TEST_FILES = 48
+if sys.platform.startswith("openbsd"):
+    TEST_FILES = 64 # ulimit -n defaults to 128 for normal users
 else:
     TEST_FILES = 100
 
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -91,13 +91,15 @@
                     raise TypeError(
                         "item %d in _argtypes_ has no from_param method" % (
                             i + 1,))
-            #
-            if all([hasattr(argtype, '_ffiargshape') for argtype in argtypes]):
-                fastpath_cls = make_fastpath_subclass(self.__class__)
-                fastpath_cls.enable_fastpath_maybe(self)
             self._argtypes_ = list(argtypes)
+            self._check_argtypes_for_fastpath()
     argtypes = property(_getargtypes, _setargtypes)
 
+    def _check_argtypes_for_fastpath(self):
+        if all([hasattr(argtype, '_ffiargshape') for argtype in self._argtypes_]):
+            fastpath_cls = make_fastpath_subclass(self.__class__)
+            fastpath_cls.enable_fastpath_maybe(self)
+
     def _getparamflags(self):
         return self._paramflags
 
@@ -216,6 +218,7 @@
                 import ctypes
                 restype = ctypes.c_int
             self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype)
+            self._check_argtypes_for_fastpath()
             return
 
         
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -14,6 +14,15 @@
             raise TypeError("Expected CData subclass, got %s" % (tp,))
         if isinstance(tp, StructOrUnionMeta):
             tp._make_final()
+        if len(f) == 3:
+            if (not hasattr(tp, '_type_')
+                or not isinstance(tp._type_, str)
+                or tp._type_ not in "iIhHbBlL"):
+                #XXX: are those all types?
+                #     we just dont get the type name
+                #     in the interp levle thrown TypeError
+                #     from rawffi if there are more
+                raise TypeError('bit fields not allowed for type ' + tp.__name__)
 
     all_fields = []
     for cls in reversed(inspect.getmro(superclass)):
@@ -34,34 +43,37 @@
     for i, field in enumerate(all_fields):
         name = field[0]
         value = field[1]
+        is_bitfield = (len(field) == 3)
         fields[name] = Field(name,
                              self._ffistruct.fieldoffset(name),
                              self._ffistruct.fieldsize(name),
-                             value, i)
+                             value, i, is_bitfield)
 
     if anonymous_fields:
         resnames = []
         for i, field in enumerate(all_fields):
             name = field[0]
             value = field[1]
+            is_bitfield = (len(field) == 3)
             startpos = self._ffistruct.fieldoffset(name)
             if name in anonymous_fields:
                 for subname in value._names:
                     resnames.append(subname)
-                    relpos = startpos + value._fieldtypes[subname].offset
-                    subvalue = value._fieldtypes[subname].ctype
+                    subfield = getattr(value, subname)
+                    relpos = startpos + subfield.offset
+                    subvalue = subfield.ctype
                     fields[subname] = Field(subname,
                                             relpos, subvalue._sizeofinstances(),
-                                            subvalue, i)
+                                            subvalue, i, is_bitfield)
             else:
                 resnames.append(name)
         names = resnames
     self._names = names
-    self._fieldtypes = fields
+    self.__dict__.update(fields)
 
 class Field(object):
-    def __init__(self, name, offset, size, ctype, num):
-        for k in ('name', 'offset', 'size', 'ctype', 'num'):
+    def __init__(self, name, offset, size, ctype, num, is_bitfield):
+        for k in ('name', 'offset', 'size', 'ctype', 'num', 'is_bitfield'):
             self.__dict__[k] = locals()[k]
 
     def __setattr__(self, name, value):
@@ -71,6 +83,35 @@
         return "<Field '%s' offset=%d size=%d>" % (self.name, self.offset,
                                                    self.size)
 
+    def __get__(self, obj, cls=None):
+        if obj is None:
+            return self
+        if self.is_bitfield:
+            # bitfield member, use direct access
+            return obj._buffer.__getattr__(self.name)
+        else:
+            fieldtype = self.ctype
+            offset = self.num
+            suba = obj._subarray(fieldtype, self.name)
+            return fieldtype._CData_output(suba, obj, offset)
+
+
+    def __set__(self, obj, value):
+        fieldtype = self.ctype
+        cobj = fieldtype.from_param(value)
+        if ensure_objects(cobj) is not None:
+            key = keepalive_key(self.num)
+            store_reference(obj, key, cobj._objects)
+        arg = cobj._get_buffer_value()
+        if fieldtype._fficompositesize is not None:
+            from ctypes import memmove
+            dest = obj._buffer.fieldaddress(self.name)
+            memmove(dest, arg, fieldtype._fficompositesize)
+        else:
+            obj._buffer.__setattr__(self.name, arg)
+
+
+
 # ________________________________________________________________
 
 def _set_shape(tp, rawfields, is_union=False):
@@ -79,17 +120,12 @@
     tp._ffiargshape = tp._ffishape = (tp._ffistruct, 1)
     tp._fficompositesize = tp._ffistruct.size
 
-def struct_getattr(self, name):
-    if name not in ('_fields_', '_fieldtypes'):
-        if hasattr(self, '_fieldtypes') and name in self._fieldtypes:
-            return self._fieldtypes[name]
-    return _CDataMeta.__getattribute__(self, name)
 
 def struct_setattr(self, name, value):
     if name == '_fields_':
         if self.__dict__.get('_fields_', None) is not None:
             raise AttributeError("_fields_ is final")
-        if self in [v for k, v in value]:
+        if self in [f[1] for f in value]:
             raise AttributeError("Structure or union cannot contain itself")
         names_and_fields(
             self,
@@ -127,10 +163,8 @@
         if '_fields_' not in self.__dict__:
             self._fields_ = []
             self._names = []
-            self._fieldtypes = {}
             _set_shape(self, [], self._is_union)
 
-    __getattr__ = struct_getattr
     __setattr__ = struct_setattr
 
     def from_address(self, address):
@@ -200,40 +234,6 @@
         A = _rawffi.Array(fieldtype._ffishape)
         return A.fromaddress(address, 1)
 
-    def __setattr__(self, name, value):
-        try:
-            field = self._fieldtypes[name]
-        except KeyError:
-            return _CData.__setattr__(self, name, value)
-        fieldtype = field.ctype
-        cobj = fieldtype.from_param(value)
-        if ensure_objects(cobj) is not None:
-            key = keepalive_key(field.num)
-            store_reference(self, key, cobj._objects)
-        arg = cobj._get_buffer_value()
-        if fieldtype._fficompositesize is not None:
-            from ctypes import memmove
-            dest = self._buffer.fieldaddress(name)
-            memmove(dest, arg, fieldtype._fficompositesize)
-        else:
-            self._buffer.__setattr__(name, arg)
-
-    def __getattribute__(self, name):
-        if name == '_fieldtypes':
-            return _CData.__getattribute__(self, '_fieldtypes')
-        try:
-            field = self._fieldtypes[name]
-        except KeyError:
-            return _CData.__getattribute__(self, name)
-        if field.size >> 16:
-            # bitfield member, use direct access
-            return self._buffer.__getattr__(name)
-        else:
-            fieldtype = field.ctype
-            offset = field.num
-            suba = self._subarray(fieldtype, name)
-            return fieldtype._CData_output(suba, self, offset)
-
     def _get_buffer_for_param(self):
         return self
 
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -724,13 +724,12 @@
             self.statement.reset()
             raise self.connection._get_exception(ret)
 
-        if self.statement.kind == "DQL":
-            if ret == SQLITE_ROW:
-                self.statement._build_row_cast_map()
-                self.statement._readahead()
-            else:
-                self.statement.item = None
-                self.statement.exhausted = True
+        if self.statement.kind == "DQL"and ret == SQLITE_ROW:
+            self.statement._build_row_cast_map()
+            self.statement._readahead()
+        else:
+            self.statement.item = None
+            self.statement.exhausted = True
 
         if self.statement.kind in ("DML", "DDL"):
             self.statement.reset()
@@ -891,7 +890,8 @@
 
         self.statement = c_void_p()
         next_char = c_char_p()
-        ret = sqlite.sqlite3_prepare_v2(self.con.db, sql, -1, byref(self.statement), byref(next_char))
+        sql_char = c_char_p(sql)
+        ret = sqlite.sqlite3_prepare_v2(self.con.db, sql_char, -1, byref(self.statement), byref(next_char))
         if ret == SQLITE_OK and self.statement.value is None:
             # an empty statement, we work around that, as it's the least trouble
             ret = sqlite.sqlite3_prepare_v2(self.con.db, "select 42", -1, byref(self.statement), byref(next_char))
@@ -901,7 +901,9 @@
             raise self.con._get_exception(ret)
         self.con._remember_statement(self)
         if _check_remaining_sql(next_char.value):
-            raise Warning, "One and only one statement required"
+            raise Warning, "One and only one statement required: %r" % (
+                next_char.value,)
+        # sql_char should remain alive until here
 
         self._build_row_cast_map()
 
diff --git a/lib_pypy/pyrepl/readline.py b/lib_pypy/pyrepl/readline.py
--- a/lib_pypy/pyrepl/readline.py
+++ b/lib_pypy/pyrepl/readline.py
@@ -33,7 +33,7 @@
 from pyrepl.unix_console import UnixConsole, _error
 
 
-ENCODING = 'latin1'     # XXX hard-coded
+ENCODING = sys.getfilesystemencoding() or 'latin1'     # XXX review
 
 __all__ = ['add_history',
            'clear_history',
diff --git a/pypy/annotation/builtin.py b/pypy/annotation/builtin.py
--- a/pypy/annotation/builtin.py
+++ b/pypy/annotation/builtin.py
@@ -308,9 +308,6 @@
             clsdef = clsdef.commonbase(cdef)
     return SomeInstance(clsdef)
 
-def robjmodel_we_are_translated():
-    return immutablevalue(True)
-
 def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None):
     if s_force_non_null is None:
         force_non_null = False
@@ -376,8 +373,6 @@
 
 BUILTIN_ANALYZERS[pypy.rlib.rarithmetic.intmask] = rarith_intmask
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.instantiate] = robjmodel_instantiate
-BUILTIN_ANALYZERS[pypy.rlib.objectmodel.we_are_translated] = (
-    robjmodel_we_are_translated)
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.r_dict] = robjmodel_r_dict
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.hlinvoke] = robjmodel_hlinvoke
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.keepalive_until_here] = robjmodel_keepalive_until_here
@@ -416,7 +411,8 @@
 from pypy.annotation.model import SomePtr
 from pypy.rpython.lltypesystem import lltype
 
-def malloc(s_T, s_n=None, s_flavor=None, s_zero=None, s_track_allocation=None):
+def malloc(s_T, s_n=None, s_flavor=None, s_zero=None, s_track_allocation=None,
+           s_add_memory_pressure=None):
     assert (s_n is None or s_n.knowntype == int
             or issubclass(s_n.knowntype, pypy.rlib.rarithmetic.base_int))
     assert s_T.is_constant()
@@ -432,6 +428,8 @@
     else:
         assert s_flavor.is_constant()
         assert s_track_allocation is None or s_track_allocation.is_constant()
+        assert (s_add_memory_pressure is None or
+                s_add_memory_pressure.is_constant())
         # not sure how to call malloc() for the example 'p' in the
         # presence of s_extraargs
         r = SomePtr(lltype.Ptr(s_T.const))
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -13,6 +13,10 @@
 DEFL_LOW_INLINE_THRESHOLD = DEFL_INLINE_THRESHOLD / 2.0
 
 DEFL_GC = "minimark"
+if sys.platform.startswith("linux"):
+    DEFL_ROOTFINDER_WITHJIT = "asmgcc"
+else:
+    DEFL_ROOTFINDER_WITHJIT = "shadowstack"
 
 IS_64_BITS = sys.maxint > 2147483647
 
@@ -109,7 +113,7 @@
     BoolOption("jit", "generate a JIT",
                default=False,
                suggests=[("translation.gc", DEFL_GC),
-                         ("translation.gcrootfinder", "asmgcc"),
+                         ("translation.gcrootfinder", DEFL_ROOTFINDER_WITHJIT),
                          ("translation.list_comprehension_operations", True)]),
     ChoiceOption("jit_backend", "choose the backend for the JIT",
                  ["auto", "x86", "x86-without-sse2", "llvm"],
diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py
--- a/pypy/doc/conf.py
+++ b/pypy/doc/conf.py
@@ -45,9 +45,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.5'
+version = '1.6'
 # The full version, including alpha/beta/rc tags.
-release = '1.5'
+release = '1.6'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst
--- a/pypy/doc/contributor.rst
+++ b/pypy/doc/contributor.rst
@@ -9,22 +9,22 @@
     Armin Rigo
     Maciej Fijalkowski
     Carl Friedrich Bolz
+    Antonio Cuni
     Amaury Forgeot d'Arc
-    Antonio Cuni
     Samuele Pedroni
     Michael Hudson
     Holger Krekel
+    Benjamin Peterson
     Christian Tismer
-    Benjamin Peterson
+    Hakan Ardo
+    Alex Gaynor
     Eric van Riet Paap
-    Anders Chrigstr&#246;m
-    H&#229;kan Ard&#246;
+    Anders Chrigstrom
+    David Schneider
     Richard Emslie
     Dan Villiom Podlaski Christiansen
     Alexander Schremmer
-    Alex Gaynor
-    David Schneider
-    Aureli&#233;n Campeas
+    Aurelien Campeas
     Anders Lehmann
     Camillo Bruni
     Niklaus Haldimann
@@ -35,16 +35,17 @@
     Bartosz Skowron
     Jakub Gustak
     Guido Wesdorp
+    Daniel Roberts
     Adrien Di Mascio
     Laura Creighton
     Ludovic Aubry
     Niko Matsakis
-    Daniel Roberts
     Jason Creighton
-    Jacob Hall&#233;n
+    Jacob Hallen
     Alex Martelli
     Anders Hammarquist
     Jan de Mooij
+    Wim Lavrijsen
     Stephan Diehl
     Michael Foord
     Stefan Schwarzer
@@ -55,9 +56,13 @@
     Alexandre Fayolle
     Marius Gedminas
     Simon Burton
+    Justin Peel
     Jean-Paul Calderone
     John Witulski
+    Lukas Diekmann
+    holger krekel
     Wim Lavrijsen
+    Dario Bertini
     Andreas St&#252;hrk
     Jean-Philippe St. Pierre
     Guido van Rossum
@@ -69,15 +74,16 @@
     Georg Brandl
     Gerald Klix
     Wanja Saatkamp
+    Ronny Pfannschmidt
     Boris Feigin
     Oscar Nierstrasz
-    Dario Bertini
     David Malcolm
     Eugene Oden
     Henry Mason
+    Sven Hager
     Lukas Renggli
+    Ilya Osadchiy
     Guenter Jantzen
-    Ronny Pfannschmidt
     Bert Freudenberg
     Amit Regmi
     Ben Young
@@ -94,8 +100,8 @@
     Jared Grubb
     Karl Bartel
     Gabriel Lavoie
+    Victor Stinner
     Brian Dorsey
-    Victor Stinner
     Stuart Williams
     Toby Watson
     Antoine Pitrou
@@ -106,19 +112,23 @@
     Jonathan David Riehl
     Elmo M&#228;ntynen
     Anders Qvist
-    Beatrice D&#252;ring
+    Beatrice During
     Alexander Sedov
+    Timo Paulssen
+    Corbin Simpson
     Vincent Legoll
+    Romain Guillebert
     Alan McIntyre
-    Romain Guillebert
     Alex Perry
     Jens-Uwe Mager
+    Simon Cross
     Dan Stromberg
-    Lukas Diekmann
+    Guillebert Romain
     Carl Meyer
     Pieter Zieschang
     Alejandro J. Cura
     Sylvain Thenault
+    Christoph Gerum
     Travis Francis Athougies
     Henrik Vendelbo
     Lutz Paelike
@@ -129,6 +139,7 @@
     Miguel de Val Borro
     Ignas Mikalajunas
     Artur Lisiecki
+    Philip Jenvey
     Joshua Gilbert
     Godefroid Chappelle
     Yusei Tahara
@@ -137,24 +148,29 @@
     Gustavo Niemeyer
     William Leslie
     Akira Li
-    Kristj&#225;n Valur J&#243;nsson
+    Kristjan Valur Jonsson
     Bobby Impollonia
+    Michael Hudson-Doyle
     Andrew Thompson
     Anders Sigfridsson
+    Floris Bruynooghe
     Jacek Generowicz
     Dan Colish
-    Sven Hager
     Zooko Wilcox-O Hearn
+    Dan Villiom Podlaski Christiansen
     Anders Hammarquist
+    Chris Lambacher
     Dinu Gherman
     Dan Colish
+    Brett Cannon
     Daniel Neuh&#228;user
     Michael Chermside
     Konrad Delong
     Anna Ravencroft
     Greg Price
     Armin Ronacher
+    Christian Muirhead
     Jim Baker
-    Philip Jenvey
     Rodrigo Ara&#250;jo
+    Romain Guillebert
 
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -88,6 +88,13 @@
 
     _stackless
 
+  Note that only some of these modules are built-in in a typical
+  CPython installation, and the rest is from non built-in extension
+  modules.  This means that e.g. ``import parser`` will, on CPython,
+  find a local file ``parser.py``, while ``import sys`` will not find a
+  local file ``sys.py``.  In PyPy the difference does not exist: all
+  these modules are built-in.
+
 * Supported by being rewritten in pure Python (possibly using ``ctypes``):
   see the `lib_pypy/`_ directory.  Examples of modules that we
   support this way: ``ctypes``, ``cPickle``, ``cmath``, ``dbm``, ``datetime``...
@@ -280,7 +287,14 @@
   never a dictionary as it sometimes is in CPython. Assigning to
   ``__builtins__`` has no effect.
 
-* object identity of immutable keys in dictionaries is not necessarily preserved.
-  Never compare immutable objects with ``is``.
+* Do not compare immutable objects with ``is``.  For example on CPython
+  it is true that ``x is 0`` works, i.e. does the same as ``type(x) is
+  int and x == 0``, but it is so by accident.  If you do instead
+  ``x is 1000``, then it stops working, because 1000 is too large and
+  doesn't come from the internal cache.  In PyPy it fails to work in
+  both cases, because we have no need for a cache at all.
+
+* Also, object identity of immutable keys in dictionaries is not necessarily
+  preserved.
 
 .. include:: _ref.txt
diff --git a/pypy/doc/garbage_collection.rst b/pypy/doc/garbage_collection.rst
--- a/pypy/doc/garbage_collection.rst
+++ b/pypy/doc/garbage_collection.rst
@@ -147,7 +147,7 @@
 You can read more about them at the start of
 `pypy/rpython/memory/gc/minimark.py`_.
 
-In more details:
+In more detail:
 
 - The small newly malloced objects are allocated in the nursery (case 1).
   All objects living in the nursery are "young".
diff --git a/pypy/doc/getting-started-python.rst b/pypy/doc/getting-started-python.rst
--- a/pypy/doc/getting-started-python.rst
+++ b/pypy/doc/getting-started-python.rst
@@ -32,7 +32,10 @@
 .. _`windows document`: windows.html
 
 You can translate the whole of PyPy's Python interpreter to low level C code,
-or `CLI code`_.
+or `CLI code`_.  If you intend to build using gcc, check to make sure that
+the version you have is not 4.2 or you will run into `this bug`_.
+
+.. _`this bug`: https://bugs.launchpad.net/ubuntu/+source/gcc-4.2/+bug/187391
 
 1. First `download a pre-built PyPy`_ for your architecture which you will
    use to translate your Python interpreter.  It is, of course, possible to
@@ -102,7 +105,7 @@
 
     $ ./pypy-c
     Python 2.7.0 (61ef2a11b56a, Mar 02 2011, 03:00:11)
-    [PyPy 1.5.0-alpha0 with GCC 4.4.3] on linux2
+    [PyPy 1.6.0 with GCC 4.4.3] on linux2
     Type "help", "copyright", "credits" or "license" for more information.
     And now for something completely different: ``this sentence is false''
     >>>> 46 - 4
@@ -162,7 +165,7 @@
 
     $ ./pypy-cli
     Python 2.7.0 (61ef2a11b56a, Mar 02 2011, 03:00:11)
-    [PyPy 1.5.0-alpha0] on linux2
+    [PyPy 1.6.0] on linux2
     Type "help", "copyright", "credits" or "license" for more information.
     And now for something completely different: ``distopian and utopian chairs''
     >>>> 
@@ -199,7 +202,7 @@
 
         $ ./pypy-jvm 
         Python 2.7.0 (61ef2a11b56a, Mar 02 2011, 03:00:11)
-        [PyPy 1.5.0-alpha0] on linux2
+        [PyPy 1.6.0] on linux2
         Type "help", "copyright", "credits" or "license" for more information.
         And now for something completely different: ``# assert did not crash''
         >>>> 
@@ -238,7 +241,7 @@
 the ``bin/pypy`` executable.
 
 To install PyPy system wide on unix-like systems, it is recommended to put the
-whole hierarchy alone (e.g. in ``/opt/pypy1.5``) and put a symlink to the
+whole hierarchy alone (e.g. in ``/opt/pypy1.6``) and put a symlink to the
 ``pypy`` executable into ``/usr/bin`` or ``/usr/local/bin``
 
 If the executable fails to find suitable libraries, it will report
diff --git a/pypy/doc/getting-started.rst b/pypy/doc/getting-started.rst
--- a/pypy/doc/getting-started.rst
+++ b/pypy/doc/getting-started.rst
@@ -53,11 +53,11 @@
 PyPy is ready to be executed as soon as you unpack the tarball or the zip
 file, with no need to install it in any specific location::
 
-    $ tar xf pypy-1.5-linux.tar.bz2
+    $ tar xf pypy-1.6-linux.tar.bz2
 
-    $ ./pypy-1.5-linux/bin/pypy
+    $ ./pypy-1.6/bin/pypy
     Python 2.7.1 (?, Apr 27 2011, 12:44:21)
-    [PyPy 1.5.0-alpha0 with GCC 4.4.3] on linux2
+    [PyPy 1.6.0 with GCC 4.4.3] on linux2
     Type "help", "copyright", "credits" or "license" for more information.
     And now for something completely different: ``implementing LOGO in LOGO:
     "turtles all the way down"''
@@ -73,16 +73,16 @@
 
     $ curl -O http://python-distribute.org/distribute_setup.py
 
-    $ curl -O https://github.com/pypa/pip/raw/master/contrib/get-pip.py
+    $ curl -O https://raw.github.com/pypa/pip/master/contrib/get-pip.py
 
-    $ ./pypy-1.5-linux/bin/pypy distribute_setup.py
+    $ ./pypy-1.6/bin/pypy distribute_setup.py
 
-    $ ./pypy-1.5-linux/bin/pypy get-pip.py
+    $ ./pypy-1.6/bin/pypy get-pip.py
 
-    $ ./pypy-1.5-linux/bin/pip install pygments  # for example
+    $ ./pypy-1.6/bin/pip install pygments  # for example
 
-3rd party libraries will be installed in ``pypy-1.5-linux/site-packages``, and
-the scripts in ``pypy-1.5-linux/bin``.
+3rd party libraries will be installed in ``pypy-1.6/site-packages``, and
+the scripts in ``pypy-1.6/bin``.
 
 Installing using virtualenv
 ---------------------------
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -21,8 +21,8 @@
 Release Steps
 ----------------
 
-* at code freeze make a release branch under
-  http://codepeak.net/svn/pypy/release/x.y(.z). IMPORTANT: bump the
+* at code freeze make a release branch using release-x.x.x in mercurial
+  IMPORTANT: bump the
   pypy version number in module/sys/version.py and in
   module/cpyext/include/patchlevel.h, notice that the branch
   will capture the revision number of this change for the release;
@@ -48,12 +48,6 @@
   the release announcement should contain a direct link to the download page
 * update pypy.org (under extradoc/pypy.org), rebuild and commit
 
-* update http://codespeak.net/pypy/trunk:
-   code0> + chmod -R yourname:users /www/codespeak.net/htdocs/pypy/trunk
-   local> cd ..../pypy/doc && py.test
-   local> cd ..../pypy
-   local> rsync -az doc codespeak.net:/www/codespeak.net/htdocs/pypy/trunk/pypy/
-
 * post announcement on morepypy.blogspot.com
 * send announcements to pypy-dev, python-list,
   python-announce, python-dev ...
diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst
--- a/pypy/doc/index-of-release-notes.rst
+++ b/pypy/doc/index-of-release-notes.rst
@@ -16,3 +16,4 @@
    release-1.4.0beta.rst
    release-1.4.1.rst
    release-1.5.0.rst
+   release-1.6.0.rst
diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -15,7 +15,7 @@
 
 * `FAQ`_: some frequently asked questions.
 
-* `Release 1.5`_: the latest official release
+* `Release 1.6`_: the latest official release
 
 * `PyPy Blog`_: news and status info about PyPy 
 
@@ -77,7 +77,7 @@
 .. _`Getting Started`: getting-started.html
 .. _`Papers`: extradoc.html
 .. _`Videos`: video-index.html
-.. _`Release 1.5`: http://pypy.org/download.html
+.. _`Release 1.6`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
 .. _`potential project ideas`: project-ideas.html
@@ -122,9 +122,9 @@
 Windows, on top of .NET, and on top of Java.
 To dig into PyPy it is recommended to try out the current
 Mercurial default branch, which is always working or mostly working,
-instead of the latest release, which is `1.5`__.
+instead of the latest release, which is `1.6`__.
 
-.. __: release-1.5.0.html
+.. __: release-1.6.0.html
 
 PyPy is mainly developed on Linux and Mac OS X.  Windows is supported,
 but platform-specific bugs tend to take longer before we notice and fix
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -48,12 +48,6 @@
 
 .. image:: image/jitviewer.png
 
-We would like to add one level to this hierarchy, by showing the generated
-machine code for each jit operation.  The necessary information is already in
-the log file produced by the JIT, so it is "only" a matter of teaching the
-jitviewer to display it.  Ideally, the machine code should be hidden by
-default and viewable on request.
-
 The jitviewer is a web application based on flask and jinja2 (and jQuery on
 the client): if you have great web developing skills and want to help PyPy,
 this is an ideal task to get started, because it does not require any deep
diff --git a/pypy/doc/release-1.6.0.rst b/pypy/doc/release-1.6.0.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-1.6.0.rst
@@ -0,0 +1,95 @@
+========================
+PyPy 1.6 - kickass panda
+========================
+
+We're pleased to announce the 1.6 release of PyPy. This release brings a lot
+of bugfixes and performance improvements over 1.5, and improves support for
+Windows 32bit and OS X 64bit. This version fully implements Python 2.7.1 and
+has beta level support for loading CPython C extensions.  You can download it
+here:
+
+    http://pypy.org/download.html
+
+What is PyPy?
+=============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7.1. It's fast (`pypy 1.5 and cpython 2.6.2`_ performance comparison)
+due to its integrated tracing JIT compiler.
+
+This release supports x86 machines running Linux 32/64 or Mac OS X.  Windows 32
+is beta (it roughly works but a lot of small issues have not been fixed so
+far).  Windows 64 is not yet supported.
+
+The main topics of this release are speed and stability: on average on
+our benchmark suite, PyPy 1.6 is between **20% and 30%** faster than PyPy 1.5,
+which was already much faster than CPython on our set of benchmarks.
+
+The speed improvements have been made possible by optimizing many of the
+layers which compose PyPy.  In particular, we improved: the Garbage Collector,
+the JIT warmup time, the optimizations performed by the JIT, the quality of
+the generated machine code and the implementation of our Python interpreter.
+
+.. _`pypy 1.5 and cpython 2.6.2`: http://speed.pypy.org
+
+
+Highlights
+==========
+
+* Numerous performance improvements, overall giving considerable speedups:
+
+  - better GC behavior when dealing with very large objects and arrays
+
+  - **fast ctypes:** now calls to ctypes functions are seen and optimized
+    by the JIT, and they are up to 60 times faster than PyPy 1.5 and 10 times
+    faster than CPython
+
+  - improved generators(1): simple generators now are inlined into the caller
+    loop, making performance up to 3.5 times faster than PyPy 1.5.
+
+  - improved generators(2): thanks to other optimizations, even generators
+    that are not inlined are between 10% and 20% faster than PyPy 1.5.
+
+  - faster warmup time for the JIT
+
+  - JIT support for single floats (e.g., for ``array('f')``)
+
+  - optimized dictionaries: the internal representation of dictionaries is now
+    dynamically selected depending on the type of stored objects, resulting in
+    faster code and smaller memory footprint.  For example, dictionaries whose
+    keys are all strings, or all integers. Other dictionaries are also smaller
+    due to bugfixes.
+
+* JitViewer: this is the first official release which includes the JitViewer,
+  a web-based tool which helps you to see which parts of your Python code have
+  been compiled by the JIT, down until the assembler. The `jitviewer`_ 0.1 has
+  already been release and works well with PyPy 1.6.
+
+* The CPython extension module API has been improved and now supports many
+  more extensions. For information on which one are supported, please refer to
+  our `compatibility wiki`_.
+
+* Multibyte encoding support: this was of of the last areas in which we were
+  still behind CPython, but now we fully support them.
+
+* Preliminary support for NumPy: this release includes a preview of a very
+  fast NumPy module integrated with the PyPy JIT.  Unfortunately, this does
+  not mean that you can expect to take an existing NumPy program and run it on
+  PyPy, because the module is still unfinished and supports only some of the
+  numpy API. However, barring some details, what works should be
+  blazingly fast :-)
+
+* Bugfixes: since the 1.5 release we fixed 53 bugs in our `bug tracker`_, not
+  counting the numerous bugs that were found and reported through other
+  channels than the bug tracker.
+
+Cheers,
+
+Hakan Ardo, Carl Friedrich Bolz, Laura Creighton, Antonio Cuni,
+Maciej Fijalkowski, Amaury Forgeot d'Arc, Alex Gaynor,
+Armin Rigo and the PyPy team
+
+.. _`jitviewer`: http://morepypy.blogspot.com/2011/08/visualization-of-jitted-code.html
+.. _`bug tracker`: https://bugs.pypy.org
+.. _`compatibility wiki`: https://bitbucket.org/pypy/compatibility/wiki/Home
+
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -111,6 +111,9 @@
     def setslotvalue(self, index, w_val):
         raise NotImplementedError
 
+    def delslotvalue(self, index):
+        raise NotImplementedError
+
     def descr_call_mismatch(self, space, opname, RequiredClass, args):
         if RequiredClass is None:
             classname = '?'
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -64,7 +64,7 @@
                 self.visit_self(el[1], *args)
             else:
                 self.visit_function(el, *args)
-        else:
+        elif isinstance(el, type):
             for typ in self.bases_order:
                 if issubclass(el, typ):
                     visit = getattr(self, "visit__%s" % (typ.__name__,))
@@ -73,6 +73,8 @@
             else:
                 raise Exception("%s: no match for unwrap_spec element %s" % (
                     self.__class__.__name__, el))
+        else:
+            raise Exception("unable to dispatch, %s, perhaps your parameter should have started with w_?" % el)
 
     def apply_over(self, unwrap_spec, *extra):
         dispatch = self.dispatch
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -52,6 +52,9 @@
     escaped                  = False  # see mark_as_escaped()
 
     def __init__(self, space, code, w_globals, closure):
+        if not we_are_translated():
+            assert type(self) in (space.FrameClass, CPythonFrame), (
+                "use space.FrameClass(), not directly PyFrame()")
         self = hint(self, access_directly=True, fresh_virtualizable=True)
         assert isinstance(code, pycode.PyCode)
         self.pycode = code
@@ -80,7 +83,7 @@
         self.escaped = True
 
     def append_block(self, block):
-        block.previous = self.lastblock
+        assert block.previous is self.lastblock
         self.lastblock = block
 
     def pop_block(self):
@@ -106,7 +109,8 @@
         while i >= 0:
             block = lst[i]
             i -= 1
-            self.append_block(block)
+            block.previous = self.lastblock
+            self.lastblock = block
 
     def get_builtin(self):
         if self.space.config.objspace.honor__builtins__:
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -892,16 +892,16 @@
         raise BytecodeCorruption, "old opcode, no longer in use"
 
     def SETUP_LOOP(self, offsettoend, next_instr):
-        block = LoopBlock(self, next_instr + offsettoend)
-        self.append_block(block)
+        block = LoopBlock(self, next_instr + offsettoend, self.lastblock)
+        self.lastblock = block
 
     def SETUP_EXCEPT(self, offsettoend, next_instr):
-        block = ExceptBlock(self, next_instr + offsettoend)
-        self.append_block(block)
+        block = ExceptBlock(self, next_instr + offsettoend, self.lastblock)
+        self.lastblock = block
 
     def SETUP_FINALLY(self, offsettoend, next_instr):
-        block = FinallyBlock(self, next_instr + offsettoend)
-        self.append_block(block)
+        block = FinallyBlock(self, next_instr + offsettoend, self.lastblock)
+        self.lastblock = block
 
     def SETUP_WITH(self, offsettoend, next_instr):
         w_manager = self.peekvalue()
@@ -915,8 +915,8 @@
         w_exit = self.space.get(w_descr, w_manager)
         self.settopvalue(w_exit)
         w_result = self.space.get_and_call_function(w_enter, w_manager)
-        block = WithBlock(self, next_instr + offsettoend)
-        self.append_block(block)
+        block = WithBlock(self, next_instr + offsettoend, self.lastblock)
+        self.lastblock = block
         self.pushvalue(w_result)
 
     def WITH_CLEANUP(self, oparg, next_instr):
@@ -1247,10 +1247,10 @@
 
     _immutable_ = True
 
-    def __init__(self, frame, handlerposition):
+    def __init__(self, frame, handlerposition, previous):
         self.handlerposition = handlerposition
         self.valuestackdepth = frame.valuestackdepth
-        self.previous = None # this makes a linked list of blocks
+        self.previous = previous   # this makes a linked list of blocks
 
     def __eq__(self, other):
         return (self.__class__ is other.__class__ and
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -258,6 +258,11 @@
                     self.slots_w = [None] * nslots
             def setslotvalue(self, index, w_value):
                 self.slots_w[index] = w_value
+            def delslotvalue(self, index):
+                if self.slots_w[index] is None:
+                    return False
+                self.slots_w[index] = None
+                return True
             def getslotvalue(self, index):
                 return self.slots_w[index]
         add(Proto)
@@ -530,11 +535,10 @@
         """member.__delete__(obj)
         Delete the value of the slot 'member' from the given 'obj'."""
         self.typecheck(space, w_obj)
-        w_oldresult = w_obj.getslotvalue(self.index)
-        if w_oldresult is None:
+        success = w_obj.delslotvalue(self.index)
+        if not success:
             raise OperationError(space.w_AttributeError,
                                  space.wrap(self.name)) # XXX better message
-        w_obj.setslotvalue(self.index, None)
 
 Member.typedef = TypeDef(
     "member_descriptor",
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -1071,6 +1071,8 @@
         return heaptracker.adr2int(llmemory.cast_ptr_to_adr(x))
     if TP == llmemory.Address:
         return heaptracker.adr2int(x)
+    if TP is lltype.SingleFloat:
+        return longlong.singlefloat2int(x)
     return lltype.cast_primitive(lltype.Signed, x)
 
 def cast_from_int(TYPE, x):
@@ -1086,6 +1088,9 @@
             x = llmemory.cast_int_to_adr(x)
         assert lltype.typeOf(x) == llmemory.Address
         return x
+    elif TYPE is lltype.SingleFloat:
+        assert lltype.typeOf(x) is lltype.Signed
+        return longlong.int2singlefloat(x)
     else:
         if lltype.typeOf(x) == llmemory.Address:
             x = heaptracker.adr2int(x)
@@ -1140,6 +1145,7 @@
     del _future_values[:]
 
 def set_future_value_int(index, value):
+    assert lltype.typeOf(value) is lltype.Signed
     set_future_value_ref(index, value)
 
 def set_future_value_float(index, value):
@@ -1488,6 +1494,7 @@
     'i': lltype.Signed,
     'f': lltype.Float,
     'L': lltype.SignedLongLong,
+    'S': lltype.SingleFloat,
     'v': lltype.Void,
     }
 
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -91,6 +91,7 @@
 class BaseCPU(model.AbstractCPU):
     supports_floats = True
     supports_longlong = llimpl.IS_32_BIT
+    supports_singlefloats = True
 
     def __init__(self, rtyper, stats=None, opts=None,
                  translate_support_code=False,
@@ -327,12 +328,16 @@
 
     def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo=None):
         from pypy.jit.backend.llsupport.ffisupport import get_ffi_type_kind
+        from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
         arg_types = []
-        for arg in ffi_args:
-            kind = get_ffi_type_kind(arg)
-            if kind != history.VOID:
-                arg_types.append(kind)
-        reskind = get_ffi_type_kind(ffi_result)
+        try:
+            for arg in ffi_args:
+                kind = get_ffi_type_kind(self, arg)
+                if kind != history.VOID:
+                    arg_types.append(kind)
+            reskind = get_ffi_type_kind(self, ffi_result)
+        except UnsupportedKind:
+            return None
         return self.getdescr(0, reskind, extrainfo=extrainfo,
                              arg_types=''.join(arg_types))
 
diff --git a/pypy/jit/backend/llgraph/test/test_llgraph.py b/pypy/jit/backend/llgraph/test/test_llgraph.py
--- a/pypy/jit/backend/llgraph/test/test_llgraph.py
+++ b/pypy/jit/backend/llgraph/test/test_llgraph.py
@@ -19,6 +19,9 @@
     def setup_method(self, _):
         self.cpu = self.cpu_type(None)
 
+    def test_memoryerror(self):
+        py.test.skip("does not make much sense on the llgraph backend")
+
 
 def test_cast_adr_to_int_and_back():
     X = lltype.Struct('X', ('foo', lltype.Signed))
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -303,6 +303,8 @@
                 c = 'f'
             elif c == 'f' and longlong.supports_longlong:
                 return 'longlong.getrealfloat(%s)' % (process('L'),)
+            elif c == 'S':
+                return 'longlong.int2singlefloat(%s)' % (process('i'),)
             arg = 'args_%s[%d]' % (c, seen[c])
             seen[c] += 1
             return arg
@@ -318,6 +320,8 @@
                 return lltype.Void
             elif arg == 'L':
                 return lltype.SignedLongLong
+            elif arg == 'S':
+                return lltype.SingleFloat
             else:
                 raise AssertionError(arg)
 
@@ -334,6 +338,8 @@
             result = 'rffi.cast(lltype.SignedLongLong, res)'
         elif self.get_return_type() == history.VOID:
             result = 'None'
+        elif self.get_return_type() == 'S':
+            result = 'longlong.singlefloat2int(res)'
         else:
             assert 0
         source = py.code.Source("""
@@ -344,14 +350,15 @@
         """ % locals())
         ARGS = [TYPE(arg) for arg in self.arg_classes]
         FUNC = lltype.FuncType(ARGS, RESULT)
-        d = locals().copy()
-        d.update(globals())
+        d = globals().copy()
+        d.update(locals())
         exec source.compile() in d
         self.call_stub = d['call_stub']
 
     def verify_types(self, args_i, args_r, args_f, return_type):
         assert self._return_type in return_type
-        assert self.arg_classes.count('i') == len(args_i or ())
+        assert (self.arg_classes.count('i') +
+                self.arg_classes.count('S')) == len(args_i or ())
         assert self.arg_classes.count('r') == len(args_r or ())
         assert (self.arg_classes.count('f') +
                 self.arg_classes.count('L')) == len(args_f or ())
@@ -428,23 +435,39 @@
     def get_result_size(self, translate_support_code):
         return 0
 
+_SingleFloatCallDescr = None   # built lazily
+
 def getCallDescrClass(RESULT):
     if RESULT is lltype.Void:
         return VoidCallDescr
     if RESULT is lltype.Float:
         return FloatCallDescr
+    if RESULT is lltype.SingleFloat:
+        global _SingleFloatCallDescr
+        if _SingleFloatCallDescr is None:
+            assert rffi.sizeof(rffi.UINT) == rffi.sizeof(RESULT)
+            class SingleFloatCallDescr(getCallDescrClass(rffi.UINT)):
+                _clsname = 'SingleFloatCallDescr'
+                _return_type = 'S'
+            _SingleFloatCallDescr = SingleFloatCallDescr
+        return _SingleFloatCallDescr
     if is_longlong(RESULT):
         return LongLongCallDescr
     return getDescrClass(RESULT, BaseIntCallDescr, GcPtrCallDescr,
                          NonGcPtrCallDescr, 'Call', 'get_result_size',
                          Ellipsis,  # <= floatattrname should not be used here
                          '_is_result_signed')
+getCallDescrClass._annspecialcase_ = 'specialize:memo'
 
 def get_call_descr(gccache, ARGS, RESULT, extrainfo=None):
     arg_classes = []
     for ARG in ARGS:
         kind = getkind(ARG)
-        if   kind == 'int': arg_classes.append('i')
+        if   kind == 'int':
+            if ARG is lltype.SingleFloat:
+                arg_classes.append('S')
+            else:
+                arg_classes.append('i')
         elif kind == 'ref': arg_classes.append('r')
         elif kind == 'float':
             if is_longlong(ARG):
@@ -476,6 +499,9 @@
             return GcPtrDescr
         else:
             return NonGcPtrDescr
+    if TYPE is lltype.SingleFloat:
+        assert rffi.sizeof(rffi.UINT) == rffi.sizeof(TYPE)
+        TYPE = rffi.UINT
     try:
         return _cache[nameprefix, TYPE]
     except KeyError:
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -1,19 +1,21 @@
 from pypy.rlib.rarithmetic import intmask
 from pypy.jit.metainterp import history
-from pypy.jit.backend.llsupport.descr import DynamicIntCallDescr, NonGcPtrCallDescr,\
-    FloatCallDescr, VoidCallDescr
+from pypy.rpython.lltypesystem import rffi
+from pypy.jit.backend.llsupport.descr import (
+    DynamicIntCallDescr, NonGcPtrCallDescr, FloatCallDescr, VoidCallDescr,
+    LongLongCallDescr, getCallDescrClass)
 
 class UnsupportedKind(Exception):
     pass
 
-def get_call_descr_dynamic(ffi_args, ffi_result, extrainfo=None):
+def get_call_descr_dynamic(cpu, ffi_args, ffi_result, extrainfo=None):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
-        reskind = get_ffi_type_kind(ffi_result)
-        argkinds = [get_ffi_type_kind(arg) for arg in ffi_args]
+        reskind = get_ffi_type_kind(cpu, ffi_result)
+        argkinds = [get_ffi_type_kind(cpu, arg) for arg in ffi_args]
     except UnsupportedKind:
-        return None # ??
+        return None
     arg_classes = ''.join(argkinds)
     if reskind == history.INT:
         size = intmask(ffi_result.c_size)
@@ -25,17 +27,26 @@
         return FloatCallDescr(arg_classes, extrainfo)
     elif reskind == history.VOID:
         return VoidCallDescr(arg_classes, extrainfo)
+    elif reskind == 'L':
+        return LongLongCallDescr(arg_classes, extrainfo)
+    elif reskind == 'S':
+        SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
+        return SingleFloatCallDescr(arg_classes, extrainfo)
     assert False
 
-def get_ffi_type_kind(ffi_type):
+def get_ffi_type_kind(cpu, ffi_type):
     from pypy.rlib.libffi import types
     kind = types.getkind(ffi_type)
     if kind == 'i' or kind == 'u':
         return history.INT
-    elif kind == 'f':
+    elif cpu.supports_floats and kind == 'f':
         return history.FLOAT
     elif kind == 'v':
         return history.VOID
+    elif cpu.supports_longlong and (kind == 'I' or kind == 'U'):     # longlong
+        return 'L'
+    elif cpu.supports_singlefloats and kind == 's':    # singlefloat
+        return 'S'
     raise UnsupportedKind("Unsupported kind '%s'" % kind)
 
 def is_ffi_type_signed(ffi_type):
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -544,18 +544,19 @@
         assert self.GCClass.inline_simple_malloc
         assert self.GCClass.inline_simple_malloc_varsize
 
-        # make a malloc function, with three arguments
+        # make a malloc function, with two arguments
         def malloc_basic(size, tid):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
             has_finalizer = bool(tid & (1<<llgroup.HALFSHIFT))
             check_typeid(type_id)
-            try:
-                res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                                      type_id, size, True,
-                                                      has_finalizer, False)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                res = lltype.nullptr(llmemory.GCREF.TO)
+            res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                                  type_id, size, True,
+                                                  has_finalizer, False)
+            # In case the operation above failed, we are returning NULL
+            # from this function to assembler.  There is also an RPython
+            # exception set, typically MemoryError; but it's easier and
+            # faster to check for the NULL return value, as done by
+            # translator/exceptiontransform.py.
             #llop.debug_print(lltype.Void, "\tmalloc_basic", size, type_id,
             #                 "-->", res)
             return res
@@ -571,14 +572,10 @@
         def malloc_array(itemsize, tid, num_elem):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
             check_typeid(type_id)
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    type_id, num_elem, self.array_basesize, itemsize,
-                    self.array_length_ofs, True)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                type_id, num_elem, self.array_basesize, itemsize,
+                self.array_length_ofs, True)
         self.malloc_array = malloc_array
         self.GC_MALLOC_ARRAY = lltype.Ptr(lltype.FuncType(
             [lltype.Signed] * 3, llmemory.GCREF))
@@ -591,23 +588,15 @@
         unicode_type_id = self.layoutbuilder.get_type_id(rstr.UNICODE)
         #
         def malloc_str(length):
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    str_type_id, length, str_basesize, str_itemsize,
-                    str_ofs_length, True)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                str_type_id, length, str_basesize, str_itemsize,
+                str_ofs_length, True)
         def malloc_unicode(length):
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    unicode_type_id, length, unicode_basesize,unicode_itemsize,
-                    unicode_ofs_length, True)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                unicode_type_id, length, unicode_basesize,unicode_itemsize,
+                unicode_ofs_length, True)
         self.malloc_str = malloc_str
         self.malloc_unicode = malloc_unicode
         self.GC_MALLOC_STR_UNICODE = lltype.Ptr(lltype.FuncType(
@@ -628,16 +617,12 @@
             if self.DEBUG:
                 random_usage_of_xmm_registers()
             assert size >= self.minimal_size_in_nursery
-            try:
-                # NB. although we call do_malloc_fixedsize_clear() here,
-                # it's a bit of a hack because we set tid to 0 and may
-                # also use it to allocate varsized objects.  The tid
-                # and possibly the length are both set afterward.
-                gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                            0, size, True, False, False)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return 0
+            # NB. although we call do_malloc_fixedsize_clear() here,
+            # it's a bit of a hack because we set tid to 0 and may
+            # also use it to allocate varsized objects.  The tid
+            # and possibly the length are both set afterward.
+            gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                        0, size, True, False, False)
             return rffi.cast(lltype.Signed, gcref)
         self.malloc_slowpath = malloc_slowpath
         self.MALLOC_SLOWPATH = lltype.FuncType([lltype.Signed], lltype.Signed)
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -259,7 +259,7 @@
 
     def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo=None):
         from pypy.jit.backend.llsupport import ffisupport
-        return ffisupport.get_call_descr_dynamic(ffi_args, ffi_result,
+        return ffisupport.get_call_descr_dynamic(self, ffi_args, ffi_result,
                                                  extrainfo)
 
     def get_overflow_error(self):
@@ -499,7 +499,7 @@
     def bh_call_i(self, func, calldescr, args_i, args_r, args_f):
         assert isinstance(calldescr, BaseIntCallDescr)
         if not we_are_translated():
-            calldescr.verify_types(args_i, args_r, args_f, history.INT)
+            calldescr.verify_types(args_i, args_r, args_f, history.INT + 'S')
         return calldescr.call_stub(func, args_i, args_r, args_f)
 
     def bh_call_r(self, func, calldescr, args_i, args_r, args_f):
diff --git a/pypy/jit/backend/llsupport/test/test_descr.py b/pypy/jit/backend/llsupport/test/test_descr.py
--- a/pypy/jit/backend/llsupport/test/test_descr.py
+++ b/pypy/jit/backend/llsupport/test/test_descr.py
@@ -52,7 +52,8 @@
     S = lltype.GcStruct('S', ('x', lltype.Char),
                              ('y', lltype.Ptr(T)),
                              ('z', lltype.Ptr(U)),
-                             ('f', lltype.Float))
+                             ('f', lltype.Float),
+                             ('s', lltype.SingleFloat))
     assert getFieldDescrClass(lltype.Ptr(T)) is GcPtrFieldDescr
     assert getFieldDescrClass(lltype.Ptr(U)) is NonGcPtrFieldDescr
     cls = getFieldDescrClass(lltype.Char)
@@ -61,6 +62,10 @@
     clsf = getFieldDescrClass(lltype.Float)
     assert clsf != cls
     assert clsf == getFieldDescrClass(lltype.Float)
+    clss = getFieldDescrClass(lltype.SingleFloat)
+    assert clss not in (cls, clsf)
+    assert clss == getFieldDescrClass(lltype.SingleFloat)
+    assert clss == getFieldDescrClass(rffi.UINT)    # for now
     #
     c0 = GcCache(False)
     c1 = GcCache(True)
@@ -72,14 +77,17 @@
         descr_y = get_field_descr(c2, S, 'y')
         descr_z = get_field_descr(c2, S, 'z')
         descr_f = get_field_descr(c2, S, 'f')
+        descr_s = get_field_descr(c2, S, 's')
         assert descr_x.__class__ is cls
         assert descr_y.__class__ is GcPtrFieldDescr
         assert descr_z.__class__ is NonGcPtrFieldDescr
         assert descr_f.__class__ is clsf
+        assert descr_s.__class__ is clss
         assert descr_x.name == 'S.x'
         assert descr_y.name == 'S.y'
         assert descr_z.name == 'S.z'
         assert descr_f.name == 'S.f'
+        assert descr_s.name == 'S.s'
         if not tsc:
             assert descr_x.offset < descr_y.offset < descr_z.offset
             assert descr_x.sort_key() < descr_y.sort_key() < descr_z.sort_key()
@@ -87,23 +95,29 @@
             assert descr_y.get_field_size(False) == rffi.sizeof(lltype.Ptr(T))
             assert descr_z.get_field_size(False) == rffi.sizeof(lltype.Ptr(U))
             assert descr_f.get_field_size(False) == rffi.sizeof(lltype.Float)
+            assert descr_s.get_field_size(False) == rffi.sizeof(
+                                                            lltype.SingleFloat)
         else:
             assert isinstance(descr_x.offset, Symbolic)
             assert isinstance(descr_y.offset, Symbolic)
             assert isinstance(descr_z.offset, Symbolic)
             assert isinstance(descr_f.offset, Symbolic)
+            assert isinstance(descr_s.offset, Symbolic)
             assert isinstance(descr_x.get_field_size(True), Symbolic)
             assert isinstance(descr_y.get_field_size(True), Symbolic)
             assert isinstance(descr_z.get_field_size(True), Symbolic)
             assert isinstance(descr_f.get_field_size(True), Symbolic)
+            assert isinstance(descr_s.get_field_size(True), Symbolic)
         assert not descr_x.is_pointer_field()
         assert     descr_y.is_pointer_field()
         assert not descr_z.is_pointer_field()
         assert not descr_f.is_pointer_field()
+        assert not descr_s.is_pointer_field()
         assert not descr_x.is_float_field()
         assert not descr_y.is_float_field()
         assert not descr_z.is_float_field()
         assert     descr_f.is_float_field()
+        assert not descr_s.is_float_field()
 
 
 def test_get_field_descr_sign():
@@ -135,6 +149,7 @@
     A2 = lltype.GcArray(lltype.Ptr(T))
     A3 = lltype.GcArray(lltype.Ptr(U))
     A4 = lltype.GcArray(lltype.Float)
+    A5 = lltype.GcArray(lltype.SingleFloat)
     assert getArrayDescrClass(A2) is GcPtrArrayDescr
     assert getArrayDescrClass(A3) is NonGcPtrArrayDescr
     cls = getArrayDescrClass(A1)
@@ -143,25 +158,32 @@
     clsf = getArrayDescrClass(A4)
     assert clsf != cls
     assert clsf == getArrayDescrClass(lltype.GcArray(lltype.Float))
+    clss = getArrayDescrClass(A5)
+    assert clss not in (clsf, cls)
+    assert clss == getArrayDescrClass(lltype.GcArray(rffi.UINT))
     #
     c0 = GcCache(False)
     descr1 = get_array_descr(c0, A1)
     descr2 = get_array_descr(c0, A2)
     descr3 = get_array_descr(c0, A3)
     descr4 = get_array_descr(c0, A4)
+    descr5 = get_array_descr(c0, A5)
     assert descr1.__class__ is cls
     assert descr2.__class__ is GcPtrArrayDescr
     assert descr3.__class__ is NonGcPtrArrayDescr
     assert descr4.__class__ is clsf
+    assert descr5.__class__ is clss
     assert descr1 == get_array_descr(c0, lltype.GcArray(lltype.Char))
     assert not descr1.is_array_of_pointers()
     assert     descr2.is_array_of_pointers()
     assert not descr3.is_array_of_pointers()
     assert not descr4.is_array_of_pointers()
+    assert not descr5.is_array_of_pointers()
     assert not descr1.is_array_of_floats()
     assert not descr2.is_array_of_floats()
     assert not descr3.is_array_of_floats()
     assert     descr4.is_array_of_floats()
+    assert not descr5.is_array_of_floats()
     #
     def get_alignment(code):
         # Retrieve default alignment for the compiler/platform
@@ -170,27 +192,33 @@
     assert descr2.get_base_size(False) == get_alignment('p')
     assert descr3.get_base_size(False) == get_alignment('p')
     assert descr4.get_base_size(False) == get_alignment('d')
+    assert descr5.get_base_size(False) == get_alignment('f')
     assert descr1.get_ofs_length(False) == 0
     assert descr2.get_ofs_length(False) == 0
     assert descr3.get_ofs_length(False) == 0
     assert descr4.get_ofs_length(False) == 0
+    assert descr5.get_ofs_length(False) == 0
     assert descr1.get_item_size(False) == rffi.sizeof(lltype.Char)
     assert descr2.get_item_size(False) == rffi.sizeof(lltype.Ptr(T))
     assert descr3.get_item_size(False) == rffi.sizeof(lltype.Ptr(U))
     assert descr4.get_item_size(False) == rffi.sizeof(lltype.Float)
+    assert descr5.get_item_size(False) == rffi.sizeof(lltype.SingleFloat)
     #
     assert isinstance(descr1.get_base_size(True), Symbolic)
     assert isinstance(descr2.get_base_size(True), Symbolic)
     assert isinstance(descr3.get_base_size(True), Symbolic)
     assert isinstance(descr4.get_base_size(True), Symbolic)
+    assert isinstance(descr5.get_base_size(True), Symbolic)
     assert isinstance(descr1.get_ofs_length(True), Symbolic)
     assert isinstance(descr2.get_ofs_length(True), Symbolic)
     assert isinstance(descr3.get_ofs_length(True), Symbolic)
     assert isinstance(descr4.get_ofs_length(True), Symbolic)
+    assert isinstance(descr5.get_ofs_length(True), Symbolic)
     assert isinstance(descr1.get_item_size(True), Symbolic)
     assert isinstance(descr2.get_item_size(True), Symbolic)
     assert isinstance(descr3.get_item_size(True), Symbolic)
     assert isinstance(descr4.get_item_size(True), Symbolic)
+    assert isinstance(descr5.get_item_size(True), Symbolic)
     CA = rffi.CArray(lltype.Signed)
     descr = get_array_descr(c0, CA)
     assert not descr.is_array_of_floats()
@@ -210,6 +238,11 @@
     assert descr.is_array_of_floats()
     assert descr.get_base_size(False) == 0
     assert descr.get_ofs_length(False) == -1
+    CA = rffi.CArray(rffi.FLOAT)
+    descr = get_array_descr(c0, CA)
+    assert not descr.is_array_of_floats()
+    assert descr.get_base_size(False) == 0
+    assert descr.get_ofs_length(False) == -1
 
 
 def test_get_array_descr_sign():
@@ -257,6 +290,11 @@
     assert descr4.get_result_size(False) == rffi.sizeof(lltype.Float)
     assert descr4.get_return_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
+    #
+    descr5 = get_call_descr(c0, [lltype.SingleFloat], lltype.SingleFloat)
+    assert descr5.get_result_size(False) == rffi.sizeof(lltype.SingleFloat)
+    assert descr5.get_return_type() == "S"
+    assert descr5.arg_classes == "S"
 
 def test_get_call_descr_not_translated_longlong():
     if sys.maxint > 2147483647:
@@ -286,6 +324,11 @@
     assert isinstance(descr4.get_result_size(True), Symbolic)
     assert descr4.get_return_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
+    #
+    descr5 = get_call_descr(c1, [lltype.SingleFloat], lltype.SingleFloat)
+    assert isinstance(descr5.get_result_size(True), Symbolic)
+    assert descr5.get_return_type() == "S"
+    assert descr5.arg_classes == "S"
 
 def test_call_descr_extra_info():
     c1 = GcCache(True)
@@ -345,8 +388,11 @@
     #
     descr4f = get_call_descr(c0, [lltype.Char, lltype.Ptr(S)], lltype.Float)
     assert 'FloatCallDescr' in descr4f.repr_of_descr()
+    #
+    descr5f = get_call_descr(c0, [lltype.Char], lltype.SingleFloat)
+    assert 'SingleFloatCallDescr' in descr5f.repr_of_descr()
 
-def test_call_stubs():
+def test_call_stubs_1():
     c0 = GcCache(False)
     ARGS = [lltype.Char, lltype.Signed]
     RES = lltype.Char
@@ -360,6 +406,8 @@
     res = call_stub(rffi.cast(lltype.Signed, fnptr), [1, 2], None, None)
     assert res == ord('c')
 
+def test_call_stubs_2():
+    c0 = GcCache(False)
     ARRAY = lltype.GcArray(lltype.Signed)
     ARGS = [lltype.Float, lltype.Ptr(ARRAY)]
     RES = lltype.Float
@@ -375,3 +423,27 @@
     res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
                            [], [opaquea], [longlong.getfloatstorage(3.5)])
     assert longlong.getrealfloat(res) == 4.5
+
+def test_call_stubs_single_float():
+    from pypy.rlib.longlong2float import uint2singlefloat, singlefloat2uint
+    from pypy.rlib.rarithmetic import r_singlefloat, intmask
+    #
+    c0 = GcCache(False)
+    ARGS = [lltype.SingleFloat, lltype.SingleFloat, lltype.SingleFloat]
+    RES = lltype.SingleFloat
+
+    def f(a, b, c):
+        a = float(a)
+        b = float(b)
+        c = float(c)
+        x = a - (b / c)
+        return r_singlefloat(x)
+
+    fnptr = llhelper(lltype.Ptr(lltype.FuncType(ARGS, RES)), f)
+    descr2 = get_call_descr(c0, ARGS, RES)
+    a = intmask(singlefloat2uint(r_singlefloat(-10.0)))
+    b = intmask(singlefloat2uint(r_singlefloat(3.0)))
+    c = intmask(singlefloat2uint(r_singlefloat(2.0)))
+    res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
+                           [a, b, c], [], [])
+    assert float(uint2singlefloat(rffi.r_uint(res))) == -11.5
diff --git a/pypy/jit/backend/llsupport/test/test_ffisupport.py b/pypy/jit/backend/llsupport/test/test_ffisupport.py
--- a/pypy/jit/backend/llsupport/test/test_ffisupport.py
+++ b/pypy/jit/backend/llsupport/test/test_ffisupport.py
@@ -1,24 +1,52 @@
 from pypy.rlib.libffi import types
-from pypy.jit.backend.llsupport.ffisupport import get_call_descr_dynamic, \
-    VoidCallDescr, DynamicIntCallDescr
-    
+from pypy.jit.codewriter.longlong import is_64_bit
+from pypy.jit.backend.llsupport.ffisupport import *
+
+
+class FakeCPU:
+    def __init__(self, supports_floats=False, supports_longlong=False,
+                 supports_singlefloats=False):
+        self.supports_floats = supports_floats
+        self.supports_longlong = supports_longlong
+        self.supports_singlefloats = supports_singlefloats
+
+
 def test_call_descr_dynamic():
+    args = [types.sint, types.pointer]
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.sint)
+    assert isinstance(descr, DynamicIntCallDescr)
+    assert descr.arg_classes == 'ii'
 
     args = [types.sint, types.double, types.pointer]
-    descr = get_call_descr_dynamic(args, types.void)
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.void)
+    assert descr is None    # missing floats
+    descr = get_call_descr_dynamic(FakeCPU(supports_floats=True),
+                                   args, types.void)
     assert isinstance(descr, VoidCallDescr)
     assert descr.arg_classes == 'ifi'
 
-    descr = get_call_descr_dynamic([], types.sint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.sint8)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == True
 
-    descr = get_call_descr_dynamic([], types.uint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.uint8)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == False
 
-    descr = get_call_descr_dynamic([], types.float)
-    assert descr is None # single floats are not supported so far
-    
+    if not is_64_bit:
+        descr = get_call_descr_dynamic(FakeCPU(), [], types.slonglong)
+        assert descr is None   # missing longlongs
+        descr = get_call_descr_dynamic(FakeCPU(supports_longlong=True),
+                                       [], types.slonglong)
+        assert isinstance(descr, LongLongCallDescr)
+    else:
+        assert types.slonglong is types.slong
+
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.float)
+    assert descr is None   # missing singlefloats
+    descr = get_call_descr_dynamic(FakeCPU(supports_singlefloats=True),
+                                   [], types.float)
+    SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
+    assert isinstance(descr, SingleFloatCallDescr)
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -8,12 +8,13 @@
     # ^^^ This is only useful on 32-bit platforms.  If True,
     # longlongs are supported by the JIT, but stored as doubles.
     # Boxes and Consts are BoxFloats and ConstFloats.
+    supports_singlefloats = False
 
     done_with_this_frame_void_v = -1
     done_with_this_frame_int_v = -1
     done_with_this_frame_ref_v = -1
     done_with_this_frame_float_v = -1
-    exit_frame_with_exception_v = -1
+    propagate_exception_v = -1
     total_compiled_loops = 0
     total_compiled_bridges = 0
     total_freed_loops = 0
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -290,3 +290,58 @@
                 assert abs(x - expected_result) < 0.0001
             finally:
                 del self.cpu.done_with_this_frame_float_v
+
+    def test_call_with_singlefloats(self):
+        cpu = self.cpu
+        if not cpu.supports_floats or not cpu.supports_singlefloats:
+            py.test.skip('requires floats and singlefloats')
+
+        import random
+        from pypy.rlib.libffi import types
+        from pypy.rlib.rarithmetic import r_singlefloat
+
+        def func(*args):
+            res = 0.0
+            for i, x in enumerate(args):
+                res += (i + 1.1) * float(x)
+            return res
+
+        F = lltype.Float
+        S = lltype.SingleFloat
+        I = lltype.Signed
+        floats = [random.random() - 0.5 for i in range(8)]
+        singlefloats = [r_singlefloat(random.random() - 0.5) for i in range(8)]
+        ints = [random.randrange(-99, 99) for i in range(8)]
+        for repeat in range(100):
+            args = []
+            argvalues = []
+            argslist = []
+            local_floats = list(floats)
+            local_singlefloats = list(singlefloats)
+            local_ints = list(ints)
+            for i in range(8):
+                case = random.randrange(0, 3)
+                if case == 0:
+                    args.append(F)
+                    arg = local_floats.pop()
+                    argslist.append(boxfloat(arg))
+                elif case == 1:
+                    args.append(S)
+                    arg = local_singlefloats.pop()
+                    argslist.append(BoxInt(longlong.singlefloat2int(arg)))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    argslist.append(BoxInt(arg))
+                argvalues.append(arg)
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
+
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            expected = func(*argvalues)
+            assert abs(res.getfloat() - expected) < 0.0001
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -2734,6 +2734,65 @@
                                      'float', descr=calldescr)
         assert res.getfloatstorage() == expected
 
+    def test_singlefloat_result_of_call_direct(self):
+        if not self.cpu.supports_singlefloats:
+            py.test.skip("singlefloat test")
+        from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.rlib.rarithmetic import r_singlefloat
+        eci = ExternalCompilationInfo(
+            separate_module_sources=["""
+            float fn_test_result_of_call(float x)
+            {
+                return x / 2.0f;
+            }
+            """],
+            export_symbols=['fn_test_result_of_call'])
+        f = rffi.llexternal('fn_test_result_of_call', [lltype.SingleFloat],
+                            lltype.SingleFloat,
+                            compilation_info=eci, _nowrapper=True)
+        value = r_singlefloat(-42.5)
+        expected = r_singlefloat(-21.25)
+        assert f(value) == expected
+        #
+        FUNC = self.FuncType([lltype.SingleFloat], lltype.SingleFloat)
+        FPTR = self.Ptr(FUNC)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        ivalue = longlong.singlefloat2int(value)
+        iexpected = longlong.singlefloat2int(expected)
+        x = self.cpu.bh_call_i(self.get_funcbox(self.cpu, f).value,
+                               calldescr, [ivalue], None, None)
+        assert x == iexpected
+
+    def test_singlefloat_result_of_call_compiled(self):
+        if not self.cpu.supports_singlefloats:
+            py.test.skip("test of singlefloat result")
+        from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.rlib.rarithmetic import r_singlefloat
+        eci = ExternalCompilationInfo(
+            separate_module_sources=["""
+            float fn_test_result_of_call(float x)
+            {
+                return x / 2.0f;
+            }
+            """],
+            export_symbols=['fn_test_result_of_call'])
+        f = rffi.llexternal('fn_test_result_of_call', [lltype.SingleFloat],
+                            lltype.SingleFloat,
+                            compilation_info=eci, _nowrapper=True)
+        value = r_singlefloat(-42.5)
+        expected = r_singlefloat(-21.25)
+        assert f(value) == expected
+        #
+        FUNC = self.FuncType([lltype.SingleFloat], lltype.SingleFloat)
+        FPTR = self.Ptr(FUNC)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        funcbox = self.get_funcbox(self.cpu, f)
+        ivalue = longlong.singlefloat2int(value)
+        iexpected = longlong.singlefloat2int(expected)
+        res = self.execute_operation(rop.CALL, [funcbox, BoxInt(ivalue)],
+                                     'int', descr=calldescr)
+        assert res.value == iexpected
+
     def test_free_loop_and_bridges(self):
         from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
         if not isinstance(self.cpu, AbstractLLCPU):
@@ -2748,6 +2807,26 @@
         assert mem2 < mem1
         assert mem2 == mem0
 
+    def test_memoryerror(self):
+        excdescr = BasicFailDescr(666)
+        self.cpu.propagate_exception_v = self.cpu.get_fail_descr_number(
+            excdescr)
+        self.cpu.setup_once()    # xxx redo it, because we added
+                                 # propagate_exception_v
+        i0 = BoxInt()
+        p0 = BoxPtr()
+        operations = [
+            ResOperation(rop.NEWUNICODE, [i0], p0),
+            ResOperation(rop.FINISH, [p0], None, descr=BasicFailDescr(1))
+            ]
+        inputargs = [i0]
+        looptoken = LoopToken()
+        self.cpu.compile_loop(inputargs, operations, looptoken)
+        # overflowing value:
+        self.cpu.set_future_value_int(0, sys.maxint // 4 + 1)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == excdescr.identifier
+
 
 class OOtypeBackendTest(BaseBackendTest):
 
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -56,7 +56,9 @@
         self.exc = exc
         self.is_guard_not_invalidated = is_guard_not_invalidated
 
-DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed))
+DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
+                              ('bridge', lltype.Signed), # 0 or 1
+                              ('number', lltype.Signed))
 
 class Assembler386(object):
     _regalloc = None
@@ -89,6 +91,8 @@
         self._current_depths_cache = (0, 0)
         self.datablockwrapper = None
         self.stack_check_slowpath = 0
+        self.propagate_exception_path = 0
+        self.gcrootmap_retaddr_forced = 0
         self.teardown()
 
     def leave_jitted_hook(self):
@@ -125,6 +129,7 @@
             self._build_failure_recovery(True, withfloats=True)
             support.ensure_sse2_floats()
             self._build_float_constants()
+        self._build_propagate_exception_path()
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
         self._build_stack_check_slowpath()
@@ -138,6 +143,9 @@
         assert self.memcpy_addr != 0, "setup_once() not called?"
         self.current_clt = looptoken.compiled_loop_token
         self.pending_guard_tokens = []
+        if WORD == 8:
+            self.pending_memoryerror_trampoline_from = []
+            self.error_trampoline_64 = 0
         self.mc = codebuf.MachineCodeBlockWrapper()
         #assert self.datablockwrapper is None --- but obscure case
         # possible, e.g. getting MemoryError and continuing
@@ -147,6 +155,8 @@
 
     def teardown(self):
         self.pending_guard_tokens = None
+        if WORD == 8:
+            self.pending_memoryerror_trampoline_from = None
         self.mc = None
         self.looppos = -1
         self.currently_compiling_loop = None
@@ -155,9 +165,12 @@
     def finish_once(self):
         if self._debug:
             debug_start('jit-backend-counts')
-            for i in range(len(self.loop_run_counters)):
-                struct = self.loop_run_counters[i]
-                debug_print(str(i) + ':' + str(struct.i))
+            for struct in self.loop_run_counters:
+                if struct.bridge:
+                    prefix = 'bridge '
+                else:
+                    prefix = 'loop '
+                debug_print(prefix + str(struct.number) + ':' + str(struct.i))
             debug_stop('jit-backend-counts')
 
     def _build_float_constants(self):
@@ -232,15 +245,47 @@
         if self.cpu.supports_floats:          # restore the XMM registers
             for i in range(self.cpu.NUM_REGS):# from where they were saved
                 mc.MOVSD_xs(i, (WORD*2)+8*i)
+        #
+        # Note: we check this after the code above, just because the code
+        # above is more than 127 bytes on 64-bits...
+        mc.TEST_rr(eax.value, eax.value)
+        mc.J_il8(rx86.Conditions['Z'], 0) # patched later
+        jz_location = mc.get_relative_pos()
+        #
         nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
         mc.MOV(edx, heap(nursery_free_adr))   # load this in EDX
         mc.RET()
+        #
+        # If the slowpath malloc failed, we raise a MemoryError that
+        # always interrupts the current loop, as a "good enough"
+        # approximation.  Also note that we didn't RET from this helper;
+        # but the code we jump to will actually restore the stack
+        # position based on EBP, which will get us out of here for free.
+        offset = mc.get_relative_pos() - jz_location
+        assert 0 < offset <= 127
+        mc.overwrite(jz_location-1, chr(offset))
+        mc.JMP(imm(self.propagate_exception_path))
+        #
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.malloc_slowpath2 = rawstart
 
+    def _build_propagate_exception_path(self):
+        if self.cpu.propagate_exception_v < 0:
+            return      # not supported (for tests, or non-translated)
+        #
+        self.mc = codebuf.MachineCodeBlockWrapper()
+        # call on_leave_jitted_save_exc()
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
+        self.mc.CALL(imm(addr))
+        self.mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
+        self._call_footer()
+        rawstart = self.mc.materialize(self.cpu.asmmemmgr, [])
+        self.propagate_exception_path = rawstart
+        self.mc = None
+
     def _build_stack_check_slowpath(self):
         _, _, slowpathaddr = self.cpu.insert_stack_check()
-        if slowpathaddr == 0 or self.cpu.exit_frame_with_exception_v < 0:
+        if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
             return      # no stack check (for tests, or non-translated)
         #
         # make a "function" that is called immediately at the start of
@@ -296,19 +341,11 @@
         offset = mc.get_relative_pos() - jnz_location
         assert 0 < offset <= 127
         mc.overwrite(jnz_location-1, chr(offset))
-        # clear the exception from the global position
-        mc.MOV(eax, heap(self.cpu.pos_exc_value()))
-        mc.MOV(heap(self.cpu.pos_exception()), imm0)
-        mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
-        # save the current exception instance into fail_boxes_ptr[0]
-        adr = self.fail_boxes_ptr.get_addr_for_num(0)
-        mc.MOV(heap(adr), eax)
-        # call the helper function to set the GC flag on the fail_boxes_ptr
-        # array (note that there is no exception any more here)
-        addr = self.cpu.get_on_leave_jitted_int(save_exception=False)
+        # call on_leave_jitted_save_exc()
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
         mc.CALL(imm(addr))
         #
-        mc.MOV_ri(eax.value, self.cpu.exit_frame_with_exception_v)
+        mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
         #
         # footer -- note the ADD, which skips the return address of this
         # function, and will instead return to the caller's caller.  Note
@@ -321,6 +358,7 @@
         self.stack_check_slowpath = rawstart
 
     @staticmethod
+    @rgc.no_collect
     def _release_gil_asmgcc(css):
         # similar to trackgcroot.py:pypy_asm_stackwalk, first part
         from pypy.rpython.memory.gctransform import asmgcroot
@@ -336,6 +374,7 @@
             before()
 
     @staticmethod
+    @rgc.no_collect
     def _reacquire_gil_asmgcc(css):
         # first reacquire the GIL
         after = rffi.aroundstate.after
@@ -350,12 +389,14 @@
         next.prev = prev
 
     @staticmethod
+    @rgc.no_collect
     def _release_gil_shadowstack():
         before = rffi.aroundstate.before
         if before:
             before()
 
     @staticmethod
+    @rgc.no_collect
     def _reacquire_gil_shadowstack():
         after = rffi.aroundstate.after
         if after:
@@ -404,7 +445,7 @@
         self.setup(looptoken)
         self.currently_compiling_loop = looptoken
         if log:
-            self._register_counter()
+            self._register_counter(False, looptoken.number)
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
@@ -473,7 +514,7 @@
 
         self.setup(original_loop_token)
         if log:
-            self._register_counter()
+            self._register_counter(True, descr_number)
             operations = self._inject_debugging_code(faildescr, operations)
 
         arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
@@ -520,6 +561,8 @@
         # at the end of self.mc.
         for tok in self.pending_guard_tokens:
             tok.pos_recovery_stub = self.generate_quick_failure(tok)
+        if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
+            self.error_trampoline_64 = self.generate_propagate_error_64()
 
     def patch_pending_failure_recoveries(self, rawstart):
         # after we wrote the assembler to raw memory, set up
@@ -556,6 +599,12 @@
                 # less, we would run into the issue that overwriting the
                 # 5 bytes here might get a few nonsense bytes at the
                 # return address of the following CALL.
+        if WORD == 8:
+            for pos_after_jz in self.pending_memoryerror_trampoline_from:
+                assert self.error_trampoline_64 != 0     # only if non-empty
+                mc = codebuf.MachineCodeBlockWrapper()
+                mc.writeimm32(self.error_trampoline_64 - pos_after_jz)
+                mc.copy_to_raw_memory(rawstart + pos_after_jz - 4)
 
     def get_asmmemmgr_blocks(self, looptoken):
         clt = looptoken.compiled_loop_token
@@ -570,7 +619,7 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _register_counter(self):
+    def _register_counter(self, bridge, number):
         if self._debug:
             # YYY very minor leak -- we need the counters to stay alive
             # forever, just because we want to report them at the end
@@ -578,6 +627,8 @@
             struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
                                    track_allocation=False)
             struct.i = 0
+            struct.bridge = int(bridge)
+            struct.number = number
             self.loop_run_counters.append(struct)
 
     def _find_failure_recovery_bytecode(self, faildescr):
@@ -1068,9 +1119,10 @@
                     self.implement_guard(guard_token, checkfalsecond)
         return genop_cmp_guard_float
 
-    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax):
+    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax,
+                   argtypes=None):
         if IS_X86_64:
-            return self._emit_call_64(force_index, x, arglocs, start)
+            return self._emit_call_64(force_index, x, arglocs, start, argtypes)
 
         p = 0
         n = len(arglocs)
@@ -1098,12 +1150,13 @@
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
 
-    def _emit_call_64(self, force_index, x, arglocs, start):
+    def _emit_call_64(self, force_index, x, arglocs, start, argtypes):
         src_locs = []
         dst_locs = []
         xmm_src_locs = []
         xmm_dst_locs = []
         pass_on_stack = []
+        singlefloats = None
 
         # In reverse order for use with pop()
         unused_gpr = [r9, r8, ecx, edx, esi, edi]
@@ -1123,6 +1176,11 @@
                     xmm_dst_locs.append(unused_xmm.pop())
                 else:
                     pass_on_stack.append(loc)
+            elif (argtypes is not None and argtypes[i-start] == 'S' and
+                  len(unused_xmm) > 0):
+                # Singlefloat argument
+                if singlefloats is None: singlefloats = []
+                singlefloats.append((loc, unused_xmm.pop()))
             else:
                 if len(unused_gpr) > 0:
                     src_locs.append(loc)
@@ -1150,9 +1208,15 @@
                 else:
                     self.mc.MOV_sr(i*WORD, loc.value)
 
-        # Handle register arguments
+        # Handle register arguments: first remap the xmm arguments
+        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
+                           X86_64_XMM_SCRATCH_REG)
+        # Load the singlefloat arguments from main regs or stack to xmm regs
+        if singlefloats is not None:
+            for src, dst in singlefloats:
+                self.mc.MOVD(dst, src)
+        # Finally remap the arguments in the main regs
         remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
-        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs, X86_64_XMM_SCRATCH_REG)
 
         self._regalloc.reserve_param(len(pass_on_stack))
         self.mc.CALL(x)
@@ -1267,6 +1331,20 @@
     def genop_cast_int_to_float(self, op, arglocs, resloc):
         self.mc.CVTSI2SD(resloc, arglocs[0])
 
+    def genop_cast_float_to_singlefloat(self, op, arglocs, resloc):
+        loc0, loctmp = arglocs
+        self.mc.CVTSD2SS(loctmp, loc0)
+        assert isinstance(resloc, RegLoc)
+        assert isinstance(loctmp, RegLoc)
+        self.mc.MOVD_rx(resloc.value, loctmp.value)
+
+    def genop_cast_singlefloat_to_float(self, op, arglocs, resloc):
+        loc0, = arglocs
+        assert isinstance(resloc, RegLoc)
+        assert isinstance(loc0, RegLoc)
+        self.mc.MOVD_xr(resloc.value, loc0.value)
+        self.mc.CVTSS2SD_xx(resloc.value, resloc.value)
+
     def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
         guard_opnum = guard_op.getopnum()
         self.mc.CMP(arglocs[0], imm0)
@@ -1388,7 +1466,7 @@
         assert isinstance(loc_vtable, ImmedLoc)
         arglocs = arglocs[:-1]
         self.call(self.malloc_func_addr, arglocs, eax)
-        # xxx ignore NULL returns for now
+        self.propagate_memoryerror_if_eax_is_null()
         self.set_vtable(eax, loc_vtable)
 
     def set_vtable(self, loc, loc_vtable):
@@ -1407,18 +1485,35 @@
     def genop_new(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_new_array(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_array_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_newstr(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_str_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_newunicode(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_unicode_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
+
+    def propagate_memoryerror_if_eax_is_null(self):
+        # if self.propagate_exception_path == 0 (tests), this may jump to 0
+        # and segfaults.  too bad.  the alternative is to continue anyway
+        # with eax==0, but that will segfault too.
+        self.mc.TEST_rr(eax.value, eax.value)
+        if WORD == 4:
+            self.mc.J_il(rx86.Conditions['Z'], self.propagate_exception_path)
+            self.mc.add_pending_relocation()
+        elif WORD == 8:
+            self.mc.J_il(rx86.Conditions['Z'], 0)
+            pos = self.mc.get_relative_pos()
+            self.pending_memoryerror_trampoline_from.append(pos)
 
     # ----------
 
@@ -1690,6 +1785,12 @@
         return GuardToken(faildescr, failargs, fail_locs, exc,
                           is_guard_not_invalidated)
 
+    def generate_propagate_error_64(self):
+        assert WORD == 8
+        startpos = self.mc.get_relative_pos()
+        self.mc.JMP(imm(self.propagate_exception_path))
+        return startpos
+
     def generate_quick_failure(self, guardtok):
         """Generate the initial code for handling a failure.  We try to
         keep it as compact as possible.
@@ -2025,7 +2126,8 @@
         else:
             tmp = eax
 
-        self._emit_call(force_index, x, arglocs, 3, tmp=tmp)
+        self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
+                        argtypes=op.getdescr().get_arg_types())
 
         if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.width == 8:
             # a float or a long long return
@@ -2037,7 +2139,19 @@
                 #     and this way is simpler also because the result loc
                 #     can just be always a stack location
             else:
-                self.mc.FSTP_b(resloc.value)   # float return
+                self.mc.FSTPL_b(resloc.value)   # float return
+        elif op.getdescr().get_return_type() == 'S':
+            # singlefloat return
+            assert resloc is eax
+            if IS_X86_32:
+                # must convert ST(0) to a 32-bit singlefloat and load it into EAX
+                # mess mess mess
+                self.mc.SUB_ri(esp.value, 4)
+                self.mc.FSTPS_s(0)
+                self.mc.POP_r(eax.value)
+            elif IS_X86_64:
+                # must copy from the lower 32 bits of XMM0 into eax
+                self.mc.MOVD_rx(eax.value, xmm0.value)
         elif size == WORD:
             assert resloc is eax or resloc is xmm0    # a full word
         elif size == 0:
@@ -2109,13 +2223,27 @@
                 css = get_ebp_ofs(pos + use_words - 1)
                 self._regalloc.close_stack_struct = css
             # The location where the future CALL will put its return address
-            # will be [ESP-WORD], so save that as the next frame's top address
-            self.mc.LEA_rs(eax.value, -WORD)        # LEA EAX, [ESP-4]
+            # will be [ESP-WORD].  But we can't use that as the next frame's
+            # top address!  As the code after releasegil() runs without the
+            # GIL, it might not be set yet by the time we need it (very
+            # unlikely), or it might be overwritten by the following call
+            # to reaquiregil() (much more likely).  So we hack even more
+            # and use a dummy location containing a dummy value (a pointer
+            # to itself) which we pretend is the return address :-/ :-/ :-/
+            # It prevents us to store any %esp-based stack locations but we
+            # don't so far.
+            adr = self.datablockwrapper.malloc_aligned(WORD, WORD)
+            rffi.cast(rffi.CArrayPtr(lltype.Signed), adr)[0] = adr
+            self.gcrootmap_retaddr_forced = adr
             frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
-            self.mc.MOV_br(frame_ptr, eax.value)    # MOV [css.frame], EAX
+            if rx86.fits_in_32bits(adr):
+                self.mc.MOV_bi(frame_ptr, adr)          # MOV [css.frame], adr
+            else:
+                self.mc.MOV_ri(eax.value, adr)          # MOV EAX, adr
+                self.mc.MOV_br(frame_ptr, eax.value)    # MOV [css.frame], EAX
             # Save ebp
             index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
-            self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
+            self.mc.MOV_br(index_of_ebp, ebp.value)     # MOV [css.ebp], EBP
             # Call the closestack() function (also releasing the GIL)
             if IS_X86_32:
                 reg = eax
@@ -2143,6 +2271,9 @@
         if gcrootmap.is_shadow_stack:
             args = []
         else:
+            assert self.gcrootmap_retaddr_forced == -1, (
+                      "missing mark_gc_roots() in CALL_RELEASE_GIL")
+            self.gcrootmap_retaddr_forced = 0
             css = self._regalloc.close_stack_struct
             assert css != 0
             if IS_X86_32:
@@ -2195,7 +2326,7 @@
         self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
                         tmp=ecx)
         if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
-            self.mc.FSTP_b(result_loc.value)
+            self.mc.FSTPL_b(result_loc.value)
         #else: result_loc is already either eax or None, checked below
         self.mc.JMP_l8(0) # jump to done, patched later
         jmp_location = self.mc.get_relative_pos()
@@ -2393,7 +2524,13 @@
             if gcrootmap.is_shadow_stack:
                 gcrootmap.write_callshape(mark, force_index)
             else:
-                self.mc.insert_gcroot_marker(mark)
+                if self.gcrootmap_retaddr_forced == 0:
+                    self.mc.insert_gcroot_marker(mark)   # common case
+                else:
+                    assert self.gcrootmap_retaddr_forced != -1, (
+                              "two mark_gc_roots() in a CALL_RELEASE_GIL")
+                    gcrootmap.put(self.gcrootmap_retaddr_forced, mark)
+                    self.gcrootmap_retaddr_forced = -1
 
     def target_arglocs(self, loop_token):
         return loop_token._x86_arglocs
diff --git a/pypy/jit/backend/x86/codebuf.py b/pypy/jit/backend/x86/codebuf.py
--- a/pypy/jit/backend/x86/codebuf.py
+++ b/pypy/jit/backend/x86/codebuf.py
@@ -25,8 +25,11 @@
         self.init_block_builder()
         # a list of relative positions; for each position p, the bytes
         # at [p-4:p] encode an absolute address that will need to be
-        # made relative.
-        self.relocations = []
+        # made relative.  Only works on 32-bit!
+        if WORD == 4:
+            self.relocations = []
+        else:
+            self.relocations = None
         #
         # ResOperation --> offset in the assembly.
         # ops_offset[None] represents the beginning of the code after the last op
@@ -42,9 +45,10 @@
 
     def copy_to_raw_memory(self, addr):
         self._copy_to_raw_memory(addr)
-        for reloc in self.relocations:
-            p = addr + reloc
-            adr = rffi.cast(rffi.LONGP, p - WORD)
-            adr[0] = intmask(adr[0] - p)
+        if self.relocations is not None:
+            for reloc in self.relocations:
+                p = addr + reloc
+                adr = rffi.cast(rffi.LONGP, p - WORD)
+                adr[0] = intmask(adr[0] - p)
         valgrind.discard_translations(addr, self.get_relative_pos())
         self._dump(addr, "jit-backend-dump", backend_name)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -705,6 +705,17 @@
         self.Perform(op, [loc0], loc1)
         self.rm.possibly_free_var(op.getarg(0))
 
+    def consider_cast_float_to_singlefloat(self, op):
+        loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
+        loc1 = self.rm.force_allocate_reg(op.result)
+        self.xrm.possibly_free_var(op.getarg(0))
+        tmpxvar = TempBox()
+        loctmp = self.xrm.force_allocate_reg(tmpxvar)   # may be equal to loc0
+        self.xrm.possibly_free_var(tmpxvar)
+        self.Perform(op, [loc0, loctmp], loc1)
+
+    consider_cast_singlefloat_to_float = consider_cast_int_to_float
+
     def _consider_llong_binop_xx(self, op):
         # must force both arguments into xmm registers, because we don't
         # know if they will be suitably aligned.  Exception: if the second
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -521,6 +521,8 @@
     UCOMISD = _binaryop('UCOMISD')
     CVTSI2SD = _binaryop('CVTSI2SD')
     CVTTSD2SI = _binaryop('CVTTSD2SI')
+    CVTSD2SS = _binaryop('CVTSD2SS')
+    CVTSS2SD = _binaryop('CVTSS2SD')
     
     SQRTSD = _binaryop('SQRTSD')
 
@@ -534,6 +536,8 @@
     PXOR  = _binaryop('PXOR')
     PCMPEQD = _binaryop('PCMPEQD')
 
+    MOVD = _binaryop('MOVD')
+
     CALL = _relative_unaryop('CALL')
     JMP = _relative_unaryop('JMP')
 
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -19,6 +19,7 @@
 class AbstractX86CPU(AbstractLLCPU):
     debug = True
     supports_floats = True
+    supports_singlefloats = True
 
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -573,7 +573,8 @@
     BTS_jr = insn(rex_w, '\x0F\xAB', register(2,8), abs_, immediate(1))
 
     # x87 instructions
-    FSTP_b = insn('\xDD', orbyte(3<<3), stack_bp(1))
+    FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) # rffi.DOUBLE ('as' wants L??)
+    FSTPS_s = insn('\xD9', orbyte(3<<3), stack_sp(1)) # lltype.SingleFloat
 
     # ------------------------------ Random mess -----------------------
     RDTSC = insn('\x0F\x31')
@@ -590,8 +591,18 @@
     CVTTSD2SI_rx = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), register(2), '\xC0')
     CVTTSD2SI_rb = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), stack_bp(2))
 
-    MOVD_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), register(1), '\xC0')
-    MOVD_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    CVTSD2SS_xx = xmminsn('\xF2', rex_nw, '\x0F\x5A',
+                          register(1, 8), register(2), '\xC0')
+    CVTSD2SS_xb = xmminsn('\xF2', rex_nw, '\x0F\x5A',
+                          register(1, 8), stack_bp(2))
+    CVTSS2SD_xx = xmminsn('\xF3', rex_nw, '\x0F\x5A',
+                          register(1, 8), register(2), '\xC0')
+    CVTSS2SD_xb = xmminsn('\xF3', rex_nw, '\x0F\x5A',
+                          register(1, 8), stack_bp(2))
+
+    MOVD_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), register(1), '\xC0')
+    MOVD_xr = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    MOVD_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2))
 
     PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
 
diff --git a/pypy/jit/backend/x86/test/test_regloc.py b/pypy/jit/backend/x86/test/test_regloc.py
--- a/pypy/jit/backend/x86/test/test_regloc.py
+++ b/pypy/jit/backend/x86/test/test_regloc.py
@@ -62,7 +62,7 @@
             assert mc.relocations == [5]
             expected = "\xE8" + struct.pack('<i', target - (rawstart + 5))
         elif IS_X86_64:
-            assert mc.relocations == []
+            assert mc.relocations is None
             if 0 <= target <= 0xffffffff:
                 assert length == 9
                 expected = (
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -463,7 +463,7 @@
             self.cpu.finish_once()
         finally:
             debug._log = None
-        assert ('jit-backend-counts', [('debug_print', '0:10')]) in dlog
+        assert ('jit-backend-counts', [('debug_print', 'loop -1:10')]) in dlog
 
     def test_debugger_checksum(self):
         loop = """
diff --git a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -36,6 +36,14 @@
 def hexdump(s):
     return ' '.join(["%02X" % ord(c) for c in s])
 
+def reduce_to_32bit(s):
+    if s[:2] != '%r':
+        return s
+    if s[2:].isdigit():
+        return s + 'd'
+    else:
+        return '%e' + s[2:]
+
 # ____________________________________________________________
 
 COUNT1 = 15
@@ -180,12 +188,14 @@
     ##        for m, extra in args:
     ##            if m in (i386.MODRM, i386.MODRM8) or all:
     ##                suffix = suffixes[sizes[m]] + suffix
-            if argmodes and not self.is_xmm_insn:
+            if (argmodes and not self.is_xmm_insn
+                         and not instrname.startswith('FSTP')):
                 suffix = suffixes[self.WORD]
             # Special case: On 64-bit CPUs, rx86 assumes 64-bit integer
             # operands when converting to/from floating point, so we need to
             # indicate that with a suffix
-            if (self.WORD == 8) and instrname.startswith('CVT'):
+            if (self.WORD == 8) and (instrname.startswith('CVT') and
+                                     'SI' in instrname):
                 suffix = suffixes[self.WORD]
 
             if instr_suffix is not None:
@@ -218,10 +228,10 @@
                 and ops[1].startswith('%r')):
                 # movq $xxx, %rax => movl $xxx, %eax
                 suffix = 'l'
-                if ops[1][2:].isdigit():
-                    ops[1] += 'd'
-                else:
-                    ops[1] = '%e' + ops[1][2:]
+                ops[1] = reduce_to_32bit(ops[1])
+            if instrname.lower() == 'movd':
+                ops[0] = reduce_to_32bit(ops[0])
+                ops[1] = reduce_to_32bit(ops[1])
             #
             op = '\t%s%s %s%s' % (instrname.lower(), suffix,
                                   ', '.join(ops), following)
diff --git a/pypy/jit/codewriter/assembler.py b/pypy/jit/codewriter/assembler.py
--- a/pypy/jit/codewriter/assembler.py
+++ b/pypy/jit/codewriter/assembler.py
@@ -76,6 +76,8 @@
                 TYPE = llmemory.Address
             if TYPE == llmemory.Address:
                 value = heaptracker.adr2int(value)
+            if TYPE is lltype.SingleFloat:
+                value = longlong.singlefloat2int(value)
             if not isinstance(value, (llmemory.AddressAsInt,
                                       ComputedIntSymbolic)):
                 value = lltype.cast_primitive(lltype.Signed, value)
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -9,7 +9,7 @@
 from pypy.objspace.flow.model import SpaceOperation, Variable, Constant, c_last_exception
 from pypy.rlib import objectmodel
 from pypy.rlib.jit import _we_are_jitted
-from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass
+from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass, rffi
 from pypy.rpython.rclass import IR_QUASIIMMUTABLE, IR_QUASIIMMUTABLE_ARRAY
 from pypy.translator.simplify import get_funcobj
 from pypy.translator.unsimplify import varoftype
@@ -198,7 +198,6 @@
             self.vable_array_vars[op.result]= self.vable_array_vars[op.args[0]]
 
     rewrite_op_cast_pointer = rewrite_op_same_as
-    rewrite_op_cast_opaque_ptr = rewrite_op_same_as   # rlib.rerased
     def rewrite_op_cast_bool_to_int(self, op): pass
     def rewrite_op_cast_bool_to_uint(self, op): pass
     def rewrite_op_cast_char_to_int(self, op): pass
@@ -785,7 +784,6 @@
             op2.result = op.result
             return op2
         elif toll:
-            from pypy.rpython.lltypesystem import rffi
             size, unsigned = rffi.size_and_sign(op.args[0].concretetype)
             if unsigned:
                 INTERMEDIATE = lltype.Unsigned
@@ -807,20 +805,27 @@
             return self.force_cast_without_longlong(op.args[0], op.result)
 
     def force_cast_without_longlong(self, v_arg, v_result):
-        from pypy.rpython.lltypesystem.rffi import size_and_sign, sizeof, FLOAT
-        #
-        if (v_result.concretetype in (FLOAT, lltype.Float) or
-            v_arg.concretetype in (FLOAT, lltype.Float)):
-            assert (v_result.concretetype == lltype.Float and
-                    v_arg.concretetype == lltype.Float), "xxx unsupported cast"
+        if v_result.concretetype == v_arg.concretetype:
             return
-        #
-        size2, unsigned2 = size_and_sign(v_result.concretetype)
-        assert size2 <= sizeof(lltype.Signed)
-        if size2 == sizeof(lltype.Signed):
+        if v_arg.concretetype == rffi.FLOAT:
+            assert v_result.concretetype == lltype.Float, "cast %s -> %s" % (
+                v_arg.concretetype, v_result.concretetype)
+            return SpaceOperation('cast_singlefloat_to_float', [v_arg],
+                                  v_result)
+        if v_result.concretetype == rffi.FLOAT:
+            assert v_arg.concretetype == lltype.Float, "cast %s -> %s" % (
+                v_arg.concretetype, v_result.concretetype)
+            return SpaceOperation('cast_float_to_singlefloat', [v_arg],
+                                  v_result)
+        return self.force_cast_without_singlefloat(v_arg, v_result)
+
+    def force_cast_without_singlefloat(self, v_arg, v_result):
+        size2, unsigned2 = rffi.size_and_sign(v_result.concretetype)
+        assert size2 <= rffi.sizeof(lltype.Signed)
+        if size2 == rffi.sizeof(lltype.Signed):
             return     # the target type is LONG or ULONG
-        size1, unsigned1 = size_and_sign(v_arg.concretetype)
-        assert size1 <= sizeof(lltype.Signed)
+        size1, unsigned1 = rffi.size_and_sign(v_arg.concretetype)
+        assert size1 <= rffi.sizeof(lltype.Signed)
         #
         def bounds(size, unsigned):
             if unsigned:
@@ -849,7 +854,6 @@
         return result
 
     def rewrite_op_direct_ptradd(self, op):
-        from pypy.rpython.lltypesystem import rffi
         # xxx otherwise, not implemented:
         assert op.args[0].concretetype == rffi.CCHARP
         #
diff --git a/pypy/jit/codewriter/longlong.py b/pypy/jit/codewriter/longlong.py
--- a/pypy/jit/codewriter/longlong.py
+++ b/pypy/jit/codewriter/longlong.py
@@ -7,7 +7,8 @@
 """
 
 import sys
-from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib import rarithmetic, longlong2float
 
 
 if sys.maxint > 2147483647:
@@ -31,8 +32,6 @@
     # ---------- 32-bit platform ----------
     # the type FloatStorage is r_longlong, and conversion is needed
 
-    from pypy.rlib import rarithmetic, longlong2float
-
     is_64_bit = False
     supports_longlong = True
     r_float_storage = rarithmetic.r_longlong
@@ -41,9 +40,19 @@
     getfloatstorage = longlong2float.float2longlong
     getrealfloat    = longlong2float.longlong2float
     gethash         = lambda xll: rarithmetic.intmask(xll - (xll >> 32))
-    is_longlong     = lambda TYPE: (TYPE == lltype.SignedLongLong or
-                                    TYPE == lltype.UnsignedLongLong)
+    is_longlong     = lambda TYPE: (TYPE is lltype.SignedLongLong or
+                                    TYPE is lltype.UnsignedLongLong)
 
     # -------------------------------------
 
 ZEROF = getfloatstorage(0.0)
+
+# ____________________________________________________________
+
+def int2singlefloat(x):
+    x = rffi.r_uint(x)
+    return longlong2float.uint2singlefloat(x)
+
+def singlefloat2int(x):
+    x = longlong2float.singlefloat2uint(x)
+    return rffi.cast(lltype.Signed, x)
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -12,6 +12,7 @@
         self.unsafe_loopy_graphs = set()
         self.supports_floats = False
         self.supports_longlong = False
+        self.supports_singlefloats = False
 
     def set_supports_floats(self, flag):
         self.supports_floats = flag
@@ -19,6 +20,9 @@
     def set_supports_longlong(self, flag):
         self.supports_longlong = flag
 
+    def set_supports_singlefloats(self, flag):
+        self.supports_singlefloats = flag
+
     def dump_unsafe_loops(self):
         f = udir.join("unsafe-loops.txt").open('w')
         strs = [str(graph) for graph in self.unsafe_loopy_graphs]
@@ -58,8 +62,9 @@
                     func, '_jit_unroll_safe_', False)
 
         unsupported = contains_unsupported_variable_type(graph,
-                                                         self.supports_floats,
-                                                         self.supports_longlong)
+                            self.supports_floats,
+                            self.supports_longlong,
+                            self.supports_singlefloats)
         res = see_function and not unsupported
         if res and contains_loop:
             self.unsafe_loopy_graphs.add(graph)
@@ -80,17 +85,24 @@
         return res
 
 def contains_unsupported_variable_type(graph, supports_floats,
-                                       supports_longlong):
+                                              supports_longlong,
+                                              supports_singlefloats):
     getkind = history.getkind
     try:
         for block in graph.iterblocks():
             for v in block.inputargs:
-                getkind(v.concretetype, supports_floats, supports_longlong)
+                getkind(v.concretetype, supports_floats,
+                                        supports_longlong,
+                                        supports_singlefloats)
             for op in block.operations:
                 for v in op.args:
-                    getkind(v.concretetype, supports_floats, supports_longlong)
+                    getkind(v.concretetype, supports_floats,
+                                            supports_longlong,
+                                            supports_singlefloats)
                 v = op.result
-                getkind(v.concretetype, supports_floats, supports_longlong)
+                getkind(v.concretetype, supports_floats,
+                                        supports_longlong,
+                                        supports_singlefloats)
     except NotImplementedError, e:
         log.WARNING('%s, ignoring graph' % (e,))
         log.WARNING('  %s' % (graph,))
diff --git a/pypy/jit/codewriter/test/test_longlong.py b/pypy/jit/codewriter/test/test_longlong.py
--- a/pypy/jit/codewriter/test/test_longlong.py
+++ b/pypy/jit/codewriter/test/test_longlong.py
@@ -230,3 +230,18 @@
             assert list(op1.args[3]) == []
             assert list(op1.args[4]) == vlist
             assert op1.result == v_result
+
+
+##def test_singlefloat_constants():
+##    v_x = varoftype(TYPE)
+##    vlist = [v_x, const(rffi.cast(TYPE, 7))]
+##    v_result = varoftype(TYPE)
+##    op = SpaceOperation('llong_add', vlist, v_result)
+##    tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
+##    op1 = tr.rewrite_operation(op)
+##    #
+##    assert op1.opname == 'residual_call_irf_f'
+##    assert list(op1.args[2]) == []
+##    assert list(op1.args[3]) == []
+##    assert list(op1.args[4]) == vlist
+##    assert op1.result == v_result
diff --git a/pypy/jit/codewriter/test/test_policy.py b/pypy/jit/codewriter/test/test_policy.py
--- a/pypy/jit/codewriter/test/test_policy.py
+++ b/pypy/jit/codewriter/test/test_policy.py
@@ -12,24 +12,30 @@
     graph = support.getgraph(f, [5])
     for sf in [False, True]:
         for sll in [False, True]:
-            assert not contains_unsupported_variable_type(graph, sf, sll)
+            for ssf in [False, True]:
+                assert not contains_unsupported_variable_type(graph, sf,
+                                                              sll, ssf)
     #
     graph = support.getgraph(f, [5.5])
     for sf in [False, True]:
         for sll in [False, True]:
-            res = contains_unsupported_variable_type(graph, sf, sll)
-            assert res is not sf
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res is not sf
     #
     graph = support.getgraph(f, [r_singlefloat(5.5)])
     for sf in [False, True]:
         for sll in [False, True]:
-            assert contains_unsupported_variable_type(graph, sf, sll)
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res == (not ssf)
     #
     graph = support.getgraph(f, [r_longlong(5)])
     for sf in [False, True]:
         for sll in [False, True]:
-            res = contains_unsupported_variable_type(graph, sf, sll)
-            assert res == (sys.maxint == 2147483647 and not sll)
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res == (sys.maxint == 2147483647 and not sll)
 
 
 def test_regular_function():
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -500,6 +500,9 @@
     @arguments("r", returns="i")
     def bhimpl_ptr_nonzero(a):
         return bool(a)
+    @arguments("r", returns="r")
+    def bhimpl_cast_opaque_ptr(a):
+        return a
 
     @arguments("i", returns="i")
     def bhimpl_int_copy(a):
@@ -623,6 +626,19 @@
         x = float(a)
         return longlong.getfloatstorage(x)
 
+    @arguments("f", returns="i")
+    def bhimpl_cast_float_to_singlefloat(a):
+        from pypy.rlib.rarithmetic import r_singlefloat
+        a = longlong.getrealfloat(a)
+        a = r_singlefloat(a)
+        return longlong.singlefloat2int(a)
+
+    @arguments("i", returns="f")
+    def bhimpl_cast_singlefloat_to_float(a):
+        a = longlong.int2singlefloat(a)
+        a = float(a)
+        return longlong.getfloatstorage(a)
+
     # ----------
     # control flow operations
 
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -137,6 +137,10 @@
             jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
                 greenkey, loop.preamble.token)
             record_loop_or_bridge(metainterp_sd, loop.preamble)
+        elif token.short_preamble:
+            short = token.short_preamble[-1]
+            metainterp_sd.logger_ops.log_short_preamble(short.inputargs,
+                                                        short.operations)
         return token
     else:
         send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop,
@@ -637,6 +641,7 @@
         debug_print("compile_new_bridge: got an InvalidLoop")
         # XXX I am fairly convinced that optimize_bridge cannot actually raise
         # InvalidLoop
+        debug_print('InvalidLoop in compile_new_bridge')
         return None
     # Did it work?
     if target_loop_token is not None:
@@ -668,10 +673,9 @@
     def handle_fail(self, metainterp_sd, jitdriver_sd):
         cpu = metainterp_sd.cpu
         exception = cpu.grab_exc_value()
+        assert exception, "PropagateExceptionDescr: no exception??"
         raise metainterp_sd.ExitFrameWithExceptionRef(cpu, exception)
 
-propagate_exception_descr = PropagateExceptionDescr()
-
 def compile_tmp_callback(cpu, jitdriver_sd, greenboxes, redboxes,
                          memory_manager=None):
     """Make a LoopToken that corresponds to assembler code that just
@@ -705,7 +709,7 @@
         finishargs = []
     #
     jd = jitdriver_sd
-    faildescr = propagate_exception_descr
+    faildescr = PropagateExceptionDescr()
     operations = [
         ResOperation(rop.CALL, callargs, result, descr=jd.portal_calldescr),
         ResOperation(rop.GUARD_NO_EXCEPTION, [], None, descr=faildescr),
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -50,7 +50,7 @@
     func = argboxes[0].getint()
     # do the call using the correct function from the cpu
     rettype = descr.get_return_type()
-    if rettype == INT:
+    if rettype == INT or rettype == 'S':       # *S*ingle float
         try:
             result = cpu.bh_call_i(func, descr, args_i, args_r, args_f)
         except Exception, e:
@@ -64,7 +64,7 @@
             metainterp.execute_raised(e)
             result = NULL
         return BoxPtr(result)
-    if rettype == FLOAT or rettype == 'L':
+    if rettype == FLOAT or rettype == 'L':     # *L*ong long
         try:
             result = cpu.bh_call_f(func, descr, args_i, args_r, args_f)
         except Exception, e:
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -20,12 +20,16 @@
 
 FAILARGS_LIMIT = 1000
 
-def getkind(TYPE, supports_floats=True, supports_longlong=True):
+def getkind(TYPE, supports_floats=True,
+                  supports_longlong=True,
+                  supports_singlefloats=True):
     if TYPE is lltype.Void:
         return "void"
     elif isinstance(TYPE, lltype.Primitive):
         if TYPE is lltype.Float and supports_floats:
             return 'float'
+        if TYPE is lltype.SingleFloat and supports_singlefloats:
+            return 'int'     # singlefloats are stored in an int
         if TYPE in (lltype.Float, lltype.SingleFloat):
             raise NotImplementedError("type %s not supported" % TYPE)
         # XXX fix this for oo...
@@ -145,6 +149,7 @@
         """ Implement in call descr.
         Must return INT, REF, FLOAT, or 'v' for void.
         On 32-bit (hack) it can also be 'L' for longlongs.
+        Additionally it can be 'S' for singlefloats.
         """
         raise NotImplementedError
 
diff --git a/pypy/jit/metainterp/optimize.py b/pypy/jit/metainterp/optimize.py
--- a/pypy/jit/metainterp/optimize.py
+++ b/pypy/jit/metainterp/optimize.py
@@ -1,4 +1,4 @@
-from pypy.rlib.debug import debug_start, debug_stop
+from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.jit.metainterp.jitexc import JitException
 
 class InvalidLoop(JitException):
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -33,10 +33,6 @@
         if name in enable_opts:
             if opt is not None:
                 o = opt()
-                if unroll and name == 'string':
-                    o.enabled = False
-                # FIXME: Workaround to disable string optimisation
-                # during preamble but to keep it during the loop
                 optimizations.append(o)
             elif name == 'ffi' and config.translation.jit_ffi:
                 # we cannot put the class directly in the unrolling_iterable,
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -1,12 +1,11 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.libffi import Func
-from pypy.rlib.debug import debug_start, debug_stop, debug_print
+from pypy.rlib.debug import debug_print
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
-from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
 
 
 class FuncInfo(object):
@@ -20,11 +19,8 @@
         self.funcval = funcval
         self.opargs = []
         argtypes, restype = self._get_signature(funcval)
-        try:
-            self.descr = cpu.calldescrof_dynamic(argtypes, restype)
-        except UnsupportedKind:
-            # e.g., I or U for long longs
-            self.descr = None
+        self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        # ^^^ may be None if unsupported
         self.prepare_op = prepare_op
         self.delayed_ops = []
 
@@ -78,18 +74,9 @@
         else:
             self.logops = None
 
-    def propagate_begin_forward(self):
-        debug_start('jit-log-ffiopt')
-        Optimization.propagate_begin_forward(self)
-
-    def propagate_end_forward(self):
-        debug_stop('jit-log-ffiopt')
-        Optimization.propagate_end_forward(self)
-
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
+    def new(self):
         return OptFfiCall()
-        # FIXME: Should any status be saved for next iteration?
-
+    
     def begin_optimization(self, funcval, op):
         self.rollback_maybe('begin_optimization', op)
         self.funcinfo = FuncInfo(funcval, self.optimizer.cpu, op)
@@ -184,7 +171,8 @@
     def do_call(self, op):
         funcval = self._get_funcval(op)
         funcinfo = self.funcinfo
-        if not funcinfo or funcinfo.funcval is not funcval:
+        if (not funcinfo or funcinfo.funcval is not funcval or
+            funcinfo.descr is None):
             return [op] # cannot optimize
         funcsymval = self.getvalue(op.getarg(2))
         arglist = [funcsymval.force_box()]
diff --git a/pypy/jit/metainterp/optimizeopt/generalize.py b/pypy/jit/metainterp/optimizeopt/generalize.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/optimizeopt/generalize.py
@@ -0,0 +1,19 @@
+from pypy.jit.metainterp.optimizeopt.optimizer import MININT, MAXINT
+
+class GeneralizationStrategy(object):
+    def __init__(self, optimizer):
+        self.optimizer = optimizer
+
+    def apply(self):
+        raise NotImplementedError
+
+class KillHugeIntBounds(GeneralizationStrategy):
+    def apply(self):
+        for v in self.optimizer.values.values():
+            if v.is_constant():
+                continue
+            if v.intbound.lower < MININT/2:
+                v.intbound.lower = MININT
+            if v.intbound.upper > MAXINT/2:
+                v.intbound.upper = MAXINT
+          
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -1,9 +1,10 @@
 import os
 
 from pypy.jit.metainterp.jitexc import JitException
-from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, MODE_ARRAY
+from pypy.jit.metainterp.history import ConstInt, Const
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.metainterp.resoperation import rop
+from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.rlib.objectmodel import we_are_translated
 
 
@@ -24,6 +25,7 @@
         #      'cached_fields'.
         #
         self._cached_fields = {}
+        self._cached_fields_getfield_op = {}        
         self._lazy_setfield = None
         self._lazy_setfield_registered = False
 
@@ -70,9 +72,10 @@
         else:
             return self._cached_fields.get(structvalue, None)
 
-    def remember_field_value(self, structvalue, fieldvalue):
+    def remember_field_value(self, structvalue, fieldvalue, getfield_op=None):
         assert self._lazy_setfield is None
         self._cached_fields[structvalue] = fieldvalue
+        self._cached_fields_getfield_op[structvalue] = getfield_op        
 
     def force_lazy_setfield(self, optheap, can_cache=True):
         op = self._lazy_setfield
@@ -81,7 +84,7 @@
             # Now we clear _cached_fields, because actually doing the
             # setfield might impact any of the stored result (because of
             # possible aliasing).
-            self._cached_fields.clear()
+            self.clear()
             self._lazy_setfield = None
             optheap.next_optimization.propagate_forward(op)
             if not can_cache:
@@ -91,19 +94,49 @@
             # field.
             structvalue = optheap.getvalue(op.getarg(0))
             fieldvalue  = optheap.getvalue(op.getarglist()[-1])
-            self.remember_field_value(structvalue, fieldvalue)
+            self.remember_field_value(structvalue, fieldvalue, op)
         elif not can_cache:
-            self._cached_fields.clear()
+            self.clear()
 
-    def get_reconstructed(self, optimizer, valuemap):
-        assert self._lazy_setfield is None
-        cf = CachedField()
-        for structvalue, fieldvalue in self._cached_fields.iteritems():
-            structvalue2 = structvalue.get_reconstructed(optimizer, valuemap)
-            fieldvalue2  = fieldvalue .get_reconstructed(optimizer, valuemap)
-            cf._cached_fields[structvalue2] = fieldvalue2
-        return cf
+    def clear(self):
+        self._cached_fields.clear()
+        self._cached_fields_getfield_op.clear()
 
+    def turned_constant(self, newvalue, value):
+        if newvalue not in self._cached_fields and value in self._cached_fields:
+            self._cached_fields[newvalue] = self._cached_fields[value]
+            op = self._cached_fields_getfield_op[value].clone()
+            constbox = value.box
+            assert isinstance(constbox, Const)
+            op.setarg(0, constbox)
+            self._cached_fields_getfield_op[newvalue] = op
+        for structvalue in self._cached_fields.keys():
+            if self._cached_fields[structvalue] is value:
+                self._cached_fields[structvalue] = newvalue
+
+    def produce_potential_short_preamble_ops(self, optimizer, shortboxes, descr):
+        if self._lazy_setfield is not None:
+            return
+        for structvalue in self._cached_fields_getfield_op.keys():
+            op = self._cached_fields_getfield_op[structvalue]
+            if not op:
+                continue
+            if optimizer.getvalue(op.getarg(0)) in optimizer.opaque_pointers:
+                continue
+            if structvalue in self._cached_fields:
+                if op.getopnum() == rop.SETFIELD_GC:
+                    result = op.getarg(1)
+                    if isinstance(result, Const):
+                        newresult = result.clonebox()
+                        optimizer.make_constant(newresult, result)
+                        result = newresult
+                    getop = ResOperation(rop.GETFIELD_GC, [op.getarg(0)],
+                                         result, op.getdescr())
+                    getop = shortboxes.add_potential(getop)
+                    self._cached_fields_getfield_op[structvalue] = getop
+                    self._cached_fields[structvalue] = optimizer.getvalue(result)
+                elif op.result is not None:
+                    shortboxes.add_potential(op)
 
 class BogusPureField(JitException):
     pass
@@ -122,24 +155,32 @@
         self._remove_guard_not_invalidated = False
         self._seen_guard_not_invalidated = False
 
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
-        new = OptHeap()
+    def force_at_end_of_preamble(self):
+        self.force_all_lazy_setfields_and_arrayitems()
 
-        if True:
-            self.force_all_lazy_setfields_and_arrayitems()
-        else:
-            assert 0   # was: new.lazy_setfields = self.lazy_setfields
+    def flush(self):
+        self.force_all_lazy_setfields_and_arrayitems()
 
-        for descr, d in self.cached_fields.items():
-            new.cached_fields[descr] = d.get_reconstructed(optimizer, valuemap)
+    def new(self):
+        return OptHeap()
+        
+    def produce_potential_short_preamble_ops(self, sb):
+        descrkeys = self.cached_fields.keys()
+        if not we_are_translated():
+            # XXX Pure operation of boxes that are cached in several places will
+            #     only be removed from the peeled loop when red from the first
+            #     place discovered here. This is far from ideal, as it makes
+            #     the effectiveness of our optimization a bit random. It should
+            #     howevere always generate correct results. For tests we dont
+            #     want this randomness.
+            descrkeys.sort(key=str, reverse=True)
+        for descr in descrkeys:
+            d = self.cached_fields[descr]
+            d.produce_potential_short_preamble_ops(self.optimizer, sb, descr)
 
         for descr, submap in self.cached_arrayitems.items():
-            newdict = {}
             for index, d in submap.items():
-                newdict[index] = d.get_reconstructed(optimizer, valuemap)
-            new.cached_arrayitems[descr] = newdict
-
-        return new
+                d.produce_potential_short_preamble_ops(self.optimizer, sb, descr)
 
     def clean_caches(self):
         del self._lazy_setfields_and_arrayitems[:]
@@ -225,12 +266,10 @@
         newvalue = self.getvalue(value.box)
         if value is not newvalue:
             for cf in self.cached_fields.itervalues():
-                if value in cf._cached_fields:
-                    cf._cached_fields[newvalue] = cf._cached_fields[value]
+                cf.turned_constant(newvalue, value)
             for submap in self.cached_arrayitems.itervalues():
                 for cf in submap.itervalues():
-                    if value in cf._cached_fields:
-                        cf._cached_fields[newvalue] = cf._cached_fields[value]
+                    cf.turned_constant(newvalue, value)
 
     def force_lazy_setfield(self, descr, can_cache=True):
         try:
@@ -239,13 +278,14 @@
             return
         cf.force_lazy_setfield(self, can_cache)
 
-    def force_lazy_setarrayitem(self, arraydescr, can_cache=True):
+    def force_lazy_setarrayitem(self, arraydescr, indexvalue=None, can_cache=True):
         try:
             submap = self.cached_arrayitems[arraydescr]
         except KeyError:
             return
-        for cf in submap.values():
-            cf.force_lazy_setfield(self, can_cache)
+        for idx, cf in submap.iteritems():
+            if indexvalue is None or indexvalue.intbound.contains(idx):
+                cf.force_lazy_setfield(self, can_cache)
 
     def fixup_guard_situation(self):
         # hackish: reverse the order of the last two operations if it makes
@@ -332,7 +372,7 @@
         self.emit_operation(op)
         # then remember the result of reading the field
         fieldvalue = self.getvalue(op.result)
-        cf.remember_field_value(structvalue, fieldvalue)
+        cf.remember_field_value(structvalue, fieldvalue, op)
 
     def optimize_SETFIELD_GC(self, op):
         if self.has_pure_result(rop.GETFIELD_GC_PURE, [op.getarg(0)],
@@ -349,6 +389,7 @@
         indexvalue = self.getvalue(op.getarg(1))
         cf = None
         if indexvalue.is_constant():
+            arrayvalue.make_len_gt(MODE_ARRAY, op.getdescr(), indexvalue.box.getint())
             # use the cache on (arraydescr, index), which is a constant
             cf = self.arrayitem_cache(op.getdescr(), indexvalue.box.getint())
             fieldvalue = cf.getfield_from_cache(self, arrayvalue)
@@ -357,14 +398,14 @@
                 return
         else:
             # variable index, so make sure the lazy setarrayitems are done
-            self.force_lazy_setarrayitem(op.getdescr())
+            self.force_lazy_setarrayitem(op.getdescr(), indexvalue=indexvalue)
         # default case: produce the operation
         arrayvalue.ensure_nonnull()
         self.emit_operation(op)
         # the remember the result of reading the array item
         if cf is not None:
             fieldvalue = self.getvalue(op.result)
-            cf.remember_field_value(arrayvalue, fieldvalue)
+            cf.remember_field_value(arrayvalue, fieldvalue, op)
 
     def optimize_SETARRAYITEM_GC(self, op):
         if self.has_pure_result(rop.GETARRAYITEM_GC_PURE, [op.getarg(0),
@@ -376,12 +417,14 @@
         #
         indexvalue = self.getvalue(op.getarg(1))
         if indexvalue.is_constant():
+            arrayvalue = self.getvalue(op.getarg(0))
+            arrayvalue.make_len_gt(MODE_ARRAY, op.getdescr(), indexvalue.box.getint())
             # use the cache on (arraydescr, index), which is a constant
             cf = self.arrayitem_cache(op.getdescr(), indexvalue.box.getint())
             cf.do_setfield(self, op)
         else:
             # variable index, so make sure the lazy setarrayitems are done
-            self.force_lazy_setarrayitem(op.getdescr(), can_cache=False)
+            self.force_lazy_setarrayitem(op.getdescr(), indexvalue=indexvalue, can_cache=False)
             # and then emit the operation
             self.emit_operation(op)
 
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -1,7 +1,8 @@
+from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, CONST_1, CONST_0, \
+                                                  MODE_ARRAY, MODE_STR, MODE_UNICODE
 from pypy.jit.metainterp.history import ConstInt
 from pypy.jit.metainterp.optimizeopt.intutils import (IntBound, IntLowerBound,
     IntUpperBound)
-from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, CONST_1, CONST_0
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.resoperation import rop
 
@@ -14,18 +15,17 @@
         self.posponedop = None
         self.nextop = None
 
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
+    def new(self):
         assert self.posponedop is None
-        return self
+        return OptIntBounds()
+        
+    def flush(self):
+        assert self.posponedop is None
 
     def setup(self):
         self.posponedop = None
         self.nextop = None
 
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
-        assert self.posponedop is None
-        return self
-
     def propagate_forward(self, op):
         if op.is_ovf():
             self.posponedop = op
@@ -125,6 +125,17 @@
         r = self.getvalue(op.result)
         r.intbound.intersect(v1.intbound.div_bound(v2.intbound))
 
+    def optimize_INT_MOD(self, op):
+        self.emit_operation(op)
+        v2 = self.getvalue(op.getarg(1))
+        if v2.is_constant():
+            val = v2.box.getint()
+            r = self.getvalue(op.result)
+            if val < 0:
+                val = -val
+            r.intbound.make_gt(IntBound(-val, -val))
+            r.intbound.make_lt(IntBound(val, val))
+
     def optimize_INT_LSHIFT(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
@@ -275,10 +286,27 @@
 
     def optimize_ARRAYLEN_GC(self, op):
         self.emit_operation(op)
-        v1 = self.getvalue(op.result)
-        v1.intbound.make_ge(IntLowerBound(0))
+        array  = self.getvalue(op.getarg(0))
+        result = self.getvalue(op.result)
+        array.make_len_gt(MODE_ARRAY, op.getdescr(), -1)
+        array.lenbound.bound.intersect(result.intbound)
+        result.intbound = array.lenbound.bound
 
-    optimize_STRLEN = optimize_UNICODELEN = optimize_ARRAYLEN_GC
+    def optimize_STRLEN(self, op):
+        self.emit_operation(op)
+        array  = self.getvalue(op.getarg(0))
+        result = self.getvalue(op.result)
+        array.make_len_gt(MODE_STR, op.getdescr(), -1)
+        array.lenbound.bound.intersect(result.intbound)
+        result.intbound = array.lenbound.bound
+
+    def optimize_UNICODELEN(self, op):
+        self.emit_operation(op)
+        array  = self.getvalue(op.getarg(0))
+        result = self.getvalue(op.result)
+        array.make_len_gt(MODE_UNICODE, op.getdescr(), -1)
+        array.lenbound.bound.intersect(result.intbound)
+        result.intbound = array.lenbound.bound
 
     def optimize_STRGETITEM(self, op):
         self.emit_operation(op)
diff --git a/pypy/jit/metainterp/optimizeopt/intutils.py b/pypy/jit/metainterp/optimizeopt/intutils.py
--- a/pypy/jit/metainterp/optimizeopt/intutils.py
+++ b/pypy/jit/metainterp/optimizeopt/intutils.py
@@ -1,4 +1,9 @@
 from pypy.rlib.rarithmetic import ovfcheck, ovfcheck_lshift, LONG_BIT
+from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.jit.metainterp.history import BoxInt, ConstInt
+import sys
+MAXINT = sys.maxint
+MININT = -sys.maxint - 1
 
 class IntBound(object):
     _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower')
@@ -210,11 +215,11 @@
         
     def __repr__(self):
         if self.has_lower:
-            l = '%4d' % self.lower
+            l = '%d' % self.lower
         else:
             l = '-Inf'
         if self.has_upper:
-            u = '%3d' % self.upper
+            u = '%d' % self.upper
         else:
             u = 'Inf'
         return '%s <= x <= %s' % (l, u)
@@ -224,7 +229,24 @@
         res.has_lower = self.has_lower
         res.has_upper = self.has_upper
         return res
+
+    def make_guards(self, box, guards):
+        if self.has_lower and self.lower > MININT:
+            bound = self.lower
+            res = BoxInt()
+            op = ResOperation(rop.INT_GE, [box, ConstInt(bound)], res)
+            guards.append(op)
+            op = ResOperation(rop.GUARD_TRUE, [res], None)
+            guards.append(op)
+        if self.has_upper and self.upper < MAXINT:
+            bound = self.upper
+            res = BoxInt()
+            op = ResOperation(rop.INT_LE, [box, ConstInt(bound)], res)
+            guards.append(op)
+            op = ResOperation(rop.GUARD_TRUE, [res], None)
+            guards.append(op)
     
+
 class IntUpperBound(IntBound):
     def __init__(self, upper):
         self.has_upper = True
@@ -244,7 +266,23 @@
         self.has_upper = False
         self.has_lower = False
         self.upper = 0
-        self.lower = 0        
+        self.lower = 0
+
+class ImmutableIntUnbounded(IntUnbounded):
+    def _raise(self):
+        raise TypeError('ImmutableIntUnbounded is immutable')
+    def make_le(self, other):
+        self._raise()
+    def make_lt(self, other):
+        self._raise()
+    def make_ge(self, other):
+        self._raise()
+    def make_gt(self, other):
+        self._raise()
+    def make_constant(self, value):
+        self._raise()
+    def intersect(self, other):        
+        self._raise()
 
 def min4(t):
     return min(min(t[0], t[1]), min(t[2], t[3]))
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -1,64 +1,105 @@
 from pypy.jit.metainterp import jitprof, resume, compile
 from pypy.jit.metainterp.executor import execute_nonspec
 from pypy.jit.metainterp.history import BoxInt, BoxFloat, Const, ConstInt, REF
-from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded
+from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded, \
+                                                     ImmutableIntUnbounded, \
+                                                     IntLowerBound, MININT, MAXINT
 from pypy.jit.metainterp.optimizeopt.util import (make_dispatcher_method,
     args_dict)
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
 from pypy.tool.pairtype import extendabletype
+from pypy.rlib.debug import debug_start, debug_stop, debug_print
 
 LEVEL_UNKNOWN    = '\x00'
 LEVEL_NONNULL    = '\x01'
 LEVEL_KNOWNCLASS = '\x02'     # might also mean KNOWNARRAYDESCR, for arrays
 LEVEL_CONSTANT   = '\x03'
 
-import sys
-MAXINT = sys.maxint
-MININT = -sys.maxint - 1
+MODE_ARRAY   = '\x00'
+MODE_STR     = '\x01'
+MODE_UNICODE = '\x02'
+class LenBound(object):
+    def __init__(self, mode, descr, bound):
+        self.mode = mode
+        self.descr = descr
+        self.bound = bound
 
 class OptValue(object):
     __metaclass__ = extendabletype
-    _attrs_ = ('box', 'known_class', 'last_guard_index', 'level', 'intbound')
+    _attrs_ = ('box', 'known_class', 'last_guard_index', 'level', 'intbound', 'lenbound')
     last_guard_index = -1
 
     level = LEVEL_UNKNOWN
     known_class = None
-    intbound = None
+    intbound = ImmutableIntUnbounded()
+    lenbound = None
 
-    def __init__(self, box):
+    def __init__(self, box, level=None, known_class=None, intbound=None):
         self.box = box
-        self.intbound = IntBound(MININT, MAXINT) #IntUnbounded()
+        if level is not None:
+            self.level = level
+        self.known_class = known_class
+        if intbound:
+            self.intbound = intbound
+        else:
+            if isinstance(box, BoxInt):
+                self.intbound = IntBound(MININT, MAXINT)
+            else:
+                self.intbound = IntUnbounded()
+
         if isinstance(box, Const):
             self.make_constant(box)
         # invariant: box is a Const if and only if level == LEVEL_CONSTANT
 
+    def make_len_gt(self, mode, descr, val):
+        if self.lenbound:
+            assert self.lenbound.mode == mode
+            assert self.lenbound.descr == descr
+            self.lenbound.bound.make_gt(IntBound(val, val))
+        else:
+            self.lenbound = LenBound(mode, descr, IntLowerBound(val + 1))
+
+    def make_guards(self, box):
+        guards = []
+        if self.level == LEVEL_CONSTANT:
+            op = ResOperation(rop.GUARD_VALUE, [box, self.box], None)
+            guards.append(op)
+        elif self.level == LEVEL_KNOWNCLASS:
+            op = ResOperation(rop.GUARD_NONNULL, [box], None)
+            guards.append(op)            
+            op = ResOperation(rop.GUARD_CLASS, [box, self.known_class], None)
+            guards.append(op)
+        else:
+            if self.level == LEVEL_NONNULL:
+                op = ResOperation(rop.GUARD_NONNULL, [box], None)
+                guards.append(op)
+            self.intbound.make_guards(box, guards)
+            if self.lenbound:
+                lenbox = BoxInt()
+                if self.lenbound.mode == MODE_ARRAY:
+                    op = ResOperation(rop.ARRAYLEN_GC, [box], lenbox, self.lenbound.descr)
+                elif self.lenbound.mode == MODE_STR:
+                    op = ResOperation(rop.STRLEN, [box], lenbox, self.lenbound.descr)
+                elif self.lenbound.mode == MODE_UNICODE:
+                    op = ResOperation(rop.UNICODELEN, [box], lenbox, self.lenbound.descr)
+                else:
+                    debug_print("Unknown lenbound mode")
+                    assert False
+                guards.append(op)
+                self.lenbound.bound.make_guards(lenbox, guards)
+
+        return guards
+
     def force_box(self):
         return self.box
 
     def get_key_box(self):
         return self.box
 
-    def enum_forced_boxes(self, boxes, already_seen):
-        key = self.get_key_box()
-        if key not in already_seen:
-            boxes.append(self.force_box())
-            already_seen[self.get_key_box()] = None
-
-    def get_reconstructed(self, optimizer, valuemap):
-        if self in valuemap:
-            return valuemap[self]
-        new = self.reconstruct_for_next_iteration(optimizer)
-        valuemap[self] = new
-        self.reconstruct_childs(new, valuemap)
-        return new
-
-    def reconstruct_for_next_iteration(self, optimizer):
+    def force_at_end_of_preamble(self, already_forced):
         return self
 
-    def reconstruct_childs(self, new, valuemap):
-        pass
-
     def get_args_for_fail(self, modifier):
         pass
 
@@ -82,6 +123,7 @@
         assert isinstance(constbox, Const)
         self.box = constbox
         self.level = LEVEL_CONSTANT
+        
         if isinstance(constbox, ConstInt):
             val = constbox.getint()
             self.intbound = IntBound(val, val)
@@ -222,7 +264,9 @@
 
     def pure(self, opnum, args, result):
         op = ResOperation(opnum, args, result)
-        self.optimizer.pure_operations[self.optimizer.make_args_key(op)] = op
+        key = self.optimizer.make_args_key(op)
+        if key not in self.optimizer.pure_operations:
+            self.optimizer.pure_operations[key] = op
 
     def has_pure_result(self, opnum, args, descr):
         op = ResOperation(opnum, args, None, descr)
@@ -235,16 +279,22 @@
     def setup(self):
         pass
 
+    def turned_constant(self, value):
+        pass
+
     def force_at_end_of_preamble(self):
         pass
 
-    def turned_constant(self, value):
+    # It is too late to force stuff here, it must be done in force_at_end_of_preamble
+    def new(self):
+        raise NotImplementedError
+
+    # Called after last operation has been propagated to flush out any posponed ops
+    def flush(self):
         pass
 
-    def reconstruct_for_next_iteration(self, optimizer=None, valuemap=None):
-        #return self.__class__()
-        raise NotImplementedError
-
+    def produce_potential_short_preamble_ops(self, potential_ops):
+        pass
 
 class Optimizer(Optimization):
 
@@ -257,14 +307,17 @@
         self.interned_refs = self.cpu.ts.new_ref_dict()
         self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd)
         self.bool_boxes = {}
-        self.loop_invariant_results = {}
         self.pure_operations = args_dict()
+        self.emitted_pure_operations = {}
         self.producer = {}
         self.pendingfields = []
         self.posponedop = None
         self.exception_might_have_happened = False
         self.quasi_immutable_deps = None
+        self.opaque_pointers = {}
         self.newoperations = []
+        self.emitting_dissabled = False
+        self.emitted_guards = 0        
         if loop is not None:
             self.call_pure_results = loop.call_pure_results
 
@@ -286,39 +339,32 @@
         self.optimizations  = optimizations
 
     def force_at_end_of_preamble(self):
-        self.resumedata_memo = resume.ResumeDataLoopMemo(self.metainterp_sd)
         for o in self.optimizations:
             o.force_at_end_of_preamble()
 
-    def reconstruct_for_next_iteration(self, optimizer=None, valuemap=None):
-        assert optimizer is None
-        assert valuemap is None
-        valuemap = {}
+    def flush(self):
+        for o in self.optimizations:
+            o.flush()
+        assert self.posponedop is None
+
+    def new(self):
+        assert self.posponedop is None
         new = Optimizer(self.metainterp_sd, self.loop)
-        optimizations = [o.reconstruct_for_next_iteration(new, valuemap) for o in
-                         self.optimizations]
+        optimizations = [o.new() for o in self.optimizations]
         new.set_optimizations(optimizations)
-
-        new.values = {}
-        for box, value in self.values.items():
-            new.values[box] = value.get_reconstructed(new, valuemap)
-        new.interned_refs = self.interned_refs
-        new.bool_boxes = {}
-        for value in new.bool_boxes.keys():
-            new.bool_boxes[value.get_reconstructed(new, valuemap)] = None
-
-        # FIXME: Move to rewrite.py
-        new.loop_invariant_results = {}
-        for key, value in self.loop_invariant_results.items():
-            new.loop_invariant_results[key] = \
-                                 value.get_reconstructed(new, valuemap)
-
-        new.pure_operations = self.pure_operations
-        new.producer = self.producer
-        assert self.posponedop is None
         new.quasi_immutable_deps = self.quasi_immutable_deps
-
         return new
+        
+    def produce_potential_short_preamble_ops(self, sb):
+        for op in self.emitted_pure_operations:
+            if op.getopnum() == rop.GETARRAYITEM_GC_PURE or \
+               op.getopnum() == rop.STRGETITEM or \
+               op.getopnum() == rop.UNICODEGETITEM:
+                if not self.getvalue(op.getarg(1)).is_constant():
+                    continue
+            sb.add_potential(op)
+        for opt in self.optimizations:
+            opt.produce_potential_short_preamble_ops(sb)
 
     def turned_constant(self, value):
         for o in self.optimizations:
@@ -433,10 +479,11 @@
         return True
 
     def emit_operation(self, op):
-        ###self.heap_op_optimizer.emitting_operation(op)
-        self._emit_operation(op)
-
-    def _emit_operation(self, op):
+        if op.returns_bool_result():
+            self.bool_boxes[self.getvalue(op.result)] = None
+        if self.emitting_dissabled:
+            return
+        
         for i in range(op.numargs()):
             arg = op.getarg(i)
             if arg in self.values:
@@ -445,11 +492,10 @@
         self.metainterp_sd.profiler.count(jitprof.OPT_OPS)
         if op.is_guard():
             self.metainterp_sd.profiler.count(jitprof.OPT_GUARDS)
+            self.emitted_guards += 1 # FIXME: can we reuse above counter?
             op = self.store_final_boxes_in_guard(op)
         elif op.can_raise():
             self.exception_might_have_happened = True
-        elif op.returns_bool_result():
-            self.bool_boxes[self.getvalue(op.result)] = None
         self.newoperations.append(op)
 
     def store_final_boxes_in_guard(self, op):
@@ -533,6 +579,7 @@
                 return
             else:
                 self.pure_operations[args] = op
+                self.emitted_pure_operations[op] = True
 
         # otherwise, the operation remains
         self.emit_operation(op)
@@ -555,6 +602,35 @@
     def optimize_DEBUG_MERGE_POINT(self, op):
         self.emit_operation(op)
 
+    def optimize_CAST_OPAQUE_PTR(self, op):
+        value = self.getvalue(op.getarg(0))
+        self.opaque_pointers[value] = True
+        self.make_equal_to(op.result, value)
+
+    def optimize_GETARRAYITEM_GC_PURE(self, op):
+        indexvalue = self.getvalue(op.getarg(1))
+        if indexvalue.is_constant():
+            arrayvalue = self.getvalue(op.getarg(0))
+            arrayvalue.make_len_gt(MODE_ARRAY, op.getdescr(), indexvalue.box.getint())
+        self.optimize_default(op)
+
+    def optimize_STRGETITEM(self, op):
+        indexvalue = self.getvalue(op.getarg(1))
+        if indexvalue.is_constant():
+            arrayvalue = self.getvalue(op.getarg(0))
+            arrayvalue.make_len_gt(MODE_STR, op.getdescr(), indexvalue.box.getint())
+        self.optimize_default(op)
+
+    def optimize_UNICODEGETITEM(self, op):
+        indexvalue = self.getvalue(op.getarg(1))
+        if indexvalue.is_constant():
+            arrayvalue = self.getvalue(op.getarg(0))
+            arrayvalue.make_len_gt(MODE_UNICODE, op.getdescr(), indexvalue.box.getint())
+        self.optimize_default(op)
+        
+
+    
+
 dispatch_opt = make_dispatcher_method(Optimizer, 'optimize_',
         default=Optimizer.optimize_default)
 
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -13,9 +13,16 @@
     """Rewrite operations into equivalent, cheaper operations.
        This includes already executed operations and constants.
     """
+    def __init__(self):
+        self.loop_invariant_results = {}
+        self.loop_invariant_producer = {}
 
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
-        return self
+    def new(self):
+        return OptRewrite()
+        
+    def produce_potential_short_preamble_ops(self, sb):
+        for op in self.loop_invariant_producer.values():
+            sb.add_potential(op)
 
     def propagate_forward(self, op):
         args = self.optimizer.make_args_key(op)
@@ -344,16 +351,18 @@
         # expects a compile-time constant
         assert isinstance(arg, Const)
         key = make_hashable_int(arg.getint())
-        resvalue = self.optimizer.loop_invariant_results.get(key, None)
+        
+        resvalue = self.loop_invariant_results.get(key, None)
         if resvalue is not None:
             self.make_equal_to(op.result, resvalue)
             return
         # change the op to be a normal call, from the backend's point of view
         # there is no reason to have a separate operation for this
+        self.loop_invariant_producer[key] = op
         op = op.copy_and_change(rop.CALL)
         self.emit_operation(op)
         resvalue = self.getvalue(op.result)
-        self.optimizer.loop_invariant_results[key] = resvalue
+        self.loop_invariant_results[key] = resvalue
 
     def _optimize_nullness(self, op, box, expect_nonnull):
         value = self.getvalue(box)
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -25,6 +25,8 @@
         #     but it's a bit hard to implement robustly if heap.py is also run
         pass
 
+    optimize_CAST_OPAQUE_PTR = optimize_VIRTUAL_REF
+
 
 dispatch_opt = make_dispatcher_method(OptSimplify, 'optimize_',
         default=OptSimplify.emit_operation)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -2742,11 +2742,11 @@
     def test_residual_call_invalidate_some_arrays(self):
         ops = """
         [p1, p2, i1]
-        p3 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p3 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p4 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
         i3 = call(i1, descr=writearraydescr)
-        p5 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p5 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p6 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i4 = getarrayitem_gc(p1, 1, descr=arraydescr)
         escape(p3)
@@ -2759,7 +2759,7 @@
         """
         expected = """
         [p1, p2, i1]
-        p3 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p3 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p4 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
         i3 = call(i1, descr=writearraydescr)
@@ -4621,6 +4621,96 @@
         """
         self.optimize_strunicode_loop(ops, expected)
 
+    def test_intmod_bounds(self):
+        ops = """
+        [i0, i1]
+        i2 = int_mod(i0, 12)
+        i3 = int_gt(i2, 12)
+        guard_false(i3) []
+        i4 = int_lt(i2, -12)
+        guard_false(i4) []
+        i5 = int_mod(i1, -12)
+        i6 = int_lt(i5, -12)
+        guard_false(i6) []
+        i7 = int_gt(i5, 12)
+        guard_false(i7) []
+        jump(i2, i5)
+        """
+        expected = """
+        [i0, i1]
+        i2 = int_mod(i0, 12)
+        i5 = int_mod(i1, -12)
+        jump(i2, i5)
+        """
+        self.optimize_loop(ops, expected)
+
+        # This the sequence of resoperations that is generated for a Python
+        # app-level int % int.  When the modulus is constant and when i0
+        # is known non-negative it should be optimized to a single int_mod.
+        ops = """
+        [i0]
+        i5 = int_ge(i0, 0)
+        guard_true(i5) []
+        i1 = int_mod(i0, 42)
+        i2 = int_rshift(i1, 63)
+        i3 = int_and(42, i2)
+        i4 = int_add(i1, i3)
+        finish(i4)
+        """
+        expected = """
+        [i0]
+        i5 = int_ge(i0, 0)
+        guard_true(i5) []
+        i1 = int_mod(i0, 42)
+        finish(i1)
+        """
+        py.test.skip("in-progress")
+        self.optimize_loop(ops, expected)
+
+        # Also, 'n % power-of-two' can be turned into int_and(),
+        # but that's a bit harder to detect here because it turns into
+        # several operations, and of course it is wrong to just turn
+        # int_mod(i0, 16) into int_and(i0, 15).
+        ops = """
+        [i0]
+        i1 = int_mod(i0, 16)
+        i2 = int_rshift(i1, 63)
+        i3 = int_and(16, i2)
+        i4 = int_add(i1, i3)
+        finish(i4)
+        """
+        expected = """
+        [i0]
+        i4 = int_and(i0, 15)
+        finish(i4)
+        """
+        py.test.skip("harder")
+        self.optimize_loop(ops, expected)
+
+    def test_bounded_lazy_setfield(self):
+        ops = """
+        [p0, i0]
+        i1 = int_gt(i0, 2)
+        guard_true(i1) []
+        setarrayitem_gc(p0, 0, 3)
+        setarrayitem_gc(p0, 2, 4)
+        setarrayitem_gc(p0, i0, 15)
+        i2 = getarrayitem_gc(p0, 2)
+        jump(p0, i2)
+        """
+        # Remove the getarrayitem_gc, because we know that p[i0] does not alias
+        # p0[2]
+        expected = """
+        [p0, i0]
+        i1 = int_gt(i0, 2)
+        guard_true(i1) []
+        setarrayitem_gc(p0, i0, 15)
+        setarrayitem_gc(p0, 0, 3)
+        setarrayitem_gc(p0, 2, 4)
+        jump(p0, 4)
+        """
+        self.optimize_loop(ops, expected)
+
 
 class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
     pass
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -61,7 +61,9 @@
             boxes = []
         boxes = []
     def clone_if_mutable(self):
-        return self
+        return FakeDescr()
+    def __eq__(self, other):
+        return isinstance(other, Storage) or isinstance(other, FakeDescr)
 
 
 class BaseTestWithUnroll(BaseTest):
@@ -69,13 +71,14 @@
     enable_opts = "intbounds:rewrite:virtualize:string:heap:unroll"
 
     def optimize_loop(self, ops, expected, expected_preamble=None,
-                      call_pure_results=None):
+                      call_pure_results=None, expected_short=None):
         loop = self.parse(ops)
         if expected != "crash!":
             expected = self.parse(expected)
         if expected_preamble:
             expected_preamble = self.parse(expected_preamble)
-
+        if expected_short:
+            expected_short = self.parse(expected_short)
         loop.preamble = TreeLoop('preamble')
         loop.preamble.inputargs = loop.inputargs
         loop.preamble.token = LoopToken()
@@ -84,17 +87,33 @@
         self._do_optimize_loop(loop, call_pure_results)
         #
         print
+        print "Preamble:"
         print loop.preamble.inputargs
-        print '\n'.join([str(o) for o in loop.preamble.operations])
+        if loop.preamble.operations:
+            print '\n'.join([str(o) for o in loop.preamble.operations])
+        else:
+            print 'Failed!'
         print
+        print "Loop:"
         print loop.inputargs
         print '\n'.join([str(o) for o in loop.operations])
         print
+        if expected_short:
+            print "Short Preamble:"
+            short = loop.preamble.token.short_preamble[0]
+            print short.inputargs
+            print '\n'.join([str(o) for o in short.operations])        
+            print
+        
         assert expected != "crash!", "should have raised an exception"
         self.assert_equal(loop, expected)
         if expected_preamble:
             self.assert_equal(loop.preamble, expected_preamble,
                               text_right='expected preamble')
+        if expected_short:
+            self.assert_equal(short, expected_short,
+                              text_right='expected short preamble')
+            
         return loop
 
 class OptimizeOptTest(BaseTestWithUnroll):
@@ -840,7 +859,13 @@
         p3sub = new_with_vtable(ConstClass(node_vtable2))
         setfield_gc(p3sub, i1, descr=valuedescr)
         setfield_gc(p1, p3sub, descr=nextdescr)
-        jump(i1, p1, p3sub)
+        # XXX: We get two extra operations here because the setfield
+        #      above is the result of forcing p1 and thus not 
+        #      registered with the heap optimizer. I've makred tests
+        #      below with VIRTUALHEAP if they suffer from this issue
+        p3sub2 = getfield_gc(p1, descr=nextdescr) 
+        guard_nonnull_class(p3sub2, ConstClass(node_vtable2)) []
+        jump(i1, p1, p3sub2)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -871,7 +896,9 @@
         guard_true(i2b) []
         p3 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p3, i2, descr=nextdescr)
-        jump(p3, i2)
+        # XXX: VIRTUALHEAP (see above)
+        i3 = getfield_gc(p3, descr=nextdescr)
+        jump(p3, i3)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -1166,6 +1193,29 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_virtual_field_forced_by_lazy_setfield(self):
+        ops = """
+        [i0, p1, p3]
+        i28 = int_add(i0, 1)
+        p30 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p30, i28, descr=nextdescr)
+        setfield_gc(p3, p30, descr=valuedescr)
+        p45 = getfield_gc(p3, descr=valuedescr)
+        i29 = int_add(i28, 1)
+        jump(i29, p45, p3)
+        """
+        preamble = """
+        [i0, p1, p3]
+        i28 = int_add(i0, 1)
+        i29 = int_add(i28, 1)
+        p30 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p30, i28, descr=nextdescr)
+        setfield_gc(p3, p30, descr=valuedescr)
+        jump(i29, p30, p3)
+        """
+        expected = preamble
+        self.optimize_loop(ops, expected, preamble)
+
     def test_nonvirtual_1(self):
         ops = """
         [i]
@@ -1308,15 +1358,78 @@
         ops = """
         [i]
         i1 = getfield_gc(ConstPtr(myptr), descr=valuedescr)
-        jump(i1)
-        """
-        preamble = ops
-        expected = """
+        call(i1, descr=nonwritedescr)
+        jump(i)
+        """
+        preamble = """
         [i]
-        jump(i)
+        i1 = getfield_gc(ConstPtr(myptr), descr=valuedescr)
+        call(i1, descr=nonwritedescr)
+        jump(i, i1)
+        """
+        expected = """
+        [i, i1]
+        call(i1, descr=nonwritedescr)
+        jump(i, i1)
         """
         self.optimize_loop(ops, expected, preamble)
 
+    def test_varray_boxed1(self):
+        ops = """
+        [p0, p8]
+        p11 = getfield_gc(p0, descr=otherdescr)
+        guard_nonnull(p11) [p0, p8]
+        guard_class(p11, ConstClass(node_vtable2)) [p0, p8]
+        p14 = getfield_gc(p11, descr=otherdescr)
+        guard_isnull(p14) [p0, p8]
+        p18 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        guard_isnull(p18) [p0, p8]
+        p31 = new(descr=ssize)
+        setfield_gc(p31, 0, descr=adescr)
+        p33 = new_array(0, descr=arraydescr)
+        setfield_gc(p31, p33, descr=bdescr)
+        p35 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p35, p31, descr=valuedescr)
+        jump(p0, p35)
+        """
+        expected = """
+        [p0]
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_varray_boxed_simplified(self):
+        ops = """
+        [p0, p8]
+        p18 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        guard_isnull(p18) [p0, p8]
+        p31 = new(descr=ssize)
+        p35 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p35, p31, descr=valuedescr)        
+        jump(p0, p35)
+        """
+        expected = """
+        [p0]
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_varray_boxed_noconst(self):
+        ops = """
+        [p0, p8, p18, p19]
+        guard_isnull(p18) [p0, p8]
+        p31 = new(descr=ssize)
+        p35 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p35, p31, descr=valuedescr)        
+        jump(p0, p35, p19, p18)
+        """
+        expected = """
+        [p0, p19]
+        guard_isnull(p19) [p0]
+        jump(p0, NULL)
+        """
+        self.optimize_loop(ops, expected)
+        
     def test_varray_1(self):
         ops = """
         [i1]
@@ -1552,6 +1665,24 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_duplicate_getfield_2(self):
+        ops = """
+        [p1, p2, i0]
+        i1 = getfield_gc(p1, descr=valuedescr)
+        i2 = getfield_gc(p2, descr=valuedescr)
+        i3 = getfield_gc(p1, descr=valuedescr)
+        i4 = getfield_gc(p2, descr=valuedescr)
+        i5 = int_add(i3, i4)
+        i6 = int_add(i0, i5)
+        jump(p1, p2, i6)
+        """
+        expected = """
+        [p1, p2, i0, i5]
+        i6 = int_add(i0, i5)
+        jump(p1, p2, i6, i5)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_getfield_after_setfield(self):
         ops = """
         [p1, i1]
@@ -1728,6 +1859,7 @@
         """
         expected = """
         [p1, i1, i2]
+        setfield_gc(p1, i2, descr=valuedescr)
         jump(p1, i1, i2)
         """
         # in this case, all setfields are removed, because we can prove
@@ -1872,14 +2004,14 @@
         guard_true(i3) []
         i4 = int_neg(i2)
         setfield_gc(p1, i2, descr=valuedescr)
-        jump(p1, i1, i2, i4)
-        """
-        expected = """
-        [p1, i1, i2, i4]
+        jump(p1, i1, i2, i4, i4)
+        """
+        expected = """
+        [p1, i1, i2, i4, i5]
         setfield_gc(p1, i1, descr=valuedescr)
         guard_true(i4) []
         setfield_gc(p1, i2, descr=valuedescr)
-        jump(p1, i1, i2, 1)
+        jump(p1, i1, i2, i5, i5)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -1902,14 +2034,14 @@
         i4 = int_neg(i2)
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, i4)
-        """
-        expected = """
-        [p1, i2, i4]
+        jump(p1, i2, i4, i4)
+        """
+        expected = """
+        [p1, i2, i4, i5]
         guard_true(i4) [p1]
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, 1)
+        jump(p1, i2, i5, i5)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -1931,14 +2063,14 @@
         i4 = int_neg(i2)
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, i4)
-        """
-        expected = """
-        [p1, i2, i4]
+        jump(p1, i2, i4, i4)
+        """
+        expected = """
+        [p1, i2, i4, i5]
         guard_true(i4) [i2, p1]
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, 1)
+        jump(p1, i2, i5, i5)
         """
         self.optimize_loop(ops, expected)
 
@@ -1954,14 +2086,22 @@
         setfield_gc(p1, i2, descr=valuedescr)
         jump(p1, i1, i2, i4)
         """
-        preamble = ops
-        expected = """
-        [p1, i1, i2, i4]
+        preamble = """
+        [p1, i1, i2, i3]
+        setfield_gc(p1, i1, descr=valuedescr)
+        i5 = int_eq(i3, 5)
+        guard_true(i5) []
+        i4 = int_neg(i2)
+        setfield_gc(p1, i2, descr=valuedescr)
+        jump(p1, i1, i2, i4, i4)
+        """
+        expected = """
+        [p1, i1, i2, i4, i7]
         setfield_gc(p1, i1, descr=valuedescr)
         i5 = int_eq(i4, 5)
         guard_true(i5) []
         setfield_gc(p1, i2, descr=valuedescr)
-        jump(p1, i1, i2, 5)
+        jump(p1, i1, i2, i7, i7)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -2035,7 +2175,25 @@
         jump(p1)
         """
         self.optimize_loop(ops, expected)
-
+        
+    def test_duplicate_getarrayitem_2(self):
+        ops = """
+        [p1, i0]
+        i2 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        i3 = getarrayitem_gc(p1, 1, descr=arraydescr2)
+        i4 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        i5 = getarrayitem_gc(p1, 1, descr=arraydescr2)
+        i6 = int_add(i3, i4)
+        i7 = int_add(i0, i6)
+        jump(p1, i7)
+        """
+        expected = """
+        [p1, i0, i6]
+        i7 = int_add(i0, i6)
+        jump(p1, i7, i6)
+        """
+        self.optimize_loop(ops, expected)
+        
     def test_duplicate_getarrayitem_after_setarrayitem_1(self):
         ops = """
         [p1, p2]
@@ -2161,15 +2319,15 @@
         p2 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p2, p4, descr=nextdescr)
         setfield_gc(p1, p2, descr=nextdescr)
-        jump(p1, i2, i4, p4)
-        """
-        expected = """
-        [p1, i2, i4, p4]
+        jump(p1, i2, i4, p4, i4)
+        """
+        expected = """
+        [p1, i2, i4, p4, i5]
         guard_true(i4) [p1, p4]
         p2 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p2, p4, descr=nextdescr)
         setfield_gc(p1, p2, descr=nextdescr)
-        jump(p1, i2, 1, p4)
+        jump(p1, i2, i5, p4, i5)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -2625,6 +2783,101 @@
         """
         self.optimize_loop(ops, expected, preamble)
 
+    def test_remove_duplicate_pure_op_ovf_with_lazy_setfield(self):
+        py.test.skip('this optimization is not yet supprted')
+        ops = """
+        [i1, p1]
+        i3 = int_add_ovf(i1, 1)
+        guard_no_overflow() []
+        i3b = int_is_true(i3)
+        guard_true(i3b) []
+        setfield_gc(p1, i1, descr=valuedescr)
+        i4 = int_add_ovf(i1, 1)
+        guard_no_overflow() []
+        i4b = int_is_true(i4)
+        guard_true(i4b) []
+        escape(i3)
+        escape(i4)
+        jump(i1, p1)
+        """
+        preamble = """
+        [i1, p1]
+        i3 = int_add_ovf(i1, 1)
+        guard_no_overflow() []
+        i3b = int_is_true(i3)
+        guard_true(i3b) []
+        setfield_gc(p1, i1, descr=valuedescr)        
+        escape(i3)
+        escape(i3)
+        jump(i1, p1, i3)
+        """
+        expected = """
+        [i1, p1, i3]
+        setfield_gc(p1, i1, descr=valuedescr)        
+        escape(i3)
+        escape(i3)
+        jump(i1, p1, i3)
+        """
+        self.optimize_loop(ops, expected, preamble)
+
+    def test_ovf_guard_in_short_preamble1(self):
+        ops = """
+        [p8, p11, i24]
+        p26 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p26, i24, descr=adescr)        
+        i34 = getfield_gc_pure(p11, descr=valuedescr)
+        i35 = getfield_gc_pure(p26, descr=adescr)
+        i36 = int_add_ovf(i34, i35)
+        guard_no_overflow() []
+        jump(p8, p11, i35)
+        """
+        expected = """
+        [p8, p11, i26]
+        jump(p8, p11, i26)        
+        """
+        self.optimize_loop(ops, expected)
+        
+    def test_ovf_guard_in_short_preamble2(self):
+        ops = """
+        [p8, p11, p12]
+        p16 = getfield_gc(p8, descr=valuedescr)
+        i17 = getfield_gc(p8, descr=nextdescr)
+        i19 = getfield_gc(p16, descr=valuedescr)
+        i20 = int_ge(i17, i19)
+        guard_false(i20) []
+        i21 = getfield_gc(p16, descr=otherdescr)
+        i22 = getfield_gc(p16, descr=nextdescr)
+        i23 = int_mul(i17, i22)
+        i24 = int_add(i21, i23)
+        p26 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p26, i24, descr=adescr)
+        i28 = int_add(i17, 1)
+        setfield_gc(p8, i28, descr=nextdescr)
+        i34 = getfield_gc_pure(p11, descr=valuedescr)
+        i35 = getfield_gc_pure(p26, descr=adescr)
+        guard_nonnull(p12) []
+        i36 = int_add_ovf(i34, i35)
+        guard_no_overflow() []
+        p38 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p38, i36, descr=adescr)
+        jump(p8, p11, p26)
+        """
+        expected = """
+        [p8, p11, i24, i39, i19, p16, i21, i34]
+        i40 = int_ge(i39, i19)
+        guard_false(i40) []
+        i41 = getfield_gc(p16, descr=nextdescr)
+        i42 = int_mul(i39, i41)
+        i43 = int_add(i21, i42)
+        i44 = int_add(i39, 1)
+        setfield_gc(p8, i44, descr=nextdescr)
+        i45 = int_add_ovf(i34, i43)
+        guard_no_overflow() []
+        jump(p8, p11, i43, i44, i19, p16, i21, i34)
+        """
+        self.optimize_loop(ops, expected)
+
+
     def test_int_and_or_with_zero(self):
         ops = """
         [i0, i1]
@@ -3899,11 +4152,13 @@
         jump(p4364)
         """
         expected = """
-        [i0, i1]
+        [i0]
+        i1 = int_sub_ovf(i0, 1)
+        guard_no_overflow() []
         escape(i1)
         i2 = int_add_ovf(i0, 1)
         guard_no_overflow() []
-        jump(i2, i0)
+        jump(i2)
         """
         self.optimize_loop(ops, expected)
 
@@ -4867,32 +5122,38 @@
 
     def test_invariant_ovf(self):
         ops = """
-        [i0, i1, i10, i11, i12]
+        [i0, i1, i10, i11, i20, i21]
         i2 = int_add_ovf(i0, i1)
         guard_no_overflow() []
         i3 = int_sub_ovf(i0, i1)
         guard_no_overflow() []
         i4 = int_mul_ovf(i0, i1)
         guard_no_overflow() []
+        escape(i2)
+        escape(i3)
+        escape(i4)
         i24 = int_mul_ovf(i10, i11)
         guard_no_overflow() []
         i23 = int_sub_ovf(i10, i11)
         guard_no_overflow() []
         i22 = int_add_ovf(i10, i11)
         guard_no_overflow() []
-        jump(i0, i1, i2, i3, i4)
-        """
-        expected = """
-        [i0, i1, i10, i11, i12]
+        jump(i0, i1, i20, i21, i20, i21)
+        """
+        expected = """
+        [i0, i1, i10, i11, i2, i3, i4]
+        escape(i2)
+        escape(i3)
+        escape(i4)        
         i24 = int_mul_ovf(i10, i11)
         guard_no_overflow() []
         i23 = int_sub_ovf(i10, i11)
         guard_no_overflow() []
         i22 = int_add_ovf(i10, i11)
         guard_no_overflow() []
-        jump(i0, i1, i10, i11, i12)
-        """
-        self.optimize_loop(ops, expected, ops)
+        jump(i0, i1, i10, i11, i2, i3, i4) 
+        """
+        self.optimize_loop(ops, expected)
 
     def test_value_proven_to_be_constant_after_two_iterations(self):
         class FakeDescr(AbstractDescr):
@@ -4908,8 +5169,8 @@
         ops = """
         [p0, p1, p2, p3, i4, p5, i6, p7, p8, p9, p14]
         guard_value(i4, 3) []
-        guard_class(p9, 17278984) []
-        guard_class(p9, 17278984) []
+        guard_class(p9, ConstClass(node_vtable)) []
+        guard_class(p9, ConstClass(node_vtable)) []
         p22 = getfield_gc(p9, descr=inst_w_seq)
         guard_nonnull(p22) []
         i23 = getfield_gc(p9, descr=inst_index)
@@ -4924,11 +5185,11 @@
         guard_class(p14, 17273920) []
         guard_class(p14, 17273920) []
 
-        p75 = new_with_vtable(17278984)
+        p75 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p75, p14, descr=inst_w_seq)
         setfield_gc(p75, 0, descr=inst_index)
-        guard_class(p75, 17278984) []
-        guard_class(p75, 17278984) []
+        guard_class(p75, ConstClass(node_vtable)) []
+        guard_class(p75, ConstClass(node_vtable)) []
         p79 = getfield_gc(p75, descr=inst_w_seq)
         guard_nonnull(p79) []
         i80 = getfield_gc(p75, descr=inst_index)
@@ -4974,6 +5235,7 @@
         """
         expected = """
         [p0]
+        setfield_gc(p0, p0, descr=valuedescr)
         jump(p0)
         """
         self.optimize_loop(ops, expected, preamble)
@@ -5060,9 +5322,7 @@
         self.optimize_loop(ops, expected)
 
     # ----------
-    def optimize_strunicode_loop(self, ops, optops, preamble=None):
-        if not preamble:
-            preamble = ops # FIXME: Force proper testing of preamble
+    def optimize_strunicode_loop(self, ops, optops, preamble):
         # check with the arguments passed in
         self.optimize_loop(ops, optops, preamble)
         # check with replacing 'str' with 'unicode' everywhere
@@ -5082,7 +5342,7 @@
         [i0]
         jump(i0)
         """
-        self.optimize_strunicode_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected, expected)
 
     def test_newstr_2(self):
         ops = """
@@ -5098,7 +5358,7 @@
         [i0, i1]
         jump(i1, i0)
         """
-        self.optimize_strunicode_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected, expected)
 
     def test_str_concat_1(self):
         ops = """
@@ -5106,7 +5366,7 @@
         p3 = call(0, p1, p2, descr=strconcatdescr)
         jump(p2, p3)
         """
-        expected = """
+        preamble = """
         [p1, p2]
         i1 = strlen(p1)
         i2 = strlen(p2)
@@ -5114,9 +5374,18 @@
         p3 = newstr(i3)
         copystrcontent(p1, p3, 0, 0, i1)
         copystrcontent(p2, p3, 0, i1, i2)
-        jump(p2, p3)
-        """
-        self.optimize_strunicode_loop(ops, expected)
+        jump(p2, p3, i2)
+        """
+        expected = """
+        [p1, p2, i1]
+        i2 = strlen(p2)
+        i3 = int_add(i1, i2)
+        p3 = newstr(i3)
+        copystrcontent(p1, p3, 0, 0, i1)
+        copystrcontent(p2, p3, 0, i1, i2)
+        jump(p2, p3, i2)
+        """
+        self.optimize_strunicode_loop(ops, expected, preamble)
 
     def test_str_concat_vstr2_str(self):
         ops = """
@@ -5137,7 +5406,7 @@
         copystrcontent(p2, p3, 0, 2, i2)
         jump(i1, i0, p3)
         """
-        self.optimize_strunicode_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected, expected)
 
     def test_str_concat_str_vstr2(self):
         ops = """
@@ -5160,7 +5429,7 @@
         i6 = int_add(i5, 1)      # will be killed by the backend
         jump(i1, i0, p3)
         """
-        self.optimize_strunicode_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected, expected)
 
     def test_str_concat_str_str_str(self):
         ops = """
@@ -5169,7 +5438,7 @@
         p5 = call(0, p4, p3, descr=strconcatdescr)
         jump(p2, p3, p5)
         """
-        expected = """
+        preamble = """
         [p1, p2, p3]
         i1 = strlen(p1)
         i2 = strlen(p2)
@@ -5180,9 +5449,20 @@
         copystrcontent(p1, p5, 0, 0, i1)
         copystrcontent(p2, p5, 0, i1, i2)
         copystrcontent(p3, p5, 0, i12, i3)
-        jump(p2, p3, p5)
-        """
-        self.optimize_strunicode_loop(ops, expected)
+        jump(p2, p3, p5, i2, i3)
+        """
+        expected = """
+        [p1, p2, p3, i1, i2]
+        i12 = int_add(i1, i2)
+        i3 = strlen(p3)
+        i123 = int_add(i12, i3)
+        p5 = newstr(i123)
+        copystrcontent(p1, p5, 0, 0, i1)
+        copystrcontent(p2, p5, 0, i1, i2)
+        copystrcontent(p3, p5, 0, i12, i3)
+        jump(p2, p3, p5, i2, i3)
+        """
+        self.optimize_strunicode_loop(ops, expected, preamble)
 
     def test_str_concat_str_cstr1(self):
         ops = """
@@ -5199,7 +5479,7 @@
         strsetitem(p3, i2, 120)     # == ord('x')
         jump(p3)
         """
-        self.optimize_strunicode_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected, expected)
 
     def test_str_concat_consts(self):
         ops = """
@@ -5210,17 +5490,58 @@
         escape(p3)
         jump()
         """
-        preamble = """
-        []
-        p3 = call(0, s"ab", s"cde", descr=strconcatdescr)
-        escape(p3)
-        jump()
-        """
         expected = """
         []
         escape(s"abcde")
         jump()
         """
+        self.optimize_strunicode_loop(ops, expected, expected)
+
+    def test_str_slice_len_surviving1(self):
+        ops = """
+        [p1, i1, i2, i3]
+        escape(i3)
+        p2 = call(0, p1, i1, i2, descr=strslicedescr)
+        i4 = strlen(p2)
+        jump(p1, i1, i2, i4)
+        """
+        preamble = """
+        [p1, i1, i2, i3]
+        escape(i3)
+        i4 = int_sub(i2, i1)
+        jump(p1, i1, i2, i4, i4)
+        """
+        expected = """
+        [p1, i1, i2, i3, i4]
+        escape(i3)
+        jump(p1, i1, i2, i4, i4)
+        """
+        self.optimize_strunicode_loop(ops, expected, preamble)
+
+    def test_str_slice_len_surviving2(self):
+        ops = """
+        [p1, i1, i2, p2]
+        i5 = getfield_gc(p2, descr=valuedescr)
+        escape(i5)
+        p3 = call(0, p1, i1, i2, descr=strslicedescr)
+        i4 = strlen(p3)
+        setfield_gc(p2, i4, descr=valuedescr)
+        jump(p1, i1, i2, p2)
+        """
+        preamble = """
+        [p1, i1, i2, p2]
+        i5 = getfield_gc(p2, descr=valuedescr)
+        escape(i5)
+        i4 = int_sub(i2, i1)
+        setfield_gc(p2, i4, descr=valuedescr)
+        jump(p1, i1, i2, p2, i4, i4)
+        """
+        expected = """
+        [p1, i1, i2, p2, i5, i6]
+        escape(i5)
+        setfield_gc(p2, i6, descr=valuedescr)
+        jump(p1, i1, i2, p2, i6, i6)
+        """
         self.optimize_strunicode_loop(ops, expected, preamble)
 
     def test_str_slice_1(self):
@@ -5229,14 +5550,20 @@
         p2 = call(0, p1, i1, i2, descr=strslicedescr)
         jump(p2, i1, i2)
         """
-        expected = """
+        preamble = """
         [p1, i1, i2]
         i3 = int_sub(i2, i1)
         p2 = newstr(i3)
         copystrcontent(p1, p2, i1, 0, i3)
-        jump(p2, i1, i2)
-        """
-        self.optimize_strunicode_loop(ops, expected)
+        jump(p2, i1, i2, i3)
+        """
+        expected = """
+        [p1, i1, i2, i3]
+        p2 = newstr(i3)
+        copystrcontent(p1, p2, i1, 0, i3)
+        jump(p2, i1, i2, i3)
+        """
+        self.optimize_strunicode_loop(ops, expected, preamble)
 
     def test_str_slice_2(self):
         ops = """
@@ -5250,7 +5577,7 @@
         copystrcontent(p1, p2, 0, 0, i2)
         jump(p2, i2)
         """
-        self.optimize_strunicode_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected, expected)
 
     def test_str_slice_3(self):
         ops = """
@@ -5259,16 +5586,22 @@
         p3 = call(0, p2, i3, i4, descr=strslicedescr)
         jump(p3, i1, i2, i3, i4)
         """
-        expected = """
+        preamble = """
         [p1, i1, i2, i3, i4]
         i0 = int_sub(i2, i1)     # killed by the backend
         i5 = int_sub(i4, i3)
         i6 = int_add(i1, i3)
         p3 = newstr(i5)
         copystrcontent(p1, p3, i6, 0, i5)
-        jump(p3, i1, i2, i3, i4)
-        """
-        self.optimize_strunicode_loop(ops, expected)
+        jump(p3, i1, i2, i3, i4, i5, i6)
+        """
+        expected = """
+        [p1, i1, i2, i3, i4, i5, i6]
+        p3 = newstr(i5)
+        copystrcontent(p1, p3, i6, 0, i5)
+        jump(p3, i1, i2, i3, i4, i5, i6)
+        """
+        self.optimize_strunicode_loop(ops, expected, preamble)
 
     def test_str_slice_getitem1(self):
         ops = """
@@ -5278,15 +5611,21 @@
         escape(i4)
         jump(p1, i1, i2, i3)
         """
-        expected = """
+        preamble = """
         [p1, i1, i2, i3]
         i6 = int_sub(i2, i1)      # killed by the backend
         i5 = int_add(i1, i3)
         i4 = strgetitem(p1, i5)
         escape(i4)
-        jump(p1, i1, i2, i3)
-        """
-        self.optimize_strunicode_loop(ops, expected)
+        jump(p1, i1, i2, i3, i5)
+        """
+        expected = """
+        [p1, i1, i2, i3, i5]
+        i4 = strgetitem(p1, i5)
+        escape(i4)
+        jump(p1, i1, i2, i3, i5)
+        """
+        self.optimize_strunicode_loop(ops, expected, preamble)
 
     def test_str_slice_plain(self):
         ops = """
@@ -5304,7 +5643,7 @@
         escape(i4)
         jump(i3, i4)
         """
-        self.optimize_strunicode_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected, expected)
 
     def test_str_slice_concat(self):
         ops = """
@@ -5313,7 +5652,7 @@
         p4 = call(0, p3, p2, descr=strconcatdescr)
         jump(p4, i1, i2, p2)
         """
-        expected = """
+        preamble = """
         [p1, i1, i2, p2]
         i3 = int_sub(i2, i1)     # length of p3
         i4 = strlen(p2)
@@ -5321,14 +5660,22 @@
         p4 = newstr(i5)
         copystrcontent(p1, p4, i1, 0, i3)
         copystrcontent(p2, p4, 0, i3, i4)
-        jump(p4, i1, i2, p2)
-        """
-        self.optimize_strunicode_loop(ops, expected)
+        jump(p4, i1, i2, p2, i5, i3, i4)
+        """
+        expected = """
+        [p1, i1, i2, p2, i5, i3, i4]
+        p4 = newstr(i5)
+        copystrcontent(p1, p4, i1, 0, i3)
+        copystrcontent(p2, p4, 0, i3, i4)
+        jump(p4, i1, i2, p2, i5, i3, i4)
+        """
+        self.optimize_strunicode_loop(ops, expected, preamble)
 
     def test_strgetitem_bounds(self):
         ops = """
         [p0, i0]
         i1 = strgetitem(p0, i0)
+        i10 = strgetitem(p0, i0)
         i2 = int_lt(i1, 256)
         guard_true(i2) []
         i3 = int_ge(i1, 0)
@@ -5337,6 +5684,7 @@
         """
         expected = """
         [p0, i0]
+        i1 = strgetitem(p0, i0)
         jump(p0, i0)
         """
         self.optimize_loop(ops, expected)
@@ -5345,12 +5693,14 @@
         ops = """
         [p0, i0]
         i1 = unicodegetitem(p0, i0)
+        i10 = unicodegetitem(p0, i0)        
         i2 = int_lt(i1, 0)
         guard_false(i2) []
         jump(p0, i0)
         """
         expected = """
         [p0, i0]
+        i1 = unicodegetitem(p0, i0)        
         jump(p0, i0)
         """
         self.optimize_loop(ops, expected)
@@ -5387,7 +5737,7 @@
         self.optimize_loop(ops, expected)
 
     # ----------
-    def optimize_strunicode_loop_extradescrs(self, ops, optops, preamble=None):
+    def optimize_strunicode_loop_extradescrs(self, ops, optops, preamble):
         class FakeCallInfoCollection:
             def callinfo_for_oopspec(self, oopspecindex):
                 calldescrtype = type(LLtypeMixin.strequaldescr)
@@ -5410,7 +5760,7 @@
         escape(i0)
         jump(p1, p2)
         """
-        self.optimize_strunicode_loop_extradescrs(ops, ops)
+        self.optimize_strunicode_loop_extradescrs(ops, ops, ops)
 
     def test_str_equal_noop2(self):
         ops = """
@@ -5420,7 +5770,7 @@
         escape(i0)
         jump(p1, p2, p3)
         """
-        expected = """
+        preamble = """
         [p1, p2, p3]
         i1 = strlen(p1)
         i2 = strlen(p2)
@@ -5430,10 +5780,19 @@
         copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, p3, p4, descr=strequaldescr)
         escape(i0)
-        jump(p1, p2, p3)
-        """
-        self.optimize_strunicode_loop_extradescrs(ops,
-                                                  expected)
+        jump(p1, p2, p3, i3, i1, i2)
+        """
+        expected = """
+        [p1, p2, p3, i3, i1, i2]
+        p4 = newstr(i3)
+        copystrcontent(p1, p4, 0, 0, i1)
+        copystrcontent(p2, p4, 0, i1, i2)
+        i0 = call(0, p3, p4, descr=strequaldescr)
+        escape(i0)
+        jump(p1, p2, p3, i3, i1, i2)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected,
+                                                  preamble)
 
     def test_str_equal_slice1(self):
         ops = """
@@ -5443,15 +5802,21 @@
         escape(i0)
         jump(p1, i1, i2, p3)
         """
-        expected = """
+        preamble = """
         [p1, i1, i2, p3]
         i3 = int_sub(i2, i1)
         i0 = call(0, p1, i1, i3, p3, descr=streq_slice_checknull_descr)
         escape(i0)
-        jump(p1, i1, i2, p3)
-        """
-        self.optimize_strunicode_loop_extradescrs(ops,
-                                                  expected)
+        jump(p1, i1, i2, p3, i3)
+        """
+        expected = """
+        [p1, i1, i2, p3, i3]
+        i0 = call(0, p1, i1, i3, p3, descr=streq_slice_checknull_descr)
+        escape(i0)
+        jump(p1, i1, i2, p3, i3)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected,
+                                                  preamble)
 
     def test_str_equal_slice2(self):
         ops = """
@@ -5461,15 +5826,21 @@
         escape(i0)
         jump(p1, i1, i2, p3)
         """
-        expected = """
+        preamble = """
         [p1, i1, i2, p3]
         i4 = int_sub(i2, i1)
         i0 = call(0, p1, i1, i4, p3, descr=streq_slice_checknull_descr)
         escape(i0)
-        jump(p1, i1, i2, p3)
-        """
-        self.optimize_strunicode_loop_extradescrs(ops,
-                                                  expected)
+        jump(p1, i1, i2, p3, i4)
+        """
+        expected = """
+        [p1, i1, i2, p3, i4]
+        i0 = call(0, p1, i1, i4, p3, descr=streq_slice_checknull_descr)
+        escape(i0)
+        jump(p1, i1, i2, p3, i4)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected,
+                                                  preamble)
 
     def test_str_equal_slice3(self):
         ops = """
@@ -5481,14 +5852,21 @@
         jump(p1, i1, i2, p3)
         """
         expected = """
+        [p1, i1, i2, p3, i4]
+        i0 = call(0, p1, i1, i4, p3, descr=streq_slice_nonnull_descr)
+        escape(i0)
+        jump(p1, i1, i2, p3, i4)
+        """
+        preamble = """
         [p1, i1, i2, p3]
+        guard_nonnull(p3) []        
         i4 = int_sub(i2, i1)
         i0 = call(0, p1, i1, i4, p3, descr=streq_slice_nonnull_descr)
         escape(i0)
-        jump(p1, i1, i2, p3)
+        jump(p1, i1, i2, p3, i4)
         """
         self.optimize_strunicode_loop_extradescrs(ops,
-                                                  expected, ops)
+                                                  expected, preamble)
 
     def test_str_equal_slice4(self):
         ops = """
@@ -5498,15 +5876,21 @@
         escape(i0)
         jump(p1, i1, i2)
         """
-        expected = """
+        preamble = """
         [p1, i1, i2]
         i3 = int_sub(i2, i1)
         i0 = call(0, p1, i1, i3, 120, descr=streq_slice_char_descr)
         escape(i0)
-        jump(p1, i1, i2)
-        """
-        self.optimize_strunicode_loop_extradescrs(ops,
-                                                  expected)
+        jump(p1, i1, i2, i3)
+        """
+        expected = """
+        [p1, i1, i2, i3]
+        i0 = call(0, p1, i1, i3, 120, descr=streq_slice_char_descr)
+        escape(i0)
+        jump(p1, i1, i2, i3)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected,
+                                                  preamble)
 
     def test_str_equal_slice5(self):
         ops = """
@@ -5518,15 +5902,21 @@
         escape(i0)
         jump(p1, i1, i2, i3)
         """
-        expected = """
+        preamble = """
         [p1, i1, i2, i3]
         i4 = int_sub(i2, i1)
         i0 = call(0, p1, i1, i4, i3, descr=streq_slice_char_descr)
         escape(i0)
-        jump(p1, i1, i2, i3)
-        """
-        self.optimize_strunicode_loop_extradescrs(ops,
-                                                  expected)
+        jump(p1, i1, i2, i3, i4)
+        """
+        expected = """
+        [p1, i1, i2, i3, i4]
+        i0 = call(0, p1, i1, i4, i3, descr=streq_slice_char_descr)
+        escape(i0)
+        jump(p1, i1, i2, i3, i4)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected,
+                                                  preamble)
 
     def test_str_equal_none1(self):
         ops = """
@@ -5541,7 +5931,7 @@
         escape(i0)
         jump(p1)
         """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        self.optimize_strunicode_loop_extradescrs(ops, expected, expected)
 
     def test_str_equal_none2(self):
         ops = """
@@ -5556,7 +5946,7 @@
         escape(i0)
         jump(p1)
         """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        self.optimize_strunicode_loop_extradescrs(ops, expected, expected)
 
     def test_str_equal_nonnull1(self):
         ops = """
@@ -5572,7 +5962,14 @@
         escape(i0)
         jump(p1)
         """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        preamble = """
+        [p1]
+        guard_nonnull(p1) []
+        i0 = call(0, p1, s"hello world", descr=streq_nonnull_descr)
+        escape(i0)
+        jump(p1)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected, preamble)
 
     def test_str_equal_nonnull2(self):
         ops = """
@@ -5583,13 +5980,19 @@
         jump(p1)
         """
         expected = """
+        [p1, i0]
+        escape(i0)
+        jump(p1, i0)
+        """
+        preamble = """
         [p1]
+        guard_nonnull(p1) []
         i1 = strlen(p1)
         i0 = int_eq(i1, 0)
         escape(i0)
-        jump(p1)
-        """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        jump(p1, i0)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected, preamble)
 
     def test_str_equal_nonnull3(self):
         ops = """
@@ -5605,7 +6008,14 @@
         escape(i0)
         jump(p1)
         """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        preamble = """
+        [p1]
+        guard_nonnull(p1) []
+        i0 = call(0, p1, 120, descr=streq_nonnull_char_descr)
+        escape(i0)
+        jump(p1)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected, preamble)
 
     def test_str_equal_nonnull4(self):
         ops = """
@@ -5615,7 +6025,7 @@
         escape(i0)
         jump(p1, p2)
         """
-        expected = """
+        preamble = """
         [p1, p2]
         i1 = strlen(p1)
         i2 = strlen(p2)
@@ -5625,9 +6035,18 @@
         copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, s"hello world", p4, descr=streq_nonnull_descr)
         escape(i0)
-        jump(p1, p2)
-        """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        jump(p1, p2, i3, i1, i2)
+        """
+        expected = """
+        [p1, p2, i3, i1, i2]
+        p4 = newstr(i3)
+        copystrcontent(p1, p4, 0, 0, i1)
+        copystrcontent(p2, p4, 0, i1, i2)
+        i0 = call(0, s"hello world", p4, descr=streq_nonnull_descr)
+        escape(i0)
+        jump(p1, p2, i3, i1, i2)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected, preamble)
 
     def test_str_equal_chars0(self):
         ops = """
@@ -5642,7 +6061,7 @@
         escape(1)
         jump(i1)
         """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        self.optimize_strunicode_loop_extradescrs(ops, expected, expected)
 
     def test_str_equal_chars1(self):
         ops = """
@@ -5653,13 +6072,42 @@
         escape(i0)
         jump(i1)
         """
-        expected = """
+        preamble = """
         [i1]
         i0 = int_eq(i1, 120)     # ord('x')
         escape(i0)
-        jump(i1)
-        """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        jump(i1, i0)
+        """
+        expected = """
+        [i1, i0]
+        escape(i0)
+        jump(i1, i0)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected, preamble)
+
+    def test_str_equal_nonconst(self):
+        ops = """
+        [i1, i2]
+        p1 = newstr(1)
+        strsetitem(p1, 0, i1)
+        p2 = newstr(1)
+        strsetitem(p2, 0, i2)
+        i0 = call(0, p1, p2, descr=strequaldescr)
+        escape(i0)
+        jump(i1, i2)
+        """
+        preamble = """
+        [i1, i2]
+        i0 = int_eq(i1, i2)
+        escape(i0)
+        jump(i1, i2, i0)
+        """
+        expected = """
+        [i1, i2, i0]
+        escape(i0)
+        jump(i1, i2, i0)
+        """
+        self.optimize_strunicode_loop_extradescrs(ops, expected, preamble)
 
     def test_str_equal_chars2(self):
         ops = """
@@ -5680,7 +6128,7 @@
         escape(i0)
         jump(i1, i2)
         """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        self.optimize_strunicode_loop_extradescrs(ops, expected, expected)
 
     def test_str_equal_chars3(self):
         ops = """
@@ -5695,7 +6143,7 @@
         escape(i0)
         jump(p1)
         """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        self.optimize_strunicode_loop_extradescrs(ops, expected, expected)
 
     def test_str_equal_lengthmismatch1(self):
         ops = """
@@ -5711,7 +6159,7 @@
         escape(0)
         jump(i1)
         """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        self.optimize_strunicode_loop_extradescrs(ops, expected, expected)
 
     def test_str2unicode_constant(self):
         ops = """
@@ -5725,7 +6173,7 @@
         escape(u"xy")
         jump()
         """
-        self.optimize_strunicode_loop_extradescrs(ops, expected)
+        self.optimize_strunicode_loop_extradescrs(ops, expected, expected)
 
     def test_str2unicode_nonconstant(self):
         ops = """
@@ -5734,7 +6182,7 @@
         escape(p1)
         jump(p1)
         """
-        self.optimize_strunicode_loop_extradescrs(ops, ops)
+        self.optimize_strunicode_loop_extradescrs(ops, ops, ops)
         # more generally, supporting non-constant but virtual cases is
         # not obvious, because of the exception UnicodeDecodeError that
         # can be raised by ll_str2unicode()
@@ -5858,6 +6306,213 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_constant_getfield1(self):
+        ops = """
+        [p1, p187, i184]
+        p188 = getarrayitem_gc(p187, 42, descr=<GcPtrArrayDescr>)
+        guard_value(p188, ConstPtr(myptr)) []
+        p25 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        jump(p25, p187, i184)
+        """
+        preamble = """
+        [p1, p187, i184]
+        p188 = getarrayitem_gc(p187, 42, descr=<GcPtrArrayDescr>)
+        guard_value(p188, ConstPtr(myptr)) []
+        p25 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        jump(p25, p187, i184, p25)
+        """
+        short = """
+        [p1, p187, i184]
+        p188 = getarrayitem_gc(p187, 42, descr=<GcPtrArrayDescr>)
+        guard_value(p188, ConstPtr(myptr)) []
+        p25 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        jump(p1, p187, i184, p25)
+        """
+        expected = """
+        [p25, p187, i184, p189]
+        jump(p189, p187, i184, p189)
+        """
+        self.optimize_loop(ops, expected, preamble, expected_short=short)
+
+    def test_constant_getfield1bis(self):
+        ops = """
+        [p1, p187, i184]
+        p188 = getarrayitem_gc(p187, 42, descr=<GcPtrArrayDescr>)
+        guard_value(p188, ConstPtr(myptr)) []
+        p25 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        p26 = call(p25, descr=nonwritedescr)
+        jump(p26, p187, i184)
+        """
+        expected = """
+        [p24, p187, i184, p25]
+        p26 = call(p25, descr=nonwritedescr)
+        jump(p26, p187, i184, p25)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_constant_getfield2(self):
+        ops = """
+        [p19]
+        p22 = getfield_gc(p19, descr=otherdescr)
+        guard_value(p19, ConstPtr(myptr)) []
+        jump(p19)
+        """
+        expected = """
+        []
+        jump()
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_constant_getfield3(self):
+        ops = """
+        [p19, p20, p21]
+        p22 = getfield_gc(p19, descr=otherdescr)
+        guard_value(p19, ConstPtr(myptr)) []
+        p23 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        jump(p20, p21, p21)
+        """
+        expected = """
+        [p20, p21]
+        p22 = getfield_gc(p20, descr=otherdescr)
+        guard_value(p20, ConstPtr(myptr)) []
+        jump(p21, p21)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_constant_getfield4(self):
+        ops = """
+        [p19, p20, p21]
+        p22 = getfield_gc(p19, descr=otherdescr)
+        p23 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        guard_value(p19, ConstPtr(myptr)) []
+        jump(p20, p21, p21)
+        """
+        expected = """
+        [p20, p21]
+        p22 = getfield_gc(p20, descr=otherdescr)
+        guard_value(p20, ConstPtr(myptr)) []
+        jump(p21, p21)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_constnats_among_virtual_fileds(self):
+        ops = """
+        [p19, p20, p21]
+        p1 = getfield_gc(p20, descr=valuedescr)
+        p2 = getfield_gc(p1, descr=otherdescr)
+        pv = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(pv, p19, descr=valuedescr)
+        p22 = getfield_gc(p19, descr=otherdescr)
+        guard_value(p19, ConstPtr(myptr)) []
+        p23 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        jump(p21, pv, p21)
+        """
+        expected = """
+        [p20]
+        p22 = getfield_gc(p20, descr=otherdescr)
+        guard_value(p20, ConstPtr(myptr)) []
+        jump(ConstPtr(myptr))
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_dont_cache_setfields(self):
+        # Naivly caching the last two getfields here would specialize
+        # the loop to the state where the first two getfields return
+        # the same value. That state would need to be guarded for
+        # in the short preamble. Instead we make sure to keep the
+        # results of the two getfields as separate boxes.
+        ops = """
+        [p0, p1, ii, ii2]
+        i1 = getfield_gc(p0, descr=valuedescr)
+        i2 = getfield_gc(p1, descr=otherdescr)
+        i3 = int_add(i1, i2)
+        setfield_gc(p0, ii, descr=valuedescr)
+        setfield_gc(p1, ii, descr=otherdescr)
+        i4 = getfield_gc(p0, descr=valuedescr)
+        i5 = getfield_gc(p1, descr=otherdescr)
+        jump(p0, p1, ii2, ii)
+        """
+        preamble = """
+        [p0, p1, ii, ii2]
+        i1 = getfield_gc(p0, descr=valuedescr)
+        i2 = getfield_gc(p1, descr=otherdescr)
+        i3 = int_add(i1, i2)
+        setfield_gc(p0, ii, descr=valuedescr)
+        setfield_gc(p1, ii, descr=otherdescr)
+        jump(p0, p1, ii2, ii, ii, ii)
+        """
+        expected = """
+        [p0, p1, ii, ii2, i1, i2]
+        i3 = int_add(i1, i2)
+        setfield_gc(p0, ii, descr=valuedescr)
+        setfield_gc(p1, ii, descr=otherdescr)
+        jump(p0, p1, ii2, ii, ii, ii)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_dont_specialize_on_boxes_equal(self):
+        ops = """
+        [p0, p1, p3, ii, ii2]
+        i1 = getfield_gc(p0, descr=valuedescr)
+        i2 = getfield_gc(p1, descr=otherdescr)
+        setfield_gc(p3, i1, descr=adescr)
+        setfield_gc(p3, i2, descr=bdescr)
+        i4 = int_eq(i1, i2)
+        guard_true(i4) []
+        i5 = int_gt(ii, 42)
+        guard_true(i5) []
+        jump(p0, p1, p3, ii2, ii)
+        """
+        expected = """
+        [p0, p1, p3, ii, ii2, i1, i2]
+        setfield_gc(p3, i1, descr=adescr)
+        setfield_gc(p3, i2, descr=bdescr)
+        i5 = int_gt(ii, 42)
+        guard_true(i5) []        
+        jump(p0, p1, p3, ii2, ii, i1, i2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_lazy_setfield_forced_by_jump_needing_additionall_inputargs(self):
+        ops = """
+        [p0, p3]
+        i1 = getfield_gc(p0, descr=valuedescr)
+        setfield_gc(p3, i1, descr=otherdescr)
+        jump(p0, p3)
+        """
+        expected = """
+        [p0, p3, i1]
+        setfield_gc(p3, i1, descr=otherdescr)
+        jump(p0, p3, i1)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_guards_before_getfields_in_short_preamble(self):
+        ops = """
+        [p0]
+        guard_nonnull_class(p0, ConstClass(node_vtable)) []
+        p1 = getfield_gc(p0, descr=nextdescr)
+        guard_nonnull_class(p1, ConstClass(node_vtable)) []
+        p2 = getfield_gc(p1, descr=nextdescr)
+        guard_nonnull_class(p2, ConstClass(node_vtable)) []        
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        jump(p0)
+        """
+        short = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        guard_nonnull(p1) []
+        guard_class(p1, ConstClass(node_vtable)) []
+        p2 = getfield_gc(p1, descr=nextdescr)
+        guard_nonnull(p2) []
+        guard_class(p2, ConstClass(node_vtable)) []        
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected, expected_short=short)
+        
     def test_forced_virtual_pure_getfield(self):
         ops = """
         [p0]
@@ -5906,5 +6561,533 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_setgetfield_counter(self):
+        ops = """
+        [p1]
+        i2 = getfield_gc(p1, descr=valuedescr)
+        i3 = int_add(i2, 1)
+        setfield_gc(p1, i3, descr=valuedescr)
+        jump(p1)
+        """
+        expected = """
+        [p1, i1]
+        i2 = int_add(i1, 1)
+        setfield_gc(p1, i2, descr=valuedescr)
+        jump(p1, i2)
+        """
+        self.optimize_loop(ops, expected)
+        
+    def test_loopinvariant_strlen(self):
+        ops = """
+        [p9]
+        i843 = strlen(p9)
+        call(i843, descr=nonwritedescr)
+        jump(p9)
+        """
+        preamble = """
+        [p9]
+        i843 = strlen(p9)
+        call(i843, descr=nonwritedescr)
+        jump(p9, i843)
+        """
+        short = """
+        [p9]
+        i843 = strlen(p9)
+        i848 = int_ge(i843, 0)
+        guard_true(i848)[]
+        jump(p9, i843)
+        """
+        expected = """
+        [p9, i2]
+        call(i2, descr=nonwritedescr)
+        jump(p9, i2)
+        """
+        self.optimize_loop(ops, expected, preamble, expected_short=short)
+
+    def test_loopinvariant_strlen_with_bound(self):
+        ops = """
+        [p9]
+        i843 = strlen(p9)
+        i1 = int_gt(i843, 7)
+        guard_true(i1) []
+        call(i843, descr=nonwritedescr)
+        jump(p9)
+        """
+        expected = """
+        [p9, i2]
+        call(i2, descr=nonwritedescr)
+        jump(p9, i2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_loopinvariant_strgetitem(self):
+        ops = """
+        [p9, i1]
+        i843 = strgetitem(p9, i1)
+        call(i843, descr=nonwritedescr)
+        jump(p9, i1)
+        """
+        self.optimize_loop(ops, ops)
+
+    def test_loopinvariant_unicodelen(self):
+        ops = """
+        [p9]
+        i843 = unicodelen(p9)
+        call(i843, descr=nonwritedescr)
+        jump(p9)
+        """
+        expected = """
+        [p9, i2]
+        call(i2, descr=nonwritedescr)
+        jump(p9, i2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_loopinvariant_unicodegetitem(self):
+        ops = """
+        [p9, i1]
+        i843 = unicodegetitem(p9, i1)
+        call(i843, descr=nonwritedescr)
+        jump(p9, i1)
+        """
+        self.optimize_loop(ops, ops)
+
+    def test_loopinvariant_arraylen(self):
+        ops = """
+        [p9]
+        i843 = arraylen_gc(p9)
+        call(i843, descr=nonwritedescr)
+        jump(p9)
+        """
+        expected = """
+        [p9, i2]
+        call(i2, descr=nonwritedescr)
+        jump(p9, i2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_loopinvariant_getarrayitem(self):
+        ops = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        p2 = getarrayitem_gc(p1, 7, descr=<GcPtrArrayDescr>)
+        call(p2, descr=nonwritedescr)
+        jump(p0)
+        """
+        short = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        guard_nonnull(p1) []
+        i1 = arraylen_gc(p1)
+        i2 = int_ge(i1, 8)
+        guard_true(i2) []
+        p2 = getarrayitem_gc(p1, 7, descr=<GcPtrArrayDescr>)
+        jump(p0, p2, p1)
+        """
+        expected = """
+        [p0, p2, p1]
+        call(p2, descr=nonwritedescr)
+        i3 = arraylen_gc(p1) # Should be killed by backend
+        jump(p0, p2, p1)
+        """
+        self.optimize_loop(ops, expected, expected_short=short)
+
+    def test_duplicated_virtual(self):
+        ops = """
+        [p1, p2]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        jump(p3, p3)
+        """
+        expected = """
+        []
+        jump()
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_chained_virtuals(self):
+        ops = """
+        [p0, p1]
+        p2 = new_with_vtable(ConstClass(node_vtable))
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p2, p3, descr=nextdescr) 
+        jump(p2, p3)
+        """
+        expected = """
+        []
+        jump()
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_arraylen_bound(self):
+        ops = """
+        [p1, i]
+        p2 = getarrayitem_gc(p1, 7, descr=<GcPtrArrayDescr>)
+        i1 = arraylen_gc(p1)
+        i2 = int_ge(i1, 8)
+        guard_true(i2) []
+        jump(p2, i2)
+        """
+        expected = """
+        [p1]        
+        p2 = getarrayitem_gc(p1, 7, descr=<GcPtrArrayDescr>)
+        i1 = arraylen_gc(p1)
+        jump(p2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_loopinvariant_getarrayitem_gc_pure(self):
+        ops = """
+        [p9, i1]
+        i843 = getarrayitem_gc_pure(p9, i1)
+        call(i843, descr=nonwritedescr)
+        jump(p9, i1)
+        """
+        self.optimize_loop(ops, ops)
+
+    def test_loopinvariant_constant_getarrayitem_pure(self):
+        ops = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        p2 = getarrayitem_gc_pure(p1, 7, descr=<GcPtrArrayDescr>)
+        call(p2, descr=nonwritedescr)
+        jump(p0)
+        """
+        short = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        guard_nonnull(p1) []
+        i1 = arraylen_gc(p1)
+        i2 = int_ge(i1, 8)
+        guard_true(i2) []
+        p2 = getarrayitem_gc_pure(p1, 7, descr=<GcPtrArrayDescr>)
+        jump(p0, p2, p1)
+        """
+        expected = """
+        [p0, p2, p1]
+        call(p2, descr=nonwritedescr)
+        i3 = arraylen_gc(p1) # Should be killed by backend
+        jump(p0, p2, p1)
+        """
+        self.optimize_loop(ops, expected, expected_short=short)
+        
+        
+    def test_loopinvariant_constant_strgetitem(self):
+        ops = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        i22 = strgetitem(p1, 7)
+        call(i22, descr=nonwritedescr)
+        jump(p0)
+        """
+        short = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        guard_nonnull(p1) []
+        i1 = strlen(p1)
+        i2 = int_ge(i1, 8)
+        guard_true(i2) []
+        i22 = strgetitem(p1, 7, descr=<GcPtrArrayDescr>)
+        i8 = int_ge(i22, 0)
+        guard_true(i8) []
+        i9 = int_le(i22, 255)
+        guard_true(i9) []
+        jump(p0, i22, p1)
+        """
+        expected = """
+        [p0, i22, p1]
+        call(i22, descr=nonwritedescr)
+        i3 = strlen(p1) # Should be killed by backend
+        jump(p0, i22, p1)
+        """
+        self.optimize_loop(ops, expected, expected_short=short)
+
+    def test_loopinvariant_constant_unicodegetitem(self):
+        ops = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        i22 = unicodegetitem(p1, 7)
+        call(i22, descr=nonwritedescr)
+        jump(p0)
+        """
+        short = """
+        [p0]
+        p1 = getfield_gc(p0, descr=nextdescr)
+        guard_nonnull(p1) []
+        i1 = unicodelen(p1)
+        i2 = int_ge(i1, 8)
+        guard_true(i2) []
+        i22 = unicodegetitem(p1, 7, descr=<GcPtrArrayDescr>)
+        i8 = int_ge(i22, 0)
+        guard_true(i8) []
+        jump(p0, i22, p1)
+        """
+        expected = """
+        [p0, i22, p1]
+        call(i22, descr=nonwritedescr)
+        i3 = unicodelen(p1) # Should be killed by backend        
+        jump(p0, i22, p1)
+        """
+        self.optimize_loop(ops, expected, expected_short=short)
+        
+    def test_propagate_virtual_arryalen(self):
+        ops = """
+        [p0]
+        p404 = new_array(2, descr=arraydescr)
+        p403 = new_array(3, descr=arraydescr)
+        i405 = arraylen_gc(p404, descr=arraydescr)
+        i406 = arraylen_gc(p403, descr=arraydescr)
+        i407 = int_add_ovf(i405, i406)
+        guard_no_overflow() []
+        call(i407, descr=nonwritedescr)
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        call(5, descr=nonwritedescr)
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_propagate_virtual_strunicodelen(self):
+        ops = """
+        [p0]
+        p404 = newstr(2)
+        p403 = newunicode(3)
+        i405 = strlen(p404)
+        i406 = unicodelen(p403)
+        i407 = int_add_ovf(i405, i406)
+        guard_no_overflow() []
+        call(i407, descr=nonwritedescr)
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        call(5, descr=nonwritedescr)
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_keep_getfields_and_inputargs_separate(self):
+        ops = """
+        [p0]
+        call(p0, descr=nonwritedescr)
+        p1 = getfield_gc(ConstPtr(myptr), descr=nextdescr)
+        call(p1, descr=writeadescr)
+        jump(p1)
+        """
+        expected = """
+        [p0, p1]
+        call(p0, descr=nonwritedescr)
+        call(p1, descr=writeadescr)
+        jump(p1, p1)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_value_guard_arraylen_reused(self):
+        ops = """
+        [p0, p1]
+        p10 = getfield_gc(p0, descr=nextdescr)
+        p11 = getfield_gc(p1, descr=nextdescr)
+        i1 = arraylen_gc(p10, descr=arraydescr)
+        getarrayitem_gc(p11, 1, descr=arraydescr)
+        call(i1, descr=nonwritedescr)
+        jump(p1, p0)
+        """
+        expected = """
+        [p0, p1, p10, p11]
+        i1 = arraylen_gc(p10, descr=arraydescr)
+        getarrayitem_gc(p11, 1, descr=arraydescr)
+        call(i1, descr=nonwritedescr)        
+        jump(p1, p0, p11, p10)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_cache_constant_setfield(self):
+        ops = """
+        [p5]
+        i10 = getfield_gc(p5, descr=valuedescr)
+        call(i10, descr=nonwritedescr) 
+        setfield_gc(p5, 1, descr=valuedescr)
+        jump(p5)
+        """
+        preamble = """
+        [p5]
+        i10 = getfield_gc(p5, descr=valuedescr)
+        call(i10, descr=nonwritedescr) 
+        setfield_gc(p5, 1, descr=valuedescr)
+        jump(p5)
+        """
+        expected = """
+        [p5]
+        call(1, descr=nonwritedescr) 
+        jump(p5)
+        """
+        self.optimize_loop(ops, expected, preamble)
+
+    def test_dont_mixup_equal_boxes(self):
+        ops = """
+        [p8]
+        i9 = getfield_gc_pure(p8, descr=valuedescr)
+        i10 = int_gt(i9, 0)
+        guard_true(i10) []
+        i29 = int_lshift(i9, 1)
+        i30 = int_rshift(i29, 1)
+        i40 = int_ne(i30, i9)
+        guard_false(i40) []
+        jump(p8)
+        """
+        expected = """
+        [p8]
+        jump(p8)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_specialized_to_cached_constant_guard(self):
+        ops = """
+        [p9]
+        i16 = getfield_gc(p9, descr=valuedescr)
+        i17 = int_is_true(i16)
+        guard_false(i17) []
+        call_assembler(i17, descr=asmdescr)
+        i18 = getfield_gc(p9, descr=valuedescr)
+        guard_value(i18, 0) []
+        jump(p9)
+        """
+        expected = """
+        [p9]
+        call_assembler(0, descr=asmdescr)
+        i18 = getfield_gc(p9, descr=valuedescr)
+        guard_value(i18, 0) []        
+        jump(p9)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_specialized_to_cached_constant_setfield(self):
+        ops = """
+        [p9]
+        i16 = getfield_gc(p9, descr=valuedescr)
+        i17 = int_is_true(i16)
+        guard_false(i17) []
+        call_assembler(i17, descr=asmdescr)
+        i18 = setfield_gc(p9, 0, descr=valuedescr)
+        jump(p9)
+        """
+        expected = """
+        [p9]
+        call_assembler(0, descr=asmdescr)
+        i18 = setfield_gc(p9, 0, descr=valuedescr)
+        jump(p9)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_cached_equal_fields(self):
+        ops = """
+        [p5, p6]
+        i10 = getfield_gc(p5, descr=valuedescr)
+        i11 = getfield_gc(p6, descr=nextdescr)
+        call(i10, i11, descr=nonwritedescr)
+        setfield_gc(p6, i10, descr=nextdescr)        
+        jump(p5, p6)
+        """
+        expected = """
+        [p5, p6, i10, i11]
+        call(i10, i11, descr=nonwritedescr)
+        setfield_gc(p6, i10, descr=nextdescr)        
+        jump(p5, p6, i10, i10)
+        """
+        self.optimize_loop(ops, expected)
+        
+    def test_forced_counter(self):
+        # XXX: VIRTUALHEAP (see above)
+        py.test.skip("would be fixed by make heap optimizer aware of virtual setfields")
+        ops = """
+        [p5, p8]
+        i9 = getfield_gc_pure(p5, descr=valuedescr)
+        call(i9, descr=nonwritedescr)
+        i11 = getfield_gc_pure(p8, descr=valuedescr)
+        i13 = int_add_ovf(i11, 1)
+        guard_no_overflow() []
+        p22 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p22, i13, descr=valuedescr)
+        setfield_gc(ConstPtr(myptr), p22, descr=adescr)
+        jump(p22, p22)
+        """
+        expected = """
+        [p8, i9]
+        call(i9, descr=nonwritedescr)
+        i13 = int_add_ovf(i9, 1)
+        guard_no_overflow() []
+        p22 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p22, i13, descr=valuedescr)
+        setfield_gc(ConstPtr(myptr), p22, descr=adescr)
+        jump(p22, i13)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_constptr_samebox_getfield_setfield(self):
+        ops = """
+        [p0]
+        p10 = getfield_gc(ConstPtr(myptr), descr=valuedescr)
+        call(p10, descr=nonwritedescr)
+        setfield_gc(ConstPtr(myptr), p10, descr=valuedescr)
+        jump(p0)
+        """
+        expected = """
+        [p0, p10]
+        call(p10, descr=nonwritedescr)
+        jump(p0, p10)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_constptr_constptr_getfield_setfield(self):
+        ops = """
+        [p0]
+        p10 = getfield_gc(ConstPtr(myptr), descr=valuedescr)
+        guard_value(p10, ConstPtr(myptr2)) []
+        call(p10, descr=nonwritedescr)
+        setfield_gc(ConstPtr(myptr), ConstPtr(myptr2), descr=valuedescr)
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        call(ConstPtr(myptr2), descr=nonwritedescr)
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_box_samebox_getfield_setfield(self):
+        ops = """
+        [p0]
+        p10 = getfield_gc(p0, descr=valuedescr)
+        call(p10, descr=nonwritedescr)
+        setfield_gc(p0, p10, descr=valuedescr)
+        jump(p0)
+        """
+        expected = """
+        [p0, p10]
+        call(p10, descr=nonwritedescr)
+        jump(p0, p10)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_box_constptr_getfield_setfield(self):
+        ops = """
+        [p0]
+        p10 = getfield_gc(p0, descr=valuedescr)
+        guard_value(p10, ConstPtr(myptr2)) []
+        call(p10, descr=nonwritedescr)
+        setfield_gc(p0, ConstPtr(myptr2), descr=valuedescr)
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        call(ConstPtr(myptr2), descr=nonwritedescr)
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+        
+
 class TestLLtype(OptimizeOptTest, LLtypeMixin):
     pass
+        
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_util.py b/pypy/jit/metainterp/optimizeopt/test/test_util.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_util.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_util.py
@@ -95,10 +95,13 @@
                                      ('other', lltype.Ptr(NODE)))
     node = lltype.malloc(NODE)
     node.parent.typeptr = node_vtable
+    node2 = lltype.malloc(NODE2)
+    node2.parent.parent.typeptr = node_vtable2
     nodebox = BoxPtr(lltype.cast_opaque_ptr(llmemory.GCREF, node))
     myptr = nodebox.value
     myptr2 = lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(NODE))
-    nodebox2 = BoxPtr(lltype.cast_opaque_ptr(llmemory.GCREF, node))
+    nullptr = lltype.nullptr(llmemory.GCREF.TO)
+    nodebox2 = BoxPtr(lltype.cast_opaque_ptr(llmemory.GCREF, node2))
     nodesize = cpu.sizeof(NODE)
     nodesize2 = cpu.sizeof(NODE2)
     valuedescr = cpu.fielddescrof(NODE, 'value')
@@ -317,6 +320,10 @@
         self.config = get_pypy_config(translating=True)
         self.config.translation.jit_ffi = True
 
+    class warmrunnerdesc:
+        class memory_manager:
+            retrace_limit = 5
+            max_retrace_guards = 15
 
 class Storage(compile.ResumeGuardDescr):
     "for tests."
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -1,74 +1,15 @@
 from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.optimizeopt.virtualstate import VirtualStateAdder, ShortBoxes
 from pypy.jit.metainterp.compile import ResumeGuardDescr
 from pypy.jit.metainterp.history import TreeLoop, LoopToken
 from pypy.jit.metainterp.jitexc import JitException
 from pypy.jit.metainterp.optimize import InvalidLoop, RetraceLoop
 from pypy.jit.metainterp.optimizeopt.optimizer import *
+from pypy.jit.metainterp.optimizeopt.generalize import KillHugeIntBounds
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.resume import Snapshot
 from pypy.rlib.debug import debug_print
-
-# Assumptions
-# ===========
-#
-# For this to work some assumptions had to be made about the
-# optimizations performed. At least for the optimizations that are
-# allowed to operate across the loop boundaries. To enforce this, the
-# optimizer chain is recreated at the end of the preamble and only the
-# state of the optimizations that fulfill those assumptions are kept.
-# Since part of this state is stored in virtuals all OptValue objects
-# are also recreated to allow virtuals not supported to be forced.
-#
-# First of all, the optimizations are not allowed to introduce new
-# boxes. It is the unoptimized version of the trace that is inlined to
-# form the second iteration of the loop. Otherwise the
-# state of the virtuals would not be updated correctly. Whenever some
-# box from the first iteration is reused in the second iteration, it
-# is added to the input arguments of the loop as well as to the
-# arguments of the jump at the end of the preamble. This means that
-# inlining the jump from the unoptimized trace will not work since it
-# contains too few arguments.  Instead the jump at the end of the
-# preamble is inlined. If the arguments of that jump contains boxes
-# that were produced by one of the optimizations, and thus never seen
-# by the inliner, the inliner will not be able to inline them. There
-# is no way of known what these boxes are supposed to contain in the
-# third iteration.
-#
-# The second assumption is that the state of the optimizer should be the
-# same after the second iteration as after the first. This have forced
-# us to disable store sinking across loop boundaries. Consider the
-# following trace
-#
-#         [p1, p2]
-#         i1 = getfield_gc(p1, descr=nextdescr)
-#         i2 = int_sub(i1, 1)
-#         i2b = int_is_true(i2)
-#         guard_true(i2b) []
-#         setfield_gc(p2, i2, descr=nextdescr)
-#         p3 = new_with_vtable(ConstClass(node_vtable))
-#         jump(p2, p3)
-#
-# At the start of the preamble, p1 and p2 will be pointers. The
-# setfield_gc will be removed by the store sinking heap optimizer, and
-# p3 will become a virtual. Jumping to the loop will make p1 a pointer
-# and p2 a virtual at the start of the loop. The setfield_gc will now
-# be absorbed into the virtual p2 and never seen by the heap
-# optimizer. At the end of the loop both p2 and p3 are virtuals, but
-# the loop needs p2 to be a pointer to be able to call itself. So it
-# is forced producing the operations
-#
-#         p2 = new_with_vtable(ConstClass(node_vtable))
-#         setfield_gc(p2, i2, descr=nextdescr)
-#
-# In this case the setfield_gc is not store sinked, which means we are
-# not in the same state at the end of the loop as at the end of the
-# preamble. When we now call the loop again, the first 4 operations of
-# the trace were optimized under the wrong assumption that the
-# setfield_gc was store sinked which could lead to errors. In this
-# case what would happen is that it would be inserted once more in
-# front of the guard.
-
-
+import sys, os
 
 # FIXME: Introduce some VirtualOptimizer super class instead
 
@@ -81,7 +22,10 @@
         assert len(inputargs) == len(jump_args)
         self.argmap = {}
         for i in range(len(inputargs)):
-           self.argmap[inputargs[i]] = jump_args[i]
+            if inputargs[i] in self.argmap:
+                assert self.argmap[inputargs[i]] == jump_args[i]
+            else:
+                self.argmap[inputargs[i]] = jump_args[i]
         self.snapshot_map = {None: None}
 
     def inline_op(self, newop, ignore_result=False, clone=True,
@@ -126,112 +70,6 @@
         self.snapshot_map[snapshot] = new_snapshot
         return new_snapshot
 
-class VirtualState(object):
-    def __init__(self, state):
-        self.state = state
-
-    def generalization_of(self, other):
-        assert len(self.state) == len(other.state)
-        for i in range(len(self.state)):
-            if not self.state[i].generalization_of(other.state[i]):
-                return False
-        return True
-
-    def generate_guards(self, other, args, cpu, extra_guards):
-        assert len(self.state) == len(other.state) == len(args)
-        for i in range(len(self.state)):
-            self.state[i].generate_guards(other.state[i], args[i],
-                                          cpu, extra_guards)
-
-class VirtualStateAdder(resume.ResumeDataVirtualAdder):
-    def __init__(self, optimizer):
-        self.fieldboxes = {}
-        self.optimizer = optimizer
-        self.info = {}
-
-    def register_virtual_fields(self, keybox, fieldboxes):
-        self.fieldboxes[keybox] = fieldboxes
-
-    def already_seen_virtual(self, keybox):
-        return keybox in self.fieldboxes
-
-    def getvalue(self, box):
-        return self.optimizer.getvalue(box)
-
-    def state(self, box):
-        value = self.getvalue(box)
-        box = value.get_key_box()
-        try:
-            info = self.info[box]
-        except KeyError:
-            if value.is_virtual():
-                self.info[box] = info = value.make_virtual_info(self, None)
-                flds = self.fieldboxes[box]
-                info.fieldstate = [self.state(b) for b in flds]
-            else:
-                self.info[box] = info = self.make_not_virtual(value)
-        return info
-
-    def get_virtual_state(self, jump_args):
-        for box in jump_args:
-            value = self.getvalue(box)
-            value.get_args_for_fail(self)
-        return VirtualState([self.state(box) for box in jump_args])
-
-
-    def make_not_virtual(self, value):
-        return NotVirtualInfo(value)
-
-class NotVirtualInfo(resume.AbstractVirtualInfo):
-    def __init__(self, value):
-        self.known_class = value.known_class
-        self.level = value.level
-        if value.intbound is None:
-            self.intbound = IntBound(MININT, MAXINT)
-        else:
-            self.intbound = value.intbound.clone()
-        if value.is_constant():
-            self.constbox = value.box
-        else:
-            self.constbox = None
-
-    def generalization_of(self, other):
-        # XXX This will always retrace instead of forcing anything which
-        # might be what we want sometimes?
-        if not isinstance(other, NotVirtualInfo):
-            return False
-        if other.level < self.level:
-            return False
-        if self.level == LEVEL_CONSTANT:
-            if not self.constbox.same_constant(other.constbox):
-                return False
-        elif self.level == LEVEL_KNOWNCLASS:
-            if self.known_class != other.known_class: # FIXME: use issubclass?
-                return False
-        return self.intbound.contains_bound(other.intbound)
-
-    def _generate_guards(self, other, box, cpu, extra_guards):
-        if not isinstance(other, NotVirtualInfo):
-            raise InvalidLoop
-        if self.level == LEVEL_KNOWNCLASS and \
-           box.nonnull() and \
-           self.known_class.same_constant(cpu.ts.cls_of_box(box)):
-            # Note: This is only a hint on what the class of box was
-            # during the trace. There are actually no guarentees that this
-            # box realy comes from a trace. The hint is used here to choose
-            # between either eimtting a guard_class and jumping to an
-            # excisting compiled loop or retracing the loop. Both
-            # alternatives will always generate correct behaviour, but
-            # performace will differ.
-            op = ResOperation(rop.GUARD_CLASS, [box, self.known_class], None)
-            extra_guards.append(op)
-            return
-        # Remaining cases are probably not interesting
-        raise InvalidLoop
-        if self.level == LEVEL_CONSTANT:
-            import pdb; pdb.set_trace()
-            raise NotImplementedError
-
 
 class UnrollOptimizer(Optimization):
     """Unroll the loop into two iterations. The first one will
@@ -245,6 +83,17 @@
             newop = op.clone()
             self.cloned_operations.append(newop)
 
+    def fix_snapshot(self, loop, jump_args, snapshot):
+        if snapshot is None:
+            return None
+        snapshot_args = snapshot.boxes 
+        new_snapshot_args = []
+        for a in snapshot_args:
+            a = self.getvalue(a).get_key_box()
+            new_snapshot_args.append(a)
+        prev = self.fix_snapshot(loop, jump_args, snapshot.prev)
+        return Snapshot(prev, new_snapshot_args)
+            
     def propagate_all_forward(self):
         loop = self.optimizer.loop
         jumpop = loop.operations[-1]
@@ -260,43 +109,119 @@
             assert jumpop.getdescr() is loop.token
             jump_args = jumpop.getarglist()
             jumpop.initarglist([])
-            #virtual_state = [self.getvalue(a).is_virtual() for a in jump_args]
+            self.optimizer.flush()
+
+            KillHugeIntBounds(self.optimizer).apply()
+            
+            loop.preamble.operations = self.optimizer.newoperations
+            jump_args = [self.getvalue(a).get_key_box() for a in jump_args]
+
+            start_resumedescr = loop.preamble.start_resumedescr.clone_if_mutable()
+            self.start_resumedescr = start_resumedescr
+            assert isinstance(start_resumedescr, ResumeGuardDescr)
+            start_resumedescr.rd_snapshot = self.fix_snapshot(loop, jump_args,
+                                                              start_resumedescr.rd_snapshot)
+
             modifier = VirtualStateAdder(self.optimizer)
             virtual_state = modifier.get_virtual_state(jump_args)
+            
+            values = [self.getvalue(arg) for arg in jump_args]
+            inputargs = virtual_state.make_inputargs(values)
+            short_inputargs = virtual_state.make_inputargs(values, keyboxes=True)
 
-            loop.preamble.operations = self.optimizer.newoperations
+            self.constant_inputargs = {}
+            for box in jump_args: 
+                const = self.get_constant_box(box)
+                if const:
+                    self.constant_inputargs[box] = const
+
+            sb = ShortBoxes(self.optimizer, inputargs + self.constant_inputargs.keys())
+            self.short_boxes = sb
+            preamble_optimizer = self.optimizer
             loop.preamble.quasi_immutable_deps = (
                 self.optimizer.quasi_immutable_deps)
-            self.optimizer = self.optimizer.reconstruct_for_next_iteration()
-            inputargs = self.inline(self.cloned_operations,
-                                    loop.inputargs, jump_args)
+            self.optimizer = self.optimizer.new()
+            loop.quasi_immutable_deps = self.optimizer.quasi_immutable_deps
+
+            logops = self.optimizer.loop.logops
+            if logops:
+                args = ", ".join([logops.repr_of_arg(arg) for arg in inputargs])
+                debug_print('inputargs:       ' + args)
+                args = ", ".join([logops.repr_of_arg(arg) for arg in short_inputargs])
+                debug_print('short inputargs: ' + args)
+                self.short_boxes.debug_print(logops)
+
+            # Force virtuals amoung the jump_args of the preamble to get the
+            # operations needed to setup the proper state of those virtuals
+            # in the peeled loop
+            inputarg_setup_ops = []
+            preamble_optimizer.newoperations = []
+            seen = {}
+            for box in inputargs:
+                if box in seen:
+                    continue
+                seen[box] = True
+                value = preamble_optimizer.getvalue(box)
+                inputarg_setup_ops.extend(value.make_guards(box))
+            for box in short_inputargs:
+                if box in seen:
+                    continue
+                seen[box] = True
+                value = preamble_optimizer.getvalue(box)
+                value.force_box()
+            preamble_optimizer.flush()
+            inputarg_setup_ops += preamble_optimizer.newoperations
+
+            # Setup the state of the new optimizer by emiting the
+            # short preamble operations and discarding the result
+            self.optimizer.emitting_dissabled = True
+            for op in inputarg_setup_ops:
+                self.optimizer.send_extra_operation(op)
+            seen = {}
+            for op in self.short_boxes.operations():
+                self.ensure_short_op_emitted(op, self.optimizer, seen)
+                if op and op.result:
+                    # The order of these guards is not important as 
+                    # self.optimizer.emitting_dissabled is False
+                    value = preamble_optimizer.getvalue(op.result)
+                    for guard in value.make_guards(op.result):
+                        self.optimizer.send_extra_operation(guard)
+                    newresult = self.optimizer.getvalue(op.result).get_key_box()
+                    if newresult is not op.result:
+                        self.short_boxes.alias(newresult, op.result)
+            self.optimizer.flush()
+            self.optimizer.emitting_dissabled = False
+
+            # XXX Hack to prevent the arraylen/strlen/unicodelen ops generated
+            #     by value.make_guards() from ending up in pure_operations
+            for key, op in self.optimizer.pure_operations.items():
+                if not self.short_boxes.has_producer(op.result):
+                    del self.optimizer.pure_operations[key]
+
+            initial_inputargs_len = len(inputargs)
+            self.inliner = Inliner(loop.inputargs, jump_args)
+
+
+            short = self.inline(inputargs, self.cloned_operations,
+                                loop.inputargs, short_inputargs,
+                                virtual_state)
+            
             loop.inputargs = inputargs
-            jmp = ResOperation(rop.JUMP, loop.inputargs[:], None)
+            args = [preamble_optimizer.getvalue(self.short_boxes.original(a)).force_box()\
+                    for a in inputargs]
+            jmp = ResOperation(rop.JUMP, args, None)
             jmp.setdescr(loop.token)
             loop.preamble.operations.append(jmp)
 
             loop.operations = self.optimizer.newoperations
-            loop.quasi_immutable_deps = self.optimizer.quasi_immutable_deps
+            maxguards = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.max_retrace_guards
+            
+            if self.optimizer.emitted_guards > maxguards:
+                loop.preamble.token.retraced_count = sys.maxint
 
-            start_resumedescr = loop.preamble.start_resumedescr.clone_if_mutable()
-            assert isinstance(start_resumedescr, ResumeGuardDescr)
-            snapshot = start_resumedescr.rd_snapshot
-            while snapshot is not None:
-                snapshot_args = snapshot.boxes
-                new_snapshot_args = []
-                for a in snapshot_args:
-                    if not isinstance(a, Const):
-                        a = loop.preamble.inputargs[jump_args.index(a)]
-                    new_snapshot_args.append(a)
-                snapshot.boxes = new_snapshot_args
-                snapshot = snapshot.prev
-
-            short = self.create_short_preamble(loop.preamble, loop)
             if short:
-                if False:
-                    # FIXME: This should save some memory but requires
-                    # a lot of tests to be fixed...
-                    loop.preamble.operations = short[:]
+                assert short[-1].getopnum() == rop.JUMP
+                short[-1].setdescr(loop.token)
 
                 # Turn guards into conditional jumps to the preamble
                 for i in range(len(short)):
@@ -304,20 +229,31 @@
                     if op.is_guard():
                         op = op.clone()
                         op.setfailargs(None)
-                        op.setdescr(start_resumedescr.clone_if_mutable())
+                        descr = self.start_resumedescr.clone_if_mutable()
+                        op.setdescr(descr)
                         short[i] = op
 
                 short_loop = TreeLoop('short preamble')
-                short_loop.inputargs = loop.preamble.inputargs[:]
+                short_loop.inputargs = short_inputargs
                 short_loop.operations = short
 
                 # Clone ops and boxes to get private versions and
-                newargs = [a.clonebox() for a in short_loop.inputargs]
+                boxmap = {}
+                newargs = [None] * len(short_loop.inputargs)
+                for i in range(len(short_loop.inputargs)):
+                    a = short_loop.inputargs[i]
+                    if a in boxmap:
+                        newargs[i] = boxmap[a]
+                    else:
+                        newargs[i] = a.clonebox()
+                        boxmap[a] = newargs[i]
                 inliner = Inliner(short_loop.inputargs, newargs)
+                for box, const in self.constant_inputargs.items():
+                    inliner.argmap[box] = const
                 short_loop.inputargs = newargs
                 ops = [inliner.inline_op(op) for op in short_loop.operations]
                 short_loop.operations = ops
-                descr = start_resumedescr.clone_if_mutable()
+                descr = self.start_resumedescr.clone_if_mutable()
                 inliner.inline_descr_inplace(descr)
                 short_loop.start_resumedescr = descr
 
@@ -335,322 +271,208 @@
                     if op.result:
                         op.result.forget_value()
 
-    def inline(self, loop_operations, loop_args, jump_args):
-        self.inliner = inliner = Inliner(loop_args, jump_args)
+    def inline(self, inputargs, loop_operations, loop_args, short_inputargs, virtual_state):
+        inliner = self.inliner
 
-        for v in self.optimizer.values.values():
-            v.last_guard_index = -1 # FIXME: Are there any more indexes stored?
+        short_jumpargs = inputargs[:]
 
-        inputargs = []
-        seen_inputargs = {}
-        for arg in jump_args:
-            boxes = []
-            self.getvalue(arg).enum_forced_boxes(boxes, seen_inputargs)
-            for a in boxes:
-                if not isinstance(a, Const):
-                    inputargs.append(a)
+        short = []
+        short_seen = {}
+        for box, const in self.constant_inputargs.items():
+            short_seen[box] = True
+
+        for op in self.short_boxes.operations():
+            if op is not None:
+                if len(self.getvalue(op.result).make_guards(op.result)) > 0:
+                    self.add_op_to_short(op, short, short_seen, False, True)
 
         # This loop is equivalent to the main optimization loop in
         # Optimizer.propagate_all_forward
+        jumpop = None
         for newop in loop_operations:
+            newop = inliner.inline_op(newop, clone=False)
             if newop.getopnum() == rop.JUMP:
-                newop.initarglist(inputargs)
-            newop = inliner.inline_op(newop, clone=False)
+                jumpop = newop
+                break
 
-            self.optimizer.first_optimization.propagate_forward(newop)
+            #self.optimizer.first_optimization.propagate_forward(newop)
+            self.optimizer.send_extra_operation(newop)
 
-        # Remove jump to make sure forced code are placed before it
-        newoperations = self.optimizer.newoperations
-        jmp = newoperations[-1]
-        assert jmp.getopnum() == rop.JUMP
-        self.optimizer.newoperations = newoperations[:-1]
+        self.boxes_created_this_iteration = {}
 
-        boxes_created_this_iteration = {}
-        jumpargs = jmp.getarglist()
+        assert jumpop
+        original_jumpargs = jumpop.getarglist()[:]
+        values = [self.getvalue(arg) for arg in jumpop.getarglist()]
+        jumpargs = virtual_state.make_inputargs(values)
+        jumpop.initarglist(jumpargs)
+        jmp_to_short_args = virtual_state.make_inputargs(values, keyboxes=True)
+        self.short_inliner = Inliner(short_inputargs, jmp_to_short_args)
+        
+        for box, const in self.constant_inputargs.items():
+            self.short_inliner.argmap[box] = const
 
-        # FIXME: Should also loop over operations added by forcing things in this loop
-        for op in newoperations:
-            boxes_created_this_iteration[op.result] = True
-            args = op.getarglist()
-            if op.is_guard():
-                args = args + op.getfailargs()
+        for op in short:
+            newop = self.short_inliner.inline_op(op)
+            self.optimizer.send_extra_operation(newop)
+        
+        self.optimizer.flush()
 
-            for a in args:
-                if not isinstance(a, Const) and not a in boxes_created_this_iteration:
-                    if a not in inputargs:
-                        inputargs.append(a)
-                        box = inliner.inline_arg(a)
-                        if box in self.optimizer.values:
-                            box = self.optimizer.values[box].force_box()
-                        jumpargs.append(box)
+        i = j = 0
+        while i < len(self.optimizer.newoperations) or j < len(jumpargs):
+            if i == len(self.optimizer.newoperations):
+                while j < len(jumpargs):
+                    a = jumpargs[j]
+                    if self.optimizer.loop.logops:
+                        debug_print('J:  ' + self.optimizer.loop.logops.repr_of_arg(a))
+                    self.import_box(a, inputargs, short, short_jumpargs,
+                                    jumpargs, short_seen)
+                    j += 1
+            else:
+                op = self.optimizer.newoperations[i]
 
-        jmp.initarglist(jumpargs)
-        self.optimizer.newoperations.append(jmp)
-        return inputargs
+                self.boxes_created_this_iteration[op.result] = True
+                args = op.getarglist()
+                if op.is_guard():
+                    args = args + op.getfailargs()
 
-    def sameop(self, op1, op2):
-        if op1.getopnum() != op2.getopnum():
-            return False
+                if self.optimizer.loop.logops:
+                    debug_print('OP: ' + self.optimizer.loop.logops.repr_of_resop(op))
+                for a in args:
+                    if self.optimizer.loop.logops:
+                        debug_print('A:  ' + self.optimizer.loop.logops.repr_of_arg(a))
+                    self.import_box(a, inputargs, short, short_jumpargs,
+                                    jumpargs, short_seen)
+                i += 1
 
-        args1 = op1.getarglist()
-        args2 = op2.getarglist()
-        if len(args1) != len(args2):
-            return False
-        for i in range(len(args1)):
-            box1, box2 = args1[i], args2[i]
-            val1 = self.optimizer.getvalue(box1)
-            val2 = self.optimizer.getvalue(box2)
-            if val1.is_constant() and val2.is_constant():
-                if not val1.box.same_constant(val2.box):
-                    return False
-            elif val1 is not val2:
-                return False
+        jumpop.initarglist(jumpargs)
+        self.optimizer.send_extra_operation(jumpop)
+        short.append(ResOperation(rop.JUMP, short_jumpargs, None))
 
-        if not op1.is_guard():
-            descr1 = op1.getdescr()
-            descr2 = op2.getdescr()
-            if descr1 is not descr2:
-                return False
+        modifier = VirtualStateAdder(self.optimizer)
+        final_virtual_state = modifier.get_virtual_state(original_jumpargs)
+        debug_start('jit-log-virtualstate')
+        virtual_state.debug_print('Closed loop with ')
+        bad = {}
+        if not virtual_state.generalization_of(final_virtual_state, bad):
+            # We ended up with a virtual state that is not compatible
+            # and we are thus unable to jump to the start of the loop
+            # XXX Is it possible to end up here? If so, consider:
+            #    - Fallback on having the preamble jump to itself?
+            #    - Would virtual_state.generate_guards make sense here?
+            final_virtual_state.debug_print("Bad virtual state at end of loop, ",
+                                            bad)
+            debug_stop('jit-log-virtualstate')
+            raise InvalidLoop
+        debug_stop('jit-log-virtualstate')
+        
+        return short
 
-        return True
+    def ensure_short_op_emitted(self, op, optimizer, seen):
+        if op is None:
+            return
+        if op.result is not None and op.result in seen:
+            return
+        for a in op.getarglist():
+            if not isinstance(a, Const) and a not in seen:
+                self.ensure_short_op_emitted(self.short_boxes.producer(a), optimizer, seen)
+        optimizer.send_extra_operation(op)
+        seen[op.result] = True
+        if op.is_ovf():
+            guard = ResOperation(rop.GUARD_NO_OVERFLOW, [], None)
+            optimizer.send_extra_operation(guard)
+        
+    def add_op_to_short(self, op, short, short_seen, emit=True, guards_needed=False):
+        if op is None:
+            return None
+        if op.result is not None and op.result in short_seen:
+            if emit:
+                return self.short_inliner.inline_arg(op.result)
+            else:
+                return None
+        
+        for a in op.getarglist():
+            if not isinstance(a, Const) and a not in short_seen:
+                self.add_op_to_short(self.short_boxes.producer(a), short, short_seen,
+                                     emit, guards_needed)
+        if op.is_guard():
+            descr = self.start_resumedescr.clone_if_mutable()
+            op.setdescr(descr)
 
-    def create_short_preamble(self, preamble, loop):
-        #return None # Dissable
+        if guards_needed and self.short_boxes.has_producer(op.result):
+            value_guards = self.getvalue(op.result).make_guards(op.result)
+        else:
+            value_guards = []            
 
-        preamble_ops = preamble.operations
-        loop_ops = loop.operations
+        short.append(op)
+        short_seen[op.result] = True
+        if emit:
+            newop = self.short_inliner.inline_op(op)
+            self.optimizer.send_extra_operation(newop)
+        else:
+            newop = None
 
-        boxmap = BoxMap()
-        state = ExeState(self.optimizer)
-        short_preamble = []
-        loop_i = preamble_i = 0
-        while preamble_i < len(preamble_ops):
+        if op.is_ovf():
+            # FIXME: ensure that GUARD_OVERFLOW:ed ops not end up here
+            guard = ResOperation(rop.GUARD_NO_OVERFLOW, [], None)
+            self.add_op_to_short(guard, short, short_seen, emit, guards_needed)
+        for guard in value_guards:
+            self.add_op_to_short(guard, short, short_seen, emit, guards_needed)
 
-            op = preamble_ops[preamble_i]
-            try:
-                newop = self.inliner.inline_op(op, ignore_result=True,
-                                               ignore_failargs=True)
-            except KeyError:
-                debug_print("create_short_preamble failed due to",
-                            "new boxes created during optimization.",
-                            "op:", op.getopnum(),
-                            "at preamble position: ", preamble_i,
-                            "loop position: ", loop_i)
-                return None
+        if newop:
+            return newop.result
+        return None
+        
+    def import_box(self, box, inputargs, short, short_jumpargs,
+                   jumpargs, short_seen):
+        if isinstance(box, Const) or box in inputargs:
+            return
+        if box in self.boxes_created_this_iteration:
+            return
 
-            if self.sameop(newop, loop_ops[loop_i]) \
-               and loop_i < len(loop_ops):
-                try:
-                    boxmap.link_ops(op, loop_ops[loop_i])
-                except ImpossibleLink:
-                    debug_print("create_short_preamble failed due to",
-                                "impossible link of "
-                                "op:", op.getopnum(),
-                                "at preamble position: ", preamble_i,
-                                "loop position: ", loop_i)
-                    return None
-                loop_i += 1
-            else:
-                if not state.safe_to_move(op):
-                    debug_print("create_short_preamble failed due to",
-                                "unsafe op:", op.getopnum(),
-                                "at preamble position: ", preamble_i,
-                                "loop position: ", loop_i)
-                    return None
-                short_preamble.append(op)
+        short_op = self.short_boxes.producer(box)
+        newresult = self.add_op_to_short(short_op, short, short_seen)
 
-            state.update(op)
-            preamble_i += 1
-
-        if loop_i < len(loop_ops):
-            debug_print("create_short_preamble failed due to",
-                        "loop contaning ops not in preamble"
-                        "at position", loop_i)
-            return None
-
-
-        jumpargs = []
-        for i in range(len(loop.inputargs)):
-            try:
-                jumpargs.append(boxmap.get_preamblebox(loop.inputargs[i]))
-            except KeyError:
-                debug_print("create_short_preamble failed due to",
-                            "input arguments not located")
-                return None
-
-        jmp = ResOperation(rop.JUMP, jumpargs[:], None)
-        jmp.setdescr(loop.token)
-        short_preamble.append(jmp)
-
-        # Check that boxes used as arguemts are produced.
-        seen = {}
-        for box in preamble.inputargs:
-            seen[box] = True
-        for op in short_preamble:
-            for box in op.getarglist():
-                if isinstance(box, Const):
-                    continue
-                if box not in seen:
-                    debug_print("create_short_preamble failed due to",
-                                "op arguments not produced")
-                    return None
-            if op.result:
-                seen[op.result] = True
-
-        return short_preamble
-
-class ExeState(object):
-    def __init__(self, optimizer):
-        self.optimizer = optimizer
-        self.heap_dirty = False
-        self.unsafe_getitem = {}
-        self.unsafe_getarrayitem = {}
-        self.unsafe_getarrayitem_indexes = {}
-
-    # Make sure it is safe to move the instrucions in short_preamble
-    # to the top making short_preamble followed by loop equvivalent
-    # to preamble
-    def safe_to_move(self, op):
-        opnum = op.getopnum()
-        descr = op.getdescr()
-        if op.is_always_pure() or op.is_foldable_guard():
-            return True
-        elif opnum == rop.JUMP:
-            return True
-        elif (opnum == rop.GETFIELD_GC or
-              opnum == rop.GETFIELD_RAW):
-            if self.heap_dirty:
-                return False
-            if descr in self.unsafe_getitem:
-                return False
-            return True
-        elif (opnum == rop.GETARRAYITEM_GC or
-              opnum == rop.GETARRAYITEM_RAW):
-            if self.heap_dirty:
-                return False
-            if descr in self.unsafe_getarrayitem:
-                return False
-            index = op.getarg(1)
-            if isinstance(index, Const):
-                d = self.unsafe_getarrayitem_indexes.get(descr, None)
-                if d is not None:
-                    if index.getint() in d:
-                        return False
-            else:
-                if descr in self.unsafe_getarrayitem_indexes:
-                    return False
-            return True
-        elif opnum == rop.CALL:
-            effectinfo = descr.get_extra_info()
-            if effectinfo is not None:
-                ef = effectinfo.extraeffect
-                if ef == EffectInfo.EF_LOOPINVARIANT or \
-                   ef == EffectInfo.EF_ELIDABLE_CANNOT_RAISE or \
-                   ef == EffectInfo.EF_ELIDABLE_CAN_RAISE:
-                    return True
-        return False
-
-    def update(self, op):
-        if (op.has_no_side_effect() or
-            op.is_ovf() or
-            op.is_guard()):
-            return
-        opnum = op.getopnum()
-        descr = op.getdescr()
-        if (opnum == rop.DEBUG_MERGE_POINT):
-            return
-        if (opnum == rop.SETFIELD_GC or
-            opnum == rop.SETFIELD_RAW):
-            self.unsafe_getitem[descr] = True
-            return
-        if (opnum == rop.SETARRAYITEM_GC or
-            opnum == rop.SETARRAYITEM_RAW):
-            index = op.getarg(1)
-            if isinstance(index, Const):
-                d = self.unsafe_getarrayitem_indexes.get(descr, None)
-                if d is None:
-                    d = self.unsafe_getarrayitem_indexes[descr] = {}
-                d[index.getint()] = True
-            else:
-                self.unsafe_getarrayitem[descr] = True
-            return
-        if opnum == rop.CALL:
-            effectinfo = descr.get_extra_info()
-            if effectinfo is not None:
-                for fielddescr in effectinfo.write_descrs_fields:
-                    self.unsafe_getitem[fielddescr] = True
-                for arraydescr in effectinfo.write_descrs_arrays:
-                    self.unsafe_getarrayitem[arraydescr] = True
-                return
-        debug_print("heap dirty due to op ", opnum)
-        self.heap_dirty = True
-
-class ImpossibleLink(JitException):
-    pass
-
-class BoxMap(object):
-    def __init__(self):
-        self.map = {}
-
-
-    def link_ops(self, preambleop, loopop):
-        pargs = preambleop.getarglist()
-        largs = loopop.getarglist()
-        if len(pargs) != len(largs):
-            raise ImpossibleLink
-        for i in range(len(largs)):
-            pbox, lbox = pargs[i], largs[i]
-            self.link_boxes(pbox, lbox)
-
-        if preambleop.result:
-            if not loopop.result:
-                raise ImpossibleLink
-            self.link_boxes(preambleop.result, loopop.result)
-
-
-    def link_boxes(self, pbox, lbox):
-        if lbox in self.map:
-            if self.map[lbox] is not pbox:
-                raise ImpossibleLink
-        else:
-            if isinstance(lbox, Const):
-                if not isinstance(pbox, Const) or not pbox.same_constant(lbox):
-                    raise ImpossibleLink
-            else:
-                self.map[lbox] = pbox
-
-
-    def get_preamblebox(self, loopbox):
-        return self.map[loopbox]
+        short_jumpargs.append(short_op.result)
+        inputargs.append(box)
+        box = newresult
+        if box in self.optimizer.values:
+            box = self.optimizer.values[box].force_box()
+        jumpargs.append(box)
+        
 
 class OptInlineShortPreamble(Optimization):
     def __init__(self, retraced):
         self.retraced = retraced
-        self.inliner = None
 
-
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
-        return self
+    def new(self):
+        return OptInlineShortPreamble(self.retraced)
 
     def propagate_forward(self, op):
         if op.getopnum() == rop.JUMP:
-            descr = op.getdescr()
-            assert isinstance(descr, LoopToken)
+            loop_token = op.getdescr()
+            assert isinstance(loop_token, LoopToken)
             # FIXME: Use a tree, similar to the tree formed by the full
             # preamble and it's bridges, instead of a list to save time and
             # memory. This should also allow better behaviour in
             # situations that the is_emittable() chain currently cant
             # handle and the inlining fails unexpectedly belwo.
-            short = descr.short_preamble
+            short = loop_token.short_preamble
             if short:
                 args = op.getarglist()
                 modifier = VirtualStateAdder(self.optimizer)
                 virtual_state = modifier.get_virtual_state(args)
+                debug_start('jit-log-virtualstate')
+                virtual_state.debug_print("Looking for ")
+
                 for sh in short:
                     ok = False
                     extra_guards = []
-                    if sh.virtual_state.generalization_of(virtual_state):
+
+                    bad = {}
+                    debugmsg = 'Did not match '
+                    if sh.virtual_state.generalization_of(virtual_state, bad):
                         ok = True
+                        debugmsg = 'Matched '
                     else:
                         try:
                             cpu = self.optimizer.cpu
@@ -659,38 +481,46 @@
                                                              extra_guards)
 
                             ok = True
+                            debugmsg = 'Guarded to match '
                         except InvalidLoop:
                             pass
+                    sh.virtual_state.debug_print(debugmsg, bad)
+                    
                     if ok:
-                        # FIXME: Do we still need the dry run
-                        #if self.inline(sh.operations, sh.inputargs,
-                        #               op.getarglist(), dryrun=True):
+                        debug_stop('jit-log-virtualstate')
+
+                        values = [self.getvalue(arg)
+                                  for arg in op.getarglist()]
+                        args = sh.virtual_state.make_inputargs(values,
+                                                               keyboxes=True)
+                        inliner = Inliner(sh.inputargs, args)
+                        
+                        for guard in extra_guards:
+                            if guard.is_guard():
+                                descr = sh.start_resumedescr.clone_if_mutable()
+                                inliner.inline_descr_inplace(descr)
+                                guard.setdescr(descr)
+                            self.emit_operation(guard)
+                        
                         try:
-                            self.inline(sh.operations, sh.inputargs,
-                                        op.getarglist())
+                            for shop in sh.operations:
+                                newop = inliner.inline_op(shop)
+                                self.emit_operation(newop)
                         except InvalidLoop:
                             debug_print("Inlining failed unexpectedly",
                                         "jumping to preamble instead")
                             self.emit_operation(op)
-                        else:
-                            jumpop = self.optimizer.newoperations.pop()
-                            assert jumpop.getopnum() == rop.JUMP
-                            for guard in extra_guards:
-                                d = sh.start_resumedescr.clone_if_mutable()
-                                self.inliner.inline_descr_inplace(d)
-                                guard.setdescr(d)
-                                self.emit_operation(guard)
-                            self.optimizer.newoperations.append(jumpop)
                         return
-                retraced_count = descr.retraced_count
-                descr.retraced_count += 1
+                debug_stop('jit-log-virtualstate')
+                retraced_count = loop_token.retraced_count
                 limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
                 if not self.retraced and retraced_count<limit:
-                    if not descr.failed_states:
+                    loop_token.retraced_count += 1
+                    if not loop_token.failed_states:
                         debug_print("Retracing (%d of %d)" % (retraced_count,
                                                               limit))
                         raise RetraceLoop
-                    for failed in descr.failed_states:
+                    for failed in loop_token.failed_states:
                         if failed.generalization_of(virtual_state):
                             # Retracing once more will most likely fail again
                             break
@@ -700,29 +530,12 @@
 
                         raise RetraceLoop
                 else:
-                    if not descr.failed_states:
-                        descr.failed_states=[virtual_state]
+                    if not loop_token.failed_states:
+                        loop_token.failed_states=[virtual_state]
                     else:
-                        descr.failed_states.append(virtual_state)
+                        loop_token.failed_states.append(virtual_state)
         self.emit_operation(op)
 
 
 
-    def inline(self, loop_operations, loop_args, jump_args, dryrun=False):
-        self.inliner = inliner = Inliner(loop_args, jump_args)
 
-        for op in loop_operations:
-            newop = inliner.inline_op(op)
-
-            if not dryrun:
-                self.emit_operation(newop)
-            else:
-                if not self.is_emittable(newop):
-                    return False
-
-        return True
-
-    #def inline_arg(self, arg):
-    #    if isinstance(arg, Const):
-    #        return arg
-    #    return self.argmap[arg]
diff --git a/pypy/jit/metainterp/optimizeopt/virtualize.py b/pypy/jit/metainterp/optimizeopt/virtualize.py
--- a/pypy/jit/metainterp/optimizeopt/virtualize.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualize.py
@@ -6,6 +6,7 @@
     descrlist_dict, sort_descrs)
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.jit.metainterp.optimizeopt.optimizer import OptValue
 
 
 class AbstractVirtualValue(optimizer.OptValue):
@@ -34,6 +35,12 @@
             self._really_force()
         return self.box
 
+    def force_at_end_of_preamble(self, already_forced):
+        value = already_forced.get(self, None)
+        if value:
+            return value
+        return OptValue(self.force_box())
+
     def make_virtual_info(self, modifier, fieldnums):
         if fieldnums is None:
             return self._make_virtual(modifier)
@@ -51,9 +58,6 @@
     def _really_force(self):
         raise NotImplementedError("abstract base")
 
-    def reconstruct_for_next_iteration(self, _optimizer):
-        return optimizer.OptValue(self.force_box())
-
 def get_fielddescrlist_cache(cpu):
     if not hasattr(cpu, '_optimizeopt_fielddescrlist_cache'):
         result = descrlist_dict()
@@ -90,6 +94,15 @@
                 return False
         return True
 
+    def force_at_end_of_preamble(self, already_forced):
+        if self in already_forced:
+            return self
+        already_forced[self] = self
+        if self._fields:
+            for ofs in self._fields.keys():
+                self._fields[ofs] = self._fields[ofs].force_at_end_of_preamble(already_forced)
+        return self
+
     def _really_force(self):
         op = self.source_op
         assert op is not None
@@ -161,30 +174,6 @@
                 fieldvalue = self._fields[ofs]
                 fieldvalue.get_args_for_fail(modifier)
 
-    def enum_forced_boxes(self, boxes, already_seen):
-        key = self.get_key_box()
-        if key in already_seen:
-            return
-        already_seen[key] = None
-        if self.box is None:
-            lst = self._get_field_descr_list()
-            for ofs in lst:
-                self._fields[ofs].enum_forced_boxes(boxes, already_seen)
-        else:
-            boxes.append(self.box)
-
-    def reconstruct_for_next_iteration(self, optimizer):
-        self.optimizer = optimizer
-        return self
-
-    def reconstruct_childs(self, new, valuemap):
-        assert isinstance(new, AbstractVirtualStructValue)
-        if new.box is None:
-            lst = self._get_field_descr_list()
-            for ofs in lst:
-                new._fields[ofs] = \
-                      self._fields[ofs].get_reconstructed(new.optimizer, valuemap)
-
 class VirtualValue(AbstractVirtualStructValue):
     level = optimizer.LEVEL_KNOWNCLASS
 
@@ -220,6 +209,7 @@
     def _get_descr(self):
         return self.structdescr
 
+
 class VArrayValue(AbstractVirtualValue):
 
     def __init__(self, optimizer, arraydescr, size, keybox, source_op=None):
@@ -239,6 +229,14 @@
         assert isinstance(itemvalue, optimizer.OptValue)
         self._items[index] = itemvalue
 
+    def force_at_end_of_preamble(self, already_forced):
+        if self in already_forced:
+            return self
+        already_forced[self] = self
+        for index in range(len(self._items)):
+            self._items[index] = self._items[index].force_at_end_of_preamble(already_forced)
+        return self
+    
     def _really_force(self):
         assert self.source_op is not None
         if not we_are_translated():
@@ -271,34 +269,12 @@
     def _make_virtual(self, modifier):
         return modifier.make_varray(self.arraydescr)
 
-    def enum_forced_boxes(self, boxes, already_seen):
-        key = self.get_key_box()
-        if key in already_seen:
-            return
-        already_seen[key] = None
-        if self.box is None:
-            for itemvalue in self._items:
-                itemvalue.enum_forced_boxes(boxes, already_seen)
-        else:
-            boxes.append(self.box)
-
-    def reconstruct_for_next_iteration(self, optimizer):
-        self.optimizer = optimizer
-        return self
-
-    def reconstruct_childs(self, new, valuemap):
-        assert isinstance(new, VArrayValue)
-        if new.box is None:
-            for i in range(len(self._items)):
-                new._items[i] = self._items[i].get_reconstructed(new.optimizer,
-                                                                 valuemap)
-
 class OptVirtualize(optimizer.Optimization):
     "Virtualize objects until they escape."
 
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
-        return self
-
+    def new(self):
+        return OptVirtualize()
+        
     def make_virtual(self, known_class, box, source_op=None):
         vvalue = VirtualValue(self.optimizer, known_class, box, source_op)
         self.make_equal_to(box, vvalue)
diff --git a/pypy/jit/metainterp/optimizeopt/virtualstate.py b/pypy/jit/metainterp/optimizeopt/virtualstate.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/optimizeopt/virtualstate.py
@@ -0,0 +1,537 @@
+from pypy.jit.metainterp import resume
+from pypy.jit.metainterp.optimizeopt import virtualize
+from pypy.jit.metainterp.optimizeopt.optimizer import LEVEL_CONSTANT, \
+                                                      LEVEL_KNOWNCLASS, \
+                                                      LEVEL_NONNULL, \
+                                                      LEVEL_UNKNOWN, \
+                                                      MININT, MAXINT, OptValue
+from pypy.jit.metainterp.history import BoxInt, ConstInt, BoxPtr, Const
+from pypy.jit.metainterp.optimize import InvalidLoop
+from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded
+from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.debug import debug_start, debug_stop, debug_print
+from pypy.rlib.objectmodel import we_are_translated
+
+class AbstractVirtualStateInfo(resume.AbstractVirtualInfo):
+    position = -1
+    
+    def generalization_of(self, other, renum, bad):
+        raise NotImplementedError
+
+    def generate_guards(self, other, box, cpu, extra_guards, renum):
+        if self.generalization_of(other, renum, {}):
+            return
+        if renum[self.position] != other.position:
+            raise InvalidLoop
+        self._generate_guards(other, box, cpu, extra_guards)
+
+    def _generate_guards(self, other, box, cpu, extra_guards):
+        raise InvalidLoop
+
+    def enum_forced_boxes(self, boxes, value):
+        raise NotImplementedError
+
+    def enum(self, virtual_state):
+        if self.position != -1:
+            return
+        virtual_state.info_counter += 1
+        self.position = virtual_state.info_counter
+        self._enum(virtual_state)
+
+    def _enum(self, virtual_state):
+        raise NotImplementedError
+
+    def debug_print(self, indent, seen, bad):
+        mark = ''
+        if self in bad:
+            mark = '*'
+        self.debug_header(indent + mark)
+        if self not in seen:
+            seen[self] = True
+            for s in self.fieldstate:
+                s.debug_print(indent + "    ", seen, bad)
+        else:
+            debug_print(indent + "    ...")
+                
+
+    def debug_header(self, indent):
+        raise NotImplementedError
+
+
+class AbstractVirtualStructStateInfo(AbstractVirtualStateInfo):
+    def __init__(self, fielddescrs):
+        self.fielddescrs = fielddescrs
+
+    def generalization_of(self, other, renum, bad):
+        assert self.position != -1
+        if self.position in renum:
+            if renum[self.position] == other.position:
+                return True
+            bad[self] = True
+            bad[other] = True
+            return False
+        renum[self.position] = other.position
+        if not self._generalization_of(other):
+            bad[self] = True
+            bad[other] = True
+            return False
+        assert len(self.fielddescrs) == len(self.fieldstate)
+        assert len(other.fielddescrs) == len(other.fieldstate)
+        if len(self.fielddescrs) != len(other.fielddescrs):
+            bad[self] = True
+            bad[other] = True
+            return False
+        
+        for i in range(len(self.fielddescrs)):
+            if other.fielddescrs[i] is not self.fielddescrs[i]:
+                bad[self] = True
+                bad[other] = True
+                return False
+            if not self.fieldstate[i].generalization_of(other.fieldstate[i],
+                                                        renum, bad):
+                bad[self] = True
+                bad[other] = True
+                return False
+
+        return True
+
+    def _generalization_of(self, other):
+        raise NotImplementedError
+
+    def enum_forced_boxes(self, boxes, value):
+        assert isinstance(value, virtualize.AbstractVirtualStructValue)
+        assert value.is_virtual()
+        for i in range(len(self.fielddescrs)):
+            v = value._fields[self.fielddescrs[i]]
+            s = self.fieldstate[i]
+            if s.position > self.position:
+                s.enum_forced_boxes(boxes, v)
+
+    def _enum(self, virtual_state):
+        for s in self.fieldstate:
+            s.enum(virtual_state)
+        
+        
+class VirtualStateInfo(AbstractVirtualStructStateInfo):
+    def __init__(self, known_class, fielddescrs):
+        AbstractVirtualStructStateInfo.__init__(self, fielddescrs)
+        self.known_class = known_class
+
+    def _generalization_of(self, other):
+        if not isinstance(other, VirtualStateInfo):
+            return False
+        if not self.known_class.same_constant(other.known_class):
+            return False
+        return True
+
+    def debug_header(self, indent):
+        debug_print(indent + 'VirtualStateInfo(%d):' % self.position)
+        
+class VStructStateInfo(AbstractVirtualStructStateInfo):
+    def __init__(self, typedescr, fielddescrs):
+        AbstractVirtualStructStateInfo.__init__(self, fielddescrs)
+        self.typedescr = typedescr
+
+    def _generalization_of(self, other):        
+        if not isinstance(other, VStructStateInfo):
+            return False
+        if self.typedescr is not other.typedescr:
+            return False
+        return True
+
+    def debug_header(self, indent):
+        debug_print(indent + 'VStructStateInfo(%d):' % self.position)
+        
+class VArrayStateInfo(AbstractVirtualStateInfo):
+    def __init__(self, arraydescr):
+        self.arraydescr = arraydescr
+
+    def generalization_of(self, other, renum, bad):
+        assert self.position != -1
+        if self.position in renum:
+            if renum[self.position] == other.position:
+                return True
+            bad[self] = True
+            bad[other] = True
+            return False
+        renum[self.position] = other.position
+        if not isinstance(other, VArrayStateInfo):
+            bad[self] = True
+            bad[other] = True
+            return False
+        if self.arraydescr is not other.arraydescr:
+            bad[self] = True
+            bad[other] = True
+            return False
+        if len(self.fieldstate) != len(other.fieldstate):
+            bad[self] = True
+            bad[other] = True
+            return False
+        for i in range(len(self.fieldstate)):
+            if not self.fieldstate[i].generalization_of(other.fieldstate[i],
+                                                        renum, bad):
+                bad[self] = True
+                bad[other] = True
+                return False
+        return True
+
+    def enum_forced_boxes(self, boxes, value):
+        assert isinstance(value, virtualize.VArrayValue)
+        assert value.is_virtual()
+        for i in range(len(self.fieldstate)):
+            v = value._items[i]
+            s = self.fieldstate[i]
+            if s.position > self.position:
+                s.enum_forced_boxes(boxes, v)
+
+    def _enum(self, virtual_state):
+        for s in self.fieldstate:
+            s.enum(virtual_state)
+
+    def debug_header(self, indent):
+        debug_print(indent + 'VArrayStateInfo(%d):' % self.position)
+            
+        
+class NotVirtualStateInfo(AbstractVirtualStateInfo):
+    def __init__(self, value):
+        self.known_class = value.known_class
+        self.level = value.level
+        if value.intbound is None:
+            self.intbound = IntUnbounded()
+        else:
+            self.intbound = value.intbound.clone()
+        if value.is_constant():
+            self.constbox = value.box
+        else:
+            self.constbox = None
+        self.position_in_notvirtuals = -1
+        self.lenbound = value.lenbound
+
+    def generalization_of(self, other, renum, bad):
+        # XXX This will always retrace instead of forcing anything which
+        # might be what we want sometimes?
+        assert self.position != -1
+        if self.position in renum:
+            if renum[self.position] == other.position:
+                return True
+            bad[self] = True
+            bad[other] = True
+            return False
+        renum[self.position] = other.position
+        if not isinstance(other, NotVirtualStateInfo):
+            bad[self] = True
+            bad[other] = True
+            return False
+        if other.level < self.level:
+            bad[self] = True
+            bad[other] = True
+            return False
+        if self.level == LEVEL_CONSTANT:
+            if not self.constbox.same_constant(other.constbox):
+                bad[self] = True
+                bad[other] = True
+                return False
+        elif self.level == LEVEL_KNOWNCLASS:
+            if not self.known_class.same_constant(other.known_class):
+                bad[self] = True
+                bad[other] = True
+                return False
+        if not self.intbound.contains_bound(other.intbound):
+            bad[self] = True
+            bad[other] = True
+            return False
+        if self.lenbound and other.lenbound:
+            if self.lenbound.mode != other.lenbound.mode or \
+               self.lenbound.descr != other.lenbound.descr or \
+               not self.lenbound.bound.contains_bound(other.lenbound.bound):
+                bad[self] = True
+                bad[other] = True
+                return False
+        elif self.lenbound:
+            bad[self] = True
+            bad[other] = True
+            return False
+        return True
+
+    def _generate_guards(self, other, box, cpu, extra_guards):
+        if not isinstance(other, NotVirtualStateInfo):
+            raise InvalidLoop
+
+        if self.lenbound or other.lenbound:
+            raise InvalidLoop
+
+        if self.level == LEVEL_KNOWNCLASS and \
+           box.nonnull() and \
+           self.known_class.same_constant(cpu.ts.cls_of_box(box)):
+            # Note: This is only a hint on what the class of box was
+            # during the trace. There are actually no guarentees that this
+            # box realy comes from a trace. The hint is used here to choose
+            # between either eimtting a guard_class and jumping to an
+            # excisting compiled loop or retracing the loop. Both
+            # alternatives will always generate correct behaviour, but
+            # performace will differ.
+            op = ResOperation(rop.GUARD_NONNULL, [box], None)
+            extra_guards.append(op)
+            op = ResOperation(rop.GUARD_CLASS, [box, self.known_class], None)
+            extra_guards.append(op)
+            return
+        
+        if self.level == LEVEL_NONNULL and \
+               other.level == LEVEL_UNKNOWN and \
+               isinstance(box, BoxPtr) and \
+               box.nonnull():
+            op = ResOperation(rop.GUARD_NONNULL, [box], None)
+            extra_guards.append(op)
+            return
+        
+        if self.level == LEVEL_UNKNOWN and \
+               other.level == LEVEL_UNKNOWN and \
+               isinstance(box, BoxInt) and \
+               self.intbound.contains(box.getint()):
+            if self.intbound.has_lower:
+                bound = self.intbound.lower
+                if not (other.intbound.has_lower and \
+                        other.intbound.lower >= bound):
+                    res = BoxInt()
+                    op = ResOperation(rop.INT_GE, [box, ConstInt(bound)], res)
+                    extra_guards.append(op)
+                    op = ResOperation(rop.GUARD_TRUE, [res], None)
+                    extra_guards.append(op)
+            if self.intbound.has_upper:
+                bound = self.intbound.upper
+                if not (other.intbound.has_upper and \
+                        other.intbound.upper <= bound):
+                    res = BoxInt()
+                    op = ResOperation(rop.INT_LE, [box, ConstInt(bound)], res)
+                    extra_guards.append(op)
+                    op = ResOperation(rop.GUARD_TRUE, [res], None)
+                    extra_guards.append(op)
+            return
+        
+        # Remaining cases are probably not interesting
+        raise InvalidLoop
+        if self.level == LEVEL_CONSTANT:
+            import pdb; pdb.set_trace()
+            raise NotImplementedError
+
+    def enum_forced_boxes(self, boxes, value):
+        if self.level == LEVEL_CONSTANT:
+            return
+        assert 0 <= self.position_in_notvirtuals 
+        boxes[self.position_in_notvirtuals] = value.force_box()
+
+    def _enum(self, virtual_state):
+        if self.level == LEVEL_CONSTANT:
+            return
+        self.position_in_notvirtuals = len(virtual_state.notvirtuals)
+        virtual_state.notvirtuals.append(self)
+
+    def debug_print(self, indent, seen, bad):
+        mark = ''
+        if self in bad:
+            mark = '*'
+        if we_are_translated():
+            l = {LEVEL_UNKNOWN: 'Unknown',
+                 LEVEL_NONNULL: 'NonNull',
+                 LEVEL_KNOWNCLASS: 'KnownClass',
+                 LEVEL_CONSTANT: 'Constant',
+                 }[self.level]
+        else:
+            l = {LEVEL_UNKNOWN: 'Unknown',
+                 LEVEL_NONNULL: 'NonNull',
+                 LEVEL_KNOWNCLASS: 'KnownClass(%r)' % self.known_class,
+                 LEVEL_CONSTANT: 'Constant(%r)' % self.constbox,
+                 }[self.level]
+
+        lb = ''
+        if self.lenbound:
+            lb = ', ' + self.lenbound.bound.__repr__()
+        
+        debug_print(indent + mark + 'NotVirtualInfo(%d' % self.position +
+                    ', ' + l + ', ' + self.intbound.__repr__() + lb + ')')
+
+class VirtualState(object):
+    def __init__(self, state):
+        self.state = state
+        self.info_counter = -1
+        self.notvirtuals = [] # FIXME: We dont need this list, only it's length
+        for s in state:
+            s.enum(self)
+
+    def generalization_of(self, other, bad=None):
+        if bad is None:
+            bad = {}
+        assert len(self.state) == len(other.state)
+        renum = {}
+        for i in range(len(self.state)):
+            if not self.state[i].generalization_of(other.state[i], renum, bad):
+                return False
+        return True
+
+    def generate_guards(self, other, args, cpu, extra_guards):        
+        assert len(self.state) == len(other.state) == len(args)
+        renum = {}
+        for i in range(len(self.state)):
+            self.state[i].generate_guards(other.state[i], args[i],
+                                          cpu, extra_guards, renum)
+
+    def make_inputargs(self, values, keyboxes=False):
+        assert len(values) == len(self.state)
+        inputargs = [None] * len(self.notvirtuals)
+        for i in range(len(values)):
+            self.state[i].enum_forced_boxes(inputargs, values[i])
+
+        if keyboxes:
+            for i in range(len(values)):
+                if not isinstance(self.state[i], NotVirtualStateInfo):
+                    box = values[i].get_key_box()
+                    assert not isinstance(box, Const)
+                    inputargs.append(box)
+
+        assert None not in inputargs
+            
+        return inputargs
+
+    def debug_print(self, hdr='', bad=None):
+        if bad is None:
+            bad = {}
+        debug_print(hdr + "VirtualState():")
+        seen = {}
+        for s in self.state:
+            s.debug_print("    ", seen, bad)
+
+class VirtualStateAdder(resume.ResumeDataVirtualAdder):
+    def __init__(self, optimizer):
+        self.fieldboxes = {}
+        self.optimizer = optimizer
+        self.info = {}
+
+    def register_virtual_fields(self, keybox, fieldboxes):
+        self.fieldboxes[keybox] = fieldboxes
+        
+    def already_seen_virtual(self, keybox):
+        return keybox in self.fieldboxes
+
+    def getvalue(self, box):
+        return self.optimizer.getvalue(box)
+
+    def state(self, box):
+        value = self.getvalue(box)
+        box = value.get_key_box()
+        try:
+            info = self.info[box]
+        except KeyError:
+            if value.is_virtual():
+                self.info[box] = info = value.make_virtual_info(self, None)
+                flds = self.fieldboxes[box]
+                info.fieldstate = [self.state(b) for b in flds]
+            else:
+                self.info[box] = info = self.make_not_virtual(value)
+        return info
+
+    def get_virtual_state(self, jump_args):
+        self.optimizer.force_at_end_of_preamble()
+        already_forced = {}
+        values = [self.getvalue(box).force_at_end_of_preamble(already_forced)
+                  for box in jump_args]
+
+        for value in values:
+            if value.is_virtual():
+                value.get_args_for_fail(self)
+            else:
+                self.make_not_virtual(value)
+        return VirtualState([self.state(box) for box in jump_args])
+
+    def make_not_virtual(self, value):
+        return NotVirtualStateInfo(value)
+
+    def make_virtual(self, known_class, fielddescrs):
+        return VirtualStateInfo(known_class, fielddescrs)
+
+    def make_vstruct(self, typedescr, fielddescrs):
+        return VStructStateInfo(typedescr, fielddescrs)
+
+    def make_varray(self, arraydescr):
+        return VArrayStateInfo(arraydescr)
+
+class BoxNotProducable(Exception):
+    pass
+
+class ShortBoxes(object):
+    def __init__(self, optimizer, surviving_boxes):
+        self.potential_ops = {}
+        self.duplicates = {}
+        self.aliases = {}
+        self.optimizer = optimizer
+        for box in surviving_boxes:
+            self.potential_ops[box] = None
+        optimizer.produce_potential_short_preamble_ops(self)
+
+        self.short_boxes = {}
+
+        for box in self.potential_ops.keys():
+            try:
+                self.produce_short_preamble_box(box)
+            except BoxNotProducable:
+                pass
+
+    def produce_short_preamble_box(self, box):
+        if box in self.short_boxes:
+            return 
+        if isinstance(box, Const):
+            return 
+        if box in self.potential_ops:
+            op = self.potential_ops[box]
+            if op:
+                for arg in op.getarglist():
+                    self.produce_short_preamble_box(arg)
+            self.short_boxes[box] = op
+        else:
+            raise BoxNotProducable
+
+    def add_potential(self, op):
+        if op.result not in self.potential_ops:
+            self.potential_ops[op.result] = op
+            return op
+        newop = op.clone()
+        newop.result = op.result.clonebox()
+        self.potential_ops[newop.result] = newop
+        if op.result in self.duplicates:
+            self.duplicates[op.result].append(newop.result)
+        else:
+            self.duplicates[op.result] = [newop.result]
+        self.optimizer.make_equal_to(newop.result, self.optimizer.getvalue(op.result))
+        return newop
+
+    def debug_print(self, logops):
+        debug_start('jit-short-boxes')
+        for box, op in self.short_boxes.items():
+            if op:
+                debug_print(logops.repr_of_arg(box) + ': ' + logops.repr_of_resop(op))
+            else:
+                debug_print(logops.repr_of_arg(box) + ': None')
+        debug_stop('jit-short-boxes')
+        
+    def operations(self):
+        if not we_are_translated(): # For tests
+            ops = self.short_boxes.values()
+            ops.sort(key=str, reverse=True)
+            return ops
+        return self.short_boxes.values()
+
+    def producer(self, box):
+        return self.short_boxes[box]
+
+    def has_producer(self, box):
+        return box in self.short_boxes
+
+    def alias(self, newbox, oldbox):
+        if not isinstance(oldbox, Const) and newbox not in self.short_boxes:
+            self.short_boxes[newbox] = self.short_boxes[oldbox]
+        self.aliases[newbox] = oldbox
+        
+    def original(self, box):
+        while box in self.aliases:
+            box = self.aliases[box]
+        return box
diff --git a/pypy/jit/metainterp/optimizeopt/vstring.py b/pypy/jit/metainterp/optimizeopt/vstring.py
--- a/pypy/jit/metainterp/optimizeopt/vstring.py
+++ b/pypy/jit/metainterp/optimizeopt/vstring.py
@@ -57,7 +57,7 @@
         self.ensure_nonnull()
         box = self.force_box()
         lengthbox = BoxInt()
-        optimization.optimize_default(ResOperation(mode.STRLEN, [box], lengthbox))
+        optimization.propagate_forward(ResOperation(mode.STRLEN, [box], lengthbox))
         return lengthbox
 
     @specialize.arg(1)
@@ -332,7 +332,7 @@
     if optimizer is None:
         return None
     resbox = BoxInt()
-    optimizer.optimize_default(ResOperation(rop.INT_ADD, [box1, box2], resbox))
+    optimizer.propagate_forward(ResOperation(rop.INT_ADD, [box1, box2], resbox))
     return resbox
 
 def _int_sub(optimizer, box1, box2):
@@ -342,7 +342,7 @@
         if isinstance(box1, ConstInt):
             return ConstInt(box1.value - box2.value)
     resbox = BoxInt()
-    optimizer.optimize_default(ResOperation(rop.INT_SUB, [box1, box2], resbox))
+    optimizer.propagate_forward(ResOperation(rop.INT_SUB, [box1, box2], resbox))
     return resbox
 
 def _strgetitem(optimizer, strbox, indexbox, mode):
@@ -354,7 +354,7 @@
             s = strbox.getref(lltype.Ptr(rstr.UNICODE))
             return ConstInt(ord(s.chars[indexbox.getint()]))
     resbox = BoxInt()
-    optimizer.optimize_default(ResOperation(mode.STRGETITEM, [strbox, indexbox],
+    optimizer.propagate_forward(ResOperation(mode.STRGETITEM, [strbox, indexbox],
                                       resbox))
     return resbox
 
@@ -363,10 +363,9 @@
     "Handling of strings and unicodes."
     enabled = True
 
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
-        self.enabled = True
-        return self
-
+    def new(self):
+        return OptString()
+    
     def make_vstring_plain(self, box, source_op, mode):
         vvalue = VStringPlainValue(self.optimizer, box, source_op, mode)
         self.make_equal_to(box, vvalue)
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -15,7 +15,7 @@
 from pypy.jit.metainterp.jitprof import EmptyProfiler
 from pypy.jit.metainterp.jitprof import GUARDS, RECORDED_OPS, ABORT_ESCAPE
 from pypy.jit.metainterp.jitprof import ABORT_TOO_LONG, ABORT_BRIDGE, \
-                                        ABORT_FORCE_QUASIIMMUT
+                                        ABORT_FORCE_QUASIIMMUT, ABORT_BAD_LOOP
 from pypy.jit.metainterp.jitexc import JitException, get_llexception
 from pypy.rlib.objectmodel import specialize
 from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr
@@ -215,6 +215,7 @@
 
     for _opimpl in ['int_is_true', 'int_is_zero', 'int_neg', 'int_invert',
                     'cast_float_to_int', 'cast_int_to_float',
+                    'cast_float_to_singlefloat', 'cast_singlefloat_to_float',
                     'float_neg', 'float_abs',
                     ]:
         exec py.code.Source('''
@@ -232,6 +233,10 @@
         return self.execute(rop.PTR_EQ, box, history.CONST_NULL)
 
     @arguments("box")
+    def opimpl_cast_opaque_ptr(self, box):
+        return self.execute(rop.CAST_OPAQUE_PTR, box)
+
+    @arguments("box")
     def _opimpl_any_return(self, box):
         self.metainterp.finishframe(box)
 
@@ -1227,7 +1232,7 @@
         src_i = src_r = src_f = 0
         i = 1
         for kind in descr.get_arg_types():
-            if kind == history.INT:
+            if kind == history.INT or kind == 'S':        # single float
                 while True:
                     box = argboxes[src_i]
                     src_i += 1
@@ -1378,9 +1383,9 @@
             num = self.cpu.get_fail_descr_number(tokens[0].finishdescr)
             setattr(self.cpu, 'done_with_this_frame_%s_v' % name, num)
         #
-        tokens = self.loop_tokens_exit_frame_with_exception_ref
-        num = self.cpu.get_fail_descr_number(tokens[0].finishdescr)
-        self.cpu.exit_frame_with_exception_v = num
+        exc_descr = compile.PropagateExceptionDescr()
+        num = self.cpu.get_fail_descr_number(exc_descr)
+        self.cpu.propagate_exception_v = num
         #
         self.globaldata = MetaInterpGlobalData(self)
 
@@ -1424,6 +1429,7 @@
                 # can change from run to run.
                 d = {}
                 for jitcode in self.indirectcalltargets:
+                    assert jitcode.fnaddr not in d
                     d[jitcode.fnaddr] = jitcode
                 self.globaldata.indirectcall_dict = d
             return d.get(fnaddress, None)
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -408,6 +408,8 @@
     'FLOAT_ABS/1',
     'CAST_FLOAT_TO_INT/1',
     'CAST_INT_TO_FLOAT/1',
+    'CAST_FLOAT_TO_SINGLEFLOAT/1',
+    'CAST_SINGLEFLOAT_TO_FLOAT/1',
     #
     'INT_LT/2b',
     'INT_LE/2b',
@@ -435,6 +437,7 @@
     #
     'PTR_EQ/2b',
     'PTR_NE/2b',
+    'CAST_OPAQUE_PTR/1b',
     #
     'ARRAYLEN_GC/1d',
     'STRLEN/1',
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -455,17 +455,6 @@
 
     def debug_prints(self):
         raise NotImplementedError
-
-    def generalization_of(self, other):
-        raise NotImplementedError
-
-    def generate_guards(self, other, box, cpu, extra_guards):
-        if self.generalization_of(other):
-            return
-        self._generate_guards(other, box, cpu, extra_guards)
-
-    def _generate_guards(self, other, box, cpu, extra_guards):
-        raise InvalidLoop
         
 class AbstractVirtualStructInfo(AbstractVirtualInfo):
     def __init__(self, fielddescrs):
@@ -486,26 +475,6 @@
                         str(self.fielddescrs[i]),
                         str(untag(self.fieldnums[i])))
 
-    def generalization_of(self, other):
-        if not self._generalization_of(other):
-            return False
-        assert len(self.fielddescrs) == len(self.fieldstate)
-        assert len(other.fielddescrs) == len(other.fieldstate)
-        if len(self.fielddescrs) != len(other.fielddescrs):
-            return False
-        
-        for i in range(len(self.fielddescrs)):
-            if other.fielddescrs[i] is not self.fielddescrs[i]:
-                return False
-            if not self.fieldstate[i].generalization_of(other.fieldstate[i]):
-                return False
-
-        return True
-
-    def _generalization_of(self, other):
-        raise NotImplementedError
-
-
 class VirtualInfo(AbstractVirtualStructInfo):
     def __init__(self, known_class, fielddescrs):
         AbstractVirtualStructInfo.__init__(self, fielddescrs)
@@ -521,13 +490,6 @@
         debug_print("\tvirtualinfo", self.known_class.repr_rpython())
         AbstractVirtualStructInfo.debug_prints(self)
 
-    def _generalization_of(self, other):        
-        if not isinstance(other, VirtualInfo):
-            return False
-        if not self.known_class.same_constant(other.known_class):
-            return False
-        return True
-        
 
 class VStructInfo(AbstractVirtualStructInfo):
     def __init__(self, typedescr, fielddescrs):
@@ -544,14 +506,6 @@
         debug_print("\tvstructinfo", self.typedescr.repr_rpython())
         AbstractVirtualStructInfo.debug_prints(self)
 
-    def _generalization_of(self, other):        
-        if not isinstance(other, VStructInfo):
-            return False
-        if self.typedescr is not other.typedescr:
-            return False
-        return True
-        
-
 class VArrayInfo(AbstractVirtualInfo):
     def __init__(self, arraydescr):
         self.arraydescr = arraydescr
@@ -583,17 +537,6 @@
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
-    def generalization_of(self, other):
-        if self.arraydescr is not other.arraydescr:
-            return False
-        if len(self.fieldstate) != len(other.fieldstate):
-            return False
-        for i in range(len(self.fieldstate)):
-            if not self.fieldstate[i].generalization_of(other.fieldstate[i]):
-                return False
-        return True
-
-
 class VStrPlainInfo(AbstractVirtualInfo):
     """Stands for the string made out of the characters of all fieldnums."""
 
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -90,6 +90,40 @@
                     found += 1
             assert found == 1
 
+    def test_loop_variant_mul1(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
+        def f(x, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x=x, y=y, res=res)
+                myjitdriver.jit_merge_point(x=x, y=y, res=res)
+                res += x * x
+                x += 1
+                res += x * x                
+                y -= 1
+            return res
+        res = self.meta_interp(f, [6, 7])
+        assert res == 1323
+        self.check_loop_count(1)
+        self.check_loops(int_mul=1)
+        
+    def test_loop_variant_mul_ovf(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
+        def f(x, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x=x, y=y, res=res)
+                myjitdriver.jit_merge_point(x=x, y=y, res=res)
+                res += ovfcheck(x * x)
+                x += 1
+                res += ovfcheck(x * x)
+                y -= 1
+            return res
+        res = self.meta_interp(f, [6, 7])
+        assert res == 1323
+        self.check_loop_count(1)
+        self.check_loops(int_mul_ovf=1)
+
     def test_loop_invariant_mul1(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
         def f(x, y):
@@ -1338,8 +1372,8 @@
             return x
         res = self.meta_interp(f, [299], listops=True)
         assert res == f(299)
-        self.check_loops(guard_class=0, guard_value=2)
-        self.check_loops(guard_class=0, guard_value=5, everywhere=True)
+        self.check_loops(guard_class=0, guard_value=3)        
+        self.check_loops(guard_class=0, guard_value=6, everywhere=True)
 
     def test_merge_guardnonnull_guardclass(self):
         from pypy.rlib.objectmodel import instantiate
@@ -1367,10 +1401,10 @@
             return x
         res = self.meta_interp(f, [299], listops=True)
         assert res == f(299)
-        self.check_loops(guard_class=0, guard_nonnull=0,
-                         guard_nonnull_class=2, guard_isnull=0)
-        self.check_loops(guard_class=0, guard_nonnull=0,
-                         guard_nonnull_class=4, guard_isnull=1,
+        self.check_loops(guard_class=0, guard_nonnull=2,
+                         guard_nonnull_class=2, guard_isnull=1)
+        self.check_loops(guard_class=0, guard_nonnull=4,
+                         guard_nonnull_class=4, guard_isnull=2,
                          everywhere=True)
 
     def test_merge_guardnonnull_guardvalue(self):
@@ -1398,9 +1432,9 @@
             return x
         res = self.meta_interp(f, [299], listops=True)
         assert res == f(299)
-        self.check_loops(guard_class=0, guard_nonnull=0, guard_value=1,
+        self.check_loops(guard_class=0, guard_nonnull=2, guard_value=2,
                          guard_nonnull_class=0, guard_isnull=1)
-        self.check_loops(guard_class=0, guard_nonnull=0, guard_value=3,
+        self.check_loops(guard_class=0, guard_nonnull=4, guard_value=4,
                          guard_nonnull_class=0, guard_isnull=2,
                          everywhere=True)
 
@@ -1429,10 +1463,10 @@
             return x
         res = self.meta_interp(f, [299], listops=True)
         assert res == f(299)
-        self.check_loops(guard_class=0, guard_nonnull=0, guard_value=2,
-                         guard_nonnull_class=0, guard_isnull=0)
-        self.check_loops(guard_class=0, guard_nonnull=0, guard_value=4,
-                         guard_nonnull_class=0, guard_isnull=1,
+        self.check_loops(guard_class=0, guard_nonnull=2, guard_value=2,
+                         guard_nonnull_class=0, guard_isnull=1)
+        self.check_loops(guard_class=0, guard_nonnull=4, guard_value=4,
+                         guard_nonnull_class=0, guard_isnull=2,
                          everywhere=True)
 
     def test_merge_guardnonnull_guardclass_guardvalue(self):
@@ -1463,10 +1497,10 @@
             return x
         res = self.meta_interp(f, [399], listops=True)
         assert res == f(399)
-        self.check_loops(guard_class=0, guard_nonnull=0, guard_value=2,
-                         guard_nonnull_class=0, guard_isnull=0)
-        self.check_loops(guard_class=0, guard_nonnull=0, guard_value=5,
-                         guard_nonnull_class=0, guard_isnull=1,
+        self.check_loops(guard_class=0, guard_nonnull=3, guard_value=3,
+                         guard_nonnull_class=0, guard_isnull=1)
+        self.check_loops(guard_class=0, guard_nonnull=6, guard_value=6,
+                         guard_nonnull_class=0, guard_isnull=2,
                          everywhere=True)
 
     def test_residual_call_doesnt_lose_info(self):
@@ -1754,10 +1788,8 @@
             return a1.val + b1.val
         res = self.meta_interp(g, [6, 14])
         assert res == g(6, 14)
-        self.check_loop_count(8)
-        self.check_loops(getarrayitem_gc=7, everywhere=True)
-        py.test.skip("for the following, we need setarrayitem(varindex)")
-        self.check_loops(getarrayitem_gc=6, everywhere=True)
+        self.check_loop_count(9)
+        self.check_loops(getarrayitem_gc=8, everywhere=True)
 
     def test_multiple_specialied_versions_bridge(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'x', 'z', 'res'])
@@ -2064,6 +2096,109 @@
         assert res == 12
         self.check_tree_loop_count(2)
 
+    def test_caching_setfield(self):
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'i', 'n', 'a', 'node'])
+        class A:
+            pass
+        def f(n, a):
+            i = sa = 0
+            node = A()
+            node.val1 = node.val2 = 0
+            while i < n:
+                myjitdriver.can_enter_jit(sa=sa, i=i, n=n, a=a, node=node)
+                myjitdriver.jit_merge_point(sa=sa, i=i, n=n, a=a, node=node)
+                sa += node.val1 + node.val2
+                if i < n/2:
+                    node.val1 = a
+                    node.val2 = a
+                else:
+                    node.val1 = a
+                    node.val2 = a + 1
+                i += 1
+            return sa
+        res = self.meta_interp(f, [32, 7])
+        assert res == f(32, 7)
+        
+    def test_caching_setarrayitem_fixed(self):
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'i', 'n', 'a', 'node'])
+        def f(n, a):
+            i = sa = 0
+            node = [1, 2, 3]
+            while i < n:
+                myjitdriver.can_enter_jit(sa=sa, i=i, n=n, a=a, node=node)
+                myjitdriver.jit_merge_point(sa=sa, i=i, n=n, a=a, node=node)
+                sa += node[0] + node[1]
+                if i < n/2:
+                    node[0] = a
+                    node[1] = a
+                else:
+                    node[0] = a
+                    node[1] = a + 1
+                i += 1
+            return sa
+        res = self.meta_interp(f, [32, 7])
+        assert res == f(32, 7)
+        
+    def test_caching_setarrayitem_var(self):
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'i', 'n', 'a', 'b', 'node'])
+        def f(n, a, b):
+            i = sa = 0
+            node = [1, 2, 3]
+            while i < n:
+                myjitdriver.can_enter_jit(sa=sa, i=i, n=n, a=a, b=b, node=node)
+                myjitdriver.jit_merge_point(sa=sa, i=i, n=n, a=a, b=b, node=node)
+                sa += node[0] + node[b]
+                if i < n/2:
+                    node[0] = a
+                    node[b] = a
+                else:
+                    node[0] = a
+                    node[b] = a + 1
+                i += 1
+            return sa
+        res = self.meta_interp(f, [32, 7, 2])
+        assert res == f(32, 7, 2)
+
+    def test_getfield_result_with_intbound(self):
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'i', 'n', 'a', 'node'])
+        class A:
+            pass
+        def f(n, a):
+            i = sa = 0
+            node = A()
+            node.val1 = a
+            while i < n:
+                myjitdriver.can_enter_jit(sa=sa, i=i, n=n, a=a, node=node)
+                myjitdriver.jit_merge_point(sa=sa, i=i, n=n, a=a, node=node)
+                if node.val1 > 0:
+                    sa += 1
+                if i > n/2:
+                    node.val1 = -a
+                i += 1
+            return sa
+        res = self.meta_interp(f, [32, 7])
+        assert res == f(32, 7)
+
+    def test_getfield_result_constant(self):
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'i', 'n', 'a', 'node'])
+        class A:
+            pass
+        def f(n, a):
+            i = sa = 0
+            node = A()
+            node.val1 = 7
+            while i < n:
+                myjitdriver.can_enter_jit(sa=sa, i=i, n=n, a=a, node=node)
+                myjitdriver.jit_merge_point(sa=sa, i=i, n=n, a=a, node=node)
+                if node.val1 == 7:
+                    sa += 1
+                if i > n/2:
+                    node.val1 = -7
+                i += 1
+            return sa
+        res = self.meta_interp(f, [32, 7])
+        assert res == f(32, 7)
+
     def test_overflowing_shift_pos(self):
         myjitdriver = JitDriver(greens = [], reds = ['a', 'b', 'n', 'sa'])
         def f1(a, b):
@@ -2154,32 +2289,16 @@
             assert self.meta_interp(f, [bigval, 5]) == 0
             self.check_loops(int_rshift=3, everywhere=True)
 
-    def notest_overflowing_shift2(self):
-        myjitdriver = JitDriver(greens = [], reds = ['a', 'b', 'n', 'sa'])
-        def f(a, b):
-            n = sa = 0
-            while n < 10:
-                myjitdriver.jit_merge_point(a=a, b=b, n=n, sa=sa)
-                if 0 < a < promote(sys.maxint/2): pass
-                if 0 < b < 100: pass
-                sa += (a << b) >> b
-                n += 1
+    def test_pure_op_not_to_be_propagated(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'sa'])
+        def f(n):
+            sa = 0
+            while n > 0:
+                myjitdriver.jit_merge_point(n=n, sa=sa)
+                sa += n + 1
+                n -= 1
             return sa
-
-        assert self.meta_interp(f, [5, 5]) == 50
-        self.check_loops(int_rshift=0, everywhere=True)
-
-        assert self.meta_interp(f, [5, 10]) == 50
-        self.check_loops(int_rshift=1, everywhere=True)
-
-        assert self.meta_interp(f, [10, 5]) == 100
-        self.check_loops(int_rshift=1, everywhere=True)
-
-        assert self.meta_interp(f, [10, 10]) == 100
-        self.check_loops(int_rshift=1, everywhere=True)
-
-        assert self.meta_interp(f, [5, 100]) == 0
-        self.check_loops(int_rshift=1, everywhere=True)
+        assert self.meta_interp(f, [10]) == f(10)
 
     def test_inputarg_reset_bug(self):
         ## j = 0
@@ -2311,6 +2430,349 @@
         self.check_loops(getfield_gc_pure=0)
         self.check_loops(getfield_gc_pure=2, everywhere=True)
 
+    def test_args_becomming_equal(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'a', 'b'])
+        def f(n, a, b):
+            sa = i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa, a=a, b=b)
+                sa += a
+                sa *= b
+                if i > n/2:
+                    a = b
+                i += 1
+            return sa
+        assert self.meta_interp(f, [20, 1, 2]) == f(20, 1, 2)
+
+    def test_args_becomming_equal_boxed1(self):
+        class A(object):
+            def __init__(self, a, b):
+                self.a = a
+                self.b = b
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'a', 'b', 'node'])
+
+        def f(n, a, b):
+            sa = i = 0
+            node = A(a,b)
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa, a=a, b=b, node=node)
+                sa += node.a
+                sa *= node.b
+                if i > n/2:
+                    node = A(b, b)
+                else:
+                    node = A(a, b)
+                i += 1
+            return sa
+        assert self.meta_interp(f, [20, 1, 2]) == f(20, 1, 2)
+
+    def test_args_becomming_not_equal_boxed1(self):
+        class A(object):
+            def __init__(self, a, b):
+                self.a = a
+                self.b = b
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'a', 'b', 'node'])
+
+        def f(n, a, b):
+            sa = i = 0
+            node = A(b, b)
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa, a=a, b=b, node=node)
+                sa += node.a
+                sa *= node.b
+                if i > n/2:
+                    node = A(a, b)
+                else:
+                    node = A(b, b)
+                i += 1
+            return sa
+        assert self.meta_interp(f, [20, 1, 2]) == f(20, 1, 2)
+
+    def test_args_becomming_equal_boxed2(self):
+        class A(object):
+            def __init__(self, a, b):
+                self.a = a
+                self.b = b
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'node'])
+
+        def f(n, a, b):
+            sa = i = 0
+            node = A(a, b)
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa, node=node)
+                sa += node.a
+                sa *= node.b
+                if i > n/2:
+                    node = A(node.b, node.b)
+                else:
+                    node = A(node.b, node.a)
+                i += 1
+            return sa
+        assert self.meta_interp(f, [20, 1, 2]) == f(20, 1, 2)
+
+    def test_inlined_short_preamble_guard_needed_in_loop1(self):
+        class A(object):
+            def __init__(self, a):
+                self.a = a
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa',
+                                                     'a', 'b'])
+        def f(n, a, b):
+            sa = i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa, a=a, b=b)
+                if a.a < 10:
+                    sa += a.a
+                b.a = i
+                i += 1
+            return sa
+        def g(n):
+            return f(n, A(5), A(10))
+        assert self.meta_interp(g, [20]) == g(20)
+
+    def test_ovf_guard_in_short_preamble2(self):
+        class A(object):
+            def __init__(self, val):
+                self.val = val
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'a', 'node1', 'node2'])
+        def f(n, a):
+            node1 = node2 = A(0)
+            sa = i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa, a=a, node1=node1, node2=node2)
+                node2.val = 7
+                if a >= 100:
+                    sa += 1
+                sa += ovfcheck(i + i)
+                node1 = A(i)
+                i += 1
+        assert self.meta_interp(f, [20, 7]) == f(20, 7)
+
+    def test_intbounds_generalized(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa'])
+
+        def f(n):
+            sa = i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa)
+                if i > n/2:
+                    sa += 1
+                else:
+                    sa += 2
+                i += 1
+            return sa
+        assert self.meta_interp(f, [20]) == f(20)
+        self.check_loops(int_gt=1, int_lt=2, int_ge=0, int_le=0)
+
+    def test_intbounds_not_generalized1(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa'])
+
+        def f(n):
+            sa = i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa)
+                if i > n/2:
+                    sa += 1
+                else:
+                    sa += 2
+                    assert  -100 < i < 100
+                i += 1
+            return sa
+        assert self.meta_interp(f, [20]) == f(20)
+        self.check_loops(int_gt=1, int_lt=3, int_ge=2, int_le=1)
+
+    def test_intbounds_not_generalized2(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'node'])
+        class A(object):
+            def __init__(self, val):
+                self.val = val
+        def f(n):
+            sa = i = 0
+            node = A(n)
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa, node=node)
+                if i > n/2:
+                    sa += 1
+                else:
+                    sa += 2
+                    assert  -100 <= node.val <= 100
+                i += 1
+            return sa
+        assert self.meta_interp(f, [20]) == f(20)
+        self.check_loops(int_gt=1, int_lt=2, int_ge=1, int_le=1)
+
+    def test_retrace_limit1(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'a'])
+
+        def f(n, limit):
+            myjitdriver.set_param('retrace_limit', limit)
+            sa = i = a = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa, a=a)
+                a = i/4
+                a = hint(a, promote=True)
+                sa += a
+                i += 1
+            return sa
+        assert self.meta_interp(f, [20, 2]) == f(20, 2)
+        self.check_tree_loop_count(4)
+        assert self.meta_interp(f, [20, 3]) == f(20, 3)
+        self.check_tree_loop_count(5)
+
+    def test_max_retrace_guards(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'a'])
+
+        def f(n, limit):
+            myjitdriver.set_param('retrace_limit', 3)
+            myjitdriver.set_param('max_retrace_guards', limit)
+            sa = i = a = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa, a=a)
+                a = i/4
+                a = hint(a, promote=True)
+                sa += a
+                i += 1
+            return sa
+        assert self.meta_interp(f, [20, 1]) == f(20, 1)
+        self.check_tree_loop_count(2)
+        assert self.meta_interp(f, [20, 10]) == f(20, 10)
+        self.check_tree_loop_count(5)
+
+
+    def test_retrace_limit_with_extra_guards(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'a',
+                                                     'node'])
+        def f(n, limit):
+            myjitdriver.set_param('retrace_limit', limit)
+            sa = i = a = 0
+            node = [1, 2, 3]
+            node[1] = n
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i, sa=sa, a=a, node=node)
+                a = i/4
+                a = hint(a, promote=True)
+                if i&1 == 0:
+                    sa += node[i%3]
+                sa += a
+                i += 1
+            return sa
+        assert self.meta_interp(f, [20, 2]) == f(20, 2)
+        self.check_tree_loop_count(4)
+        assert self.meta_interp(f, [20, 3]) == f(20, 3)
+        self.check_tree_loop_count(5)
+
+    def test_retrace_ending_up_retrazing_another_loop(self):
+
+        myjitdriver = JitDriver(greens = ['pc'], reds = ['n', 'i', 'sa'])
+        bytecode = "0+sI0+SI"
+        def f(n):
+            myjitdriver.set_param('threshold', 3)
+            myjitdriver.set_param('trace_eagerness', 1)
+            myjitdriver.set_param('retrace_limit', 5)
+            myjitdriver.set_param('function_threshold', -1)            
+            pc = sa = i = 0
+            while pc < len(bytecode):
+                myjitdriver.jit_merge_point(pc=pc, n=n, sa=sa, i=i)
+                n = hint(n, promote=True)
+                op = bytecode[pc]
+                if op == '0':
+                    i = 0
+                elif op == '+':
+                    i += 1
+                elif op == 's':
+                    sa += i
+                elif op == 'S':
+                    sa += 2
+                elif op == 'I':
+                    if i < n:
+                        pc -= 2
+                        myjitdriver.can_enter_jit(pc=pc, n=n, sa=sa, i=i)
+                        continue
+                pc += 1
+            return sa
+
+        def g(n1, n2):
+            for i in range(10):
+                f(n1)
+            for i in range(10):                
+                f(n2)
+
+        nn = [10, 3]
+        assert self.meta_interp(g, nn) == g(*nn)
+        
+        # The attempts of retracing first loop will end up retracing the
+        # second and thus fail 5 times, saturating the retrace_count. Instead a
+        # bridge back to the preamble of the first loop is produced. A guard in
+        # this bridge is later traced resulting in a retrace of the second loop.
+        # Thus we end up with:
+        #   1 preamble and 1 specialized version of first loop
+        #   1 preamble and 2 specialized version of second loop
+        self.check_tree_loop_count(2 + 3)
+
+        # FIXME: Add a gloabl retrace counter and test that we are not trying more than 5 times.
+        
+        def g(n):
+            for i in range(n):
+                for j in range(10):
+                    f(n-i)
+
+        res = self.meta_interp(g, [10])
+        assert res == g(10)
+        # 1 preamble and 6 speciealized versions of each loop
+        self.check_tree_loop_count(2*(1 + 6))
+
+    def test_nested_retrace(self):
+
+        myjitdriver = JitDriver(greens = ['pc'], reds = ['n', 'a', 'i', 'j', 'sa'])
+        bytecode = "ij+Jj+JI"
+        def f(n, a):
+            myjitdriver.set_param('threshold', 5)
+            myjitdriver.set_param('trace_eagerness', 1)
+            myjitdriver.set_param('retrace_limit', 2)
+            pc = sa = i = j = 0
+            while pc < len(bytecode):
+                myjitdriver.jit_merge_point(pc=pc, n=n, sa=sa, i=i, j=j, a=a)
+                a = hint(a, promote=True)
+                op = bytecode[pc]
+                if op == 'i':
+                    i = 0
+                elif op == 'j':
+                    j = 0
+                elif op == '+':
+                    sa += a
+                elif op == 'J':
+                    j += 1
+                    if j < 3:
+                        pc -= 1
+                        myjitdriver.can_enter_jit(pc=pc, n=n, sa=sa, i=i, j=j, a=a)
+                        continue
+                elif op == 'I':
+                    i += 1
+                    if i < n:
+                        pc -= 6
+                        myjitdriver.can_enter_jit(pc=pc, n=n, sa=sa, i=i, j=j, a=a)
+                        continue
+                pc += 1
+            return sa
+
+        res = self.meta_interp(f, [10, 7])
+        assert res == f(10, 7)
+        self.check_tree_loop_count(4)
+
+        def g(n):
+            return f(n, 2) + f(n, 3)
+
+        res = self.meta_interp(g, [10])
+        assert res == g(10)
+        self.check_tree_loop_count(6)
+
+
+        def g(n):
+            return f(n, 2) + f(n, 3) + f(n, 4) + f(n, 5) + f(n, 6) + f(n, 7)
+
+        res = self.meta_interp(g, [10])
+        assert res == g(10)
+        self.check_tree_loop_count(8)
+
     def test_frame_finished_during_retrace(self):
         class Base(object):
             pass
@@ -2452,7 +2914,46 @@
         # 1 preamble and 6 speciealized versions of each loop
         self.check_tree_loop_count(2*(1 + 6))
 
+    def test_continue_tracing_with_boxes_in_start_snapshot_replaced_by_optimizer(self):
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'n', 'a', 'b'])
+        def f(n):
+            sa = a = 0
+            b = 10
+            while n:
+                myjitdriver.jit_merge_point(sa=sa, n=n, a=a, b=b)
+                sa += b
+                b += 1
+                if b > 7:
+                    pass
+                if a == 0:
+                    a = 1
+                elif a == 1:
+                    a = 2
+                elif a == 2:
+                    a = 0
+                sa += a
+                sa += 0
+                n -= 1
+            return sa
+        res = self.meta_interp(f, [16])
+        assert res == f(16)
 
+    def test_loopinvariant_array_shrinking1(self):
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'n', 'i', 'a'])
+        def f(n):
+            sa = i = 0
+            a = [0, 1, 2, 3, 4]
+            while i < n:
+                myjitdriver.jit_merge_point(sa=sa, n=n, a=a, i=i)
+                if i < n/2:
+                    sa += a[4]
+                elif i == n/2:
+                    a.pop()
+                i += 1
+        res = self.meta_interp(f, [32])
+        assert res == f(32)
+        self.check_loops(arraylen_gc=2)
+        
 class TestOOtype(BasicTests, OOJitMixin):
 
     def test_oohash(self):
@@ -2630,7 +3131,101 @@
         self.meta_interp(f, [], enable_opts='')
         self.check_loops(new_with_vtable=1)
 
+    def test_two_loopinvariant_arrays1(self):
+        from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'n', 'i', 'a'])
+        TP = lltype.GcArray(lltype.Signed)
+        def f(n):
+            sa = i = 0
+            a = lltype.malloc(TP, 5)
+            a[4] = 7
+            while i < n:
+                myjitdriver.jit_merge_point(sa=sa, n=n, a=a, i=i)
+                if i < n/2:
+                    sa += a[4]
+                if i == n/2:
+                    a = lltype.malloc(TP, 3)
+                i += 1
+            return sa
+        res = self.meta_interp(f, [32])
+        assert res == f(32)
+        self.check_tree_loop_count(3)
+
+    def test_two_loopinvariant_arrays2(self):
+        from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'n', 'i', 'a'])
+        TP = lltype.GcArray(lltype.Signed)
+        def f(n):
+            sa = i = 0
+            a = lltype.malloc(TP, 5)
+            a[4] = 7
+            while i < n:
+                myjitdriver.jit_merge_point(sa=sa, n=n, a=a, i=i)
+                if i < n/2:
+                    sa += a[4]
+                elif i > n/2:
+                    sa += a[2]
+                if i == n/2:
+                    a = lltype.malloc(TP, 3)
+                    a[2] = 42
+                i += 1
+            return sa
+        res = self.meta_interp(f, [32])
+        assert res == f(32)
+        self.check_tree_loop_count(3)
+        
+    def test_two_loopinvariant_arrays3(self):
+        from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'n', 'i', 'a'])
+        TP = lltype.GcArray(lltype.Signed)
+        def f(n):
+            sa = i = 0
+            a = lltype.malloc(TP, 5)
+            a[2] = 7
+            while i < n:
+                myjitdriver.jit_merge_point(sa=sa, n=n, a=a, i=i)
+                if i < n/2:
+                    sa += a[2]
+                elif i > n/2:
+                    sa += a[3]
+                if i == n/2:
+                    a = lltype.malloc(TP, 7)
+                    a[3] = 10
+                    a[2] = 42
+                i += 1
+            return sa
+        res = self.meta_interp(f, [32])
+        assert res == f(32)
+        self.check_tree_loop_count(2)
+        
+    def test_two_loopinvariant_arrays_boxed(self):
+        class A(object):
+            def __init__(self, a):
+                self.a  = a
+        from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'n', 'i', 'a'])
+        TP = lltype.GcArray(lltype.Signed)
+        a1 = A(lltype.malloc(TP, 5))
+        a2 = A(lltype.malloc(TP, 3))
+        def f(n):
+            sa = i = 0
+            a = a1
+            a.a[4] = 7
+            while i < n:
+                myjitdriver.jit_merge_point(sa=sa, n=n, a=a, i=i)
+                if i < n/2:
+                    sa += a.a[4]
+                if i == n/2:
+                    a = a2
+                i += 1
+            return sa
+        res = self.meta_interp(f, [32])
+        assert res == f(32)
+        self.check_loops(arraylen_gc=2, everywhere=True)
+        
     def test_release_gil_flush_heap_cache(self):
+        if sys.platform == "win32":
+            py.test.skip("needs 'time'")
         T = rffi.CArrayPtr(rffi.TIME_T)
 
         external = rffi.llexternal("time", [T], rffi.TIME_T, threadsafe=True)
@@ -2681,6 +3276,7 @@
             return n
 
         self.meta_interp(f, [10], repeat=3)
+        
 
 class TestLLtype(BaseLLtypeTests, LLJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_fficall.py b/pypy/jit/metainterp/test/test_fficall.py
--- a/pypy/jit/metainterp/test/test_fficall.py
+++ b/pypy/jit/metainterp/test/test_fficall.py
@@ -3,7 +3,7 @@
 from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
 from pypy.rlib.jit import JitDriver, promote, dont_look_inside
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.libffi import ArgChain, longlong2float, float2longlong
+from pypy.rlib.libffi import ArgChain
 from pypy.rlib.libffi import IS_32_BIT
 from pypy.rlib.test.test_libffi import TestLibffiCall as _TestLibffiCall
 from pypy.rpython.lltypesystem import lltype, rffi
@@ -12,10 +12,11 @@
 from pypy.jit.metainterp.test.support import LLJitMixin
 
 class TestFfiCall(LLJitMixin, _TestLibffiCall):
+    supports_all = False     # supports_{floats,longlong,singlefloats}
 
     # ===> ../../../rlib/test/test_libffi.py
 
-    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
+    def call(self, funcspec, args, RESULT, is_struct=False, jitif=[]):
         """
         Call the function specified by funcspec in a loop, and let the jit to
         see and optimize it.
@@ -24,14 +25,7 @@
         lib, name, argtypes, restype = funcspec
         method_and_args = []
         for argval in args:
-            if type(argval) is r_singlefloat:
-                method_name = 'arg_singlefloat'
-                argval = float(argval)
-            elif IS_32_BIT and type(argval) in [r_longlong, r_ulonglong]:
-                method_name = 'arg_longlong'
-                argval = rffi.cast(rffi.LONGLONG, argval)
-                argval = longlong2float(argval)
-            elif isinstance(argval, tuple):
+            if isinstance(argval, tuple):
                 method_name, argval = argval
             else:
                 method_name = 'arg'
@@ -39,10 +33,20 @@
         method_and_args = unrolling_iterable(method_and_args)
         #
         reds = ['n', 'res', 'func']
-        if (RESULT in [rffi.FLOAT, rffi.DOUBLE] or
+        if (RESULT is rffi.DOUBLE or
             IS_32_BIT and RESULT in [rffi.LONGLONG, rffi.ULONGLONG]):
-            reds = ['n', 'func', 'res'] # floats must be *after* refs
+            reds = ['n', 'func', 'res'] # 'double' floats must be *after* refs
         driver = JitDriver(reds=reds, greens=[])
+        init_result = rffi.cast(RESULT, 0)
+        #
+        def g(func):
+            # a different function, which is marked as "dont_look_inside"
+            # in case it uses an unsupported argument
+            argchain = ArgChain()
+            # this loop is unrolled
+            for method_name, argval in method_and_args:
+                getattr(argchain, method_name)(argval)
+            return func.call(argchain, RESULT, is_struct=is_struct)
         #
         def f(n):
             func = lib.getpointer(name, argtypes, restype)
@@ -50,18 +54,44 @@
             while n < 10:
                 driver.jit_merge_point(n=n, res=res, func=func)
                 promote(func)
-                argchain = ArgChain()
-                # this loop is unrolled
-                for method_name, argval in method_and_args:
-                    getattr(argchain, method_name)(argval)
-                res = func.call(argchain, RESULT, is_struct=is_struct)
+                res = g(func)
                 n += 1
             return res
         #
-        res = self.meta_interp(f, [0], backendopt=True)
+        res = self.meta_interp(f, [0], backendopt=True,
+                               supports_floats       = self.supports_all,
+                               supports_longlong     = self.supports_all,
+                               supports_singlefloats = self.supports_all)
+        d = {'floats': self.supports_all,
+             'longlong': self.supports_all or not IS_32_BIT,
+             'singlefloats': self.supports_all,
+             'byval': False}
+        supported = all(d[check] for check in jitif)
+        if supported:
+            self.check_loops(
+                call_release_gil=1,   # a CALL_RELEASE_GIL, and no other CALLs
+                call=0,
+                call_may_force=0,
+                guard_no_exception=1,
+                guard_not_forced=1,
+                int_add=1,
+                int_lt=1,
+                guard_true=1,
+                jump=1)
+        else:
+            self.check_loops(
+                call_release_gil=0,   # no CALL_RELEASE_GIL
+                int_add=1,
+                int_lt=1,
+                guard_true=1,
+                jump=1)
         return res
 
     def test_byval_result(self):
         _TestLibffiCall.test_byval_result(self)
     test_byval_result.__doc__ = _TestLibffiCall.test_byval_result.__doc__
     test_byval_result.dont_track_allocations = True
+
+
+class TestFfiCallSupportAll(TestFfiCall):
+    supports_all = True     # supports_{floats,longlong,singlefloats}
diff --git a/pypy/jit/metainterp/test/test_float.py b/pypy/jit/metainterp/test/test_float.py
--- a/pypy/jit/metainterp/test/test_float.py
+++ b/pypy/jit/metainterp/test/test_float.py
@@ -36,6 +36,15 @@
         res = self.interp_operations(f, [x])
         assert res == -x
 
+    def test_singlefloat(self):
+        from pypy.rlib.rarithmetic import r_singlefloat
+        def f(a):
+            a = float(r_singlefloat(a))
+            a *= 4.25
+            return float(r_singlefloat(a))
+        res = self.interp_operations(f, [-2.0])
+        assert res == -8.5
+
 
 class TestOOtype(FloatTests, OOJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_jitdriver.py b/pypy/jit/metainterp/test/test_jitdriver.py
--- a/pypy/jit/metainterp/test/test_jitdriver.py
+++ b/pypy/jit/metainterp/test/test_jitdriver.py
@@ -123,7 +123,11 @@
         res = self.meta_interp(loop2, [4, 40], repeat=7, inline=True)
         assert res == loop2(4, 40)
         # we expect no loop at all for 'loop1': it should always be inlined
-        self.check_tree_loop_count(2)    # 1 x loop, 1 x enter bridge
+        # we do however get several version of 'loop2', all of which contains
+        # at least one int_add, while there are no int_add's in 'loop1'
+        self.check_tree_loop_count(5)
+        for loop in get_stats().loops:
+            assert loop.summary()['int_add'] >= 1
 
     def test_inactive_jitdriver(self):
         myjitdriver1 = JitDriver(greens=[], reds=['n', 'm'],
diff --git a/pypy/jit/metainterp/test/test_loop.py b/pypy/jit/metainterp/test/test_loop.py
--- a/pypy/jit/metainterp/test/test_loop.py
+++ b/pypy/jit/metainterp/test/test_loop.py
@@ -1,5 +1,5 @@
 import py
-from pypy.rlib.jit import JitDriver
+from pypy.rlib.jit import JitDriver, hint
 from pypy.rlib.objectmodel import compute_hash
 from pypy.jit.metainterp.warmspot import ll_meta_interp, get_stats
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
@@ -798,7 +798,70 @@
                 some_fn(Stuff(n), k, z)
             return 0
 
-        res = self.meta_interp(f, [200])
+        res = self.meta_interp(f, [200])        
+
+    def test_regular_pointers_in_short_preamble(self):
+        # XXX do we really care about this case?  If not, we should
+        # at least detect it and complain during codewriter/jtransform
+        from pypy.rpython.lltypesystem import lltype
+        BASE = lltype.GcStruct('BASE')
+        A = lltype.GcStruct('A', ('parent', BASE), ('val', lltype.Signed))
+        B = lltype.GcStruct('B', ('parent', BASE), ('charval', lltype.Char))
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'j', 'sa', 'p'])
+        def f(n, m, j):
+            i = sa = 0
+            pa = lltype.malloc(A)
+            pa.val = 7
+            p = pa.parent
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, m=m, i=i, j=j, sa=sa, p=p)
+                if i < m:
+                    pa = lltype.cast_pointer(lltype.Ptr(A), p)
+                    sa += pa.val
+                elif i == m:
+                    pb = lltype.malloc(B)
+                    pb.charval = 'y'
+                    p = pb.parent
+                else:
+                    pb = lltype.cast_pointer(lltype.Ptr(B), p)
+                    sa += ord(pb.charval)
+                sa += 100
+                assert n>0 and m>0
+                i += j
+            return sa
+        expected = f(20, 10, 1)
+        res = self.meta_interp(f, [20, 10, 1])
+        assert res == expected
+
+    def test_unerased_pointers_in_short_preamble(self):
+        from pypy.rlib.rerased import new_erasing_pair
+        from pypy.rpython.lltypesystem import lltype
+        class A(object):
+            def __init__(self, val):
+                self.val = val
+        erase_A, unerase_A = new_erasing_pair('A')
+        erase_TP, unerase_TP = new_erasing_pair('TP')
+        TP = lltype.GcArray(lltype.Signed)
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'j', 'sa', 'p'])
+        def f(n, m, j):
+            i = sa = 0
+            p = erase_A(A(7))
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, m=m, i=i, j=j, sa=sa, p=p)
+                if i < m:
+                    sa += unerase_A(p).val
+                elif i == m:
+                    a = lltype.malloc(TP, 5)
+                    a[0] = 42
+                    p = erase_TP(a)
+                else:
+                    sa += unerase_TP(p)[0]
+                sa += A(i).val
+                assert n>0 and m>0
+                i += j
+            return sa
+        res = self.meta_interp(f, [20, 10, 1])
+        assert res == f(20, 10, 1)
 
 class TestOOtype(LoopTest, OOJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_quasiimmut.py b/pypy/jit/metainterp/test/test_quasiimmut.py
--- a/pypy/jit/metainterp/test/test_quasiimmut.py
+++ b/pypy/jit/metainterp/test/test_quasiimmut.py
@@ -289,7 +289,7 @@
             return total
 
         res = self.meta_interp(main, [])
-        self.check_loop_count(7)
+        self.check_loop_count(9)
         assert res == main()
 
     def test_change_during_running(self):
diff --git a/pypy/jit/metainterp/test/test_send.py b/pypy/jit/metainterp/test/test_send.py
--- a/pypy/jit/metainterp/test/test_send.py
+++ b/pypy/jit/metainterp/test/test_send.py
@@ -384,9 +384,9 @@
         res = self.meta_interp(f, [198],
                                policy=StopAtXPolicy(State.externfn.im_func))
         assert res == f(198)
-        # we get two TreeLoops: an initial one, and one entering from
-        # the interpreter
-        self.check_tree_loop_count(2)
+        # we get four TreeLoops: one for each of the 3 getvalue functions,
+        # and one entering from the interpreter
+        self.check_tree_loop_count(4)
 
     def test_two_behaviors(self):
         py.test.skip("XXX fix me!!!!!!! problem in optimize.py")
@@ -436,10 +436,11 @@
             return x.value
         res = self.meta_interp(f, [len(cases)])
         assert res == 200
-        # we expect 1 loop, 1 entry bridge, and 1 bridge going from the
+        # we expect 2 versions of the loop, 1 entry bridge,
+        # and 1 bridge going from the
         # loop back to the start of the entry bridge
-        self.check_loop_count(2)        # 1 loop + 1 bridge
-        self.check_tree_loop_count(2)   # 1 loop + 1 entry bridge  (argh)
+        self.check_loop_count(3)        # 2 loop + 1 bridge
+        self.check_tree_loop_count(3)   # 2 loop + 1 entry bridge  (argh)
         self.check_aborted_count(0)
 
     def test_three_cases(self):
diff --git a/pypy/jit/metainterp/test/test_string.py b/pypy/jit/metainterp/test/test_string.py
--- a/pypy/jit/metainterp/test/test_string.py
+++ b/pypy/jit/metainterp/test/test_string.py
@@ -323,6 +323,163 @@
         self.meta_interp(f, [6, 7])
         self.check_loops(newstr=0, newunicode=0)
 
+    def test_str_slice_len_surviving(self):
+        _str = self._str
+        longstring = _str("Unrolling Trouble")
+        mydriver = JitDriver(reds = ['i', 'a', 'sa'], greens = []) 
+        def f(a):
+            i = sa = a
+            while i < len(longstring):
+                mydriver.jit_merge_point(i=i, a=a, sa=sa)
+                assert a >= 0 and i >= 0
+                i = len(longstring[a:i+1])
+                sa += i
+            return sa
+        assert self.meta_interp(f, [0]) == f(0)
+
+    def test_virtual_strings_direct(self):
+        _str = self._str
+        fillers = _str("abcdefghijklmnopqrstuvwxyz")
+        data = _str("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
+
+        mydriver = JitDriver(reds = ['line', 'noise', 'res'], greens = []) 
+        def f():
+            line = data
+            noise = fillers
+            ratio = len(line) // len(noise)
+            res = data[0:0]
+            while line and noise:
+                mydriver.jit_merge_point(line=line, noise=noise, res=res)
+                if len(line) // len(noise) > ratio:
+                    c, line = line[0], line[1:]
+                else:
+                    c, noise = noise[0], noise[1:]
+                res += c
+            return res + noise + line
+        s1 = self.meta_interp(f, [])
+        s2 = f()
+        for c1, c2 in zip(s1.chars, s2):
+            assert c1==c2
+
+    def test_virtual_strings_boxed(self):
+        _str = self._str
+        fillers = _str("abcdefghijklmnopqrstuvwxyz")
+        data = _str("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
+        class Str(object):
+            def __init__(self, value):
+                self.value = value
+        mydriver = JitDriver(reds = ['ratio', 'line', 'noise', 'res'],
+                             greens = []) 
+        def f():
+            line = Str(data)
+            noise = Str(fillers)
+            ratio = len(line.value) // len(noise.value)
+            res = Str(data[0:0])
+            while line.value and noise.value:
+                mydriver.jit_merge_point(line=line, noise=noise, res=res,
+                                         ratio=ratio)
+                if len(line.value) // len(noise.value) > ratio:
+                    c, line = line.value[0], Str(line.value[1:])
+                else:
+                    c, noise = noise.value[0], Str(noise.value[1:])
+                res = Str(res.value + c)
+            return res.value + noise.value + line.value
+        s1 = self.meta_interp(f, [])
+        s2 = f()
+        for c1, c2 in zip(s1.chars, s2):
+            assert c1==c2
+
+    def test_string_in_virtual_state(self):
+        _str = self._str
+        s1 = _str("a")
+        s2 = _str("AA")
+        mydriver = JitDriver(reds = ['i', 'n', 'sa'], greens = [])
+        def f(n):
+            sa = s1
+            i = 0
+            while i < n:
+                mydriver.jit_merge_point(i=i, n=n, sa=sa)
+                if i&4 == 0:
+                    sa += s1
+                else:
+                    sa += s2
+                i += 1
+            return len(sa)
+        assert self.meta_interp(f, [16]) == f(16)
+
+    def test_loop_invariant_string_slize(self):
+        _str = self._str
+        mydriver = JitDriver(reds = ['i', 'n', 'sa', 's', 's1'], greens = [])
+        def f(n, c):
+            s = s1 = _str(c*10)
+            sa = i = 0
+            while i < n:
+                mydriver.jit_merge_point(i=i, n=n, sa=sa, s=s, s1=s1)
+                sa += len(s)
+                if i < n/2:
+                    s = s1[1:3]
+                else:
+                    s = s1[2:3]
+                i += 1
+            return sa
+        assert self.meta_interp(f, [16, 'a']) == f(16, 'a')
+
+    def test_loop_invariant_string_slize_boxed(self):
+        class Str(object):
+            def __init__(self, value):
+                self.value = value
+        _str = self._str
+        mydriver = JitDriver(reds = ['i', 'n', 'sa', 's', 's1'], greens = [])
+        def f(n, c):
+            s = s1 = Str(_str(c*10))
+            sa = i = 0
+            while i < n:
+                mydriver.jit_merge_point(i=i, n=n, sa=sa, s=s, s1=s1)
+                sa += len(s.value)
+                if i < n/2:
+                    s = Str(s1.value[1:3])
+                else:
+                    s = Str(s1.value[2:3])
+                i += 1
+            return sa
+        assert self.meta_interp(f, [16, 'a']) == f(16, 'a')
+
+    def test_loop_invariant_string_slize_in_array(self):
+        _str = self._str
+        mydriver = JitDriver(reds = ['i', 'n', 'sa', 's', 's1'], greens = [])
+        def f(n, c):
+            s = s1 = [_str(c*10)]
+            sa = i = 0
+            while i < n:
+                mydriver.jit_merge_point(i=i, n=n, sa=sa, s=s, s1=s1)
+                sa += len(s[0])
+                if i < n/2:
+                    s = [s1[0][1:3]]
+                else:
+                    s = [s1[0][2:3]]
+                i += 1
+            return sa
+        assert self.meta_interp(f, [16, 'a']) == f(16, 'a')
+
+    def test_boxed_virtual_string_not_surviving(self):
+        class StrBox(object):
+            def __init__(self, val):
+                self.val = val
+        class IntBox(object):
+            def __init__(self, val):
+                self.val = val
+        _str = self._str
+        mydriver = JitDriver(reds = ['i', 'nt', 'sa'], greens = [])
+        def f(c):
+            nt = StrBox(_str(c*16))
+            sa = StrBox(_str(''))
+            i = IntBox(0)
+            while i.val < len(nt.val):
+                mydriver.jit_merge_point(i=i, nt=nt, sa=sa)
+                sa = StrBox(sa.val + StrBox(nt.val[i.val]).val)
+                i = IntBox(i.val + 1)
+            return len(sa.val)
+        assert self.meta_interp(f, ['a']) == f('a')
 
 #class TestOOtype(StringTests, OOJitMixin):
 #    CALL = "oosend"
diff --git a/pypy/jit/metainterp/test/test_virtual.py b/pypy/jit/metainterp/test/test_virtual.py
--- a/pypy/jit/metainterp/test/test_virtual.py
+++ b/pypy/jit/metainterp/test/test_virtual.py
@@ -898,6 +898,53 @@
         res = self.meta_interp(f, [], repeat=7)
         assert res == f()
 
+    def test_virtual_attribute_pure_function(self):
+        mydriver = JitDriver(reds = ['i', 'sa', 'n', 'node'], greens = [])
+        class A(object):
+            def __init__(self, v1, v2):
+                self.v1 = v1
+                self.v2 = v2
+        def f(n):
+            i = sa = 0
+            node = A(1, 2)
+            while i < n:
+                mydriver.jit_merge_point(i=i, sa=sa, n=n, node=node)
+                sa += node.v1 + node.v2 + 2*node.v1
+                if i < n/2:
+                    node = A(n, 2*n)
+                else:
+                    node = A(n, 3*n)
+                i += 1
+            return sa
+
+        res = self.meta_interp(f, [16])
+        assert res == f(16)
+
+    def test_virtual_loop_invariant_getitem(self):
+        mydriver = JitDriver(reds = ['i', 'sa', 'n', 'node1', 'node2'], greens = [])
+        class A(object):
+            def __init__(self, v1, v2):
+                self.v1 = v1
+                self.v2 = v2
+        def f(n):
+            i = sa = 0
+            node1 = A(1, 2)
+            node2 = A(n, n)
+            while i < n:
+                mydriver.jit_merge_point(i=i, sa=sa, n=n, node1=node1, node2=node2)
+                sa += node1.v1 + node2.v1 + node2.v2
+                if i < n/2:
+                    node1 = A(node2.v1, 2)
+                else:
+                    node1 = A(i, 2)
+                i += 1
+            return sa
+
+        res = self.meta_interp(f, [16])
+        assert res == f(16)
+        self.check_loops(getfield_gc=2)
+        
+
 # ____________________________________________________________
 # Run 1: all the tests instantiate a real RPython class
 
diff --git a/pypy/jit/metainterp/test/test_virtualstate.py b/pypy/jit/metainterp/test/test_virtualstate.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/test/test_virtualstate.py
@@ -0,0 +1,911 @@
+import py
+from pypy.jit.metainterp.optimize import InvalidLoop
+from pypy.jit.metainterp.optimizeopt.virtualstate import VirtualStateInfo, VStructStateInfo, \
+     VArrayStateInfo, NotVirtualStateInfo, VirtualState
+from pypy.jit.metainterp.optimizeopt.optimizer import OptValue
+from pypy.jit.metainterp.history import BoxInt, BoxFloat, BoxPtr, ConstInt, ConstPtr
+from pypy.rpython.lltypesystem import lltype
+from pypy.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin, BaseTest, equaloplists
+from pypy.jit.metainterp.optimizeopt.intutils import IntBound
+from pypy.jit.metainterp.history import TreeLoop, LoopToken
+from pypy.jit.metainterp.optimizeopt.test.test_optimizeopt import FakeDescr, FakeMetaInterpStaticData
+from pypy.jit.metainterp.optimize import RetraceLoop
+
+class TestBasic:
+    someptr1 = LLtypeMixin.myptr
+    someptr2 = LLtypeMixin.myptr2
+
+    def test_position_generalization(self):
+        def postest(info1, info2):
+            info1.position = 0
+            assert info1.generalization_of(info1, {}, {})
+            info2.position = 0
+            assert info1.generalization_of(info2, {}, {})
+            info2.position = 1
+            renum = {}
+            assert info1.generalization_of(info2, renum, {})
+            assert renum == {0:1}
+            assert info1.generalization_of(info2, {0:1}, {})
+            assert info1.generalization_of(info2, {1:1}, {})
+            bad = {}
+            assert not info1.generalization_of(info2, {0:0}, bad)
+            assert info1 in bad and info2 in bad
+
+        for BoxType in (BoxInt, BoxFloat, BoxPtr):
+            info1 = NotVirtualStateInfo(OptValue(BoxType()))
+            info2 = NotVirtualStateInfo(OptValue(BoxType()))
+            postest(info1, info2)
+            
+        info1, info2 = VArrayStateInfo(42), VArrayStateInfo(42)
+        info1.fieldstate = info2.fieldstate = []
+        postest(info1, info2)
+
+        info1, info2 = VStructStateInfo(42, []), VStructStateInfo(42, [])
+        info1.fieldstate = info2.fieldstate = []
+        postest(info1, info2)
+
+        info1, info2 = VirtualStateInfo(ConstInt(42), []), VirtualStateInfo(ConstInt(42), [])
+        info1.fieldstate = info2.fieldstate = []
+        postest(info1, info2)
+
+    def test_NotVirtualStateInfo_generalization(self):
+        def isgeneral(value1, value2):
+            info1 = NotVirtualStateInfo(value1)
+            info1.position = 0
+            info2 = NotVirtualStateInfo(value2)
+            info2.position = 0
+            return info1.generalization_of(info2, {}, {})
+
+        assert isgeneral(OptValue(BoxInt()), OptValue(ConstInt(7)))
+        assert not isgeneral(OptValue(ConstInt(7)), OptValue(BoxInt()))
+
+        ptr = OptValue(BoxPtr())
+        nonnull = OptValue(BoxPtr())
+        nonnull.make_nonnull(0)
+        knownclass = OptValue(BoxPtr())
+        knownclass.make_constant_class(ConstPtr(self.someptr1), 0)
+        const = OptValue(BoxPtr)
+        const.make_constant_class(ConstPtr(self.someptr1), 0)
+        const.make_constant(ConstPtr(self.someptr1))
+        inorder = [ptr, nonnull, knownclass, const]
+        for i in range(len(inorder)):
+            for j in range(i, len(inorder)):
+                assert isgeneral(inorder[i], inorder[j])
+                if i != j:
+                    assert not isgeneral(inorder[j], inorder[i])
+
+        value1 = OptValue(BoxInt())
+        value2 = OptValue(BoxInt())
+        value2.intbound.make_lt(IntBound(10, 10))
+        assert isgeneral(value1, value2)
+        assert not isgeneral(value2, value1)
+
+    def test_field_matching_generalization(self):
+        const1 = NotVirtualStateInfo(OptValue(ConstInt(1)))
+        const2 = NotVirtualStateInfo(OptValue(ConstInt(2)))
+        const1.position = const2.position = 1
+        assert not const1.generalization_of(const2, {}, {})
+        assert not const2.generalization_of(const1, {}, {})
+
+        def fldtst(info1, info2):
+            info1.position = info2.position = 0
+            info1.fieldstate = [const1]
+            info2.fieldstate = [const2]
+            assert not info1.generalization_of(info2, {}, {})
+            assert not info2.generalization_of(info1, {}, {})
+            assert info1.generalization_of(info1, {}, {})
+            assert info2.generalization_of(info2, {}, {})
+        fldtst(VArrayStateInfo(42), VArrayStateInfo(42))
+        fldtst(VStructStateInfo(42, [7]), VStructStateInfo(42, [7]))
+        fldtst(VirtualStateInfo(ConstInt(42), [7]), VirtualStateInfo(ConstInt(42), [7]))
+
+    def test_known_class_generalization(self):
+        knownclass1 = OptValue(BoxPtr())
+        knownclass1.make_constant_class(ConstPtr(self.someptr1), 0)
+        info1 = NotVirtualStateInfo(knownclass1)
+        info1.position = 0
+        knownclass2 = OptValue(BoxPtr())
+        knownclass2.make_constant_class(ConstPtr(self.someptr1), 0)
+        info2 = NotVirtualStateInfo(knownclass2)
+        info2.position = 0
+        assert info1.generalization_of(info2, {}, {})
+        assert info2.generalization_of(info1, {}, {})
+
+        knownclass3 = OptValue(BoxPtr())
+        knownclass3.make_constant_class(ConstPtr(self.someptr2), 0)
+        info3 = NotVirtualStateInfo(knownclass3)
+        info3.position = 0
+        assert not info1.generalization_of(info3, {}, {})
+        assert not info2.generalization_of(info3, {}, {})
+        assert not info3.generalization_of(info2, {}, {})
+        assert not info3.generalization_of(info1, {}, {})
+
+
+    def test_circular_generalization(self):
+        for info in (VArrayStateInfo(42), VStructStateInfo(42, [7]),
+                     VirtualStateInfo(ConstInt(42), [7])):
+            info.position = 0
+            info.fieldstate = [info]
+            assert info.generalization_of(info, {}, {})
+
+class BaseTestGenerateGuards(BaseTest):
+    def guards(self, info1, info2, box, expected):
+        info1.position = info2.position = 0
+        guards = []
+        info1.generate_guards(info2, box, self.cpu, guards, {})
+        self.compare(guards, expected, [box])
+
+    def compare(self, guards, expected, inputargs):
+        loop = self.parse(expected)
+        boxmap = {}
+        assert len(loop.inputargs) == len(inputargs)
+        for a, b in zip(loop.inputargs, inputargs):
+            boxmap[a] = b
+        for op in loop.operations:
+            if op.is_guard():
+                op.setdescr(None)
+        assert equaloplists(guards, loop.operations, False,
+                            boxmap)        
+    def test_intbounds(self):
+        value1 = OptValue(BoxInt())
+        value1.intbound.make_ge(IntBound(0, 10))
+        value1.intbound.make_le(IntBound(20, 30))
+        info1 = NotVirtualStateInfo(value1)
+        info2 = NotVirtualStateInfo(OptValue(BoxInt()))
+        expected = """
+        [i0]
+        i1 = int_ge(i0, 0)
+        guard_true(i1) []
+        i2 = int_le(i0, 30)
+        guard_true(i2) []
+        """
+        self.guards(info1, info2, BoxInt(15), expected)
+        py.test.raises(InvalidLoop, self.guards,
+                       info1, info2, BoxInt(50), expected)
+
+
+    def test_known_class(self):
+        value1 = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value1.make_constant_class(classbox, -1)
+        info1 = NotVirtualStateInfo(value1)
+        info2 = NotVirtualStateInfo(OptValue(self.nodebox))
+        expected = """
+        [p0]
+        guard_nonnull(p0) []        
+        guard_class(p0, ConstClass(node_vtable)) []
+        """
+        self.guards(info1, info2, self.nodebox, expected)
+        py.test.raises(InvalidLoop, self.guards,
+                       info1, info2, BoxPtr(), expected)
+
+    def test_known_class_value(self):
+        value1 = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value1.make_constant_class(classbox, -1)
+        box = self.nodebox
+        guards = value1.make_guards(box)
+        expected = """
+        [p0]
+        guard_nonnull(p0) []        
+        guard_class(p0, ConstClass(node_vtable)) []
+        """
+        self.compare(guards, expected, [box])
+
+    def test_equal_inputargs(self):
+        value = OptValue(self.nodebox)        
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        vstate1 = VirtualState([knownclass_info, knownclass_info])
+        assert vstate1.generalization_of(vstate1)
+
+        unknown_info1 = NotVirtualStateInfo(OptValue(self.nodebox))
+        vstate2 = VirtualState([unknown_info1, unknown_info1])
+        assert vstate2.generalization_of(vstate2)
+        assert not vstate1.generalization_of(vstate2)
+        assert vstate2.generalization_of(vstate1)
+
+        unknown_info1 = NotVirtualStateInfo(OptValue(self.nodebox))
+        unknown_info2 = NotVirtualStateInfo(OptValue(self.nodebox))
+        vstate3 = VirtualState([unknown_info1, unknown_info2])
+        assert vstate3.generalization_of(vstate2)
+        assert vstate3.generalization_of(vstate1)
+        assert not vstate2.generalization_of(vstate3)
+        assert not vstate1.generalization_of(vstate3)
+
+        expected = """
+        [p0]
+        guard_nonnull(p0) []        
+        guard_class(p0, ConstClass(node_vtable)) []
+        """
+        guards = []
+        vstate1.generate_guards(vstate2, [self.nodebox, self.nodebox], self.cpu, guards)
+        self.compare(guards, expected, [self.nodebox])
+
+        with py.test.raises(InvalidLoop):
+            guards = []
+            vstate1.generate_guards(vstate3, [self.nodebox, self.nodebox],
+                                    self.cpu, guards)
+        with py.test.raises(InvalidLoop):
+            guards = []
+            vstate2.generate_guards(vstate3, [self.nodebox, self.nodebox],
+                                    self.cpu, guards)
+        
+    def test_virtuals_with_equal_fields(self):
+        info1 = VirtualStateInfo(ConstInt(42), [1, 2])
+        value = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info1.fieldstate = [knownclass_info, knownclass_info]
+        vstate1 = VirtualState([info1])
+        assert vstate1.generalization_of(vstate1)
+
+        info2 = VirtualStateInfo(ConstInt(42), [1, 2])
+        unknown_info1 = NotVirtualStateInfo(OptValue(self.nodebox))
+        info2.fieldstate = [unknown_info1, unknown_info1]
+        vstate2 = VirtualState([info2])
+        assert vstate2.generalization_of(vstate2)
+        assert not vstate1.generalization_of(vstate2)
+        assert vstate2.generalization_of(vstate1)
+
+        info3 = VirtualStateInfo(ConstInt(42), [1, 2])
+        unknown_info1 = NotVirtualStateInfo(OptValue(self.nodebox))
+        unknown_info2 = NotVirtualStateInfo(OptValue(self.nodebox))
+        info3.fieldstate = [unknown_info1, unknown_info2]
+        vstate3 = VirtualState([info3])        
+        assert vstate3.generalization_of(vstate2)
+        assert vstate3.generalization_of(vstate1)
+        assert not vstate2.generalization_of(vstate3)
+        assert not vstate1.generalization_of(vstate3)
+
+    def test_virtuals_with_nonmatching_fields(self):
+        info1 = VirtualStateInfo(ConstInt(42), [1, 2])
+        value = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info1.fieldstate = [knownclass_info, knownclass_info]
+        vstate1 = VirtualState([info1])
+        assert vstate1.generalization_of(vstate1)
+
+        info2 = VirtualStateInfo(ConstInt(42), [1, 2])
+        value = OptValue(self.nodebox2)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox2)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info2.fieldstate = [knownclass_info, knownclass_info]
+        vstate2 = VirtualState([info2])
+        assert vstate2.generalization_of(vstate2)
+
+        assert not vstate2.generalization_of(vstate1)
+        assert not vstate1.generalization_of(vstate2)
+
+    def test_virtuals_with_nonmatching_descrs(self):
+        info1 = VirtualStateInfo(ConstInt(42), [10, 20])
+        value = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info1.fieldstate = [knownclass_info, knownclass_info]
+        vstate1 = VirtualState([info1])
+        assert vstate1.generalization_of(vstate1)
+
+        info2 = VirtualStateInfo(ConstInt(42), [1, 2])
+        value = OptValue(self.nodebox2)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox2)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info2.fieldstate = [knownclass_info, knownclass_info]
+        vstate2 = VirtualState([info2])
+        assert vstate2.generalization_of(vstate2)
+
+        assert not vstate2.generalization_of(vstate1)
+        assert not vstate1.generalization_of(vstate2)
+        
+    def test_virtuals_with_nonmatching_classes(self):
+        info1 = VirtualStateInfo(ConstInt(42), [1, 2])
+        value = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info1.fieldstate = [knownclass_info, knownclass_info]
+        vstate1 = VirtualState([info1])
+        assert vstate1.generalization_of(vstate1)
+
+        info2 = VirtualStateInfo(ConstInt(7), [1, 2])
+        value = OptValue(self.nodebox2)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox2)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info2.fieldstate = [knownclass_info, knownclass_info]
+        vstate2 = VirtualState([info2])
+        assert vstate2.generalization_of(vstate2)
+
+        assert not vstate2.generalization_of(vstate1)
+        assert not vstate1.generalization_of(vstate2)
+        
+    def test_nonvirtual_is_not_virtual(self):
+        info1 = VirtualStateInfo(ConstInt(42), [1, 2])
+        value = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info1.fieldstate = [knownclass_info, knownclass_info]
+        vstate1 = VirtualState([info1])
+        assert vstate1.generalization_of(vstate1)
+
+        info2 = NotVirtualStateInfo(value)
+        vstate2 = VirtualState([info2])
+        assert vstate2.generalization_of(vstate2)
+
+        assert not vstate2.generalization_of(vstate1)
+        assert not vstate1.generalization_of(vstate2)
+
+    def test_arrays_with_nonmatching_fields(self):
+        info1 = VArrayStateInfo(42)
+        value = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info1.fieldstate = [knownclass_info, knownclass_info]
+        vstate1 = VirtualState([info1])
+        assert vstate1.generalization_of(vstate1)
+
+        info2 = VArrayStateInfo(42)
+        value = OptValue(self.nodebox2)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox2)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info2.fieldstate = [knownclass_info, knownclass_info]
+        vstate2 = VirtualState([info2])
+        assert vstate2.generalization_of(vstate2)
+
+        assert not vstate2.generalization_of(vstate1)
+        assert not vstate1.generalization_of(vstate2)
+
+    def test_arrays_of_different_sizes(self):
+        info1 = VArrayStateInfo(42)
+        value = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info1.fieldstate = [knownclass_info, knownclass_info]
+        vstate1 = VirtualState([info1])
+        assert vstate1.generalization_of(vstate1)
+
+        info2 = VArrayStateInfo(42)
+        value = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info2.fieldstate = [knownclass_info]
+        vstate2 = VirtualState([info2])
+        assert vstate2.generalization_of(vstate2)
+
+        assert not vstate2.generalization_of(vstate1)
+        assert not vstate1.generalization_of(vstate2)
+
+    def test_arrays_with_nonmatching_types(self):
+        info1 = VArrayStateInfo(42)
+        value = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info1.fieldstate = [knownclass_info, knownclass_info]
+        vstate1 = VirtualState([info1])
+        assert vstate1.generalization_of(vstate1)
+
+        info2 = VArrayStateInfo(7)
+        value = OptValue(self.nodebox2)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox2)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info2.fieldstate = [knownclass_info, knownclass_info]
+        vstate2 = VirtualState([info2])
+        assert vstate2.generalization_of(vstate2)
+
+        assert not vstate2.generalization_of(vstate1)
+        assert not vstate1.generalization_of(vstate2)
+        
+    def test_nonvirtual_is_not_array(self):
+        info1 = VArrayStateInfo(42)
+        value = OptValue(self.nodebox)
+        classbox = self.cpu.ts.cls_of_box(self.nodebox)
+        value.make_constant_class(classbox, -1)
+        knownclass_info = NotVirtualStateInfo(value)
+        info1.fieldstate = [knownclass_info, knownclass_info]
+        vstate1 = VirtualState([info1])
+        assert vstate1.generalization_of(vstate1)
+
+        info2 = NotVirtualStateInfo(value)
+        vstate2 = VirtualState([info2])
+        assert vstate2.generalization_of(vstate2)
+
+        assert not vstate2.generalization_of(vstate1)
+        assert not vstate1.generalization_of(vstate2)
+        
+
+class BaseTestBridges(BaseTest):
+    enable_opts = "intbounds:rewrite:virtualize:string:heap:unroll"
+
+    def _do_optimize_bridge(self, bridge, call_pure_results):
+        from pypy.jit.metainterp.optimizeopt import optimize_bridge_1, build_opt_chain
+        from pypy.jit.metainterp.optimizeopt.util import args_dict
+
+        self.bridge = bridge
+        bridge.call_pure_results = args_dict()
+        if call_pure_results is not None:


More information about the pypy-commit mailing list