[pypy-commit] pypy celldict-versions: merge default

Fri Aug 26 18:47:12 CEST 2011

Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: celldict-versions
Changeset: r46799:dfdfeb0e6f88
Date: 2011-08-26 18:53 +0200
http://bitbucket.org/pypy/pypy/changeset/dfdfeb0e6f88/

Log:	merge default

diff too long, truncating to 10000 out of 33964 lines

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -1,6 +1,7 @@
 syntax: glob
 *.py[co]
 *~
+.*.swp
 
 syntax: regexp
 ^testresult$
@@ -38,6 +39,8 @@
 ^pypy/translator/benchmark/shootout_benchmarks$
 ^pypy/translator/goal/pypy-translation-snapshot$
 ^pypy/translator/goal/pypy-c
+^pypy/translator/goal/pypy-jvm
+^pypy/translator/goal/pypy-jvm.jar
 ^pypy/translator/goal/.+\.exe$
 ^pypy/translator/goal/.+\.dll$
 ^pypy/translator/goal/target.+-c$
diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -1,1 +1,2 @@
 b590cf6de4190623aad9aa698694c22e614d67b9 release-1.5
+b48df0bf4e75b81d98f19ce89d4a7dc3e1dab5e5 benchmarked
diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -37,22 +37,22 @@
     Armin Rigo
     Maciej Fijalkowski
     Carl Friedrich Bolz
+    Antonio Cuni
     Amaury Forgeot d'Arc
-    Antonio Cuni
     Samuele Pedroni
     Michael Hudson
     Holger Krekel
+    Benjamin Peterson
     Christian Tismer
-    Benjamin Peterson
+    Hakan Ardo
+    Alex Gaynor
     Eric van Riet Paap
-    Anders Chrigstr&#246;m
-    H&#229;kan Ard&#246;
+    Anders Chrigstrom
+    David Schneider
     Richard Emslie
     Dan Villiom Podlaski Christiansen
     Alexander Schremmer
-    Alex Gaynor
-    David Schneider
-    Aureli&#233;n Campeas
+    Aurelien Campeas
     Anders Lehmann
     Camillo Bruni
     Niklaus Haldimann
@@ -63,16 +63,17 @@
     Bartosz Skowron
     Jakub Gustak
     Guido Wesdorp
+    Daniel Roberts
     Adrien Di Mascio
     Laura Creighton
     Ludovic Aubry
     Niko Matsakis
-    Daniel Roberts
     Jason Creighton
-    Jacob Hall&#233;n
+    Jacob Hallen
     Alex Martelli
     Anders Hammarquist
     Jan de Mooij
+    Wim Lavrijsen
     Stephan Diehl
     Michael Foord
     Stefan Schwarzer
@@ -83,9 +84,13 @@
     Alexandre Fayolle
     Marius Gedminas
     Simon Burton
+    Justin Peel
     Jean-Paul Calderone
     John Witulski
+    Lukas Diekmann
+    holger krekel
     Wim Lavrijsen
+    Dario Bertini
     Andreas St&#252;hrk
     Jean-Philippe St. Pierre
     Guido van Rossum
@@ -97,15 +102,16 @@
     Georg Brandl
     Gerald Klix
     Wanja Saatkamp
+    Ronny Pfannschmidt
     Boris Feigin
     Oscar Nierstrasz
-    Dario Bertini
     David Malcolm
     Eugene Oden
     Henry Mason
+    Sven Hager
     Lukas Renggli
+    Ilya Osadchiy
     Guenter Jantzen
-    Ronny Pfannschmidt
     Bert Freudenberg
     Amit Regmi
     Ben Young
@@ -122,8 +128,8 @@
     Jared Grubb
     Karl Bartel
     Gabriel Lavoie
+    Victor Stinner
     Brian Dorsey
-    Victor Stinner
     Stuart Williams
     Toby Watson
     Antoine Pitrou
@@ -134,19 +140,23 @@
     Jonathan David Riehl
     Elmo M&#228;ntynen
     Anders Qvist
-    Beatrice D&#252;ring
+    Beatrice During
     Alexander Sedov
+    Timo Paulssen
+    Corbin Simpson
     Vincent Legoll
+    Romain Guillebert
     Alan McIntyre
-    Romain Guillebert
     Alex Perry
     Jens-Uwe Mager
+    Simon Cross
     Dan Stromberg
-    Lukas Diekmann
+    Guillebert Romain
     Carl Meyer
     Pieter Zieschang
     Alejandro J. Cura
     Sylvain Thenault
+    Christoph Gerum
     Travis Francis Athougies
     Henrik Vendelbo
     Lutz Paelike
@@ -157,6 +167,7 @@
     Miguel de Val Borro
     Ignas Mikalajunas
     Artur Lisiecki
+    Philip Jenvey
     Joshua Gilbert
     Godefroid Chappelle
     Yusei Tahara
@@ -165,26 +176,31 @@
     Gustavo Niemeyer
     William Leslie
     Akira Li
-    Kristj&#225;n Valur J&#243;nsson
+    Kristjan Valur Jonsson
     Bobby Impollonia
+    Michael Hudson-Doyle
     Andrew Thompson
     Anders Sigfridsson
+    Floris Bruynooghe
     Jacek Generowicz
     Dan Colish
-    Sven Hager
     Zooko Wilcox-O Hearn
+    Dan Villiom Podlaski Christiansen
     Anders Hammarquist
+    Chris Lambacher
     Dinu Gherman
     Dan Colish
+    Brett Cannon
     Daniel Neuh&#228;user
     Michael Chermside
     Konrad Delong
     Anna Ravencroft
     Greg Price
     Armin Ronacher
+    Christian Muirhead
     Jim Baker
-    Philip Jenvey
     Rodrigo Ara&#250;jo
+    Romain Guillebert
 
     Heinrich-Heine University, Germany 
     Open End AB (formerly AB Strakt), Sweden
diff --git a/ctypes_configure/configure.py b/ctypes_configure/configure.py
--- a/ctypes_configure/configure.py
+++ b/ctypes_configure/configure.py
@@ -559,7 +559,9 @@
 C_HEADER = """
 #include <stdio.h>
 #include <stddef.h>   /* for offsetof() */
-#include <stdint.h>   /* FreeBSD: for uint64_t */
+#ifndef _WIN32
+#  include <stdint.h>   /* FreeBSD: for uint64_t */
+#endif
 
 void dump(char* key, int value) {
     printf("%s: %d\\n", key, value);
diff --git a/ctypes_configure/stdoutcapture.py b/ctypes_configure/stdoutcapture.py
--- a/ctypes_configure/stdoutcapture.py
+++ b/ctypes_configure/stdoutcapture.py
@@ -15,6 +15,15 @@
             not hasattr(os, 'fdopen')):
             self.dummy = 1
         else:
+            try:
+                self.tmpout = os.tmpfile()
+                if mixed_out_err:
+                    self.tmperr = self.tmpout
+                else:
+                    self.tmperr = os.tmpfile()
+            except OSError:     # bah?  on at least one Windows box
+                self.dummy = 1
+                return
             self.dummy = 0
             # make new stdout/stderr files if needed
             self.localoutfd = os.dup(1)
@@ -29,11 +38,6 @@
                 sys.stderr = os.fdopen(self.localerrfd, 'w', 0)
             else:
                 self.saved_stderr = None
-            self.tmpout = os.tmpfile()
-            if mixed_out_err:
-                self.tmperr = self.tmpout
-            else:
-                self.tmperr = os.tmpfile()
             os.dup2(self.tmpout.fileno(), 1)
             os.dup2(self.tmperr.fileno(), 2)
 
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -154,18 +154,18 @@
     RegrTest('test_cmd.py'),
     RegrTest('test_cmd_line_script.py'),
     RegrTest('test_codeccallbacks.py', core=True),
-    RegrTest('test_codecencodings_cn.py'),
-    RegrTest('test_codecencodings_hk.py'),
-    RegrTest('test_codecencodings_jp.py'),
-    RegrTest('test_codecencodings_kr.py'),
-    RegrTest('test_codecencodings_tw.py'),
+    RegrTest('test_codecencodings_cn.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_hk.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_jp.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_kr.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_tw.py', usemodules='_multibytecodec'),
 
-    RegrTest('test_codecmaps_cn.py'),
-    RegrTest('test_codecmaps_hk.py'),
-    RegrTest('test_codecmaps_jp.py'),
-    RegrTest('test_codecmaps_kr.py'),
-    RegrTest('test_codecmaps_tw.py'),
-    RegrTest('test_codecs.py', core=True),
+    RegrTest('test_codecmaps_cn.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_hk.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_jp.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_kr.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_tw.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecs.py', core=True, usemodules='_multibytecodec'),
     RegrTest('test_codeop.py', core=True),
     RegrTest('test_coercion.py', core=True),
     RegrTest('test_collections.py'),
@@ -314,7 +314,7 @@
     RegrTest('test_mmap.py'),
     RegrTest('test_module.py', core=True),
     RegrTest('test_modulefinder.py'),
-    RegrTest('test_multibytecodec.py'),
+    RegrTest('test_multibytecodec.py', usemodules='_multibytecodec'),
     RegrTest('test_multibytecodec_support.py', skip="not a test"),
     RegrTest('test_multifile.py'),
     RegrTest('test_multiprocessing.py', skip='FIXME leaves subprocesses'),
diff --git a/lib-python/modified-2.7/ctypes/__init__.py b/lib-python/modified-2.7/ctypes/__init__.py
--- a/lib-python/modified-2.7/ctypes/__init__.py
+++ b/lib-python/modified-2.7/ctypes/__init__.py
@@ -489,9 +489,12 @@
         _flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI
     return CFunctionType
 
-_cast = PYFUNCTYPE(py_object, c_void_p, py_object, py_object)(_cast_addr)
 def cast(obj, typ):
-    return _cast(obj, obj, typ)
+    try:
+        c_void_p.from_param(obj)
+    except TypeError, e:
+        raise ArgumentError(str(e))
+    return _cast_addr(obj, obj, typ)
 
 _string_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_string_at_addr)
 def string_at(ptr, size=-1):
diff --git a/lib-python/modified-2.7/distutils/cygwinccompiler.py b/lib-python/modified-2.7/distutils/cygwinccompiler.py
--- a/lib-python/modified-2.7/distutils/cygwinccompiler.py
+++ b/lib-python/modified-2.7/distutils/cygwinccompiler.py
@@ -75,6 +75,9 @@
         elif msc_ver == '1500':
             # VS2008 / MSVC 9.0
             return ['msvcr90']
+        elif msc_ver == '1600':
+            # VS2010 / MSVC 10.0
+            return ['msvcr100']
         else:
             raise ValueError("Unknown MS Compiler version %s " % msc_ver)
 
diff --git a/lib-python/modified-2.7/distutils/sysconfig_pypy.py b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
--- a/lib-python/modified-2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
@@ -116,6 +116,12 @@
     if compiler.compiler_type == "unix":
         compiler.compiler_so.extend(['-fPIC', '-Wimplicit'])
         compiler.shared_lib_extension = get_config_var('SO')
+        if "CFLAGS" in os.environ:
+            cflags = os.environ["CFLAGS"]
+            compiler.compiler.append(cflags)
+            compiler.compiler_so.append(cflags)
+            compiler.linker_so.append(cflags)
+
 
 from sysconfig_cpython import (
     parse_makefile, _variable_rx, expand_makefile_vars)
diff --git a/lib-python/modified-2.7/distutils/unixccompiler.py b/lib-python/modified-2.7/distutils/unixccompiler.py
--- a/lib-python/modified-2.7/distutils/unixccompiler.py
+++ b/lib-python/modified-2.7/distutils/unixccompiler.py
@@ -324,7 +324,7 @@
             # On OSX users can specify an alternate SDK using
             # '-isysroot', calculate the SDK root if it is specified
             # (and use it further on)
-            cflags = sysconfig.get_config_var('CFLAGS')
+            cflags = sysconfig.get_config_var('CFLAGS') or ''
             m = re.search(r'-isysroot\s+(\S+)', cflags)
             if m is None:
                 sysroot = '/'
diff --git a/lib-python/modified-2.7/pickle.py b/lib-python/modified-2.7/pickle.py
--- a/lib-python/modified-2.7/pickle.py
+++ b/lib-python/modified-2.7/pickle.py
@@ -168,7 +168,7 @@
 
 # Pickling machinery
 
-class Pickler:
+class Pickler(object):
 
     def __init__(self, file, protocol=None):
         """This takes a file-like object for writing a pickle data stream.
diff --git a/lib-python/modified-2.7/test/regrtest.py b/lib-python/modified-2.7/test/regrtest.py
--- a/lib-python/modified-2.7/test/regrtest.py
+++ b/lib-python/modified-2.7/test/regrtest.py
@@ -1403,7 +1403,26 @@
         test_zipimport
         test_zlib
         """,
-    'openbsd3':
+    'openbsd4':
+        """
+        test_ascii_formatd
+        test_bsddb
+        test_bsddb3
+        test_ctypes
+        test_dl
+        test_epoll
+        test_gdbm
+        test_locale
+        test_normalization
+        test_ossaudiodev
+        test_pep277
+        test_tcl
+        test_tk
+        test_ttk_guionly
+        test_ttk_textonly
+        test_multiprocessing
+        """,
+    'openbsd5':
         """
         test_ascii_formatd
         test_bsddb
diff --git a/lib-python/modified-2.7/test/test_bz2.py b/lib-python/modified-2.7/test/test_bz2.py
--- a/lib-python/modified-2.7/test/test_bz2.py
+++ b/lib-python/modified-2.7/test/test_bz2.py
@@ -50,6 +50,7 @@
         self.filename = TESTFN
 
     def tearDown(self):
+        test_support.gc_collect()
         if os.path.isfile(self.filename):
             os.unlink(self.filename)
 
diff --git a/lib-python/modified-2.7/test/test_fcntl.py b/lib-python/modified-2.7/test/test_fcntl.py
new file mode 100644
--- /dev/null
+++ b/lib-python/modified-2.7/test/test_fcntl.py
@@ -0,0 +1,108 @@
+"""Test program for the fcntl C module.
+
+OS/2+EMX doesn't support the file locking operations.
+
+"""
+import os
+import struct
+import sys
+import unittest
+from test.test_support import (verbose, TESTFN, unlink, run_unittest,
+    import_module)
+
+# Skip test if no fnctl module.
+fcntl = import_module('fcntl')
+
+
+# TODO - Write tests for flock() and lockf().
+
+def get_lockdata():
+    if sys.platform.startswith('atheos'):
+        start_len = "qq"
+    else:
+        try:
+            os.O_LARGEFILE
+        except AttributeError:
+            start_len = "ll"
+        else:
+            start_len = "qq"
+
+    if sys.platform in ('netbsd1', 'netbsd2', 'netbsd3',
+                        'Darwin1.2', 'darwin',
+                        'freebsd2', 'freebsd3', 'freebsd4', 'freebsd5',
+                        'freebsd6', 'freebsd7', 'freebsd8',
+                        'bsdos2', 'bsdos3', 'bsdos4',
+                        'openbsd', 'openbsd2', 'openbsd3', 'openbsd4', 'openbsd5'):
+        if struct.calcsize('l') == 8:
+            off_t = 'l'
+            pid_t = 'i'
+        else:
+            off_t = 'lxxxx'
+            pid_t = 'l'
+        lockdata = struct.pack(off_t + off_t + pid_t + 'hh', 0, 0, 0,
+                               fcntl.F_WRLCK, 0)
+    elif sys.platform in ['aix3', 'aix4', 'hp-uxB', 'unixware7']:
+        lockdata = struct.pack('hhlllii', fcntl.F_WRLCK, 0, 0, 0, 0, 0, 0)
+    elif sys.platform in ['os2emx']:
+        lockdata = None
+    else:
+        lockdata = struct.pack('hh'+start_len+'hh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+    if lockdata:
+        if verbose:
+            print 'struct.pack: ', repr(lockdata)
+    return lockdata
+
+lockdata = get_lockdata()
+
+
+class TestFcntl(unittest.TestCase):
+
+    def setUp(self):
+        self.f = None
+
+    def tearDown(self):
+        if self.f and not self.f.closed:
+            self.f.close()
+        unlink(TESTFN)
+
+    def test_fcntl_fileno(self):
+        # the example from the library docs
+        self.f = open(TESTFN, 'w')
+        rv = fcntl.fcntl(self.f.fileno(), fcntl.F_SETFL, os.O_NONBLOCK)
+        if verbose:
+            print 'Status from fcntl with O_NONBLOCK: ', rv
+        if sys.platform not in ['os2emx']:
+            rv = fcntl.fcntl(self.f.fileno(), fcntl.F_SETLKW, lockdata)
+            if verbose:
+                print 'String from fcntl with F_SETLKW: ', repr(rv)
+        self.f.close()
+
+    def test_fcntl_file_descriptor(self):
+        # again, but pass the file rather than numeric descriptor
+        self.f = open(TESTFN, 'w')
+        rv = fcntl.fcntl(self.f, fcntl.F_SETFL, os.O_NONBLOCK)
+        if sys.platform not in ['os2emx']:
+            rv = fcntl.fcntl(self.f, fcntl.F_SETLKW, lockdata)
+        self.f.close()
+
+    def test_fcntl_64_bit(self):
+        # Issue #1309352: fcntl shouldn't fail when the third arg fits in a
+        # C 'long' but not in a C 'int'.
+        try:
+            cmd = fcntl.F_NOTIFY
+            # This flag is larger than 2**31 in 64-bit builds
+            flags = fcntl.DN_MULTISHOT
+        except AttributeError:
+            self.skipTest("F_NOTIFY or DN_MULTISHOT unavailable")
+        fd = os.open(os.path.dirname(os.path.abspath(TESTFN)), os.O_RDONLY)
+        try:
+            fcntl.fcntl(fd, cmd, flags)
+        finally:
+            os.close(fd)
+
+
+def test_main():
+    run_unittest(TestFcntl)
+
+if __name__ == '__main__':
+    test_main()
diff --git a/lib-python/modified-2.7/test/test_multibytecodec.py b/lib-python/modified-2.7/test/test_multibytecodec.py
--- a/lib-python/modified-2.7/test/test_multibytecodec.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec.py
@@ -148,7 +148,8 @@
 class Test_StreamReader(unittest.TestCase):
     def test_bug1728403(self):
         try:
-            open(TESTFN, 'w').write('\xa1')
+            with open(TESTFN, 'w') as f:
+                f.write('\xa1')
             f = codecs.open(TESTFN, encoding='cp949')
             self.assertRaises(UnicodeDecodeError, f.read, 2)
         finally:
diff --git a/lib-python/2.7/test/test_sets.py b/lib-python/modified-2.7/test/test_sets.py
copy from lib-python/2.7/test/test_sets.py
copy to lib-python/modified-2.7/test/test_sets.py
--- a/lib-python/2.7/test/test_sets.py
+++ b/lib-python/modified-2.7/test/test_sets.py
@@ -686,7 +686,9 @@
         set_list = sorted(self.set)
         self.assertEqual(len(dup_list), len(set_list))
         for i, el in enumerate(dup_list):
-            self.assertIs(el, set_list[i])
+            # Object identity is not guarnteed for immutable objects, so we
+            # can't use assertIs here.
+            self.assertEqual(el, set_list[i])
 
     def test_deep_copy(self):
         dup = copy.deepcopy(self.set)
diff --git a/lib-python/2.7/test/test_tarfile.py b/lib-python/modified-2.7/test/test_tarfile.py
copy from lib-python/2.7/test/test_tarfile.py
copy to lib-python/modified-2.7/test/test_tarfile.py
--- a/lib-python/2.7/test/test_tarfile.py
+++ b/lib-python/modified-2.7/test/test_tarfile.py
@@ -169,6 +169,7 @@
         except tarfile.ReadError:
             self.fail("tarfile.open() failed on empty archive")
         self.assertListEqual(tar.getmembers(), [])
+        tar.close()
 
     def test_null_tarfile(self):
         # Test for issue6123: Allow opening empty archives.
@@ -207,16 +208,21 @@
         fobj = open(self.tarname, "rb")
         tar = tarfile.open(fileobj=fobj, mode=self.mode)
         self.assertEqual(tar.name, os.path.abspath(fobj.name))
+        tar.close()
 
     def test_no_name_attribute(self):
-        data = open(self.tarname, "rb").read()
+        f = open(self.tarname, "rb")
+        data = f.read()
+        f.close()
         fobj = StringIO.StringIO(data)
         self.assertRaises(AttributeError, getattr, fobj, "name")
         tar = tarfile.open(fileobj=fobj, mode=self.mode)
         self.assertEqual(tar.name, None)
 
     def test_empty_name_attribute(self):
-        data = open(self.tarname, "rb").read()
+        f = open(self.tarname, "rb")
+        data = f.read()
+        f.close()
         fobj = StringIO.StringIO(data)
         fobj.name = ""
         tar = tarfile.open(fileobj=fobj, mode=self.mode)
@@ -515,6 +521,7 @@
         self.tar = tarfile.open(self.tarname, mode=self.mode, encoding="iso8859-1")
         tarinfo = self.tar.getmember("pax/umlauts-&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;")
         self._test_member(tarinfo, size=7011, chksum=md5_regtype)
+        self.tar.close()
 
 
 class LongnameTest(ReadTest):
@@ -675,6 +682,7 @@
             tar = tarfile.open(tmpname, self.mode)
             tarinfo = tar.gettarinfo(path)
             self.assertEqual(tarinfo.size, 0)
+            tar.close()
         finally:
             os.rmdir(path)
 
@@ -692,6 +700,7 @@
                 tar.gettarinfo(target)
                 tarinfo = tar.gettarinfo(link)
                 self.assertEqual(tarinfo.size, 0)
+                tar.close()
             finally:
                 os.remove(target)
                 os.remove(link)
@@ -704,6 +713,7 @@
                 tar = tarfile.open(tmpname, self.mode)
                 tarinfo = tar.gettarinfo(path)
                 self.assertEqual(tarinfo.size, 0)
+                tar.close()
             finally:
                 os.remove(path)
 
@@ -722,6 +732,7 @@
         tar.add(dstname)
         os.chdir(cwd)
         self.assertTrue(tar.getnames() == [], "added the archive to itself")
+        tar.close()
 
     def test_exclude(self):
         tempdir = os.path.join(TEMPDIR, "exclude")
@@ -742,6 +753,7 @@
             tar = tarfile.open(tmpname, "r")
             self.assertEqual(len(tar.getmembers()), 1)
             self.assertEqual(tar.getnames()[0], "empty_dir")
+            tar.close()
         finally:
             shutil.rmtree(tempdir)
 
@@ -859,7 +871,9 @@
             fobj.close()
         elif self.mode.endswith("bz2"):
             dec = bz2.BZ2Decompressor()
-            data = open(tmpname, "rb").read()
+            f = open(tmpname, "rb")
+            data = f.read()
+            f.close()
             data = dec.decompress(data)
             self.assertTrue(len(dec.unused_data) == 0,
                     "found trailing data")
@@ -938,6 +952,7 @@
                 "unable to read longname member")
         self.assertEqual(tarinfo.linkname, member.linkname,
                 "unable to read longname member")
+        tar.close()
 
     def test_longname_1023(self):
         self._test(("longnam/" * 127) + "longnam")
@@ -1030,6 +1045,7 @@
         else:
             n = tar.getmembers()[0].name
             self.assertTrue(name == n, "PAX longname creation failed")
+        tar.close()
 
     def test_pax_global_header(self):
         pax_headers = {
@@ -1058,6 +1074,7 @@
                     tarfile.PAX_NUMBER_FIELDS[key](val)
                 except (TypeError, ValueError):
                     self.fail("unable to convert pax header field")
+        tar.close()
 
     def test_pax_extended_header(self):
         # The fields from the pax header have priority over the
@@ -1077,6 +1094,7 @@
         self.assertEqual(t.pax_headers, pax_headers)
         self.assertEqual(t.name, "foo")
         self.assertEqual(t.uid, 123)
+        tar.close()
 
 
 class UstarUnicodeTest(unittest.TestCase):
@@ -1120,6 +1138,7 @@
         tarinfo.name = "foo"
         tarinfo.uname = u"&#65533;&#65533;&#65533;"
         self.assertRaises(UnicodeError, tar.addfile, tarinfo)
+        tar.close()
 
     def test_unicode_argument(self):
         tar = tarfile.open(tarname, "r", encoding="iso8859-1", errors="strict")
@@ -1174,6 +1193,7 @@
             tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
                     errors=handler)
             self.assertEqual(tar.getnames()[0], name)
+            tar.close()
 
         self.assertRaises(UnicodeError, tarfile.open, tmpname,
                 encoding="ascii", errors="strict")
@@ -1186,6 +1206,7 @@
         tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
                 errors="utf-8")
         self.assertEqual(tar.getnames()[0], "&#65533;&#65533;&#65533;/" + u"&#65533;".encode("utf8"))
+        tar.close()
 
 
 class AppendTest(unittest.TestCase):
@@ -1213,6 +1234,7 @@
     def _test(self, names=["bar"], fileobj=None):
         tar = tarfile.open(self.tarname, fileobj=fileobj)
         self.assertEqual(tar.getnames(), names)
+        tar.close()
 
     def test_non_existing(self):
         self._add_testfile()
@@ -1231,7 +1253,9 @@
 
     def test_fileobj(self):
         self._create_testtar()
-        data = open(self.tarname).read()
+        f = open(self.tarname)
+        data = f.read()
+        f.close()
         fobj = StringIO.StringIO(data)
         self._add_testfile(fobj)
         fobj.seek(0)
@@ -1257,7 +1281,9 @@
     # Append mode is supposed to fail if the tarfile to append to
     # does not end with a zero block.
     def _test_error(self, data):
-        open(self.tarname, "wb").write(data)
+        f = open(self.tarname, "wb")
+        f.write(data)
+        f.close()
         self.assertRaises(tarfile.ReadError, self._add_testfile)
 
     def test_null(self):
diff --git a/lib-python/modified-2.7/test/test_tempfile.py b/lib-python/modified-2.7/test/test_tempfile.py
--- a/lib-python/modified-2.7/test/test_tempfile.py
+++ b/lib-python/modified-2.7/test/test_tempfile.py
@@ -23,8 +23,8 @@
 
 # TEST_FILES may need to be tweaked for systems depending on the maximum
 # number of files that can be opened at one time (see ulimit -n)
-if sys.platform in ('openbsd3', 'openbsd4'):
-    TEST_FILES = 48
+if sys.platform.startswith("openbsd"):
+    TEST_FILES = 64 # ulimit -n defaults to 128 for normal users
 else:
     TEST_FILES = 100
 
diff --git a/lib_pypy/_ctypes/__init__.py b/lib_pypy/_ctypes/__init__.py
--- a/lib_pypy/_ctypes/__init__.py
+++ b/lib_pypy/_ctypes/__init__.py
@@ -18,7 +18,16 @@
 if _os.name in ("nt", "ce"):
     from _rawffi import FormatError
     from _rawffi import check_HRESULT as _check_HRESULT
-    CopyComPointer = None # XXX
+
+    def CopyComPointer(src, dst):
+        from ctypes import c_void_p, cast
+        if src:
+            hr = src[0][0].AddRef(src)
+            if hr & 0x80000000:
+                return hr
+        dst[0] = cast(src, c_void_p).value
+        return 0
+
     LoadLibrary = dlopen
 
 from _rawffi import FUNCFLAG_STDCALL, FUNCFLAG_CDECL, FUNCFLAG_PYTHONAPI
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -48,7 +48,8 @@
             return self.from_param(as_parameter)
 
     def get_ffi_param(self, value):
-        return self.from_param(value)._to_ffi_param()
+        cdata = self.from_param(value)
+        return cdata, cdata._to_ffi_param()
 
     def get_ffi_argtype(self):
         if self._ffiargtype:
@@ -139,7 +140,10 @@
         return buffer(self._buffer)
 
     def _get_b_base(self):
-        return self._base
+        try:
+            return self._base
+        except AttributeError:
+            return None
     _b_base_ = property(_get_b_base)
     _b_needsfree_ = False
 
@@ -218,5 +222,7 @@
     'z' : _ffi.types.void_p,
     'O' : _ffi.types.void_p,
     'Z' : _ffi.types.void_p,
+    'X' : _ffi.types.void_p,
+    'v' : _ffi.types.sshort,
     }
 
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -78,8 +78,6 @@
     _com_iid = None
     _is_fastpath = False
 
-    __restype_set = False
-
     def _getargtypes(self):
         return self._argtypes_
 
@@ -93,13 +91,15 @@
                     raise TypeError(
                         "item %d in _argtypes_ has no from_param method" % (
                             i + 1,))
-            #
-            if all([hasattr(argtype, '_ffiargshape') for argtype in argtypes]):
-                fastpath_cls = make_fastpath_subclass(self.__class__)
-                fastpath_cls.enable_fastpath_maybe(self)
             self._argtypes_ = list(argtypes)
+            self._check_argtypes_for_fastpath()
     argtypes = property(_getargtypes, _setargtypes)
 
+    def _check_argtypes_for_fastpath(self):
+        if all([hasattr(argtype, '_ffiargshape') for argtype in self._argtypes_]):
+            fastpath_cls = make_fastpath_subclass(self.__class__)
+            fastpath_cls.enable_fastpath_maybe(self)
+
     def _getparamflags(self):
         return self._paramflags
 
@@ -149,7 +149,6 @@
         return self._restype_
 
     def _setrestype(self, restype):
-        self.__restype_set = True
         self._ptr = None
         if restype is int:
             from ctypes import c_int
@@ -219,6 +218,7 @@
                 import ctypes
                 restype = ctypes.c_int
             self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype)
+            self._check_argtypes_for_fastpath()
             return
 
         
@@ -296,13 +296,12 @@
                     "This function takes %d argument%s (%s given)"
                     % (len(self._argtypes_), plural, len(args)))
 
-            # check that arguments are convertible
-            ## XXX Not as long as ctypes.cast is a callback function with
-            ## py_object arguments...
-            ## self._convert_args(self._argtypes_, args, {})
-
             try:
-                res = self.callable(*args)
+                newargs = self._convert_args_for_callback(argtypes, args)
+            except (UnicodeError, TypeError, ValueError), e:
+                raise ArgumentError(str(e))
+            try:
+                res = self.callable(*newargs)
             except:
                 exc_info = sys.exc_info()
                 traceback.print_tb(exc_info[2], file=sys.stderr)
@@ -316,26 +315,22 @@
             warnings.warn('C function without declared arguments called',
                           RuntimeWarning, stacklevel=2)
             argtypes = []
-            
-        if not self.__restype_set:
-            warnings.warn('C function without declared return type called',
-                          RuntimeWarning, stacklevel=2)
 
         if self._com_index:
-            assert False, 'TODO2'
             from ctypes import cast, c_void_p, POINTER
             if not args:
                 raise ValueError(
                     "native COM method call without 'this' parameter"
                     )
-            thisarg = cast(args[0], POINTER(POINTER(c_void_p))).contents
-            argtypes = [c_void_p] + list(argtypes)
-            args = list(args)
-            args[0] = args[0].value
+            thisarg = cast(args[0], POINTER(POINTER(c_void_p)))
+            keepalives, newargs, argtypes, outargs = self._convert_args(argtypes,
+                                                                        args[1:], kwargs)
+            newargs.insert(0, args[0].value)
+            argtypes.insert(0, c_void_p)
         else:
             thisarg = None
-            
-        newargs, argtypes, outargs = self._convert_args(argtypes, args, kwargs)
+            keepalives, newargs, argtypes, outargs = self._convert_args(argtypes,
+                                                                        args, kwargs)
 
         funcptr = self._getfuncptr(argtypes, self._restype_, thisarg)
         result = self._call_funcptr(funcptr, *newargs)
@@ -343,6 +338,11 @@
 
         if not outargs:
             return result
+
+        simple_cdata = type(c_void_p()).__bases__[0]
+        outargs = [x.value if type(x).__bases__[0] is simple_cdata else x
+                   for x in outargs]
+
         if len(outargs) == 1:
             return outargs[0]
         return tuple(outargs)
@@ -361,7 +361,10 @@
             if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
                 set_last_error(_rawffi.get_last_error())
         #
-        return self._build_result(self._restype_, result, newargs)
+        try:
+            return self._build_result(self._restype_, result, newargs)
+        finally:
+            funcptr.free_temp_buffers()
 
     def _do_errcheck(self, result, args):
         # The 'errcheck' protocol
@@ -398,10 +401,10 @@
             # extract the address from the object's virtual table
             if not thisarg:
                 raise ValueError("COM method call without VTable")
-            ptr = thisarg[self._com_index - 0x1000]
-            argshapes = [arg._ffiargshape for arg in argtypes]
-            resshape = restype._ffiargshape
-            return _rawffi.FuncPtr(ptr, argshapes, resshape, self._flags_)
+            ptr = thisarg[0][self._com_index - 0x1000]
+            ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes]
+            ffires = restype.get_ffi_argtype()
+            return _ffi.FuncPtr.fromaddr(ptr, '', ffiargs, ffires)
         
         cdll = self.dll._handle
         try:
@@ -434,16 +437,15 @@
     @classmethod
     def _conv_param(cls, argtype, arg):
         if isinstance(argtype, _CDataMeta):
-            #arg = argtype.from_param(arg)
-            arg = argtype.get_ffi_param(arg)
-            return arg, argtype
+            cobj, ffiparam = argtype.get_ffi_param(arg)
+            return cobj, ffiparam, argtype
         
         if argtype is not None:
             arg = argtype.from_param(arg)
         if hasattr(arg, '_as_parameter_'):
             arg = arg._as_parameter_
         if isinstance(arg, _CData):
-            return arg._to_ffi_param(), type(arg)
+            return arg, arg._to_ffi_param(), type(arg)
         #
         # non-usual case: we do the import here to save a lot of code in the
         # jit trace of the normal case
@@ -460,19 +462,28 @@
         else:
             raise TypeError("Don't know how to handle %s" % (arg,))
 
-        return cobj._to_ffi_param(), type(cobj)
+        return cobj, cobj._to_ffi_param(), type(cobj)
+
+    def _convert_args_for_callback(self, argtypes, args):
+        assert len(argtypes) == len(args)
+        newargs = []
+        for argtype, arg in zip(argtypes, args):
+            param = argtype.from_param(arg)
+            if argtype._type_ == 'P': # special-case for c_void_p
+                param = param._get_buffer_value()
+            elif self._is_primitive(argtype):
+                param = param.value
+            newargs.append(param)
+        return newargs
 
     def _convert_args(self, argtypes, args, kwargs, marker=object()):
         newargs = []
         outargs = []
+        keepalives = []
         newargtypes = []
         total = len(args)
         paramflags = self._paramflags
-
-        if self._com_index:
-            inargs_idx = 1
-        else:
-            inargs_idx = 0
+        inargs_idx = 0
 
         if not paramflags and total < len(argtypes):
             raise TypeError("not enough arguments")
@@ -496,7 +507,8 @@
                     val = defval
                     if val is marker:
                         val = 0
-                    newarg, newargtype = self._conv_param(argtype, val)
+                    keepalive, newarg, newargtype = self._conv_param(argtype, val)
+                    keepalives.append(keepalive)
                     newargs.append(newarg)
                     newargtypes.append(newargtype)
                 elif flag in (0, PARAMFLAG_FIN):
@@ -512,28 +524,32 @@
                         raise TypeError("required argument '%s' missing" % name)
                     else:
                         raise TypeError("not enough arguments")
-                    newarg, newargtype = self._conv_param(argtype, val)
+                    keepalive, newarg, newargtype = self._conv_param(argtype, val)
+                    keepalives.append(keepalive)
                     newargs.append(newarg)
                     newargtypes.append(newargtype)
                 elif flag == PARAMFLAG_FOUT:
                     if defval is not marker:
                         outargs.append(defval)
-                        newarg, newargtype = self._conv_param(argtype, defval)
+                        keepalive, newarg, newargtype = self._conv_param(argtype, defval)
                     else:
                         import ctypes
                         val = argtype._type_()
                         outargs.append(val)
+                        keepalive = None
                         newarg = ctypes.byref(val)
                         newargtype = type(newarg)
+                    keepalives.append(keepalive)
                     newargs.append(newarg)
                     newargtypes.append(newargtype)
                 else:
                     raise ValueError("paramflag %d not yet implemented" % flag)
             else:
                 try:
-                    newarg, newargtype = self._conv_param(argtype, args[i])
+                    keepalive, newarg, newargtype = self._conv_param(argtype, args[i])
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
+                keepalives.append(keepalive)
                 newargs.append(newarg)
                 newargtypes.append(newargtype)
                 inargs_idx += 1
@@ -542,13 +558,17 @@
             extra = args[len(newargs):]
             for i, arg in enumerate(extra):
                 try:
-                    newarg, newargtype = self._conv_param(None, arg)
+                    keepalive, newarg, newargtype = self._conv_param(None, arg)
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
+                keepalives.append(keepalive)
                 newargs.append(newarg)
                 newargtypes.append(newargtype)
-        return newargs, newargtypes, outargs
+        return keepalives, newargs, newargtypes, outargs
 
+    @staticmethod
+    def _is_primitive(argtype):
+        return argtype.__bases__[0] is _SimpleCData
     
     def _wrap_result(self, restype, result):
         """
@@ -557,7 +577,7 @@
         """
         # hack for performance: if restype is a "simple" primitive type, don't
         # allocate the buffer because it's going to be thrown away immediately
-        if restype.__bases__[0] is _SimpleCData and not restype._is_pointer_like():
+        if self._is_primitive(restype) and not restype._is_pointer_like():
             return result
         #
         shape = restype._ffishape
@@ -587,13 +607,7 @@
 
         retval = None
 
-        if self._com_index:
-            if resbuffer[0] & 0x80000000:
-                raise get_com_error(resbuffer[0],
-                                    self._com_iid, argsandobjs[0])
-            else:
-                retval = int(resbuffer[0])
-        elif restype is not None:
+        if restype is not None:
             checker = getattr(self.restype, '_check_retval_', None)
             if checker:
                 val = restype(result)
@@ -601,7 +615,13 @@
                 # classes defining a new type, and their subclasses
                 if '_type_' in restype.__dict__:
                     val = val.value
-                retval = checker(val)
+                # XXX Raise a COMError when restype is HRESULT and
+                # checker(val) fails.  How to check for restype == HRESULT?
+                if self._com_index:
+                    if result & 0x80000000:
+                        raise get_com_error(result, None, None)
+                else:
+                    retval = checker(val)
             elif not isinstance(restype, _CDataMeta):
                 retval = restype(result)
             else:
@@ -673,7 +693,7 @@
             try:
                 result = self._call_funcptr(funcptr, *args)
                 result = self._do_errcheck(result, args)
-            except (TypeError, ArgumentError): # XXX, should be FFITypeError
+            except (TypeError, ArgumentError, UnicodeDecodeError):
                 assert self._slowpath_allowed
                 return CFuncPtr.__call__(self, *args)
             return result
diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py
--- a/lib_pypy/_ctypes/primitive.py
+++ b/lib_pypy/_ctypes/primitive.py
@@ -10,6 +10,8 @@
 from _ctypes.builtin import ConvMode
 from _ctypes.array import Array
 from _ctypes.pointer import _Pointer, as_ffi_pointer
+#from _ctypes.function import CFuncPtr # this import is moved at the bottom
+                                       # because else it's circular
 
 class NULL(object):
     pass
@@ -86,7 +88,7 @@
         return res
     if isinstance(value, Array):
         return value
-    if isinstance(value, _Pointer):
+    if isinstance(value, (_Pointer, CFuncPtr)):
         return cls.from_address(value._buffer.buffer)
     if isinstance(value, (int, long)):
         return cls(value)
@@ -216,10 +218,15 @@
             result.value = property(_getvalue, _setvalue)
 
         elif tp == 'X':
-            from ctypes import windll
-            SysAllocStringLen = windll.oleaut32.SysAllocStringLen
-            SysStringLen = windll.oleaut32.SysStringLen
-            SysFreeString = windll.oleaut32.SysFreeString
+            from ctypes import WinDLL
+            # Use WinDLL("oleaut32") instead of windll.oleaut32
+            # because the latter is a shared (cached) object; and
+            # other code may set their own restypes. We need out own
+            # restype here.
+            oleaut32 = WinDLL("oleaut32")
+            SysAllocStringLen = oleaut32.SysAllocStringLen
+            SysStringLen = oleaut32.SysStringLen
+            SysFreeString = oleaut32.SysFreeString
             def _getvalue(self):
                 addr = self._buffer[0]
                 if addr == 0:
@@ -333,3 +340,5 @@
 
     def __nonzero__(self):
         return self._buffer[0] not in (0, '\x00')
+
+from _ctypes.function import CFuncPtr
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -14,6 +14,15 @@
             raise TypeError("Expected CData subclass, got %s" % (tp,))
         if isinstance(tp, StructOrUnionMeta):
             tp._make_final()
+        if len(f) == 3:
+            if (not hasattr(tp, '_type_')
+                or not isinstance(tp._type_, str)
+                or tp._type_ not in "iIhHbBlL"):
+                #XXX: are those all types?
+                #     we just dont get the type name
+                #     in the interp levle thrown TypeError
+                #     from rawffi if there are more
+                raise TypeError('bit fields not allowed for type ' + tp.__name__)
 
     all_fields = []
     for cls in reversed(inspect.getmro(superclass)):
@@ -34,34 +43,37 @@
     for i, field in enumerate(all_fields):
         name = field[0]
         value = field[1]
+        is_bitfield = (len(field) == 3)
         fields[name] = Field(name,
                              self._ffistruct.fieldoffset(name),
                              self._ffistruct.fieldsize(name),
-                             value, i)
+                             value, i, is_bitfield)
 
     if anonymous_fields:
         resnames = []
         for i, field in enumerate(all_fields):
             name = field[0]
             value = field[1]
+            is_bitfield = (len(field) == 3)
             startpos = self._ffistruct.fieldoffset(name)
             if name in anonymous_fields:
                 for subname in value._names:
                     resnames.append(subname)
-                    relpos = startpos + value._fieldtypes[subname].offset
-                    subvalue = value._fieldtypes[subname].ctype
+                    subfield = getattr(value, subname)
+                    relpos = startpos + subfield.offset
+                    subvalue = subfield.ctype
                     fields[subname] = Field(subname,
                                             relpos, subvalue._sizeofinstances(),
-                                            subvalue, i)
+                                            subvalue, i, is_bitfield)
             else:
                 resnames.append(name)
         names = resnames
     self._names = names
-    self._fieldtypes = fields
+    self.__dict__.update(fields)
 
 class Field(object):
-    def __init__(self, name, offset, size, ctype, num):
-        for k in ('name', 'offset', 'size', 'ctype', 'num'):
+    def __init__(self, name, offset, size, ctype, num, is_bitfield):
+        for k in ('name', 'offset', 'size', 'ctype', 'num', 'is_bitfield'):
             self.__dict__[k] = locals()[k]
 
     def __setattr__(self, name, value):
@@ -71,6 +83,35 @@
         return "<Field '%s' offset=%d size=%d>" % (self.name, self.offset,
                                                    self.size)
 
+    def __get__(self, obj, cls=None):
+        if obj is None:
+            return self
+        if self.is_bitfield:
+            # bitfield member, use direct access
+            return obj._buffer.__getattr__(self.name)
+        else:
+            fieldtype = self.ctype
+            offset = self.num
+            suba = obj._subarray(fieldtype, self.name)
+            return fieldtype._CData_output(suba, obj, offset)
+
+
+    def __set__(self, obj, value):
+        fieldtype = self.ctype
+        cobj = fieldtype.from_param(value)
+        if ensure_objects(cobj) is not None:
+            key = keepalive_key(self.num)
+            store_reference(obj, key, cobj._objects)
+        arg = cobj._get_buffer_value()
+        if fieldtype._fficompositesize is not None:
+            from ctypes import memmove
+            dest = obj._buffer.fieldaddress(self.name)
+            memmove(dest, arg, fieldtype._fficompositesize)
+        else:
+            obj._buffer.__setattr__(self.name, arg)
+
+
+
 # ________________________________________________________________
 
 def _set_shape(tp, rawfields, is_union=False):
@@ -79,17 +120,12 @@
     tp._ffiargshape = tp._ffishape = (tp._ffistruct, 1)
     tp._fficompositesize = tp._ffistruct.size
 
-def struct_getattr(self, name):
-    if name not in ('_fields_', '_fieldtypes'):
-        if hasattr(self, '_fieldtypes') and name in self._fieldtypes:
-            return self._fieldtypes[name]
-    return _CDataMeta.__getattribute__(self, name)
 
 def struct_setattr(self, name, value):
     if name == '_fields_':
         if self.__dict__.get('_fields_', None) is not None:
             raise AttributeError("_fields_ is final")
-        if self in [v for k, v in value]:
+        if self in [f[1] for f in value]:
             raise AttributeError("Structure or union cannot contain itself")
         names_and_fields(
             self,
@@ -127,10 +163,8 @@
         if '_fields_' not in self.__dict__:
             self._fields_ = []
             self._names = []
-            self._fieldtypes = {}
             _set_shape(self, [], self._is_union)
 
-    __getattr__ = struct_getattr
     __setattr__ = struct_setattr
 
     def from_address(self, address):
@@ -200,40 +234,6 @@
         A = _rawffi.Array(fieldtype._ffishape)
         return A.fromaddress(address, 1)
 
-    def __setattr__(self, name, value):
-        try:
-            field = self._fieldtypes[name]
-        except KeyError:
-            return _CData.__setattr__(self, name, value)
-        fieldtype = field.ctype
-        cobj = fieldtype.from_param(value)
-        if ensure_objects(cobj) is not None:
-            key = keepalive_key(field.num)
-            store_reference(self, key, cobj._objects)
-        arg = cobj._get_buffer_value()
-        if fieldtype._fficompositesize is not None:
-            from ctypes import memmove
-            dest = self._buffer.fieldaddress(name)
-            memmove(dest, arg, fieldtype._fficompositesize)
-        else:
-            self._buffer.__setattr__(name, arg)
-
-    def __getattribute__(self, name):
-        if name == '_fieldtypes':
-            return _CData.__getattribute__(self, '_fieldtypes')
-        try:
-            field = self._fieldtypes[name]
-        except KeyError:
-            return _CData.__getattribute__(self, name)
-        if field.size >> 16:
-            # bitfield member, use direct access
-            return self._buffer.__getattr__(name)
-        else:
-            fieldtype = field.ctype
-            offset = field.num
-            suba = self._subarray(fieldtype, name)
-            return fieldtype._CData_output(suba, self, offset)
-
     def _get_buffer_for_param(self):
         return self
 
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -724,13 +724,12 @@
             self.statement.reset()
             raise self.connection._get_exception(ret)
 
-        if self.statement.kind == "DQL":
-            if ret == SQLITE_ROW:
-                self.statement._build_row_cast_map()
-                self.statement._readahead()
-            else:
-                self.statement.item = None
-                self.statement.exhausted = True
+        if self.statement.kind == "DQL"and ret == SQLITE_ROW:
+            self.statement._build_row_cast_map()
+            self.statement._readahead()
+        else:
+            self.statement.item = None
+            self.statement.exhausted = True
 
         if self.statement.kind in ("DML", "DDL"):
             self.statement.reset()
@@ -891,7 +890,8 @@
 
         self.statement = c_void_p()
         next_char = c_char_p()
-        ret = sqlite.sqlite3_prepare_v2(self.con.db, sql, -1, byref(self.statement), byref(next_char))
+        sql_char = c_char_p(sql)
+        ret = sqlite.sqlite3_prepare_v2(self.con.db, sql_char, -1, byref(self.statement), byref(next_char))
         if ret == SQLITE_OK and self.statement.value is None:
             # an empty statement, we work around that, as it's the least trouble
             ret = sqlite.sqlite3_prepare_v2(self.con.db, "select 42", -1, byref(self.statement), byref(next_char))
@@ -901,7 +901,9 @@
             raise self.con._get_exception(ret)
         self.con._remember_statement(self)
         if _check_remaining_sql(next_char.value):
-            raise Warning, "One and only one statement required"
+            raise Warning, "One and only one statement required: %r" % (
+                next_char.value,)
+        # sql_char should remain alive until here
 
         self._build_row_cast_map()
 
diff --git a/lib_pypy/_subprocess.py b/lib_pypy/_subprocess.py
--- a/lib_pypy/_subprocess.py
+++ b/lib_pypy/_subprocess.py
@@ -35,7 +35,7 @@
 _DuplicateHandle.restype = ctypes.c_int
     
 _WaitForSingleObject = _kernel32.WaitForSingleObject
-_WaitForSingleObject.argtypes = [ctypes.c_int, ctypes.c_int]
+_WaitForSingleObject.argtypes = [ctypes.c_int, ctypes.c_uint]
 _WaitForSingleObject.restype = ctypes.c_int
 
 _GetExitCodeProcess = _kernel32.GetExitCodeProcess
diff --git a/lib_pypy/binascii.py b/lib_pypy/binascii.py
--- a/lib_pypy/binascii.py
+++ b/lib_pypy/binascii.py
@@ -659,7 +659,7 @@
         crc = crc_32_tab[(crc ^ long(ord(c))) & 0xffL] ^ (crc >> 8)
         #/* Note:  (crc >> 8) MUST zero fill on left
 
-        result = crc ^ 0xffffffffL
+    result = crc ^ 0xffffffffL
     
     if result > 2**31:
         result = ((result + 2**31) % 2**32) - 2**31
diff --git a/lib_pypy/cPickle.py b/lib_pypy/cPickle.py
--- a/lib_pypy/cPickle.py
+++ b/lib_pypy/cPickle.py
@@ -27,9 +27,9 @@
             PythonPickler.__init__(self, self.__f, args[0], **kw)
         else:
             PythonPickler.__init__(self, *args, **kw)
-            
+
     def memoize(self, obj):
-        self.memo[None] = None   # cPickle starts counting at one
+        self.memo[id(None)] = None   # cPickle starts counting at one
         return PythonPickler.memoize(self, obj)
 
     def getvalue(self):
diff --git a/lib_pypy/pyrepl/readline.py b/lib_pypy/pyrepl/readline.py
--- a/lib_pypy/pyrepl/readline.py
+++ b/lib_pypy/pyrepl/readline.py
@@ -33,7 +33,7 @@
 from pyrepl.unix_console import UnixConsole, _error
 
 
-ENCODING = 'latin1'     # XXX hard-coded
+ENCODING = sys.getfilesystemencoding() or 'latin1'     # XXX review
 
 __all__ = ['add_history',
            'clear_history',
diff --git a/lib_pypy/pyrepl/unix_console.py b/lib_pypy/pyrepl/unix_console.py
--- a/lib_pypy/pyrepl/unix_console.py
+++ b/lib_pypy/pyrepl/unix_console.py
@@ -384,15 +384,19 @@
 
         self.__maybe_write_code(self._smkx)
 
-        self.old_sigwinch = signal.signal(
-            signal.SIGWINCH, self.__sigwinch)
+        try:
+            self.old_sigwinch = signal.signal(
+                signal.SIGWINCH, self.__sigwinch)
+        except ValueError:
+            pass
 
     def restore(self):
         self.__maybe_write_code(self._rmkx)
         self.flushoutput()
         tcsetattr(self.input_fd, termios.TCSADRAIN, self.__svtermstate)
 
-        signal.signal(signal.SIGWINCH, self.old_sigwinch)
+        if hasattr(self, 'old_sigwinch'):
+            signal.signal(signal.SIGWINCH, self.old_sigwinch)
 
     def __sigwinch(self, signum, frame):
         self.height, self.width = self.getheightwidth()
diff --git a/pypy/annotation/builtin.py b/pypy/annotation/builtin.py
--- a/pypy/annotation/builtin.py
+++ b/pypy/annotation/builtin.py
@@ -308,9 +308,6 @@
             clsdef = clsdef.commonbase(cdef)
     return SomeInstance(clsdef)
 
-def robjmodel_we_are_translated():
-    return immutablevalue(True)
-
 def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None):
     if s_force_non_null is None:
         force_non_null = False
@@ -376,8 +373,6 @@
 
 BUILTIN_ANALYZERS[pypy.rlib.rarithmetic.intmask] = rarith_intmask
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.instantiate] = robjmodel_instantiate
-BUILTIN_ANALYZERS[pypy.rlib.objectmodel.we_are_translated] = (
-    robjmodel_we_are_translated)
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.r_dict] = robjmodel_r_dict
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.hlinvoke] = robjmodel_hlinvoke
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.keepalive_until_here] = robjmodel_keepalive_until_here
@@ -416,7 +411,8 @@
 from pypy.annotation.model import SomePtr
 from pypy.rpython.lltypesystem import lltype
 
-def malloc(s_T, s_n=None, s_flavor=None, s_zero=None, s_track_allocation=None):
+def malloc(s_T, s_n=None, s_flavor=None, s_zero=None, s_track_allocation=None,
+           s_add_memory_pressure=None):
     assert (s_n is None or s_n.knowntype == int
             or issubclass(s_n.knowntype, pypy.rlib.rarithmetic.base_int))
     assert s_T.is_constant()
@@ -432,6 +428,8 @@
     else:
         assert s_flavor.is_constant()
         assert s_track_allocation is None or s_track_allocation.is_constant()
+        assert (s_add_memory_pressure is None or
+                s_add_memory_pressure.is_constant())
         # not sure how to call malloc() for the example 'p' in the
         # presence of s_extraargs
         r = SomePtr(lltype.Ptr(s_T.const))
diff --git a/pypy/config/config.py b/pypy/config/config.py
--- a/pypy/config/config.py
+++ b/pypy/config/config.py
@@ -81,6 +81,12 @@
                                  (self.__class__, name))
         return self._cfgimpl_values[name]
 
+    def __dir__(self):
+        from_type = dir(type(self))
+        from_dict = list(self.__dict__)
+        extras = list(self._cfgimpl_values)
+        return sorted(set(extras + from_type + from_dict))
+
     def __delattr__(self, name):
         # XXX if you use delattr you are responsible for all bad things
         # happening
diff --git a/pypy/config/makerestdoc.py b/pypy/config/makerestdoc.py
--- a/pypy/config/makerestdoc.py
+++ b/pypy/config/makerestdoc.py
@@ -134,7 +134,7 @@
         for child in self._children:
             subpath = fullpath + "." + child._name
             toctree.append(subpath)
-        content.add(Directive("toctree", *toctree, maxdepth=4))
+        content.add(Directive("toctree", *toctree, **{'maxdepth': 4}))
         content.join(
             ListItem(Strong("name:"), self._name),
             ListItem(Strong("description:"), self.doc))
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -327,6 +327,9 @@
         BoolOption("mutable_builtintypes",
                    "Allow the changing of builtin types", default=False,
                    requires=[("objspace.std.builtinshortcut", True)]),
+        BoolOption("withidentitydict",
+                   "track types that override __hash__, __eq__ or __cmp__ and use a special dict strategy for those which do not",
+                   default=True),
      ]),
 ])
 
diff --git a/pypy/config/support.py b/pypy/config/support.py
--- a/pypy/config/support.py
+++ b/pypy/config/support.py
@@ -9,7 +9,7 @@
         return 1    # don't override MAKEFLAGS.  This will call 'make' without any '-j' option
     if sys.platform == 'darwin':
         return darwin_get_cpu_count()
-    elif sys.platform != 'linux2':
+    elif not sys.platform.startswith('linux'):
         return 1    # implement me
     try:
         if isinstance(filename_or_file, str):
diff --git a/pypy/config/test/test_config.py b/pypy/config/test/test_config.py
--- a/pypy/config/test/test_config.py
+++ b/pypy/config/test/test_config.py
@@ -1,5 +1,5 @@
 from pypy.config.config import *
-import py
+import py, sys
 
 def make_description():
     gcoption = ChoiceOption('name', 'GC name', ['ref', 'framework'], 'ref')
@@ -63,6 +63,22 @@
     py.test.raises(ConfigError, 'config.gc.name = "ref"')
     config.gc.name = "framework"
 
+def test___dir__():
+    descr = make_description()
+    config = Config(descr, bool=False)
+    attrs = dir(config)
+    assert '__repr__' in attrs        # from the type
+    assert '_cfgimpl_values' in attrs # from self
+    if sys.version_info >= (2, 6):
+        assert 'gc' in attrs              # custom attribute
+        assert 'objspace' in attrs        # custom attribute
+    #
+    attrs = dir(config.gc)
+    if sys.version_info >= (2, 6):
+        assert 'name' in attrs
+        assert 'dummy' in attrs
+        assert 'float' in attrs
+
 def test_arbitrary_option():
     descr = OptionDescription("top", "", [
         ArbitraryOption("a", "no help", default=None)
diff --git a/pypy/config/test/test_support.py b/pypy/config/test/test_support.py
--- a/pypy/config/test/test_support.py
+++ b/pypy/config/test/test_support.py
@@ -40,7 +40,7 @@
         return self._value
 
 def test_cpuinfo_linux():
-    if sys.platform != 'linux2':
+    if not sys.platform.startswith('linux'):
         py.test.skip("linux only")
     saved = os.environ
     try:
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -13,6 +13,10 @@
 DEFL_LOW_INLINE_THRESHOLD = DEFL_INLINE_THRESHOLD / 2.0
 
 DEFL_GC = "minimark"
+if sys.platform.startswith("linux"):
+    DEFL_ROOTFINDER_WITHJIT = "asmgcc"
+else:
+    DEFL_ROOTFINDER_WITHJIT = "shadowstack"
 
 IS_64_BITS = sys.maxint > 2147483647
 
@@ -109,7 +113,7 @@
     BoolOption("jit", "generate a JIT",
                default=False,
                suggests=[("translation.gc", DEFL_GC),
-                         ("translation.gcrootfinder", "asmgcc"),
+                         ("translation.gcrootfinder", DEFL_ROOTFINDER_WITHJIT),
                          ("translation.list_comprehension_operations", True)]),
     ChoiceOption("jit_backend", "choose the backend for the JIT",
                  ["auto", "x86", "x86-without-sse2", "llvm"],
@@ -140,7 +144,10 @@
                  ["annotate", "rtype", "backendopt", "database", "source",
                   "pyjitpl"],
                  default=None, cmdline="--fork-before"),
-
+    BoolOption("dont_write_c_files",
+               "Make the C backend write everyting to /dev/null. " +
+               "Useful for benchmarking, so you don't actually involve the disk",
+               default=False, cmdline="--dont-write-c-files"),
     ArbitraryOption("instrumentctl", "internal",
                default=None),
     StrOption("output", "Output file name", cmdline="--output"),
diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst
--- a/pypy/doc/coding-guide.rst
+++ b/pypy/doc/coding-guide.rst
@@ -929,6 +929,19 @@
 located in the ``py/bin/`` directory.  For switches to
 modify test execution pass the ``-h`` option.
 
+Coverage reports
+----------------
+
+In order to get coverage reports the `pytest-cov`_ plugin is included.
+it adds some extra requirements ( coverage_ and `cov-core`_ )
+and can once they are installed coverage testing can be invoked via::
+
+  python test_all.py --cov file_or_direcory_to_cover file_or_directory
+
+.. _`pytest-cov`: http://pypi.python.org/pypi/pytest-cov
+.. _`coverage`: http://pypi.python.org/pypi/coverage
+.. _`cov-core`: http://pypi.python.org/pypi/cov-core
+
 Test conventions
 ----------------
 
diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py
--- a/pypy/doc/conf.py
+++ b/pypy/doc/conf.py
@@ -45,9 +45,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.5'
+version = '1.6'
 # The full version, including alpha/beta/rc tags.
-release = '1.5'
+release = '1.6'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/pypy/doc/config/objspace.std.withidentitydict.txt b/pypy/doc/config/objspace.std.withidentitydict.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.std.withidentitydict.txt
@@ -0,0 +1,21 @@
+=============================
+objspace.std.withidentitydict
+=============================
+
+* **name:** withidentitydict
+
+* **description:** enable a dictionary strategy for "by identity" comparisons
+
+* **command-line:** --objspace-std-withidentitydict
+
+* **command-line for negation:** --no-objspace-std-withidentitydict
+
+* **option type:** boolean option
+
+* **default:** True
+
+
+Enable a dictionary strategy specialized for instances of classes which
+compares "by identity", which is the default unless you override ``__hash__``,
+``__eq__`` or ``__cmp__``.  This strategy will be used only with new-style
+classes.
diff --git a/pypy/doc/config/translation.dont_write_c_files.txt b/pypy/doc/config/translation.dont_write_c_files.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/translation.dont_write_c_files.txt
@@ -0,0 +1,4 @@
+write the generated C files to ``/dev/null`` instead of to the disk. Useful if
+you want to use translate.py as a benchmark and don't want to access the disk.
+
+.. _`translation documentation`: ../translation.html
diff --git a/pypy/doc/config/translation.gc.txt b/pypy/doc/config/translation.gc.txt
--- a/pypy/doc/config/translation.gc.txt
+++ b/pypy/doc/config/translation.gc.txt
@@ -1,4 +1,6 @@
-Choose the Garbage Collector used by the translated program:
+Choose the Garbage Collector used by the translated program.
+The good performing collectors are "hybrid" and "minimark".
+The default is "minimark".
 
   - "ref": reference counting. Takes very long to translate and the result is
     slow.
@@ -11,3 +13,12 @@
     older generation.
 
   - "boehm": use the Boehm conservative GC.
+
+  - "hybrid": a hybrid collector of "generation" together with a
+    mark-n-sweep old space
+
+  - "markcompact": a slow, but memory-efficient collector,
+    influenced e.g. by Smalltalk systems.
+
+  - "minimark": a generational mark-n-sweep collector with good
+    performance.  Includes page marking for large arrays.
diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst
--- a/pypy/doc/contributor.rst
+++ b/pypy/doc/contributor.rst
@@ -9,22 +9,22 @@
     Armin Rigo
     Maciej Fijalkowski
     Carl Friedrich Bolz
+    Antonio Cuni
     Amaury Forgeot d'Arc
-    Antonio Cuni
     Samuele Pedroni
     Michael Hudson
     Holger Krekel
+    Benjamin Peterson
     Christian Tismer
-    Benjamin Peterson
+    Hakan Ardo
+    Alex Gaynor
     Eric van Riet Paap
-    Anders Chrigstr&#246;m
-    H&#229;kan Ard&#246;
+    Anders Chrigstrom
+    David Schneider
     Richard Emslie
     Dan Villiom Podlaski Christiansen
     Alexander Schremmer
-    Alex Gaynor
-    David Schneider
-    Aureli&#233;n Campeas
+    Aurelien Campeas
     Anders Lehmann
     Camillo Bruni
     Niklaus Haldimann
@@ -35,16 +35,17 @@
     Bartosz Skowron
     Jakub Gustak
     Guido Wesdorp
+    Daniel Roberts
     Adrien Di Mascio
     Laura Creighton
     Ludovic Aubry
     Niko Matsakis
-    Daniel Roberts
     Jason Creighton
-    Jacob Hall&#233;n
+    Jacob Hallen
     Alex Martelli
     Anders Hammarquist
     Jan de Mooij
+    Wim Lavrijsen
     Stephan Diehl
     Michael Foord
     Stefan Schwarzer
@@ -55,9 +56,13 @@
     Alexandre Fayolle
     Marius Gedminas
     Simon Burton
+    Justin Peel
     Jean-Paul Calderone
     John Witulski
+    Lukas Diekmann
+    holger krekel
     Wim Lavrijsen
+    Dario Bertini
     Andreas St&#252;hrk
     Jean-Philippe St. Pierre
     Guido van Rossum
@@ -69,15 +74,16 @@
     Georg Brandl
     Gerald Klix
     Wanja Saatkamp
+    Ronny Pfannschmidt
     Boris Feigin
     Oscar Nierstrasz
-    Dario Bertini
     David Malcolm
     Eugene Oden
     Henry Mason
+    Sven Hager
     Lukas Renggli
+    Ilya Osadchiy
     Guenter Jantzen
-    Ronny Pfannschmidt
     Bert Freudenberg
     Amit Regmi
     Ben Young
@@ -94,8 +100,8 @@
     Jared Grubb
     Karl Bartel
     Gabriel Lavoie
+    Victor Stinner
     Brian Dorsey
-    Victor Stinner
     Stuart Williams
     Toby Watson
     Antoine Pitrou
@@ -106,19 +112,23 @@
     Jonathan David Riehl
     Elmo M&#228;ntynen
     Anders Qvist
-    Beatrice D&#252;ring
+    Beatrice During
     Alexander Sedov
+    Timo Paulssen
+    Corbin Simpson
     Vincent Legoll
+    Romain Guillebert
     Alan McIntyre
-    Romain Guillebert
     Alex Perry
     Jens-Uwe Mager
+    Simon Cross
     Dan Stromberg
-    Lukas Diekmann
+    Guillebert Romain
     Carl Meyer
     Pieter Zieschang
     Alejandro J. Cura
     Sylvain Thenault
+    Christoph Gerum
     Travis Francis Athougies
     Henrik Vendelbo
     Lutz Paelike
@@ -129,6 +139,7 @@
     Miguel de Val Borro
     Ignas Mikalajunas
     Artur Lisiecki
+    Philip Jenvey
     Joshua Gilbert
     Godefroid Chappelle
     Yusei Tahara
@@ -137,24 +148,29 @@
     Gustavo Niemeyer
     William Leslie
     Akira Li
-    Kristj&#225;n Valur J&#243;nsson
+    Kristjan Valur Jonsson
     Bobby Impollonia
+    Michael Hudson-Doyle
     Andrew Thompson
     Anders Sigfridsson
+    Floris Bruynooghe
     Jacek Generowicz
     Dan Colish
-    Sven Hager
     Zooko Wilcox-O Hearn
+    Dan Villiom Podlaski Christiansen
     Anders Hammarquist
+    Chris Lambacher
     Dinu Gherman
     Dan Colish
+    Brett Cannon
     Daniel Neuh&#228;user
     Michael Chermside
     Konrad Delong
     Anna Ravencroft
     Greg Price
     Armin Ronacher
+    Christian Muirhead
     Jim Baker
-    Philip Jenvey
     Rodrigo Ara&#250;jo
+    Romain Guillebert
 
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -88,6 +88,13 @@
 
     _stackless
 
+  Note that only some of these modules are built-in in a typical
+  CPython installation, and the rest is from non built-in extension
+  modules.  This means that e.g. ``import parser`` will, on CPython,
+  find a local file ``parser.py``, while ``import sys`` will not find a
+  local file ``sys.py``.  In PyPy the difference does not exist: all
+  these modules are built-in.
+
 * Supported by being rewritten in pure Python (possibly using ``ctypes``):
   see the `lib_pypy/`_ directory.  Examples of modules that we
   support this way: ``ctypes``, ``cPickle``, ``cmath``, ``dbm``, ``datetime``...
@@ -211,6 +218,38 @@
     >>>> print d1['a']
     42
 
+Mutating classes of objects which are already used as dictionary keys
+---------------------------------------------------------------------
+
+Consider the following snippet of code::
+
+    class X(object):
+        pass
+
+    def __evil_eq__(self, other):
+        print 'hello world'
+        return False
+
+    def evil(y):
+        d = {x(): 1}
+        X.__eq__ = __evil_eq__
+        d[y] # might trigger a call to __eq__?
+
+In CPython, __evil_eq__ **might** be called, although there is no way to write
+a test which reliably calls it.  It happens if ``y is not x`` and ``hash(y) ==
+hash(x)``, where ``hash(x)`` is computed when ``x`` is inserted into the
+dictionary.  If **by chance** the condition is satisfied, then ``__evil_eq__``
+is called.
+
+PyPy uses a special strategy to optimize dictionaries whose keys are instances
+of user-defined classes which do not override the default ``__hash__``,
+``__eq__`` and ``__cmp__``: when using this strategy, ``__eq__`` and
+``__cmp__`` are never called, but instead the lookup is done by identity, so
+in the case above it is guaranteed that ``__eq__`` won't be called.
+
+Note that in all other cases (e.g., if you have a custom ``__hash__`` and
+``__eq__`` in ``y``) the behavior is exactly the same as CPython.
+
 
 Ignored exceptions
 -----------------------
@@ -248,7 +287,14 @@
   never a dictionary as it sometimes is in CPython. Assigning to
   ``__builtins__`` has no effect.
 
-* object identity of immutable keys in dictionaries is not necessarily preserved.
-  Never compare immutable objects with ``is``.
+* Do not compare immutable objects with ``is``.  For example on CPython
+  it is true that ``x is 0`` works, i.e. does the same as ``type(x) is
+  int and x == 0``, but it is so by accident.  If you do instead
+  ``x is 1000``, then it stops working, because 1000 is too large and
+  doesn't come from the internal cache.  In PyPy it fails to work in
+  both cases, because we have no need for a cache at all.
+
+* Also, object identity of immutable keys in dictionaries is not necessarily
+  preserved.
 
 .. include:: _ref.txt
diff --git a/pypy/doc/extending.rst b/pypy/doc/extending.rst
--- a/pypy/doc/extending.rst
+++ b/pypy/doc/extending.rst
@@ -19,12 +19,12 @@
   section
 
 * Write them in pure python and use direct libffi low-level bindings, See
-  \_rawffi_ module description.
+  \_ffi_ module description.
 
 * Write them in RPython as mixedmodule_, using *rffi* as bindings.
 
 .. _ctypes: #CTypes
-.. _\_rawffi: #LibFFI
+.. _\_ffi: #LibFFI
 .. _mixedmodule: #Mixed Modules
 
 CTypes
@@ -42,41 +42,50 @@
 platform-dependent details (compiling small snippets of C code and running
 them), so it'll benefit not pypy-related ctypes-based modules as well.
 
+ctypes call are optimized by the JIT and the resulting machine code contains a
+direct call to the target C function.  However, due to the very dynamic nature
+of ctypes, some overhead over a bare C call is still present, in particular to
+check/convert the types of the parameters.  Moreover, even if most calls are
+optimized, some cannot and thus need to follow the slow path, not optimized by
+the JIT.
+
 .. _`ctypes-configure`: ctypes-implementation.html#ctypes-configure
+.. _`CPython ctypes`: http://docs.python.org/library/ctypes.html
 
 Pros
 ----
 
-Stable, CPython-compatible API
+Stable, CPython-compatible API.  Most calls are fast, optimized by JIT.
 
 Cons
 ----
 
-Only pure-python code (slow), problems with platform-dependency (although
-we partially solve those). PyPy implementation is now very slow.
+Problems with platform-dependency (although we partially solve
+those). Although the JIT optimizes ctypes calls, some overhead is still
+present.  The slow-path is very slow.
 
-_`CPython ctypes`: http://python.net/crew/theller/ctypes/
 
 LibFFI
 ======
 
 Mostly in order to be able to write a ctypes module, we developed a very
-low-level libffi bindings. (libffi is a C-level library for dynamic calling,
+low-level libffi bindings called ``_ffi``. (libffi is a C-level library for dynamic calling,
 which is used by CPython ctypes). This library provides stable and usable API,
 although it's API is a very low-level one. It does not contain any
-magic.
+magic.  It is also optimized by the JIT, but has much less overhead than ctypes.
 
 Pros
 ----
 
-Works. Combines disadvantages of using ctypes with disadvantages of
-using mixed modules. Probably more suitable for a delicate code
-where ctypes magic goes in a way.
+It Works. Probably more suitable for a delicate code where ctypes magic goes
+in a way.  All calls are optimized by the JIT, there is no slow path as in
+ctypes.
 
 Cons
 ----
 
-Slow. CPython-incompatible API, very rough and low-level
+It combines disadvantages of using ctypes with disadvantages of using mixed
+modules. CPython-incompatible API, very rough and low-level.
 
 Mixed Modules
 =============
@@ -87,15 +96,15 @@
 * a mixed module needs to be written in RPython, which is far more
   complicated than Python (XXX link)
 
-* due to lack of separate compilation (as of April 2008), each
+* due to lack of separate compilation (as of July 2011), each
   compilation-check requires to recompile whole PyPy python interpreter,
   which takes 0.5-1h. We plan to solve this at some point in near future.
 
 * although rpython is a garbage-collected language, the border between
   C and RPython needs to be managed by hand (each object that goes into the
-  C level must be explicitly freed) XXX we try to solve this
+  C level must be explicitly freed).
 
-Some document is available `here`_
+Some documentation is available `here`_
 
 .. _`here`: rffi.html
 
diff --git a/pypy/doc/garbage_collection.rst b/pypy/doc/garbage_collection.rst
--- a/pypy/doc/garbage_collection.rst
+++ b/pypy/doc/garbage_collection.rst
@@ -147,7 +147,7 @@
 You can read more about them at the start of
 `pypy/rpython/memory/gc/minimark.py`_.
 
-In more details:
+In more detail:
 
 - The small newly malloced objects are allocated in the nursery (case 1).
   All objects living in the nursery are "young".
diff --git a/pypy/doc/getting-started-python.rst b/pypy/doc/getting-started-python.rst
--- a/pypy/doc/getting-started-python.rst
+++ b/pypy/doc/getting-started-python.rst
@@ -32,7 +32,10 @@
 .. _`windows document`: windows.html
 
 You can translate the whole of PyPy's Python interpreter to low level C code,
-or `CLI code`_.
+or `CLI code`_.  If you intend to build using gcc, check to make sure that
+the version you have is not 4.2 or you will run into `this bug`_.
+
+.. _`this bug`: https://bugs.launchpad.net/ubuntu/+source/gcc-4.2/+bug/187391
 
 1. First `download a pre-built PyPy`_ for your architecture which you will
    use to translate your Python interpreter.  It is, of course, possible to
@@ -102,7 +105,7 @@
 
     $ ./pypy-c
     Python 2.7.0 (61ef2a11b56a, Mar 02 2011, 03:00:11)
-    [PyPy 1.5.0-alpha0 with GCC 4.4.3] on linux2
+    [PyPy 1.6.0 with GCC 4.4.3] on linux2
     Type "help", "copyright", "credits" or "license" for more information.
     And now for something completely different: ``this sentence is false''
     >>>> 46 - 4
@@ -162,7 +165,7 @@
 
     $ ./pypy-cli
     Python 2.7.0 (61ef2a11b56a, Mar 02 2011, 03:00:11)
-    [PyPy 1.5.0-alpha0] on linux2
+    [PyPy 1.6.0] on linux2
     Type "help", "copyright", "credits" or "license" for more information.
     And now for something completely different: ``distopian and utopian chairs''
     >>>> 
@@ -199,7 +202,7 @@
 
         $ ./pypy-jvm 
         Python 2.7.0 (61ef2a11b56a, Mar 02 2011, 03:00:11)
-        [PyPy 1.5.0-alpha0] on linux2
+        [PyPy 1.6.0] on linux2
         Type "help", "copyright", "credits" or "license" for more information.
         And now for something completely different: ``# assert did not crash''
         >>>> 
@@ -238,7 +241,7 @@
 the ``bin/pypy`` executable.
 
 To install PyPy system wide on unix-like systems, it is recommended to put the
-whole hierarchy alone (e.g. in ``/opt/pypy1.5``) and put a symlink to the
+whole hierarchy alone (e.g. in ``/opt/pypy1.6``) and put a symlink to the
 ``pypy`` executable into ``/usr/bin`` or ``/usr/local/bin``
 
 If the executable fails to find suitable libraries, it will report
diff --git a/pypy/doc/getting-started.rst b/pypy/doc/getting-started.rst
--- a/pypy/doc/getting-started.rst
+++ b/pypy/doc/getting-started.rst
@@ -51,13 +51,13 @@
 ---------------
 
 PyPy is ready to be executed as soon as you unpack the tarball or the zip
-file, with no need install it in any specific location::
+file, with no need to install it in any specific location::
 
-    $ tar xf pypy-1.5-linux.tar.bz2
+    $ tar xf pypy-1.6-linux.tar.bz2
 
-    $ ./pypy-1.5-linux/bin/pypy
+    $ ./pypy-1.6/bin/pypy
     Python 2.7.1 (?, Apr 27 2011, 12:44:21)
-    [PyPy 1.5.0-alpha0 with GCC 4.4.3] on linux2
+    [PyPy 1.6.0 with GCC 4.4.3] on linux2
     Type "help", "copyright", "credits" or "license" for more information.
     And now for something completely different: ``implementing LOGO in LOGO:
     "turtles all the way down"''
@@ -73,16 +73,16 @@
 
     $ curl -O http://python-distribute.org/distribute_setup.py
 
-    $ curl -O https://github.com/pypa/pip/raw/master/contrib/get-pip.py
+    $ curl -O https://raw.github.com/pypa/pip/master/contrib/get-pip.py
 
-    $ ./pypy-1.5-linux/bin/pypy distribute_setup.py
+    $ ./pypy-1.6/bin/pypy distribute_setup.py
 
-    $ ./pypy-1.5-linux/bin/pypy get-pip.py
+    $ ./pypy-1.6/bin/pypy get-pip.py
 
-    $ ./pypy-1.5-linux/bin/pip install pygments  # for example
+    $ ./pypy-1.6/bin/pip install pygments  # for example
 
-3rd party libraries will be installed in ``pypy-1.5-linux/site-packages``, and
-the scripts in ``pypy-1.5-linux/bin``.
+3rd party libraries will be installed in ``pypy-1.6/site-packages``, and
+the scripts in ``pypy-1.6/bin``.
 
 Installing using virtualenv
 ---------------------------
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -21,8 +21,8 @@
 Release Steps
 ----------------
 
-* at code freeze make a release branch under
-  http://codepeak.net/svn/pypy/release/x.y(.z). IMPORTANT: bump the
+* at code freeze make a release branch using release-x.x.x in mercurial
+  IMPORTANT: bump the
   pypy version number in module/sys/version.py and in
   module/cpyext/include/patchlevel.h, notice that the branch
   will capture the revision number of this change for the release;
@@ -48,12 +48,6 @@
   the release announcement should contain a direct link to the download page
 * update pypy.org (under extradoc/pypy.org), rebuild and commit
 
-* update http://codespeak.net/pypy/trunk:
-   code0> + chmod -R yourname:users /www/codespeak.net/htdocs/pypy/trunk
-   local> cd ..../pypy/doc && py.test
-   local> cd ..../pypy
-   local> rsync -az doc codespeak.net:/www/codespeak.net/htdocs/pypy/trunk/pypy/
-
 * post announcement on morepypy.blogspot.com
 * send announcements to pypy-dev, python-list,
   python-announce, python-dev ...
diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst
--- a/pypy/doc/index-of-release-notes.rst
+++ b/pypy/doc/index-of-release-notes.rst
@@ -16,3 +16,4 @@
    release-1.4.0beta.rst
    release-1.4.1.rst
    release-1.5.0.rst
+   release-1.6.0.rst
diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -11,7 +11,11 @@
 Getting into PyPy ... 
 =============================================
 
-* `Release 1.5`_: the latest official release
+* `Getting started`_: how to install and run the PyPy Python interpreter
+
+* `FAQ`_: some frequently asked questions.
+
+* `Release 1.6`_: the latest official release
 
 * `PyPy Blog`_: news and status info about PyPy 
 
@@ -26,13 +30,6 @@
 Documentation for the PyPy Python Interpreter
 ===============================================
 
-`getting started`_ provides hands-on instructions 
-including a two-liner to run the PyPy Python interpreter 
-on your system, examples on advanced features and 
-entry points for using the `RPython toolchain`_.
-
-`FAQ`_ contains some frequently asked questions.
-
 New features of PyPy's Python Interpreter and 
 Translation Framework: 
 
@@ -80,7 +77,7 @@
 .. _`Getting Started`: getting-started.html
 .. _`Papers`: extradoc.html
 .. _`Videos`: video-index.html
-.. _`Release 1.5`: http://pypy.org/download.html
+.. _`Release 1.6`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
 .. _`potential project ideas`: project-ideas.html
@@ -125,9 +122,9 @@
 Windows, on top of .NET, and on top of Java.
 To dig into PyPy it is recommended to try out the current
 Mercurial default branch, which is always working or mostly working,
-instead of the latest release, which is `1.5`__.
+instead of the latest release, which is `1.6`__.
 
-.. __: release-1.5.0.html
+.. __: release-1.6.0.html
 
 PyPy is mainly developed on Linux and Mac OS X.  Windows is supported,
 but platform-specific bugs tend to take longer before we notice and fix
diff --git a/pypy/doc/interpreter-optimizations.rst b/pypy/doc/interpreter-optimizations.rst
--- a/pypy/doc/interpreter-optimizations.rst
+++ b/pypy/doc/interpreter-optimizations.rst
@@ -157,32 +157,6 @@
 A more advanced version of sharing dicts, called *map dicts,* is available
 with the :config:`objspace.std.withmapdict` option.
 
-Builtin-Shadowing
-+++++++++++++++++
-
-Usually the calling of builtins in Python requires two dictionary lookups: first
-to see whether the current global dictionary contains an object with the same
-name, then a lookup in the ``__builtin__`` dictionary. This is somehow
-circumvented by storing an often used builtin into a local variable to get
-the fast local lookup (which is a rather strange and ugly hack).
-
-The same problem is solved in a different way by "wary" dictionaries. They are
-another dictionary representation used together with multidicts. This
-representation is used only for module dictionaries. The representation checks on
-every setitem whether the key that is used is the name of a builtin. If this is
-the case, the dictionary is marked as shadowing that particular builtin.
-
-To identify calls to builtins easily, a new bytecode (``CALL_LIKELY_BUILTIN``)
-is introduced. Whenever it is executed, the globals dictionary is checked
-to see whether it masks the builtin (which is possible without a dictionary
-lookup).  Then the ``__builtin__`` dict is checked in the same way,
-to see whether somebody replaced the real builtin with something else. In the
-common case, the program didn't do any of these; the proper builtin can then
-be called without using any dictionary lookup at all.
-
-You can enable this feature with the
-:config:`objspace.opcodes.CALL_LIKELY_BUILTIN` option.
-
 
 List Optimizations
 ------------------
@@ -289,34 +263,6 @@
 You can enable this feature with the :config:`objspace.opcodes.CALL_METHOD`
 option.
 
-.. _`call likely builtin`:
-
-CALL_LIKELY_BUILTIN
-+++++++++++++++++++
-
-A often heard "tip" for speeding up Python programs is to give an often used
-builtin a local name, since local lookups are faster than lookups of builtins,
-which involve doing two dictionary lookups: one in the globals dictionary and
-one in the the builtins dictionary. PyPy approaches this problem at the
-implementation level, with the introduction of the new ``CALL_LIKELY_BUILTIN``
-bytecode. This bytecode is produced by the compiler for a call whose target is
-the name of a builtin.  Since such a syntactic construct is very often actually
-invoking the expected builtin at run-time, this information can be used to make
-the call to the builtin directly, without going through any dictionary lookup.
-
-However, it can occur that the name is shadowed by a global name from the
-current module.  To catch this case, a special dictionary implementation for
-multidicts is introduced, which is used for the dictionaries of modules. This
-implementation keeps track which builtin name is shadowed by it.  The
-``CALL_LIKELY_BUILTIN`` bytecode asks the dictionary whether it is shadowing the
-builtin that is about to be called and asks the dictionary of ``__builtin__``
-whether the original builtin was changed.  These two checks are cheaper than
-full lookups.  In the common case, neither of these cases is true, so the
-builtin can be directly invoked.
-
-You can enable this feature with the
-:config:`objspace.opcodes.CALL_LIKELY_BUILTIN` option.
-
 .. more here?
 
 Overall Effects
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -48,12 +48,6 @@
 
 .. image:: image/jitviewer.png
 
-We would like to add one level to this hierarchy, by showing the generated
-machine code for each jit operation.  The necessary information is already in
-the log file produced by the JIT, so it is "only" a matter of teaching the
-jitviewer to display it.  Ideally, the machine code should be hidden by
-default and viewable on request.
-
 The jitviewer is a web application based on flask and jinja2 (and jQuery on
 the client): if you have great web developing skills and want to help PyPy,
 this is an ideal task to get started, because it does not require any deep
diff --git a/pypy/doc/release-1.6.0.rst b/pypy/doc/release-1.6.0.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-1.6.0.rst
@@ -0,0 +1,95 @@
+========================
+PyPy 1.6 - kickass panda
+========================
+
+We're pleased to announce the 1.6 release of PyPy. This release brings a lot
+of bugfixes and performance improvements over 1.5, and improves support for
+Windows 32bit and OS X 64bit. This version fully implements Python 2.7.1 and
+has beta level support for loading CPython C extensions.  You can download it
+here:
+
+    http://pypy.org/download.html
+
+What is PyPy?
+=============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7.1. It's fast (`pypy 1.5 and cpython 2.6.2`_ performance comparison)
+due to its integrated tracing JIT compiler.
+
+This release supports x86 machines running Linux 32/64 or Mac OS X.  Windows 32
+is beta (it roughly works but a lot of small issues have not been fixed so
+far).  Windows 64 is not yet supported.
+
+The main topics of this release are speed and stability: on average on
+our benchmark suite, PyPy 1.6 is between **20% and 30%** faster than PyPy 1.5,
+which was already much faster than CPython on our set of benchmarks.
+
+The speed improvements have been made possible by optimizing many of the
+layers which compose PyPy.  In particular, we improved: the Garbage Collector,
+the JIT warmup time, the optimizations performed by the JIT, the quality of
+the generated machine code and the implementation of our Python interpreter.
+
+.. _`pypy 1.5 and cpython 2.6.2`: http://speed.pypy.org
+
+
+Highlights
+==========
+
+* Numerous performance improvements, overall giving considerable speedups:
+
+  - better GC behavior when dealing with very large objects and arrays
+
+  - **fast ctypes:** now calls to ctypes functions are seen and optimized
+    by the JIT, and they are up to 60 times faster than PyPy 1.5 and 10 times
+    faster than CPython
+
+  - improved generators(1): simple generators now are inlined into the caller
+    loop, making performance up to 3.5 times faster than PyPy 1.5.
+
+  - improved generators(2): thanks to other optimizations, even generators
+    that are not inlined are between 10% and 20% faster than PyPy 1.5.
+
+  - faster warmup time for the JIT
+
+  - JIT support for single floats (e.g., for ``array('f')``)
+
+  - optimized dictionaries: the internal representation of dictionaries is now
+    dynamically selected depending on the type of stored objects, resulting in
+    faster code and smaller memory footprint.  For example, dictionaries whose
+    keys are all strings, or all integers. Other dictionaries are also smaller
+    due to bugfixes.
+
+* JitViewer: this is the first official release which includes the JitViewer,
+  a web-based tool which helps you to see which parts of your Python code have
+  been compiled by the JIT, down until the assembler. The `jitviewer`_ 0.1 has
+  already been release and works well with PyPy 1.6.
+
+* The CPython extension module API has been improved and now supports many
+  more extensions. For information on which one are supported, please refer to
+  our `compatibility wiki`_.
+
+* Multibyte encoding support: this was of of the last areas in which we were
+  still behind CPython, but now we fully support them.
+
+* Preliminary support for NumPy: this release includes a preview of a very
+  fast NumPy module integrated with the PyPy JIT.  Unfortunately, this does
+  not mean that you can expect to take an existing NumPy program and run it on
+  PyPy, because the module is still unfinished and supports only some of the
+  numpy API. However, barring some details, what works should be
+  blazingly fast :-)
+
+* Bugfixes: since the 1.5 release we fixed 53 bugs in our `bug tracker`_, not
+  counting the numerous bugs that were found and reported through other
+  channels than the bug tracker.
+
+Cheers,
+
+Hakan Ardo, Carl Friedrich Bolz, Laura Creighton, Antonio Cuni,
+Maciej Fijalkowski, Amaury Forgeot d'Arc, Alex Gaynor,
+Armin Rigo and the PyPy team
+
+.. _`jitviewer`: http://morepypy.blogspot.com/2011/08/visualization-of-jitted-code.html
+.. _`bug tracker`: https://bugs.pypy.org
+.. _`compatibility wiki`: https://bitbucket.org/pypy/compatibility/wiki/Home
+
diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst
--- a/pypy/doc/windows.rst
+++ b/pypy/doc/windows.rst
@@ -32,6 +32,24 @@
 modules that relies on third-party libraries.  See below how to get
 and build them.
 
+Preping Windows for the Large Build
+-----------------------------------
+
+Normally 32bit programs are limited to 2GB of memory on Windows. It is
+possible to raise this limit, to 3GB on Windows 32bit, and almost 4GB
+on Windows 64bit.
+
+On Windows 32bit, it is necessary to modify the system: follow
+http://usa.autodesk.com/adsk/servlet/ps/dl/item?siteID=123112&id=9583842&linkID=9240617
+to enable the "3GB" feature, and reboot. This step is not necessary on
+Windows 64bit.
+
+Then you need to execute::
+
+    editbin /largeaddressaware pypy.exe
+
+on the pypy.exe file you compiled.
+
 Installing external packages
 ----------------------------
 
diff --git a/pypy/interpreter/astcompiler/ast.py b/pypy/interpreter/astcompiler/ast.py
--- a/pypy/interpreter/astcompiler/ast.py
+++ b/pypy/interpreter/astcompiler/ast.py
@@ -2541,8 +2541,9 @@
 class ASTVisitor(object):
 
     def visit_sequence(self, seq):
-        for node in seq:
-            node.walkabout(self)
+        if seq is not None:
+            for node in seq:
+                node.walkabout(self)
 
     def default_visitor(self, node):
         raise NodeVisitorNotImplemented
@@ -2673,46 +2674,36 @@
 class GenericASTVisitor(ASTVisitor):
 
     def visit_Module(self, node):
-        if node.body:
-            self.visit_sequence(node.body)
+        self.visit_sequence(node.body)
 
     def visit_Interactive(self, node):
-        if node.body:
-            self.visit_sequence(node.body)
+        self.visit_sequence(node.body)
 
     def visit_Expression(self, node):
         node.body.walkabout(self)
 
     def visit_Suite(self, node):
-        if node.body:
-            self.visit_sequence(node.body)
+        self.visit_sequence(node.body)
 
     def visit_FunctionDef(self, node):
         node.args.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.decorator_list:
-            self.visit_sequence(node.decorator_list)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.decorator_list)
 
     def visit_ClassDef(self, node):
-        if node.bases:
-            self.visit_sequence(node.bases)
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.decorator_list:
-            self.visit_sequence(node.decorator_list)
+        self.visit_sequence(node.bases)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.decorator_list)
 
     def visit_Return(self, node):
         if node.value:
             node.value.walkabout(self)
 
     def visit_Delete(self, node):
-        if node.targets:
-            self.visit_sequence(node.targets)
+        self.visit_sequence(node.targets)
 
     def visit_Assign(self, node):
-        if node.targets:
-            self.visit_sequence(node.targets)
+        self.visit_sequence(node.targets)
         node.value.walkabout(self)
 
     def visit_AugAssign(self, node):
@@ -2722,37 +2713,29 @@
     def visit_Print(self, node):
         if node.dest:
             node.dest.walkabout(self)
-        if node.values:
-            self.visit_sequence(node.values)
+        self.visit_sequence(node.values)
 
     def visit_For(self, node):
         node.target.walkabout(self)
         node.iter.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.orelse:
-            self.visit_sequence(node.orelse)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.orelse)
 
     def visit_While(self, node):
         node.test.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.orelse:
-            self.visit_sequence(node.orelse)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.orelse)
 
     def visit_If(self, node):
         node.test.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.orelse:
-            self.visit_sequence(node.orelse)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.orelse)
 
     def visit_With(self, node):
         node.context_expr.walkabout(self)
         if node.optional_vars:
             node.optional_vars.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
+        self.visit_sequence(node.body)
 
     def visit_Raise(self, node):
         if node.type:
@@ -2763,18 +2746,13 @@
             node.tback.walkabout(self)
 
     def visit_TryExcept(self, node):
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.handlers:
-            self.visit_sequence(node.handlers)
-        if node.orelse:
-            self.visit_sequence(node.orelse)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.handlers)
+        self.visit_sequence(node.orelse)
 
     def visit_TryFinally(self, node):
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.finalbody:
-            self.visit_sequence(node.finalbody)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.finalbody)
 
     def visit_Assert(self, node):
         node.test.walkabout(self)
@@ -2782,12 +2760,10 @@
             node.msg.walkabout(self)
 
     def visit_Import(self, node):
-        if node.names:
-            self.visit_sequence(node.names)
+        self.visit_sequence(node.names)
 
     def visit_ImportFrom(self, node):
-        if node.names:
-            self.visit_sequence(node.names)
+        self.visit_sequence(node.names)
 
     def visit_Exec(self, node):
         node.body.walkabout(self)
@@ -2812,8 +2788,7 @@
         pass
 
     def visit_BoolOp(self, node):
-        if node.values:
-            self.visit_sequence(node.values)
+        self.visit_sequence(node.values)
 
     def visit_BinOp(self, node):
         node.left.walkabout(self)
@@ -2832,35 +2807,28 @@
         node.orelse.walkabout(self)
 
     def visit_Dict(self, node):
-        if node.keys:
-            self.visit_sequence(node.keys)
-        if node.values:
-            self.visit_sequence(node.values)
+        self.visit_sequence(node.keys)
+        self.visit_sequence(node.values)
 
     def visit_Set(self, node):
-        if node.elts:
-            self.visit_sequence(node.elts)
+        self.visit_sequence(node.elts)
 
     def visit_ListComp(self, node):
         node.elt.walkabout(self)
-        if node.generators:
-            self.visit_sequence(node.generators)
+        self.visit_sequence(node.generators)
 
     def visit_SetComp(self, node):
         node.elt.walkabout(self)
-        if node.generators:
-            self.visit_sequence(node.generators)
+        self.visit_sequence(node.generators)
 
     def visit_DictComp(self, node):
         node.key.walkabout(self)
         node.value.walkabout(self)
-        if node.generators:
-            self.visit_sequence(node.generators)
+        self.visit_sequence(node.generators)
 
     def visit_GeneratorExp(self, node):
         node.elt.walkabout(self)
-        if node.generators:
-            self.visit_sequence(node.generators)
+        self.visit_sequence(node.generators)
 
     def visit_Yield(self, node):
         if node.value:
@@ -2868,15 +2836,12 @@
 
     def visit_Compare(self, node):
         node.left.walkabout(self)
-        if node.comparators:
-            self.visit_sequence(node.comparators)
+        self.visit_sequence(node.comparators)
 
     def visit_Call(self, node):
         node.func.walkabout(self)
-        if node.args:
-            self.visit_sequence(node.args)
-        if node.keywords:
-            self.visit_sequence(node.keywords)
+        self.visit_sequence(node.args)
+        self.visit_sequence(node.keywords)
         if node.starargs:
             node.starargs.walkabout(self)
         if node.kwargs:
@@ -2902,12 +2867,10 @@
         pass
 
     def visit_List(self, node):
-        if node.elts:
-            self.visit_sequence(node.elts)
+        self.visit_sequence(node.elts)
 
     def visit_Tuple(self, node):
-        if node.elts:
-            self.visit_sequence(node.elts)
+        self.visit_sequence(node.elts)
 
     def visit_Const(self, node):
         pass
@@ -2924,8 +2887,7 @@
             node.step.walkabout(self)
 
     def visit_ExtSlice(self, node):
-        if node.dims:
-            self.visit_sequence(node.dims)
+        self.visit_sequence(node.dims)
 
     def visit_Index(self, node):
         node.value.walkabout(self)
@@ -2933,22 +2895,18 @@
     def visit_comprehension(self, node):
         node.target.walkabout(self)
         node.iter.walkabout(self)
-        if node.ifs:
-            self.visit_sequence(node.ifs)
+        self.visit_sequence(node.ifs)
 
     def visit_ExceptHandler(self, node):
         if node.type:
             node.type.walkabout(self)
         if node.name:
             node.name.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
+        self.visit_sequence(node.body)
 
     def visit_arguments(self, node):
-        if node.args:
-            self.visit_sequence(node.args)
-        if node.defaults:
-            self.visit_sequence(node.defaults)
+        self.visit_sequence(node.args)
+        self.visit_sequence(node.defaults)
 
     def visit_keyword(self, node):
         node.value.walkabout(self)
@@ -3069,6 +3027,7 @@
             raise
         w_self.setdictvalue(space, 'body', w_new_value)
         return
+    w_self.deldictvalue(space, 'body')
     w_self.initialization_state |= 1
 
 _Expression_field_unroller = unrolling_iterable(['body'])
@@ -3157,6 +3116,7 @@
             raise
         w_self.setdictvalue(space, 'lineno', w_new_value)
         return
+    w_self.deldictvalue(space, 'lineno')
     w_self.initialization_state |= w_self._lineno_mask
 
 def stmt_get_col_offset(space, w_self):
@@ -3178,6 +3138,7 @@
             raise
         w_self.setdictvalue(space, 'col_offset', w_new_value)
         return
+    w_self.deldictvalue(space, 'col_offset')
     w_self.initialization_state |= w_self._col_offset_mask
 
 stmt.typedef = typedef.TypeDef("stmt",
@@ -3208,6 +3169,7 @@
             raise
         w_self.setdictvalue(space, 'name', w_new_value)
         return
+    w_self.deldictvalue(space, 'name')
     w_self.initialization_state |= 1
 
 def FunctionDef_get_args(space, w_self):
@@ -3229,6 +3191,7 @@
             raise
         w_self.setdictvalue(space, 'args', w_new_value)
         return
+    w_self.deldictvalue(space, 'args')
     w_self.initialization_state |= 2
 
 def FunctionDef_get_body(space, w_self):
@@ -3315,6 +3278,7 @@
             raise
         w_self.setdictvalue(space, 'name', w_new_value)
         return
+    w_self.deldictvalue(space, 'name')
     w_self.initialization_state |= 1
 
 def ClassDef_get_bases(space, w_self):
@@ -3420,6 +3384,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Return_field_unroller = unrolling_iterable(['value'])
@@ -3526,6 +3491,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 2
 
 _Assign_field_unroller = unrolling_iterable(['targets', 'value'])
@@ -3573,6 +3539,7 @@
             raise
         w_self.setdictvalue(space, 'target', w_new_value)
         return
+    w_self.deldictvalue(space, 'target')
     w_self.initialization_state |= 1
 
 def AugAssign_get_op(space, w_self):
@@ -3590,13 +3557,13 @@
     try:
         obj = space.interp_w(operator, w_new_value)
         w_self.op = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'op', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'op', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'op', w_new_value)
     w_self.initialization_state |= 2
 
 def AugAssign_get_value(space, w_self):
@@ -3618,6 +3585,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 4
 
 _AugAssign_field_unroller = unrolling_iterable(['target', 'op', 'value'])
@@ -3665,6 +3633,7 @@
             raise
         w_self.setdictvalue(space, 'dest', w_new_value)
         return
+    w_self.deldictvalue(space, 'dest')
     w_self.initialization_state |= 1
 
 def Print_get_values(space, w_self):
@@ -3704,6 +3673,7 @@
             raise
         w_self.setdictvalue(space, 'nl', w_new_value)
         return
+    w_self.deldictvalue(space, 'nl')
     w_self.initialization_state |= 4
 
 _Print_field_unroller = unrolling_iterable(['dest', 'values', 'nl'])
@@ -3752,6 +3722,7 @@
             raise
         w_self.setdictvalue(space, 'target', w_new_value)
         return
+    w_self.deldictvalue(space, 'target')
     w_self.initialization_state |= 1
 
 def For_get_iter(space, w_self):
@@ -3773,6 +3744,7 @@
             raise
         w_self.setdictvalue(space, 'iter', w_new_value)
         return
+    w_self.deldictvalue(space, 'iter')
     w_self.initialization_state |= 2
 
 def For_get_body(space, w_self):
@@ -3859,6 +3831,7 @@
             raise
         w_self.setdictvalue(space, 'test', w_new_value)
         return
+    w_self.deldictvalue(space, 'test')
     w_self.initialization_state |= 1
 
 def While_get_body(space, w_self):
@@ -3944,6 +3917,7 @@
             raise
         w_self.setdictvalue(space, 'test', w_new_value)
         return
+    w_self.deldictvalue(space, 'test')
     w_self.initialization_state |= 1
 
 def If_get_body(space, w_self):
@@ -4029,6 +4003,7 @@
             raise
         w_self.setdictvalue(space, 'context_expr', w_new_value)
         return
+    w_self.deldictvalue(space, 'context_expr')
     w_self.initialization_state |= 1
 
 def With_get_optional_vars(space, w_self):
@@ -4050,6 +4025,7 @@
             raise
         w_self.setdictvalue(space, 'optional_vars', w_new_value)
         return
+    w_self.deldictvalue(space, 'optional_vars')
     w_self.initialization_state |= 2
 
 def With_get_body(space, w_self):
@@ -4116,6 +4092,7 @@
             raise
         w_self.setdictvalue(space, 'type', w_new_value)
         return
+    w_self.deldictvalue(space, 'type')
     w_self.initialization_state |= 1
 
 def Raise_get_inst(space, w_self):
@@ -4137,6 +4114,7 @@
             raise
         w_self.setdictvalue(space, 'inst', w_new_value)
         return
+    w_self.deldictvalue(space, 'inst')
     w_self.initialization_state |= 2
 
 def Raise_get_tback(space, w_self):
@@ -4158,6 +4136,7 @@
             raise
         w_self.setdictvalue(space, 'tback', w_new_value)
         return
+    w_self.deldictvalue(space, 'tback')
     w_self.initialization_state |= 4
 
 _Raise_field_unroller = unrolling_iterable(['type', 'inst', 'tback'])
@@ -4351,6 +4330,7 @@
             raise
         w_self.setdictvalue(space, 'test', w_new_value)
         return
+    w_self.deldictvalue(space, 'test')
     w_self.initialization_state |= 1
 
 def Assert_get_msg(space, w_self):
@@ -4372,6 +4352,7 @@
             raise
         w_self.setdictvalue(space, 'msg', w_new_value)
         return
+    w_self.deldictvalue(space, 'msg')
     w_self.initialization_state |= 2
 
 _Assert_field_unroller = unrolling_iterable(['test', 'msg'])
@@ -4464,6 +4445,7 @@
             raise
         w_self.setdictvalue(space, 'module', w_new_value)
         return
+    w_self.deldictvalue(space, 'module')
     w_self.initialization_state |= 1
 
 def ImportFrom_get_names(space, w_self):
@@ -4503,6 +4485,7 @@
             raise
         w_self.setdictvalue(space, 'level', w_new_value)
         return
+    w_self.deldictvalue(space, 'level')
     w_self.initialization_state |= 4
 
 _ImportFrom_field_unroller = unrolling_iterable(['module', 'names', 'level'])
@@ -4551,6 +4534,7 @@
             raise
         w_self.setdictvalue(space, 'body', w_new_value)
         return
+    w_self.deldictvalue(space, 'body')
     w_self.initialization_state |= 1
 
 def Exec_get_globals(space, w_self):
@@ -4572,6 +4556,7 @@
             raise
         w_self.setdictvalue(space, 'globals', w_new_value)
         return
+    w_self.deldictvalue(space, 'globals')
     w_self.initialization_state |= 2
 
 def Exec_get_locals(space, w_self):
@@ -4593,6 +4578,7 @@
             raise
         w_self.setdictvalue(space, 'locals', w_new_value)
         return
+    w_self.deldictvalue(space, 'locals')
     w_self.initialization_state |= 4
 
 _Exec_field_unroller = unrolling_iterable(['body', 'globals', 'locals'])
@@ -4683,6 +4669,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Expr_field_unroller = unrolling_iterable(['value'])
@@ -4779,6 +4766,7 @@
             raise
         w_self.setdictvalue(space, 'lineno', w_new_value)
         return
+    w_self.deldictvalue(space, 'lineno')
     w_self.initialization_state |= w_self._lineno_mask
 
 def expr_get_col_offset(space, w_self):
@@ -4800,6 +4788,7 @@
             raise
         w_self.setdictvalue(space, 'col_offset', w_new_value)
         return
+    w_self.deldictvalue(space, 'col_offset')
     w_self.initialization_state |= w_self._col_offset_mask
 
 expr.typedef = typedef.TypeDef("expr",
@@ -4826,13 +4815,13 @@
     try:
         obj = space.interp_w(boolop, w_new_value)
         w_self.op = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'op', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'op', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'op', w_new_value)
     w_self.initialization_state |= 1
 
 def BoolOp_get_values(space, w_self):
@@ -4898,6 +4887,7 @@
             raise
         w_self.setdictvalue(space, 'left', w_new_value)
         return
+    w_self.deldictvalue(space, 'left')
     w_self.initialization_state |= 1
 
 def BinOp_get_op(space, w_self):
@@ -4915,13 +4905,13 @@
     try:
         obj = space.interp_w(operator, w_new_value)
         w_self.op = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'op', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'op', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'op', w_new_value)
     w_self.initialization_state |= 2
 
 def BinOp_get_right(space, w_self):
@@ -4943,6 +4933,7 @@
             raise
         w_self.setdictvalue(space, 'right', w_new_value)
         return
+    w_self.deldictvalue(space, 'right')
     w_self.initialization_state |= 4
 
 _BinOp_field_unroller = unrolling_iterable(['left', 'op', 'right'])
@@ -4986,13 +4977,13 @@
     try:
         obj = space.interp_w(unaryop, w_new_value)
         w_self.op = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'op', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'op', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'op', w_new_value)
     w_self.initialization_state |= 1
 
 def UnaryOp_get_operand(space, w_self):
@@ -5014,6 +5005,7 @@
             raise
         w_self.setdictvalue(space, 'operand', w_new_value)
         return
+    w_self.deldictvalue(space, 'operand')
     w_self.initialization_state |= 2
 
 _UnaryOp_field_unroller = unrolling_iterable(['op', 'operand'])
@@ -5060,6 +5052,7 @@
             raise
         w_self.setdictvalue(space, 'args', w_new_value)
         return
+    w_self.deldictvalue(space, 'args')
     w_self.initialization_state |= 1
 
 def Lambda_get_body(space, w_self):
@@ -5081,6 +5074,7 @@
             raise
         w_self.setdictvalue(space, 'body', w_new_value)
         return
+    w_self.deldictvalue(space, 'body')
     w_self.initialization_state |= 2
 
 _Lambda_field_unroller = unrolling_iterable(['args', 'body'])
@@ -5127,6 +5121,7 @@
             raise
         w_self.setdictvalue(space, 'test', w_new_value)
         return
+    w_self.deldictvalue(space, 'test')
     w_self.initialization_state |= 1
 
 def IfExp_get_body(space, w_self):
@@ -5148,6 +5143,7 @@
             raise
         w_self.setdictvalue(space, 'body', w_new_value)
         return
+    w_self.deldictvalue(space, 'body')
     w_self.initialization_state |= 2
 
 def IfExp_get_orelse(space, w_self):
@@ -5169,6 +5165,7 @@
             raise
         w_self.setdictvalue(space, 'orelse', w_new_value)
         return
+    w_self.deldictvalue(space, 'orelse')
     w_self.initialization_state |= 4
 
 _IfExp_field_unroller = unrolling_iterable(['test', 'body', 'orelse'])
@@ -5322,6 +5319,7 @@
             raise
         w_self.setdictvalue(space, 'elt', w_new_value)
         return
+    w_self.deldictvalue(space, 'elt')
     w_self.initialization_state |= 1
 
 def ListComp_get_generators(space, w_self):
@@ -5387,6 +5385,7 @@
             raise
         w_self.setdictvalue(space, 'elt', w_new_value)
         return
+    w_self.deldictvalue(space, 'elt')
     w_self.initialization_state |= 1
 
 def SetComp_get_generators(space, w_self):
@@ -5452,6 +5451,7 @@
             raise
         w_self.setdictvalue(space, 'key', w_new_value)
         return
+    w_self.deldictvalue(space, 'key')
     w_self.initialization_state |= 1
 
 def DictComp_get_value(space, w_self):
@@ -5473,6 +5473,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 2
 
 def DictComp_get_generators(space, w_self):
@@ -5539,6 +5540,7 @@
             raise
         w_self.setdictvalue(space, 'elt', w_new_value)
         return
+    w_self.deldictvalue(space, 'elt')
     w_self.initialization_state |= 1
 
 def GeneratorExp_get_generators(space, w_self):
@@ -5604,6 +5606,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Yield_field_unroller = unrolling_iterable(['value'])
@@ -5649,6 +5652,7 @@
             raise
         w_self.setdictvalue(space, 'left', w_new_value)
         return
+    w_self.deldictvalue(space, 'left')
     w_self.initialization_state |= 1
 
 def Compare_get_ops(space, w_self):
@@ -5734,6 +5738,7 @@
             raise
         w_self.setdictvalue(space, 'func', w_new_value)
         return
+    w_self.deldictvalue(space, 'func')
     w_self.initialization_state |= 1
 
 def Call_get_args(space, w_self):
@@ -5791,6 +5796,7 @@
             raise
         w_self.setdictvalue(space, 'starargs', w_new_value)
         return
+    w_self.deldictvalue(space, 'starargs')
     w_self.initialization_state |= 8
 
 def Call_get_kwargs(space, w_self):
@@ -5812,6 +5818,7 @@
             raise
         w_self.setdictvalue(space, 'kwargs', w_new_value)
         return
+    w_self.deldictvalue(space, 'kwargs')
     w_self.initialization_state |= 16
 
 _Call_field_unroller = unrolling_iterable(['func', 'args', 'keywords', 'starargs', 'kwargs'])
@@ -5863,6 +5870,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Repr_field_unroller = unrolling_iterable(['value'])
@@ -5908,6 +5916,7 @@
             raise
         w_self.setdictvalue(space, 'n', w_new_value)
         return
+    w_self.deldictvalue(space, 'n')
     w_self.initialization_state |= 1
 
 _Num_field_unroller = unrolling_iterable(['n'])
@@ -5953,6 +5962,7 @@
             raise
         w_self.setdictvalue(space, 's', w_new_value)
         return
+    w_self.deldictvalue(space, 's')
     w_self.initialization_state |= 1
 
 _Str_field_unroller = unrolling_iterable(['s'])
@@ -5998,6 +6008,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 def Attribute_get_attr(space, w_self):
@@ -6019,6 +6030,7 @@
             raise
         w_self.setdictvalue(space, 'attr', w_new_value)
         return
+    w_self.deldictvalue(space, 'attr')
     w_self.initialization_state |= 2
 
 def Attribute_get_ctx(space, w_self):
@@ -6036,13 +6048,13 @@
     try:
         obj = space.interp_w(expr_context, w_new_value)
         w_self.ctx = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'ctx', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'ctx', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'ctx', w_new_value)
     w_self.initialization_state |= 4
 
 _Attribute_field_unroller = unrolling_iterable(['value', 'attr', 'ctx'])
@@ -6090,6 +6102,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 def Subscript_get_slice(space, w_self):
@@ -6111,6 +6124,7 @@
             raise
         w_self.setdictvalue(space, 'slice', w_new_value)
         return
+    w_self.deldictvalue(space, 'slice')
     w_self.initialization_state |= 2
 
 def Subscript_get_ctx(space, w_self):
@@ -6128,13 +6142,13 @@
     try:
         obj = space.interp_w(expr_context, w_new_value)
         w_self.ctx = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'ctx', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'ctx', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'ctx', w_new_value)
     w_self.initialization_state |= 4
 
 _Subscript_field_unroller = unrolling_iterable(['value', 'slice', 'ctx'])
@@ -6182,6 +6196,7 @@
             raise
         w_self.setdictvalue(space, 'id', w_new_value)
         return
+    w_self.deldictvalue(space, 'id')
     w_self.initialization_state |= 1
 
 def Name_get_ctx(space, w_self):
@@ -6199,13 +6214,13 @@
     try:
         obj = space.interp_w(expr_context, w_new_value)
         w_self.ctx = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'ctx', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'ctx', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'ctx', w_new_value)
     w_self.initialization_state |= 2
 
 _Name_field_unroller = unrolling_iterable(['id', 'ctx'])
@@ -6266,13 +6281,13 @@
     try:
         obj = space.interp_w(expr_context, w_new_value)
         w_self.ctx = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'ctx', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'ctx', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'ctx', w_new_value)
     w_self.initialization_state |= 2
 
 _List_field_unroller = unrolling_iterable(['elts', 'ctx'])
@@ -6334,13 +6349,13 @@
     try:
         obj = space.interp_w(expr_context, w_new_value)
         w_self.ctx = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'ctx', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'ctx', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'ctx', w_new_value)
     w_self.initialization_state |= 2
 
 _Tuple_field_unroller = unrolling_iterable(['elts', 'ctx'])
@@ -6388,6 +6403,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Const_field_unroller = unrolling_iterable(['value'])
@@ -6506,6 +6522,7 @@
             raise
         w_self.setdictvalue(space, 'lower', w_new_value)
         return
+    w_self.deldictvalue(space, 'lower')
     w_self.initialization_state |= 1
 
 def Slice_get_upper(space, w_self):
@@ -6527,6 +6544,7 @@
             raise
         w_self.setdictvalue(space, 'upper', w_new_value)
         return
+    w_self.deldictvalue(space, 'upper')
     w_self.initialization_state |= 2
 
 def Slice_get_step(space, w_self):
@@ -6548,6 +6566,7 @@
             raise
         w_self.setdictvalue(space, 'step', w_new_value)
         return
+    w_self.deldictvalue(space, 'step')
     w_self.initialization_state |= 4
 
 _Slice_field_unroller = unrolling_iterable(['lower', 'upper', 'step'])
@@ -6638,6 +6657,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Index_field_unroller = unrolling_iterable(['value'])
@@ -6907,6 +6927,7 @@
             raise
         w_self.setdictvalue(space, 'target', w_new_value)
         return
+    w_self.deldictvalue(space, 'target')
     w_self.initialization_state |= 1
 
 def comprehension_get_iter(space, w_self):
@@ -6928,6 +6949,7 @@
             raise
         w_self.setdictvalue(space, 'iter', w_new_value)
         return
+    w_self.deldictvalue(space, 'iter')
     w_self.initialization_state |= 2
 
 def comprehension_get_ifs(space, w_self):
@@ -6994,6 +7016,7 @@
             raise
         w_self.setdictvalue(space, 'lineno', w_new_value)
         return
+    w_self.deldictvalue(space, 'lineno')
     w_self.initialization_state |= w_self._lineno_mask
 
 def excepthandler_get_col_offset(space, w_self):
@@ -7015,6 +7038,7 @@
             raise
         w_self.setdictvalue(space, 'col_offset', w_new_value)
         return
+    w_self.deldictvalue(space, 'col_offset')
     w_self.initialization_state |= w_self._col_offset_mask
 
 excepthandler.typedef = typedef.TypeDef("excepthandler",
@@ -7045,6 +7069,7 @@
             raise
         w_self.setdictvalue(space, 'type', w_new_value)
         return
+    w_self.deldictvalue(space, 'type')
     w_self.initialization_state |= 1
 
 def ExceptHandler_get_name(space, w_self):
@@ -7066,6 +7091,7 @@
             raise
         w_self.setdictvalue(space, 'name', w_new_value)
         return
+    w_self.deldictvalue(space, 'name')
     w_self.initialization_state |= 2
 
 def ExceptHandler_get_body(space, w_self):
@@ -7153,6 +7179,7 @@
             raise
         w_self.setdictvalue(space, 'vararg', w_new_value)
         return
+    w_self.deldictvalue(space, 'vararg')
     w_self.initialization_state |= 2
 
 def arguments_get_kwarg(space, w_self):
@@ -7177,6 +7204,7 @@
             raise
         w_self.setdictvalue(space, 'kwarg', w_new_value)
         return
+    w_self.deldictvalue(space, 'kwarg')
     w_self.initialization_state |= 4
 
 def arguments_get_defaults(space, w_self):
@@ -7245,6 +7273,7 @@
             raise
         w_self.setdictvalue(space, 'arg', w_new_value)
         return
+    w_self.deldictvalue(space, 'arg')
     w_self.initialization_state |= 1
 
 def keyword_get_value(space, w_self):
@@ -7266,6 +7295,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 2
 
 _keyword_field_unroller = unrolling_iterable(['arg', 'value'])
@@ -7312,6 +7342,7 @@
             raise
         w_self.setdictvalue(space, 'name', w_new_value)
         return
+    w_self.deldictvalue(space, 'name')
     w_self.initialization_state |= 1
 
 def alias_get_asname(space, w_self):
@@ -7336,6 +7367,7 @@
             raise
         w_self.setdictvalue(space, 'asname', w_new_value)
         return
+    w_self.deldictvalue(space, 'asname')
     w_self.initialization_state |= 2
 
 _alias_field_unroller = unrolling_iterable(['name', 'asname'])
diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -295,15 +295,11 @@
     def visit_FunctionDef(self, func):
         self.update_position(func.lineno, True)
         # Load decorators first, but apply them after the function is created.
-        if func.decorator_list:
-            self.visit_sequence(func.decorator_list)
+        self.visit_sequence(func.decorator_list)
         args = func.args
         assert isinstance(args, ast.arguments)
-        if args.defaults:
-            self.visit_sequence(args.defaults)
-            num_defaults = len(args.defaults)
-        else:
-            num_defaults = 0
+        self.visit_sequence(args.defaults)
+        num_defaults = len(args.defaults) if args.defaults is not None else 0
         code = self.sub_scope(FunctionCodeGenerator, func.name, func,
                               func.lineno)
         self._make_function(code, num_defaults)
@@ -317,24 +313,17 @@
         self.update_position(lam.lineno)
         args = lam.args
         assert isinstance(args, ast.arguments)
-        if args.defaults:
-            self.visit_sequence(args.defaults)
-            default_count = len(args.defaults)
-        else:
-            default_count = 0
+        self.visit_sequence(args.defaults)
+        default_count = len(args.defaults) if args.defaults is not None else 0
         code = self.sub_scope(LambdaCodeGenerator, "<lambda>", lam, lam.lineno)
         self._make_function(code, default_count)
 
     def visit_ClassDef(self, cls):
         self.update_position(cls.lineno, True)
-        if cls.decorator_list:
-            self.visit_sequence(cls.decorator_list)
+        self.visit_sequence(cls.decorator_list)
         self.load_const(self.space.wrap(cls.name))
-        if cls.bases:
-            bases_count = len(cls.bases)
-            self.visit_sequence(cls.bases)
-        else:
-            bases_count = 0
+        self.visit_sequence(cls.bases)
+        bases_count = len(cls.bases) if cls.bases is not None else 0
         self.emit_op_arg(ops.BUILD_TUPLE, bases_count)
         code = self.sub_scope(ClassCodeGenerator, cls.name, cls, cls.lineno)
         self._make_function(code, 0)
@@ -446,8 +435,7 @@
         end = self.new_block()
         test_constant = if_.test.as_constant_truth(self.space)
         if test_constant == optimize.CONST_FALSE:
-            if if_.orelse:
-                self.visit_sequence(if_.orelse)
+            self.visit_sequence(if_.orelse)
         elif test_constant == optimize.CONST_TRUE:
             self.visit_sequence(if_.body)
         else:
@@ -515,16 +503,14 @@
         self.use_next_block(cleanup)
         self.emit_op(ops.POP_BLOCK)
         self.pop_frame_block(F_BLOCK_LOOP, start)
-        if fr.orelse:
-            self.visit_sequence(fr.orelse)
+        self.visit_sequence(fr.orelse)
         self.use_next_block(end)
 
     def visit_While(self, wh):
         self.update_position(wh.lineno, True)
         test_constant = wh.test.as_constant_truth(self.space)
         if test_constant == optimize.CONST_FALSE:
-            if wh.orelse:
-                self.visit_sequence(wh.orelse)
+            self.visit_sequence(wh.orelse)
         else:
             end = self.new_block()
             anchor = None
@@ -544,8 +530,7 @@
                 self.use_next_block(anchor)
                 self.emit_op(ops.POP_BLOCK)
             self.pop_frame_block(F_BLOCK_LOOP, loop)
-            if wh.orelse:
-                self.visit_sequence(wh.orelse)
+            self.visit_sequence(wh.orelse)
             self.use_next_block(end)
 
     def visit_TryExcept(self, te):
@@ -581,8 +566,7 @@
             self.use_next_block(next_except)
         self.emit_op(ops.END_FINALLY)
         self.use_next_block(otherwise)
-        if te.orelse:
-            self.visit_sequence(te.orelse)
+        self.visit_sequence(te.orelse)
         self.use_next_block(end)
 
     def visit_TryFinally(self, tf):
@@ -893,27 +877,19 @@
 
     def visit_Tuple(self, tup):
         self.update_position(tup.lineno)
-        if tup.elts:
-            elt_count = len(tup.elts)
-        else:
-            elt_count = 0
+        elt_count = len(tup.elts) if tup.elts is not None else 0
         if tup.ctx == ast.Store:
             self.emit_op_arg(ops.UNPACK_SEQUENCE, elt_count)
-        if elt_count:
-            self.visit_sequence(tup.elts)
+        self.visit_sequence(tup.elts)
         if tup.ctx == ast.Load:
             self.emit_op_arg(ops.BUILD_TUPLE, elt_count)
 
     def visit_List(self, l):
         self.update_position(l.lineno)
-        if l.elts:
-            elt_count = len(l.elts)
-        else:
-            elt_count = 0
+        elt_count = len(l.elts) if l.elts is not None else 0
         if l.ctx == ast.Store:
             self.emit_op_arg(ops.UNPACK_SEQUENCE, elt_count)
-        if elt_count:
-            self.visit_sequence(l.elts)
+        self.visit_sequence(l.elts)
         if l.ctx == ast.Load:
             self.emit_op_arg(ops.BUILD_LIST, elt_count)
 
@@ -944,11 +920,9 @@
         if self._optimize_method_call(call):
             return
         call.func.walkabout(self)
-        arg = 0
+        arg = len(call.args) if call.args is not None else 0
         call_type = 0
-        if call.args:
-            arg = len(call.args)
-            self.visit_sequence(call.args)
+        self.visit_sequence(call.args)
         if call.keywords:
             self.visit_sequence(call.keywords)
             arg |= len(call.keywords) << 8
@@ -984,16 +958,10 @@
         assert isinstance(attr_lookup, ast.Attribute)
         attr_lookup.value.walkabout(self)
         self.emit_op_name(ops.LOOKUP_METHOD, self.names, attr_lookup.attr)
-        if call.args:
-            self.visit_sequence(call.args)
-            arg_count = len(call.args)
-        else:
-            arg_count = 0
-        if call.keywords:
-            self.visit_sequence(call.keywords)
-            kwarg_count = len(call.keywords)
-        else:
-            kwarg_count = 0
+        self.visit_sequence(call.args)
+        arg_count = len(call.args) if call.args is not None else 0
+        self.visit_sequence(call.keywords)
+        kwarg_count = len(call.keywords) if call.keywords is not None else 0
         self.emit_op_arg(ops.CALL_METHOD, (kwarg_count << 8) | arg_count)
         return True
 
@@ -1251,7 +1219,10 @@
     def _compile(self, func):
         assert isinstance(func, ast.FunctionDef)
         # If there's a docstring, store it as the first constant.
-        doc_expr = self.possible_docstring(func.body[0])
+        if func.body:
+            doc_expr = self.possible_docstring(func.body[0])
+        else:
+            doc_expr = None
         if doc_expr is not None:
             self.add_const(doc_expr.s)
             start = 1
@@ -1263,8 +1234,9 @@
         if args.args:
             self._handle_nested_args(args.args)
             self.argcount = len(args.args)
-        for i in range(start, len(func.body)):
-            func.body[i].walkabout(self)
+        if func.body:
+            for i in range(start, len(func.body)):
+                func.body[i].walkabout(self)
 
 
 class LambdaCodeGenerator(AbstractFunctionCodeGenerator):
diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -27,9 +27,10 @@
     _emit_syntax_warning(space, w_msg, w_filename, w_lineno, w_offset)
 
 
-def parse_future(tree):
+def parse_future(tree, feature_flags):
     future_lineno = 0
     future_column = 0
+    flags = 0
     have_docstring = False
     body = None
     if isinstance(tree, ast.Module):
@@ -37,7 +38,7 @@
     elif isinstance(tree, ast.Interactive):
         body = tree.body
     if body is None:
-        return 0, 0
+        return 0, 0, 0
     for stmt in body:
         if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Str):
             if have_docstring:
@@ -48,11 +49,16 @@
             if stmt.module == "__future__":
                 future_lineno = stmt.lineno
                 future_column = stmt.col_offset
+                for alias in stmt.names:
+                    assert isinstance(alias, ast.alias)
+                    # If this is an invalid flag, it will be caught later in
+                    # codegen.py.
+                    flags |= feature_flags.get(alias.name, 0)
             else:
                 break
         else:
             break
-    return future_lineno, future_column
+    return flags, future_lineno, future_column
 
 
 class ForbiddenNameAssignment(Exception):
diff --git a/pypy/interpreter/astcompiler/symtable.py b/pypy/interpreter/astcompiler/symtable.py
--- a/pypy/interpreter/astcompiler/symtable.py
+++ b/pypy/interpreter/astcompiler/symtable.py
@@ -356,10 +356,8 @@
         # Function defaults and decorators happen in the outer scope.
         args = func.args
         assert isinstance(args, ast.arguments)
-        if args.defaults:
-            self.visit_sequence(args.defaults)
-        if func.decorator_list:
-            self.visit_sequence(func.decorator_list)
+        self.visit_sequence(args.defaults)
+        self.visit_sequence(func.decorator_list)
         new_scope = FunctionScope(func.name, func.lineno, func.col_offset)
         self.push_scope(new_scope, func)
         func.args.walkabout(self)
@@ -372,10 +370,8 @@
 
     def visit_ClassDef(self, clsdef):
         self.note_symbol(clsdef.name, SYM_ASSIGNED)
-        if clsdef.bases:
-            self.visit_sequence(clsdef.bases)
-        if clsdef.decorator_list:
-            self.visit_sequence(clsdef.decorator_list)
+        self.visit_sequence(clsdef.bases)
+        self.visit_sequence(clsdef.decorator_list)
         self.push_scope(ClassScope(clsdef), clsdef)
         self.visit_sequence(clsdef.body)
         self.pop_scope()
@@ -431,8 +427,7 @@
     def visit_Lambda(self, lamb):
         args = lamb.args
         assert isinstance(args, ast.arguments)
-        if args.defaults:
-            self.visit_sequence(args.defaults)
+        self.visit_sequence(args.defaults)
         new_scope = FunctionScope("lambda", lamb.lineno, lamb.col_offset)
         self.push_scope(new_scope, lamb)
         lamb.args.walkabout(self)
@@ -447,8 +442,7 @@
         self.push_scope(new_scope, node)
         self.implicit_arg(0)
         outer.target.walkabout(self)
-        if outer.ifs:
-            self.visit_sequence(outer.ifs)
+        self.visit_sequence(outer.ifs)
         self.visit_sequence(comps[1:])
         for item in list(consider):
             item.walkabout(self)
diff --git a/pypy/interpreter/astcompiler/tools/asdl_py.py b/pypy/interpreter/astcompiler/tools/asdl_py.py
--- a/pypy/interpreter/astcompiler/tools/asdl_py.py
+++ b/pypy/interpreter/astcompiler/tools/asdl_py.py
@@ -221,8 +221,9 @@
         self.emit("class ASTVisitor(object):")
         self.emit("")
         self.emit("def visit_sequence(self, seq):", 1)
-        self.emit("for node in seq:", 2)
-        self.emit("node.walkabout(self)", 3)
+        self.emit("if seq is not None:", 2)
+        self.emit("for node in seq:", 3)
+        self.emit("node.walkabout(self)", 4)
         self.emit("")
         self.emit("def default_visitor(self, node):", 1)
         self.emit("raise NodeVisitorNotImplemented", 2)
@@ -280,15 +281,13 @@
     def visitField(self, field):
         if field.type.value not in asdl.builtin_types and \
                 field.type.value not in self.data.simple_types:
-            if field.seq or field.opt:
+            level = 2
+            template = "node.%s.walkabout(self)"
+            if field.seq:
+                template = "self.visit_sequence(node.%s)"
+            elif field.opt:
                 self.emit("if node.%s:" % (field.name,), 2)
                 level = 3
-            else:
-                level = 2
-            if field.seq:
-                template = "self.visit_sequence(node.%s)"
-            else:
-                template = "node.%s.walkabout(self)"
             self.emit(template % (field.name,), level)
             return True
         return False
@@ -446,6 +445,7 @@
         if field.seq:
             self.emit("w_self.w_%s = w_new_value" % (field.name,), 1)
         else:
+            save_original_object = False
             self.emit("try:", 1)
             if field.type.value not in asdl.builtin_types:
                 # These are always other AST nodes.
@@ -454,9 +454,7 @@
                                   (field.type,), 2)
                     self.emit("w_self.%s = obj.to_simple_int(space)" %
                               (field.name,), 2)
-                    self.emit("# need to save the original object too", 2)
-                    self.emit("w_self.setdictvalue(space, '%s', w_new_value)"
-                              % (field.name,), 2)
+                    save_original_object = True
                 else:
                     config = (field.name, field.type, repr(field.opt))
                     self.emit("w_self.%s = space.interp_w(%s, w_new_value, %s)" %
@@ -480,6 +478,12 @@
             self.emit("    w_self.setdictvalue(space, '%s', w_new_value)"
                       % (field.name,), 1)
             self.emit("    return", 1)
+            if save_original_object:
+                self.emit("# need to save the original object too", 1)
+                self.emit("w_self.setdictvalue(space, '%s', w_new_value)"
+                          % (field.name,), 1)
+            else:
+                self.emit("w_self.deldictvalue(space, '%s')" %(field.name,), 1)
         self.emit("w_self.initialization_state |= %s" % (flag,), 1)
         self.emit("")
 
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -44,11 +44,11 @@
             return True
         return False
 
-    def deldictvalue(self, space, w_name):
+    def deldictvalue(self, space, attr):
         w_dict = self.getdict(space)
         if w_dict is not None:
             try:
-                space.delitem(w_dict, w_name)
+                space.delitem(w_dict, space.wrap(attr))
                 return True
             except OperationError, ex:
                 if not ex.match(space, space.w_KeyError):
@@ -111,6 +111,9 @@
     def setslotvalue(self, index, w_val):
         raise NotImplementedError
 
+    def delslotvalue(self, index):
+        raise NotImplementedError
+
     def descr_call_mismatch(self, space, opname, RequiredClass, args):
         if RequiredClass is None:
             classname = '?'
@@ -130,6 +133,9 @@
         raise operationerrfmt(space.w_TypeError,
             "cannot create weak reference to '%s' object", typename)
 
+    def delweakref(self):
+        pass
+
     def clear_all_weakrefs(self):
         """Call this at the beginning of interp-level __del__() methods
         in subclasses.  It ensures that weakrefs (if any) are cleared
@@ -143,29 +149,28 @@
             # app-level, e.g. a user-defined __del__(), and this code
             # tries to use weakrefs again, it won't reuse the broken
             # (already-cleared) weakrefs from this lifeline.
-            self.setweakref(lifeline.space, None)
+            self.delweakref()
             lifeline.clear_all_weakrefs()
 
-    __already_enqueued_for_destruction = False
+    __already_enqueued_for_destruction = ()
 
-    def _enqueue_for_destruction(self, space, call_user_del=True):
+    def enqueue_for_destruction(self, space, callback, descrname):
         """Put the object in the destructor queue of the space.
-        At a later, safe point in time, UserDelAction will use
-        space.userdel() to call the object's app-level __del__ method.
+        At a later, safe point in time, UserDelAction will call
+        callback(self).  If that raises OperationError, prints it
+        to stderr with the descrname string.
+
+        Note that 'callback' will usually need to start with:
+            assert isinstance(self, W_SpecificClass)
         """
         # this function always resurect the object, so when
         # running on top of CPython we must manually ensure that
         # we enqueue it only once
         if not we_are_translated():
-            if self.__already_enqueued_for_destruction:
+            if callback in self.__already_enqueued_for_destruction:
                 return
-            self.__already_enqueued_for_destruction = True
-        self.clear_all_weakrefs()
-        if call_user_del:
-            space.user_del_action.register_dying_object(self)
-
-    def _call_builtin_destructor(self):
-        pass     # method overridden in typedef.py
+            self.__already_enqueued_for_destruction += (callback,)
+        space.user_del_action.register_callback(self, callback, descrname)
 
     # hooks that the mapdict implementations needs:
     def _get_mapdict_map(self):
@@ -237,7 +242,7 @@
 
 class ObjSpace(object):
     """Base class for the interpreter-level implementations of object spaces.
-    http://codespeak.net/pypy/dist/pypy/doc/objspace.html"""
+    http://pypy.readthedocs.org/en/latest/objspace.html"""
 
     full_exceptions = True  # full support for exceptions (normalization & more)
 
@@ -925,6 +930,9 @@
                 return self.w_True
         return self.w_False
 
+    def issequence_w(self, w_obj):
+        return (self.findattr(w_obj, self.wrap("__getitem__")) is not None)
+
     def isinstance_w(self, w_obj, w_type):
         return self.is_true(self.isinstance(w_obj, w_type))
 
@@ -1279,6 +1287,17 @@
                                  self.wrap("expected a 32-bit integer"))
         return value
 
+    def truncatedint(self, w_obj):
+        # Like space.gateway_int_w(), but return the integer truncated
+        # instead of raising OverflowError.  For obscure cases only.
+        try:
+            return self.int_w(w_obj)
+        except OperationError, e:
+            if not e.match(self, self.w_OverflowError):
+                raise
+            from pypy.rlib.rarithmetic import intmask
+            return intmask(self.bigint_w(w_obj).uintmask())
+
     def c_filedescriptor_w(self, w_fd):
         # This is only used sometimes in CPython, e.g. for os.fsync() but
         # not os.close().  It's likely designed for 'select'.  It's irregular
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -189,7 +189,7 @@
             if space.is_w(w_value, space.w_None):
                 # raise Type: we assume we have to instantiate Type
                 w_value = space.call_function(w_type)
-                w_type = space.exception_getclass(w_value)
+                w_type = self._exception_getclass(space, w_value)
             else:
                 w_valuetype = space.exception_getclass(w_value)
                 if space.exception_issubclass_w(w_valuetype, w_type):
@@ -204,18 +204,12 @@
                     else:
                         # raise Type, X: assume X is the constructor argument
                         w_value = space.call_function(w_type, w_value)
-                    w_type = space.exception_getclass(w_value)
+                    w_type = self._exception_getclass(space, w_value)
 
         else:
             # the only case left here is (inst, None), from a 'raise inst'.
             w_inst = w_type
-            w_instclass = space.exception_getclass(w_inst)
-            if not space.exception_is_valid_class_w(w_instclass):
-                instclassname = w_instclass.getname(space)
-                msg = ("exceptions must be old-style classes or derived "
-                       "from BaseException, not %s")
-                raise operationerrfmt(space.w_TypeError, msg, instclassname)
-
+            w_instclass = self._exception_getclass(space, w_inst)
             if not space.is_w(w_value, space.w_None):
                 raise OperationError(space.w_TypeError,
                                      space.wrap("instance exception may not "
@@ -226,6 +220,15 @@
         self.w_type   = w_type
         self._w_value = w_value
 
+    def _exception_getclass(self, space, w_inst):
+        w_type = space.exception_getclass(w_inst)
+        if not space.exception_is_valid_class_w(w_type):
+            typename = w_type.getname(space)
+            msg = ("exceptions must be old-style classes or derived "
+                   "from BaseException, not %s")
+            raise operationerrfmt(space.w_TypeError, msg, typename)
+        return w_type
+
     def write_unraisable(self, space, where, w_object=None):
         if w_object is None:
             objrepr = ''
diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -484,44 +484,31 @@
 
     def __init__(self, space):
         AsyncAction.__init__(self, space)
-        self.dying_objects_w = []
-        self.weakrefs_w = []
+        self.dying_objects = []
         self.finalizers_lock_count = 0
 
-    def register_dying_object(self, w_obj):
-        self.dying_objects_w.append(w_obj)
-        self.fire()
-
-    def register_weakref_callback(self, w_ref):
-        self.weakrefs_w.append(w_ref)
+    def register_callback(self, w_obj, callback, descrname):
+        self.dying_objects.append((w_obj, callback, descrname))
         self.fire()
 
     def perform(self, executioncontext, frame):
         if self.finalizers_lock_count > 0:
             return
-        # Each call to perform() first grabs the self.dying_objects_w
+        # Each call to perform() first grabs the self.dying_objects
         # and replaces it with an empty list.  We do this to try to
         # avoid too deep recursions of the kind of __del__ being called
         # while in the middle of another __del__ call.
-        pending_w = self.dying_objects_w
-        self.dying_objects_w = []
+        pending = self.dying_objects
+        self.dying_objects = []
         space = self.space
-        for i in range(len(pending_w)):
-            w_obj = pending_w[i]
-            pending_w[i] = None
+        for i in range(len(pending)):
+            w_obj, callback, descrname = pending[i]
+            pending[i] = (None, None, None)
             try:
-                space.userdel(w_obj)
+                callback(w_obj)
             except OperationError, e:
-                e.write_unraisable(space, 'method __del__ of ', w_obj)
+                e.write_unraisable(space, descrname, w_obj)
                 e.clear(space)   # break up reference cycles
-            # finally, this calls the interp-level destructor for the
-            # cases where there is both an app-level and a built-in __del__.
-            w_obj._call_builtin_destructor()
-        pending_w = self.weakrefs_w
-        self.weakrefs_w = []
-        for i in range(len(pending_w)):
-            w_ref = pending_w[i]
-            w_ref.activate_callback()
 
 class FrameTraceAction(AsyncAction):
     """An action that calls the local trace functions (w_f_trace)."""
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -31,7 +31,8 @@
     _immutable_fields_ = ['code?',
                           'w_func_globals?',
                           'closure?',
-                          'defs_w?[*]']
+                          'defs_w?[*]',
+                          'name?']
 
     def __init__(self, space, code, w_globals=None, defs_w=[], closure=None,
                  forcename=None):
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -64,7 +64,7 @@
                 self.visit_self(el[1], *args)
             else:
                 self.visit_function(el, *args)
-        else:
+        elif isinstance(el, type):
             for typ in self.bases_order:
                 if issubclass(el, typ):
                     visit = getattr(self, "visit__%s" % (typ.__name__,))
@@ -73,6 +73,8 @@
             else:
                 raise Exception("%s: no match for unwrap_spec element %s" % (
                     self.__class__.__name__, el))
+        else:
+            raise Exception("unable to dispatch, %s, perhaps your parameter should have started with w_?" % el)
 
     def apply_over(self, unwrap_spec, *extra):
         dispatch = self.dispatch
@@ -140,6 +142,9 @@
     def visit_c_nonnegint(self, el, app_sig):
         self.checked_space_method(el, app_sig)
 
+    def visit_truncatedint(self, el, app_sig):
+        self.checked_space_method(el, app_sig)
+
     def visit__Wrappable(self, el, app_sig):
         name = el.__name__
         argname = self.orig_arg()
@@ -257,6 +262,9 @@
     def visit_c_nonnegint(self, typ):
         self.run_args.append("space.c_nonnegint_w(%s)" % (self.scopenext(),))
 
+    def visit_truncatedint(self, typ):
+        self.run_args.append("space.truncatedint(%s)" % (self.scopenext(),))
+
     def _make_unwrap_activation_class(self, unwrap_spec, cache={}):
         try:
             key = tuple(unwrap_spec)
@@ -387,6 +395,9 @@
     def visit_c_nonnegint(self, typ):
         self.unwrap.append("space.c_nonnegint_w(%s)" % (self.nextarg(),))
 
+    def visit_truncatedint(self, typ):
+        self.unwrap.append("space.truncatedint(%s)" % (self.nextarg(),))
+
     def make_fastfunc(unwrap_spec, func):
         unwrap_info = UnwrapSpec_FastFunc_Unwrap()
         unwrap_info.apply_over(unwrap_spec)
@@ -396,11 +407,14 @@
             fastfunc = func
         else:
             # try to avoid excessive bloat
-            if func.__module__ == 'pypy.interpreter.astcompiler.ast':
+            mod = func.__module__
+            if mod is None:
+                mod = ""
+            if mod == 'pypy.interpreter.astcompiler.ast':
                 raise FastFuncNotSupported
-            if (not func.__module__.startswith('pypy.module.__builtin__') and
-                not func.__module__.startswith('pypy.module.sys') and
-                not func.__module__.startswith('pypy.module.math')):
+            if (not mod.startswith('pypy.module.__builtin__') and
+                not mod.startswith('pypy.module.sys') and
+                not mod.startswith('pypy.module.math')):
                 if not func.__name__.startswith('descr'):
                     raise FastFuncNotSupported
             d = {}
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -114,6 +114,7 @@
  
     def descr_close(self):
         """x.close(arg) -> raise GeneratorExit inside generator."""
+        assert isinstance(self, GeneratorIterator)
         space = self.space
         try:
             w_retval = self.throw(space.w_GeneratorExit, space.w_None,
@@ -141,22 +142,16 @@
         code_name = self.pycode.co_name
         return space.wrap(code_name)
 
-    def descr__del__(self):        
-        """
-        applevel __del__, which is called at a safe point after the
-        interp-level __del__ enqueued the object for destruction
-        """
-        self.descr_close()
-
     def __del__(self):
         # Only bother enqueuing self to raise an exception if the frame is
         # still not finished and finally or except blocks are present.
-        must_call_close = False
+        self.clear_all_weakrefs()
         if self.frame is not None:
             block = self.frame.lastblock
             while block is not None:
                 if not isinstance(block, LoopBlock):
-                    must_call_close = True
+                    self.enqueue_for_destruction(self.space,
+                                                 GeneratorIterator.descr_close,
+                                                 "interrupting generator of ")
                     break
                 block = block.previous
-        self._enqueue_for_destruction(self.space, must_call_close)
diff --git a/pypy/interpreter/module.py b/pypy/interpreter/module.py
--- a/pypy/interpreter/module.py
+++ b/pypy/interpreter/module.py
@@ -9,6 +9,8 @@
 class Module(Wrappable):
     """A module."""
 
+    _immutable_fields_ = ["w_dict?"]
+
     _frozen = False
 
     def __init__(self, space, w_name, w_dict=None, add_package=True):
diff --git a/pypy/interpreter/pycompiler.py b/pypy/interpreter/pycompiler.py
--- a/pypy/interpreter/pycompiler.py
+++ b/pypy/interpreter/pycompiler.py
@@ -119,7 +119,10 @@
             raise OperationError(self.space.w_TypeError, self.space.wrap(
                 "invalid node type"))
 
-        future_pos = misc.parse_future(node)
+        fut = misc.parse_future(node, self.future_flags.compiler_features)
+        f_flags, f_lineno, f_col = fut
+        future_pos = f_lineno, f_col
+        flags |= f_flags
         info = pyparse.CompileInfo(filename, mode, flags, future_pos)
         return self._compile_ast(node, info)
 
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -52,6 +52,9 @@
     escaped                  = False  # see mark_as_escaped()
 
     def __init__(self, space, code, w_globals, closure):
+        if not we_are_translated():
+            assert type(self) in (space.FrameClass, CPythonFrame), (
+                "use space.FrameClass(), not directly PyFrame()")
         self = hint(self, access_directly=True, fresh_virtualizable=True)
         assert isinstance(code, pycode.PyCode)
         self.pycode = code
@@ -80,7 +83,7 @@
         self.escaped = True
 
     def append_block(self, block):
-        block.previous = self.lastblock
+        assert block.previous is self.lastblock
         self.lastblock = block
 
     def pop_block(self):
@@ -106,7 +109,8 @@
         while i >= 0:
             block = lst[i]
             i -= 1
-            self.append_block(block)
+            block.previous = self.lastblock
+            self.lastblock = block
 
     def get_builtin(self):
         if self.space.config.objspace.honor__builtins__:
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -892,32 +892,31 @@
         raise BytecodeCorruption, "old opcode, no longer in use"
 
     def SETUP_LOOP(self, offsettoend, next_instr):
-        block = LoopBlock(self, next_instr + offsettoend)
-        self.append_block(block)
+        block = LoopBlock(self, next_instr + offsettoend, self.lastblock)
+        self.lastblock = block
 
     def SETUP_EXCEPT(self, offsettoend, next_instr):
-        block = ExceptBlock(self, next_instr + offsettoend)
-        self.append_block(block)
+        block = ExceptBlock(self, next_instr + offsettoend, self.lastblock)
+        self.lastblock = block
 
     def SETUP_FINALLY(self, offsettoend, next_instr):
-        block = FinallyBlock(self, next_instr + offsettoend)
-        self.append_block(block)
+        block = FinallyBlock(self, next_instr + offsettoend, self.lastblock)
+        self.lastblock = block
 
     def SETUP_WITH(self, offsettoend, next_instr):
         w_manager = self.peekvalue()
+        w_enter = self.space.lookup(w_manager, "__enter__")
         w_descr = self.space.lookup(w_manager, "__exit__")
-        if w_descr is None:
-            raise OperationError(self.space.w_AttributeError,
-                                 self.space.wrap("__exit__"))
+        if w_enter is None or w_descr is None:
+            typename = self.space.type(w_manager).getname(self.space)
+            raise operationerrfmt(self.space.w_AttributeError,
+                "'%s' object is not a context manager"
+                " (no __enter__/__exit__ method)", typename)
         w_exit = self.space.get(w_descr, w_manager)
         self.settopvalue(w_exit)
-        w_enter = self.space.lookup(w_manager, "__enter__")
-        if w_enter is None:
-            raise OperationError(self.space.w_AttributeError,
-                                 self.space.wrap("__enter__"))
         w_result = self.space.get_and_call_function(w_enter, w_manager)
-        block = WithBlock(self, next_instr + offsettoend)
-        self.append_block(block)
+        block = WithBlock(self, next_instr + offsettoend, self.lastblock)
+        self.lastblock = block
         self.pushvalue(w_result)
 
     def WITH_CLEANUP(self, oparg, next_instr):
@@ -1248,10 +1247,10 @@
 
     _immutable_ = True
 
-    def __init__(self, frame, handlerposition):
+    def __init__(self, frame, handlerposition, previous):
         self.handlerposition = handlerposition
         self.valuestackdepth = frame.valuestackdepth
-        self.previous = None # this makes a linked list of blocks
+        self.previous = previous   # this makes a linked list of blocks
 
     def __eq__(self, other):
         return (self.__class__ is other.__class__ and
diff --git a/pypy/interpreter/test/test_raise.py b/pypy/interpreter/test/test_raise.py
--- a/pypy/interpreter/test/test_raise.py
+++ b/pypy/interpreter/test/test_raise.py
@@ -274,3 +274,9 @@
             pass
         except A:
             pass
+
+    def test_new_returns_bad_instance(self):
+        class MyException(Exception):
+            def __new__(cls, *args):
+                return object()
+        raises(TypeError, "raise MyException")
diff --git a/pypy/interpreter/test/test_typedef.py b/pypy/interpreter/test/test_typedef.py
--- a/pypy/interpreter/test/test_typedef.py
+++ b/pypy/interpreter/test/test_typedef.py
@@ -1,3 +1,4 @@
+import gc
 from pypy.interpreter import typedef
 from pypy.tool.udir import udir
 from pypy.interpreter.baseobjspace import Wrappable
@@ -180,6 +181,85 @@
             assert err.value.message == "'some_type' objects are unhashable"
             """)
 
+    def test_destructor(self):
+        space = self.space
+        class W_Level1(Wrappable):
+            def __init__(self, space1):
+                assert space1 is space
+            def __del__(self):
+                space.call_method(w_seen, 'append', space.wrap(1))
+        class W_Level2(Wrappable):
+            def __init__(self, space1):
+                assert space1 is space
+            def __del__(self):
+                self.enqueue_for_destruction(space, W_Level2.destructormeth,
+                                             'FOO ')
+            def destructormeth(self):
+                space.call_method(w_seen, 'append', space.wrap(2))
+        W_Level1.typedef = typedef.TypeDef(
+            'level1',
+            __new__ = typedef.generic_new_descr(W_Level1))
+        W_Level2.typedef = typedef.TypeDef(
+            'level2',
+            __new__ = typedef.generic_new_descr(W_Level2))
+        #
+        w_seen = space.newlist([])
+        W_Level1(space)
+        gc.collect(); gc.collect()
+        assert space.unwrap(w_seen) == [1]
+        #
+        w_seen = space.newlist([])
+        W_Level2(space)
+        gc.collect(); gc.collect()
+        assert space.str_w(space.repr(w_seen)) == "[]"  # not called yet
+        ec = space.getexecutioncontext()
+        self.space.user_del_action.perform(ec, None)
+        assert space.unwrap(w_seen) == [2]
+        #
+        w_seen = space.newlist([])
+        self.space.appexec([self.space.gettypeobject(W_Level1.typedef)],
+        """(level1):
+            class A3(level1):
+                pass
+            A3()
+        """)
+        gc.collect(); gc.collect()
+        assert space.unwrap(w_seen) == [1]
+        #
+        w_seen = space.newlist([])
+        self.space.appexec([self.space.gettypeobject(W_Level1.typedef),
+                            w_seen],
+        """(level1, seen):
+            class A4(level1):
+                def __del__(self):
+                    seen.append(4)
+            A4()
+        """)
+        gc.collect(); gc.collect()
+        assert space.unwrap(w_seen) == [4, 1]
+        #
+        w_seen = space.newlist([])
+        self.space.appexec([self.space.gettypeobject(W_Level2.typedef)],
+        """(level2):
+            class A5(level2):
+                pass
+            A5()
+        """)
+        gc.collect(); gc.collect()
+        assert space.unwrap(w_seen) == [2]
+        #
+        w_seen = space.newlist([])
+        self.space.appexec([self.space.gettypeobject(W_Level2.typedef),
+                            w_seen],
+        """(level2, seen):
+            class A6(level2):
+                def __del__(self):
+                    seen.append(6)
+            A6()
+        """)
+        gc.collect(); gc.collect()
+        assert space.unwrap(w_seen) == [6, 2]
+
 
 class AppTestTypeDef:
 
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -23,7 +23,7 @@
             self.hasdict     |= __base.hasdict
             self.weakrefable |= __base.weakrefable
         self.rawdict = {}
-        self.acceptable_as_base_class = True
+        self.acceptable_as_base_class = '__new__' in rawdict
         self.applevel_subclasses_base = None
         # xxx used by faking
         self.fakedcpytype = None
@@ -228,21 +228,26 @@
                 return self._lifeline_
             def setweakref(self, space, weakreflifeline):
                 self._lifeline_ = weakreflifeline
+            def delweakref(self):
+                self._lifeline_ = None
         add(Proto)
 
     if "del" in features:
+        parent_destructor = getattr(supercls, '__del__', None)
+        def call_parent_del(self):
+            assert isinstance(self, subcls)
+            parent_destructor(self)
+        def call_applevel_del(self):
+            assert isinstance(self, subcls)
+            self.space.userdel(self)
         class Proto(object):
             def __del__(self):
-                self._enqueue_for_destruction(self.space)
-        # if the base class needs its own interp-level __del__,
-        # we override the _call_builtin_destructor() method to invoke it
-        # after the app-level destructor.
-        parent_destructor = getattr(supercls, '__del__', None)
-        if parent_destructor is not None:
-            def _call_builtin_destructor(self):
-                parent_destructor(self)
-            Proto._call_builtin_destructor = _call_builtin_destructor
-
+                self.clear_all_weakrefs()
+                self.enqueue_for_destruction(self.space, call_applevel_del,
+                                             'method __del__ of ')
+                if parent_destructor is not None:
+                    self.enqueue_for_destruction(self.space, call_parent_del,
+                                                 'internal destructor of ')
         add(Proto)
 
     if "slots" in features:
@@ -253,6 +258,11 @@
                     self.slots_w = [None] * nslots
             def setslotvalue(self, index, w_value):
                 self.slots_w[index] = w_value
+            def delslotvalue(self, index):
+                if self.slots_w[index] is None:
+                    return False
+                self.slots_w[index] = None
+                return True
             def getslotvalue(self, index):
                 return self.slots_w[index]
         add(Proto)
@@ -525,11 +535,10 @@
         """member.__delete__(obj)
         Delete the value of the slot 'member' from the given 'obj'."""
         self.typecheck(space, w_obj)
-        w_oldresult = w_obj.getslotvalue(self.index)
-        if w_oldresult is None:
+        success = w_obj.delslotvalue(self.index)
+        if not success:
             raise OperationError(space.w_AttributeError,
                                  space.wrap(self.name)) # XXX better message
-        w_obj.setslotvalue(self.index, None)
 
 Member.typedef = TypeDef(
     "member_descriptor",
@@ -630,9 +639,12 @@
         return self._lifeline_
     def setweakref(self, space, weakreflifeline):
         self._lifeline_ = weakreflifeline
+    def delweakref(self):
+        self._lifeline_ = None
     cls._lifeline_ = None
     cls.getweakref = getweakref
     cls.setweakref = setweakref
+    cls.delweakref = delweakref
     return weakref_descr
 
 
@@ -858,8 +870,6 @@
                             descrmismatch='close'),
     __iter__   = interp2app(GeneratorIterator.descr__iter__,
                             descrmismatch='__iter__'),
-    __del__    = interp2app(GeneratorIterator.descr__del__,
-                            descrmismatch='__del__'),
     gi_running = interp_attrproperty('running', cls=GeneratorIterator),
     gi_frame   = GetSetProperty(GeneratorIterator.descr_gi_frame),
     gi_code    = GetSetProperty(GeneratorIterator.descr_gi_code),
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -1071,6 +1071,8 @@
         return heaptracker.adr2int(llmemory.cast_ptr_to_adr(x))
     if TP == llmemory.Address:
         return heaptracker.adr2int(x)
+    if TP is lltype.SingleFloat:
+        return longlong.singlefloat2int(x)
     return lltype.cast_primitive(lltype.Signed, x)
 
 def cast_from_int(TYPE, x):
@@ -1086,6 +1088,9 @@
             x = llmemory.cast_int_to_adr(x)
         assert lltype.typeOf(x) == llmemory.Address
         return x
+    elif TYPE is lltype.SingleFloat:
+        assert lltype.typeOf(x) is lltype.Signed
+        return longlong.int2singlefloat(x)
     else:
         if lltype.typeOf(x) == llmemory.Address:
             x = heaptracker.adr2int(x)
@@ -1140,6 +1145,7 @@
     del _future_values[:]
 
 def set_future_value_int(index, value):
+    assert lltype.typeOf(value) is lltype.Signed
     set_future_value_ref(index, value)
 
 def set_future_value_float(index, value):
@@ -1488,6 +1494,7 @@
     'i': lltype.Signed,
     'f': lltype.Float,
     'L': lltype.SignedLongLong,
+    'S': lltype.SingleFloat,
     'v': lltype.Void,
     }
 
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -91,6 +91,7 @@
 class BaseCPU(model.AbstractCPU):
     supports_floats = True
     supports_longlong = llimpl.IS_32_BIT
+    supports_singlefloats = True
 
     def __init__(self, rtyper, stats=None, opts=None,
                  translate_support_code=False,
@@ -327,12 +328,16 @@
 
     def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo=None):
         from pypy.jit.backend.llsupport.ffisupport import get_ffi_type_kind
+        from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
         arg_types = []
-        for arg in ffi_args:
-            kind = get_ffi_type_kind(arg)
-            if kind != history.VOID:
-                arg_types.append(kind)
-        reskind = get_ffi_type_kind(ffi_result)
+        try:
+            for arg in ffi_args:
+                kind = get_ffi_type_kind(self, arg)
+                if kind != history.VOID:
+                    arg_types.append(kind)
+            reskind = get_ffi_type_kind(self, ffi_result)
+        except UnsupportedKind:
+            return None
         return self.getdescr(0, reskind, extrainfo=extrainfo,
                              arg_types=''.join(arg_types))
 
diff --git a/pypy/jit/backend/llgraph/test/test_llgraph.py b/pypy/jit/backend/llgraph/test/test_llgraph.py
--- a/pypy/jit/backend/llgraph/test/test_llgraph.py
+++ b/pypy/jit/backend/llgraph/test/test_llgraph.py
@@ -19,6 +19,9 @@
     def setup_method(self, _):
         self.cpu = self.cpu_type(None)
 
+    def test_memoryerror(self):
+        py.test.skip("does not make much sense on the llgraph backend")
+
 
 def test_cast_adr_to_int_and_back():
     X = lltype.Struct('X', ('foo', lltype.Signed))
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -303,6 +303,8 @@
                 c = 'f'
             elif c == 'f' and longlong.supports_longlong:
                 return 'longlong.getrealfloat(%s)' % (process('L'),)
+            elif c == 'S':
+                return 'longlong.int2singlefloat(%s)' % (process('i'),)
             arg = 'args_%s[%d]' % (c, seen[c])
             seen[c] += 1
             return arg
@@ -318,6 +320,8 @@
                 return lltype.Void
             elif arg == 'L':
                 return lltype.SignedLongLong
+            elif arg == 'S':
+                return lltype.SingleFloat
             else:
                 raise AssertionError(arg)
 
@@ -334,6 +338,8 @@
             result = 'rffi.cast(lltype.SignedLongLong, res)'
         elif self.get_return_type() == history.VOID:
             result = 'None'
+        elif self.get_return_type() == 'S':
+            result = 'longlong.singlefloat2int(res)'
         else:
             assert 0
         source = py.code.Source("""
@@ -344,14 +350,15 @@
         """ % locals())
         ARGS = [TYPE(arg) for arg in self.arg_classes]
         FUNC = lltype.FuncType(ARGS, RESULT)
-        d = locals().copy()
-        d.update(globals())
+        d = globals().copy()
+        d.update(locals())
         exec source.compile() in d
         self.call_stub = d['call_stub']
 
     def verify_types(self, args_i, args_r, args_f, return_type):
         assert self._return_type in return_type
-        assert self.arg_classes.count('i') == len(args_i or ())
+        assert (self.arg_classes.count('i') +
+                self.arg_classes.count('S')) == len(args_i or ())
         assert self.arg_classes.count('r') == len(args_r or ())
         assert (self.arg_classes.count('f') +
                 self.arg_classes.count('L')) == len(args_f or ())
@@ -428,23 +435,39 @@
     def get_result_size(self, translate_support_code):
         return 0
 
+_SingleFloatCallDescr = None   # built lazily
+
 def getCallDescrClass(RESULT):
     if RESULT is lltype.Void:
         return VoidCallDescr
     if RESULT is lltype.Float:
         return FloatCallDescr
+    if RESULT is lltype.SingleFloat:
+        global _SingleFloatCallDescr
+        if _SingleFloatCallDescr is None:
+            assert rffi.sizeof(rffi.UINT) == rffi.sizeof(RESULT)
+            class SingleFloatCallDescr(getCallDescrClass(rffi.UINT)):
+                _clsname = 'SingleFloatCallDescr'
+                _return_type = 'S'
+            _SingleFloatCallDescr = SingleFloatCallDescr
+        return _SingleFloatCallDescr
     if is_longlong(RESULT):
         return LongLongCallDescr
     return getDescrClass(RESULT, BaseIntCallDescr, GcPtrCallDescr,
                          NonGcPtrCallDescr, 'Call', 'get_result_size',
                          Ellipsis,  # <= floatattrname should not be used here
                          '_is_result_signed')
+getCallDescrClass._annspecialcase_ = 'specialize:memo'
 
 def get_call_descr(gccache, ARGS, RESULT, extrainfo=None):
     arg_classes = []
     for ARG in ARGS:
         kind = getkind(ARG)
-        if   kind == 'int': arg_classes.append('i')
+        if   kind == 'int':
+            if ARG is lltype.SingleFloat:
+                arg_classes.append('S')
+            else:
+                arg_classes.append('i')
         elif kind == 'ref': arg_classes.append('r')
         elif kind == 'float':
             if is_longlong(ARG):
@@ -476,6 +499,9 @@
             return GcPtrDescr
         else:
             return NonGcPtrDescr
+    if TYPE is lltype.SingleFloat:
+        assert rffi.sizeof(rffi.UINT) == rffi.sizeof(TYPE)
+        TYPE = rffi.UINT
     try:
         return _cache[nameprefix, TYPE]
     except KeyError:
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -1,19 +1,21 @@
 from pypy.rlib.rarithmetic import intmask
 from pypy.jit.metainterp import history
-from pypy.jit.backend.llsupport.descr import DynamicIntCallDescr, NonGcPtrCallDescr,\
-    FloatCallDescr, VoidCallDescr
+from pypy.rpython.lltypesystem import rffi
+from pypy.jit.backend.llsupport.descr import (
+    DynamicIntCallDescr, NonGcPtrCallDescr, FloatCallDescr, VoidCallDescr,
+    LongLongCallDescr, getCallDescrClass)
 
 class UnsupportedKind(Exception):
     pass
 
-def get_call_descr_dynamic(ffi_args, ffi_result, extrainfo=None):
+def get_call_descr_dynamic(cpu, ffi_args, ffi_result, extrainfo=None):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
-        reskind = get_ffi_type_kind(ffi_result)
-        argkinds = [get_ffi_type_kind(arg) for arg in ffi_args]
+        reskind = get_ffi_type_kind(cpu, ffi_result)
+        argkinds = [get_ffi_type_kind(cpu, arg) for arg in ffi_args]
     except UnsupportedKind:
-        return None # ??
+        return None
     arg_classes = ''.join(argkinds)
     if reskind == history.INT:
         size = intmask(ffi_result.c_size)
@@ -25,17 +27,26 @@
         return FloatCallDescr(arg_classes, extrainfo)
     elif reskind == history.VOID:
         return VoidCallDescr(arg_classes, extrainfo)
+    elif reskind == 'L':
+        return LongLongCallDescr(arg_classes, extrainfo)
+    elif reskind == 'S':
+        SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
+        return SingleFloatCallDescr(arg_classes, extrainfo)
     assert False
 
-def get_ffi_type_kind(ffi_type):
+def get_ffi_type_kind(cpu, ffi_type):
     from pypy.rlib.libffi import types
     kind = types.getkind(ffi_type)
     if kind == 'i' or kind == 'u':
         return history.INT
-    elif kind == 'f':
+    elif cpu.supports_floats and kind == 'f':
         return history.FLOAT
     elif kind == 'v':
         return history.VOID
+    elif cpu.supports_longlong and (kind == 'I' or kind == 'U'):     # longlong
+        return 'L'
+    elif cpu.supports_singlefloats and kind == 's':    # singlefloat
+        return 'S'
     raise UnsupportedKind("Unsupported kind '%s'" % kind)
 
 def is_ffi_type_signed(ffi_type):
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -453,21 +453,33 @@
 
 class WriteBarrierDescr(AbstractDescr):
     def __init__(self, gc_ll_descr):
+        GCClass = gc_ll_descr.GCClass
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
         self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
-        self.fielddescr_tid = get_field_descr(gc_ll_descr,
-                                              gc_ll_descr.GCClass.HDR, 'tid')
-        self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
-        # if convenient for the backend, we also compute the info about
+        self.fielddescr_tid = get_field_descr(gc_ll_descr, GCClass.HDR, 'tid')
+        #
+        self.jit_wb_if_flag = GCClass.JIT_WB_IF_FLAG
+        self.jit_wb_if_flag_byteofs, self.jit_wb_if_flag_singlebyte = (
+            self.extract_flag_byte(self.jit_wb_if_flag))
+        #
+        if hasattr(GCClass, 'JIT_WB_CARDS_SET'):
+            self.jit_wb_cards_set = GCClass.JIT_WB_CARDS_SET
+            self.jit_wb_card_page_shift = GCClass.JIT_WB_CARD_PAGE_SHIFT
+            self.jit_wb_cards_set_byteofs, self.jit_wb_cards_set_singlebyte = (
+                self.extract_flag_byte(self.jit_wb_cards_set))
+        else:
+            self.jit_wb_cards_set = 0
+
+    def extract_flag_byte(self, flag_word):
+        # if convenient for the backend, we compute the info about
         # the flag as (byte-offset, single-byte-flag).
         import struct
-        value = struct.pack("l", self.jit_wb_if_flag)
+        value = struct.pack("l", flag_word)
         assert value.count('\x00') == len(value) - 1    # only one byte is != 0
         i = 0
         while value[i] == '\x00': i += 1
-        self.jit_wb_if_flag_byteofs = i
-        self.jit_wb_if_flag_singlebyte = struct.unpack('b', value[i])[0]
+        return (i, struct.unpack('b', value[i])[0])
 
     def get_write_barrier_fn(self, cpu):
         llop1 = self.llop1
@@ -532,18 +544,19 @@
         assert self.GCClass.inline_simple_malloc
         assert self.GCClass.inline_simple_malloc_varsize
 
-        # make a malloc function, with three arguments
+        # make a malloc function, with two arguments
         def malloc_basic(size, tid):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
             has_finalizer = bool(tid & (1<<llgroup.HALFSHIFT))
             check_typeid(type_id)
-            try:
-                res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                                      type_id, size, True,
-                                                      has_finalizer, False)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                res = lltype.nullptr(llmemory.GCREF.TO)
+            res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                                  type_id, size, True,
+                                                  has_finalizer, False)
+            # In case the operation above failed, we are returning NULL
+            # from this function to assembler.  There is also an RPython
+            # exception set, typically MemoryError; but it's easier and
+            # faster to check for the NULL return value, as done by
+            # translator/exceptiontransform.py.
             #llop.debug_print(lltype.Void, "\tmalloc_basic", size, type_id,
             #                 "-->", res)
             return res
@@ -559,14 +572,10 @@
         def malloc_array(itemsize, tid, num_elem):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
             check_typeid(type_id)
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    type_id, num_elem, self.array_basesize, itemsize,
-                    self.array_length_ofs, True)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                type_id, num_elem, self.array_basesize, itemsize,
+                self.array_length_ofs, True)
         self.malloc_array = malloc_array
         self.GC_MALLOC_ARRAY = lltype.Ptr(lltype.FuncType(
             [lltype.Signed] * 3, llmemory.GCREF))
@@ -579,23 +588,15 @@
         unicode_type_id = self.layoutbuilder.get_type_id(rstr.UNICODE)
         #
         def malloc_str(length):
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    str_type_id, length, str_basesize, str_itemsize,
-                    str_ofs_length, True)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                str_type_id, length, str_basesize, str_itemsize,
+                str_ofs_length, True)
         def malloc_unicode(length):
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    unicode_type_id, length, unicode_basesize,unicode_itemsize,
-                    unicode_ofs_length, True)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                unicode_type_id, length, unicode_basesize,unicode_itemsize,
+                unicode_ofs_length, True)
         self.malloc_str = malloc_str
         self.malloc_unicode = malloc_unicode
         self.GC_MALLOC_STR_UNICODE = lltype.Ptr(lltype.FuncType(
@@ -616,16 +617,12 @@
             if self.DEBUG:
                 random_usage_of_xmm_registers()
             assert size >= self.minimal_size_in_nursery
-            try:
-                # NB. although we call do_malloc_fixedsize_clear() here,
-                # it's a bit of a hack because we set tid to 0 and may
-                # also use it to allocate varsized objects.  The tid
-                # and possibly the length are both set afterward.
-                gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                            0, size, True, False, False)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return 0
+            # NB. although we call do_malloc_fixedsize_clear() here,
+            # it's a bit of a hack because we set tid to 0 and may
+            # also use it to allocate varsized objects.  The tid
+            # and possibly the length are both set afterward.
+            gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                        0, size, True, False, False)
             return rffi.cast(lltype.Signed, gcref)
         self.malloc_slowpath = malloc_slowpath
         self.MALLOC_SLOWPATH = lltype.FuncType([lltype.Signed], lltype.Signed)
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -259,7 +259,7 @@
 
     def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo=None):
         from pypy.jit.backend.llsupport import ffisupport
-        return ffisupport.get_call_descr_dynamic(ffi_args, ffi_result,
+        return ffisupport.get_call_descr_dynamic(self, ffi_args, ffi_result,
                                                  extrainfo)
 
     def get_overflow_error(self):
@@ -499,7 +499,7 @@
     def bh_call_i(self, func, calldescr, args_i, args_r, args_f):
         assert isinstance(calldescr, BaseIntCallDescr)
         if not we_are_translated():
-            calldescr.verify_types(args_i, args_r, args_f, history.INT)
+            calldescr.verify_types(args_i, args_r, args_f, history.INT + 'S')
         return calldescr.call_stub(func, args_i, args_r, args_f)
 
     def bh_call_r(self, func, calldescr, args_i, args_r, args_f):
diff --git a/pypy/jit/backend/llsupport/test/test_descr.py b/pypy/jit/backend/llsupport/test/test_descr.py
--- a/pypy/jit/backend/llsupport/test/test_descr.py
+++ b/pypy/jit/backend/llsupport/test/test_descr.py
@@ -52,7 +52,8 @@
     S = lltype.GcStruct('S', ('x', lltype.Char),
                              ('y', lltype.Ptr(T)),
                              ('z', lltype.Ptr(U)),
-                             ('f', lltype.Float))
+                             ('f', lltype.Float),
+                             ('s', lltype.SingleFloat))
     assert getFieldDescrClass(lltype.Ptr(T)) is GcPtrFieldDescr
     assert getFieldDescrClass(lltype.Ptr(U)) is NonGcPtrFieldDescr
     cls = getFieldDescrClass(lltype.Char)
@@ -61,6 +62,10 @@
     clsf = getFieldDescrClass(lltype.Float)
     assert clsf != cls
     assert clsf == getFieldDescrClass(lltype.Float)
+    clss = getFieldDescrClass(lltype.SingleFloat)
+    assert clss not in (cls, clsf)
+    assert clss == getFieldDescrClass(lltype.SingleFloat)
+    assert clss == getFieldDescrClass(rffi.UINT)    # for now
     #
     c0 = GcCache(False)
     c1 = GcCache(True)
@@ -72,14 +77,17 @@
         descr_y = get_field_descr(c2, S, 'y')
         descr_z = get_field_descr(c2, S, 'z')
         descr_f = get_field_descr(c2, S, 'f')
+        descr_s = get_field_descr(c2, S, 's')
         assert descr_x.__class__ is cls
         assert descr_y.__class__ is GcPtrFieldDescr
         assert descr_z.__class__ is NonGcPtrFieldDescr
         assert descr_f.__class__ is clsf
+        assert descr_s.__class__ is clss
         assert descr_x.name == 'S.x'
         assert descr_y.name == 'S.y'
         assert descr_z.name == 'S.z'
         assert descr_f.name == 'S.f'
+        assert descr_s.name == 'S.s'
         if not tsc:
             assert descr_x.offset < descr_y.offset < descr_z.offset
             assert descr_x.sort_key() < descr_y.sort_key() < descr_z.sort_key()
@@ -87,23 +95,29 @@
             assert descr_y.get_field_size(False) == rffi.sizeof(lltype.Ptr(T))
             assert descr_z.get_field_size(False) == rffi.sizeof(lltype.Ptr(U))
             assert descr_f.get_field_size(False) == rffi.sizeof(lltype.Float)
+            assert descr_s.get_field_size(False) == rffi.sizeof(
+                                                            lltype.SingleFloat)
         else:
             assert isinstance(descr_x.offset, Symbolic)
             assert isinstance(descr_y.offset, Symbolic)
             assert isinstance(descr_z.offset, Symbolic)
             assert isinstance(descr_f.offset, Symbolic)
+            assert isinstance(descr_s.offset, Symbolic)
             assert isinstance(descr_x.get_field_size(True), Symbolic)
             assert isinstance(descr_y.get_field_size(True), Symbolic)
             assert isinstance(descr_z.get_field_size(True), Symbolic)
             assert isinstance(descr_f.get_field_size(True), Symbolic)
+            assert isinstance(descr_s.get_field_size(True), Symbolic)
         assert not descr_x.is_pointer_field()
         assert     descr_y.is_pointer_field()
         assert not descr_z.is_pointer_field()
         assert not descr_f.is_pointer_field()
+        assert not descr_s.is_pointer_field()
         assert not descr_x.is_float_field()
         assert not descr_y.is_float_field()
         assert not descr_z.is_float_field()
         assert     descr_f.is_float_field()
+        assert not descr_s.is_float_field()
 
 
 def test_get_field_descr_sign():
@@ -135,6 +149,7 @@
     A2 = lltype.GcArray(lltype.Ptr(T))
     A3 = lltype.GcArray(lltype.Ptr(U))
     A4 = lltype.GcArray(lltype.Float)
+    A5 = lltype.GcArray(lltype.SingleFloat)
     assert getArrayDescrClass(A2) is GcPtrArrayDescr
     assert getArrayDescrClass(A3) is NonGcPtrArrayDescr
     cls = getArrayDescrClass(A1)
@@ -143,25 +158,32 @@
     clsf = getArrayDescrClass(A4)
     assert clsf != cls
     assert clsf == getArrayDescrClass(lltype.GcArray(lltype.Float))
+    clss = getArrayDescrClass(A5)
+    assert clss not in (clsf, cls)
+    assert clss == getArrayDescrClass(lltype.GcArray(rffi.UINT))
     #
     c0 = GcCache(False)
     descr1 = get_array_descr(c0, A1)
     descr2 = get_array_descr(c0, A2)
     descr3 = get_array_descr(c0, A3)
     descr4 = get_array_descr(c0, A4)
+    descr5 = get_array_descr(c0, A5)
     assert descr1.__class__ is cls
     assert descr2.__class__ is GcPtrArrayDescr
     assert descr3.__class__ is NonGcPtrArrayDescr
     assert descr4.__class__ is clsf
+    assert descr5.__class__ is clss
     assert descr1 == get_array_descr(c0, lltype.GcArray(lltype.Char))
     assert not descr1.is_array_of_pointers()
     assert     descr2.is_array_of_pointers()
     assert not descr3.is_array_of_pointers()
     assert not descr4.is_array_of_pointers()
+    assert not descr5.is_array_of_pointers()
     assert not descr1.is_array_of_floats()
     assert not descr2.is_array_of_floats()
     assert not descr3.is_array_of_floats()
     assert     descr4.is_array_of_floats()
+    assert not descr5.is_array_of_floats()
     #
     def get_alignment(code):
         # Retrieve default alignment for the compiler/platform
@@ -170,27 +192,33 @@
     assert descr2.get_base_size(False) == get_alignment('p')
     assert descr3.get_base_size(False) == get_alignment('p')
     assert descr4.get_base_size(False) == get_alignment('d')
+    assert descr5.get_base_size(False) == get_alignment('f')
     assert descr1.get_ofs_length(False) == 0
     assert descr2.get_ofs_length(False) == 0
     assert descr3.get_ofs_length(False) == 0
     assert descr4.get_ofs_length(False) == 0
+    assert descr5.get_ofs_length(False) == 0
     assert descr1.get_item_size(False) == rffi.sizeof(lltype.Char)
     assert descr2.get_item_size(False) == rffi.sizeof(lltype.Ptr(T))
     assert descr3.get_item_size(False) == rffi.sizeof(lltype.Ptr(U))
     assert descr4.get_item_size(False) == rffi.sizeof(lltype.Float)
+    assert descr5.get_item_size(False) == rffi.sizeof(lltype.SingleFloat)
     #
     assert isinstance(descr1.get_base_size(True), Symbolic)
     assert isinstance(descr2.get_base_size(True), Symbolic)
     assert isinstance(descr3.get_base_size(True), Symbolic)
     assert isinstance(descr4.get_base_size(True), Symbolic)
+    assert isinstance(descr5.get_base_size(True), Symbolic)
     assert isinstance(descr1.get_ofs_length(True), Symbolic)
     assert isinstance(descr2.get_ofs_length(True), Symbolic)
     assert isinstance(descr3.get_ofs_length(True), Symbolic)
     assert isinstance(descr4.get_ofs_length(True), Symbolic)
+    assert isinstance(descr5.get_ofs_length(True), Symbolic)
     assert isinstance(descr1.get_item_size(True), Symbolic)
     assert isinstance(descr2.get_item_size(True), Symbolic)
     assert isinstance(descr3.get_item_size(True), Symbolic)
     assert isinstance(descr4.get_item_size(True), Symbolic)
+    assert isinstance(descr5.get_item_size(True), Symbolic)
     CA = rffi.CArray(lltype.Signed)
     descr = get_array_descr(c0, CA)
     assert not descr.is_array_of_floats()
@@ -210,6 +238,11 @@
     assert descr.is_array_of_floats()
     assert descr.get_base_size(False) == 0
     assert descr.get_ofs_length(False) == -1
+    CA = rffi.CArray(rffi.FLOAT)
+    descr = get_array_descr(c0, CA)
+    assert not descr.is_array_of_floats()
+    assert descr.get_base_size(False) == 0
+    assert descr.get_ofs_length(False) == -1
 
 
 def test_get_array_descr_sign():
@@ -257,6 +290,11 @@
     assert descr4.get_result_size(False) == rffi.sizeof(lltype.Float)
     assert descr4.get_return_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
+    #
+    descr5 = get_call_descr(c0, [lltype.SingleFloat], lltype.SingleFloat)
+    assert descr5.get_result_size(False) == rffi.sizeof(lltype.SingleFloat)
+    assert descr5.get_return_type() == "S"
+    assert descr5.arg_classes == "S"
 
 def test_get_call_descr_not_translated_longlong():
     if sys.maxint > 2147483647:
@@ -286,6 +324,11 @@
     assert isinstance(descr4.get_result_size(True), Symbolic)
     assert descr4.get_return_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
+    #
+    descr5 = get_call_descr(c1, [lltype.SingleFloat], lltype.SingleFloat)
+    assert isinstance(descr5.get_result_size(True), Symbolic)
+    assert descr5.get_return_type() == "S"
+    assert descr5.arg_classes == "S"
 
 def test_call_descr_extra_info():
     c1 = GcCache(True)
@@ -345,8 +388,11 @@
     #
     descr4f = get_call_descr(c0, [lltype.Char, lltype.Ptr(S)], lltype.Float)
     assert 'FloatCallDescr' in descr4f.repr_of_descr()
+    #
+    descr5f = get_call_descr(c0, [lltype.Char], lltype.SingleFloat)
+    assert 'SingleFloatCallDescr' in descr5f.repr_of_descr()
 
-def test_call_stubs():
+def test_call_stubs_1():
     c0 = GcCache(False)
     ARGS = [lltype.Char, lltype.Signed]
     RES = lltype.Char
@@ -360,6 +406,8 @@
     res = call_stub(rffi.cast(lltype.Signed, fnptr), [1, 2], None, None)
     assert res == ord('c')
 
+def test_call_stubs_2():
+    c0 = GcCache(False)
     ARRAY = lltype.GcArray(lltype.Signed)
     ARGS = [lltype.Float, lltype.Ptr(ARRAY)]
     RES = lltype.Float
@@ -375,3 +423,27 @@
     res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
                            [], [opaquea], [longlong.getfloatstorage(3.5)])
     assert longlong.getrealfloat(res) == 4.5
+
+def test_call_stubs_single_float():
+    from pypy.rlib.longlong2float import uint2singlefloat, singlefloat2uint
+    from pypy.rlib.rarithmetic import r_singlefloat, intmask
+    #
+    c0 = GcCache(False)
+    ARGS = [lltype.SingleFloat, lltype.SingleFloat, lltype.SingleFloat]
+    RES = lltype.SingleFloat
+
+    def f(a, b, c):
+        a = float(a)
+        b = float(b)
+        c = float(c)
+        x = a - (b / c)
+        return r_singlefloat(x)
+
+    fnptr = llhelper(lltype.Ptr(lltype.FuncType(ARGS, RES)), f)
+    descr2 = get_call_descr(c0, ARGS, RES)
+    a = intmask(singlefloat2uint(r_singlefloat(-10.0)))
+    b = intmask(singlefloat2uint(r_singlefloat(3.0)))
+    c = intmask(singlefloat2uint(r_singlefloat(2.0)))
+    res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
+                           [a, b, c], [], [])
+    assert float(uint2singlefloat(rffi.r_uint(res))) == -11.5
diff --git a/pypy/jit/backend/llsupport/test/test_ffisupport.py b/pypy/jit/backend/llsupport/test/test_ffisupport.py
--- a/pypy/jit/backend/llsupport/test/test_ffisupport.py
+++ b/pypy/jit/backend/llsupport/test/test_ffisupport.py
@@ -1,24 +1,52 @@
 from pypy.rlib.libffi import types
-from pypy.jit.backend.llsupport.ffisupport import get_call_descr_dynamic, \
-    VoidCallDescr, DynamicIntCallDescr
-    
+from pypy.jit.codewriter.longlong import is_64_bit
+from pypy.jit.backend.llsupport.ffisupport import *
+
+
+class FakeCPU:
+    def __init__(self, supports_floats=False, supports_longlong=False,
+                 supports_singlefloats=False):
+        self.supports_floats = supports_floats
+        self.supports_longlong = supports_longlong
+        self.supports_singlefloats = supports_singlefloats
+
+
 def test_call_descr_dynamic():
+    args = [types.sint, types.pointer]
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.sint)
+    assert isinstance(descr, DynamicIntCallDescr)
+    assert descr.arg_classes == 'ii'
 
     args = [types.sint, types.double, types.pointer]
-    descr = get_call_descr_dynamic(args, types.void)
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.void)
+    assert descr is None    # missing floats
+    descr = get_call_descr_dynamic(FakeCPU(supports_floats=True),
+                                   args, types.void)
     assert isinstance(descr, VoidCallDescr)
     assert descr.arg_classes == 'ifi'
 
-    descr = get_call_descr_dynamic([], types.sint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.sint8)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == True
 
-    descr = get_call_descr_dynamic([], types.uint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.uint8)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == False
 
-    descr = get_call_descr_dynamic([], types.float)
-    assert descr is None # single floats are not supported so far
-    
+    if not is_64_bit:
+        descr = get_call_descr_dynamic(FakeCPU(), [], types.slonglong)
+        assert descr is None   # missing longlongs
+        descr = get_call_descr_dynamic(FakeCPU(supports_longlong=True),
+                                       [], types.slonglong)
+        assert isinstance(descr, LongLongCallDescr)
+    else:
+        assert types.slonglong is types.slong
+
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.float)
+    assert descr is None   # missing singlefloats
+    descr = get_call_descr_dynamic(FakeCPU(supports_singlefloats=True),
+                                   [], types.float)
+    SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
+    assert isinstance(descr, SingleFloatCallDescr)
diff --git a/pypy/jit/backend/llvm/llvm_rffi.py b/pypy/jit/backend/llvm/llvm_rffi.py
--- a/pypy/jit/backend/llvm/llvm_rffi.py
+++ b/pypy/jit/backend/llvm/llvm_rffi.py
@@ -3,7 +3,7 @@
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.translator.tool.cbuild import ExternalCompilationInfo, log
 
-if sys.platform != 'linux2':
+if not sys.platform.startswith('linux'):
     py.test.skip("Linux only for now")
 
 # ____________________________________________________________
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -8,12 +8,13 @@
     # ^^^ This is only useful on 32-bit platforms.  If True,
     # longlongs are supported by the JIT, but stored as doubles.
     # Boxes and Consts are BoxFloats and ConstFloats.
+    supports_singlefloats = False
 
     done_with_this_frame_void_v = -1
     done_with_this_frame_int_v = -1
     done_with_this_frame_ref_v = -1
     done_with_this_frame_float_v = -1
-    exit_frame_with_exception_v = -1
+    propagate_exception_v = -1
     total_compiled_loops = 0
     total_compiled_bridges = 0
     total_freed_loops = 0
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -290,3 +290,58 @@
                 assert abs(x - expected_result) < 0.0001
             finally:
                 del self.cpu.done_with_this_frame_float_v
+
+    def test_call_with_singlefloats(self):
+        cpu = self.cpu
+        if not cpu.supports_floats or not cpu.supports_singlefloats:
+            py.test.skip('requires floats and singlefloats')
+
+        import random
+        from pypy.rlib.libffi import types
+        from pypy.rlib.rarithmetic import r_singlefloat
+
+        def func(*args):
+            res = 0.0
+            for i, x in enumerate(args):
+                res += (i + 1.1) * float(x)
+            return res
+
+        F = lltype.Float
+        S = lltype.SingleFloat
+        I = lltype.Signed
+        floats = [random.random() - 0.5 for i in range(8)]
+        singlefloats = [r_singlefloat(random.random() - 0.5) for i in range(8)]
+        ints = [random.randrange(-99, 99) for i in range(8)]
+        for repeat in range(100):
+            args = []
+            argvalues = []
+            argslist = []
+            local_floats = list(floats)
+            local_singlefloats = list(singlefloats)
+            local_ints = list(ints)
+            for i in range(8):
+                case = random.randrange(0, 3)
+                if case == 0:
+                    args.append(F)
+                    arg = local_floats.pop()
+                    argslist.append(boxfloat(arg))
+                elif case == 1:
+                    args.append(S)
+                    arg = local_singlefloats.pop()
+                    argslist.append(BoxInt(longlong.singlefloat2int(arg)))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    argslist.append(BoxInt(arg))
+                argvalues.append(arg)
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
+
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            expected = func(*argvalues)
+            assert abs(res.getfloat() - expected) < 0.0001
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1707,6 +1707,7 @@
             jit_wb_if_flag = 4096
             jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
             jit_wb_if_flag_singlebyte = 0x10
+            jit_wb_cards_set = 0
             def get_write_barrier_from_array_fn(self, cpu):
                 return funcbox.getint()
         #
@@ -1728,6 +1729,72 @@
             else:
                 assert record == []
 
+    def test_cond_call_gc_wb_array_card_marking_fast_path(self):
+        def func_void(a, b, c):
+            record.append((a, b, c))
+        record = []
+        #
+        S = lltype.Struct('S', ('tid', lltype.Signed))
+        S_WITH_CARDS = lltype.Struct('S_WITH_CARDS',
+                                     ('card0', lltype.Char),
+                                     ('card1', lltype.Char),
+                                     ('card2', lltype.Char),
+                                     ('card3', lltype.Char),
+                                     ('card4', lltype.Char),
+                                     ('card5', lltype.Char),
+                                     ('card6', lltype.Char),
+                                     ('card7', lltype.Char),
+                                     ('data',  S))
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
+                             lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+        funcbox = self.get_funcbox(self.cpu, func_ptr)
+        class WriteBarrierDescr(AbstractDescr):
+            jit_wb_if_flag = 4096
+            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_singlebyte = 0x10
+            jit_wb_cards_set = 8192
+            jit_wb_cards_set_byteofs = struct.pack("i", 8192).index('\x20')
+            jit_wb_cards_set_singlebyte = 0x20
+            jit_wb_card_page_shift = 7
+            def get_write_barrier_from_array_fn(self, cpu):
+                return funcbox.getint()
+        #
+        for BoxIndexCls in [BoxInt, ConstInt]:
+            for cond in [False, True]:
+                print
+                print '_'*79
+                print 'BoxIndexCls =', BoxIndexCls
+                print 'JIT_WB_CARDS_SET =', cond
+                print
+                value = random.randrange(-sys.maxint, sys.maxint)
+                value |= 4096
+                if cond:
+                    value |= 8192
+                else:
+                    value &= ~8192
+                s = lltype.malloc(S_WITH_CARDS, immortal=True, zero=True)
+                s.data.tid = value
+                sgcref = rffi.cast(llmemory.GCREF, s.data)
+                del record[:]
+                box_index = BoxIndexCls((9<<7) + 17)
+                self.execute_operation(rop.COND_CALL_GC_WB_ARRAY,
+                           [BoxPtr(sgcref), box_index, BoxPtr(sgcref)],
+                           'void', descr=WriteBarrierDescr())
+                if cond:
+                    assert record == []
+                    assert s.card6 == '\x02'
+                else:
+                    assert record == [(s.data, (9<<7) + 17, s.data)]
+                    assert s.card6 == '\x00'
+                assert s.card0 == '\x00'
+                assert s.card1 == '\x00'
+                assert s.card2 == '\x00'
+                assert s.card3 == '\x00'
+                assert s.card4 == '\x00'
+                assert s.card5 == '\x00'
+                assert s.card7 == '\x00'
+
     def test_force_operations_returning_void(self):
         values = []
         def maybe_force(token, flag):
@@ -2667,6 +2734,65 @@
                                      'float', descr=calldescr)
         assert res.getfloatstorage() == expected
 
+    def test_singlefloat_result_of_call_direct(self):
+        if not self.cpu.supports_singlefloats:
+            py.test.skip("singlefloat test")
+        from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.rlib.rarithmetic import r_singlefloat
+        eci = ExternalCompilationInfo(
+            separate_module_sources=["""
+            float fn_test_result_of_call(float x)
+            {
+                return x / 2.0f;
+            }
+            """],
+            export_symbols=['fn_test_result_of_call'])
+        f = rffi.llexternal('fn_test_result_of_call', [lltype.SingleFloat],
+                            lltype.SingleFloat,
+                            compilation_info=eci, _nowrapper=True)
+        value = r_singlefloat(-42.5)
+        expected = r_singlefloat(-21.25)
+        assert f(value) == expected
+        #
+        FUNC = self.FuncType([lltype.SingleFloat], lltype.SingleFloat)
+        FPTR = self.Ptr(FUNC)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        ivalue = longlong.singlefloat2int(value)
+        iexpected = longlong.singlefloat2int(expected)
+        x = self.cpu.bh_call_i(self.get_funcbox(self.cpu, f).value,
+                               calldescr, [ivalue], None, None)
+        assert x == iexpected
+
+    def test_singlefloat_result_of_call_compiled(self):
+        if not self.cpu.supports_singlefloats:
+            py.test.skip("test of singlefloat result")
+        from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.rlib.rarithmetic import r_singlefloat
+        eci = ExternalCompilationInfo(
+            separate_module_sources=["""
+            float fn_test_result_of_call(float x)
+            {
+                return x / 2.0f;
+            }
+            """],
+            export_symbols=['fn_test_result_of_call'])
+        f = rffi.llexternal('fn_test_result_of_call', [lltype.SingleFloat],
+                            lltype.SingleFloat,
+                            compilation_info=eci, _nowrapper=True)
+        value = r_singlefloat(-42.5)
+        expected = r_singlefloat(-21.25)
+        assert f(value) == expected
+        #
+        FUNC = self.FuncType([lltype.SingleFloat], lltype.SingleFloat)
+        FPTR = self.Ptr(FUNC)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        funcbox = self.get_funcbox(self.cpu, f)
+        ivalue = longlong.singlefloat2int(value)
+        iexpected = longlong.singlefloat2int(expected)
+        res = self.execute_operation(rop.CALL, [funcbox, BoxInt(ivalue)],
+                                     'int', descr=calldescr)
+        assert res.value == iexpected
+
     def test_free_loop_and_bridges(self):
         from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
         if not isinstance(self.cpu, AbstractLLCPU):
@@ -2681,6 +2807,26 @@
         assert mem2 < mem1
         assert mem2 == mem0
 
+    def test_memoryerror(self):
+        excdescr = BasicFailDescr(666)
+        self.cpu.propagate_exception_v = self.cpu.get_fail_descr_number(
+            excdescr)
+        self.cpu.setup_once()    # xxx redo it, because we added
+                                 # propagate_exception_v
+        i0 = BoxInt()
+        p0 = BoxPtr()
+        operations = [
+            ResOperation(rop.NEWUNICODE, [i0], p0),
+            ResOperation(rop.FINISH, [p0], None, descr=BasicFailDescr(1))
+            ]
+        inputargs = [i0]
+        looptoken = LoopToken()
+        self.cpu.compile_loop(inputargs, operations, looptoken)
+        # overflowing value:
+        self.cpu.set_future_value_int(0, sys.maxint // 4 + 1)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == excdescr.identifier
+
 
 class OOtypeBackendTest(BaseBackendTest):
 
diff --git a/pypy/jit/backend/x86/arch.py b/pypy/jit/backend/x86/arch.py
--- a/pypy/jit/backend/x86/arch.py
+++ b/pypy/jit/backend/x86/arch.py
@@ -27,3 +27,6 @@
 # which are used in the malloc itself.  They are:
 #   ecx, ebx, esi, edi               [32 and 64 bits]
 #   r8, r9, r10, r12, r13, r14, r15    [64 bits only]
+#
+# Note that with asmgcc, the locations corresponding to callee-save registers
+# are never used.
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -56,7 +56,9 @@
         self.exc = exc
         self.is_guard_not_invalidated = is_guard_not_invalidated
 
-DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed))
+DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
+                              ('bridge', lltype.Signed), # 0 or 1
+                              ('number', lltype.Signed))
 
 class Assembler386(object):
     _regalloc = None
@@ -89,6 +91,8 @@
         self._current_depths_cache = (0, 0)
         self.datablockwrapper = None
         self.stack_check_slowpath = 0
+        self.propagate_exception_path = 0
+        self.gcrootmap_retaddr_forced = 0
         self.teardown()
 
     def leave_jitted_hook(self):
@@ -125,6 +129,7 @@
             self._build_failure_recovery(True, withfloats=True)
             support.ensure_sse2_floats()
             self._build_float_constants()
+        self._build_propagate_exception_path()
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
         self._build_stack_check_slowpath()
@@ -138,6 +143,9 @@
         assert self.memcpy_addr != 0, "setup_once() not called?"
         self.current_clt = looptoken.compiled_loop_token
         self.pending_guard_tokens = []
+        if WORD == 8:
+            self.pending_memoryerror_trampoline_from = []
+            self.error_trampoline_64 = 0
         self.mc = codebuf.MachineCodeBlockWrapper()
         #assert self.datablockwrapper is None --- but obscure case
         # possible, e.g. getting MemoryError and continuing
@@ -147,6 +155,8 @@
 
     def teardown(self):
         self.pending_guard_tokens = None
+        if WORD == 8:
+            self.pending_memoryerror_trampoline_from = None
         self.mc = None
         self.looppos = -1
         self.currently_compiling_loop = None
@@ -155,9 +165,12 @@
     def finish_once(self):
         if self._debug:
             debug_start('jit-backend-counts')
-            for i in range(len(self.loop_run_counters)):
-                struct = self.loop_run_counters[i]
-                debug_print(str(i) + ':' + str(struct.i))
+            for struct in self.loop_run_counters:
+                if struct.bridge:
+                    prefix = 'bridge '
+                else:
+                    prefix = 'loop '
+                debug_print(prefix + str(struct.number) + ':' + str(struct.i))
             debug_stop('jit-backend-counts')
 
     def _build_float_constants(self):
@@ -181,6 +194,7 @@
         # instructions in assembler, with a mark_gc_roots in between.
         # With shadowstack, this is not needed, so we produce a single helper.
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
         #
         # ---------- first helper for the slow path of malloc ----------
         mc = codebuf.MachineCodeBlockWrapper()
@@ -190,10 +204,19 @@
         mc.SUB_rr(edx.value, eax.value)       # compute the size we want
         addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
         #
-        if gcrootmap is not None and gcrootmap.is_shadow_stack:
+        # The registers to save in the copy area: with shadowstack, most
+        # registers need to be saved.  With asmgcc, the callee-saved registers
+        # don't need to.
+        save_in_copy_area = gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items()
+        if not shadow_stack:
+            save_in_copy_area = [(reg, ofs) for (reg, ofs) in save_in_copy_area
+                   if reg not in gpr_reg_mgr_cls.REGLOC_TO_GCROOTMAP_REG_INDEX]
+        #
+        for reg, ofs in save_in_copy_area:
+            mc.MOV_br(ofs, reg.value)
+        #
+        if shadow_stack:
             # ---- shadowstack ----
-            for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
-                mc.MOV_br(ofs, reg.value)
             mc.SUB_ri(esp.value, 16 - WORD)      # stack alignment of 16 bytes
             if IS_X86_32:
                 mc.MOV_sr(0, edx.value)          # push argument
@@ -201,15 +224,13 @@
                 mc.MOV_rr(edi.value, edx.value)
             mc.CALL(imm(addr))
             mc.ADD_ri(esp.value, 16 - WORD)
-            for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
-                mc.MOV_rb(reg.value, ofs)
         else:
             # ---- asmgcc ----
             if IS_X86_32:
                 mc.MOV_sr(WORD, edx.value)       # save it as the new argument
             elif IS_X86_64:
-                # rdi can be clobbered: its content was forced to the stack
-                # by _fastpath_malloc(), like all other save_around_call_regs.
+                # rdi can be clobbered: its content was saved in the
+                # copy area of the stack
                 mc.MOV_rr(edi.value, edx.value)
             mc.JMP(imm(addr))                    # tail call to the real malloc
             rawstart = mc.materialize(self.cpu.asmmemmgr, [])
@@ -217,18 +238,54 @@
             # ---------- second helper for the slow path of malloc ----------
             mc = codebuf.MachineCodeBlockWrapper()
         #
+        for reg, ofs in save_in_copy_area:
+            mc.MOV_rb(reg.value, ofs)
+            assert reg is not eax and reg is not edx
+        #
         if self.cpu.supports_floats:          # restore the XMM registers
             for i in range(self.cpu.NUM_REGS):# from where they were saved
                 mc.MOVSD_xs(i, (WORD*2)+8*i)
+        #
+        # Note: we check this after the code above, just because the code
+        # above is more than 127 bytes on 64-bits...
+        mc.TEST_rr(eax.value, eax.value)
+        mc.J_il8(rx86.Conditions['Z'], 0) # patched later
+        jz_location = mc.get_relative_pos()
+        #
         nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
         mc.MOV(edx, heap(nursery_free_adr))   # load this in EDX
         mc.RET()
+        #
+        # If the slowpath malloc failed, we raise a MemoryError that
+        # always interrupts the current loop, as a "good enough"
+        # approximation.  Also note that we didn't RET from this helper;
+        # but the code we jump to will actually restore the stack
+        # position based on EBP, which will get us out of here for free.
+        offset = mc.get_relative_pos() - jz_location
+        assert 0 < offset <= 127
+        mc.overwrite(jz_location-1, chr(offset))
+        mc.JMP(imm(self.propagate_exception_path))
+        #
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.malloc_slowpath2 = rawstart
 
+    def _build_propagate_exception_path(self):
+        if self.cpu.propagate_exception_v < 0:
+            return      # not supported (for tests, or non-translated)
+        #
+        self.mc = codebuf.MachineCodeBlockWrapper()
+        # call on_leave_jitted_save_exc()
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
+        self.mc.CALL(imm(addr))
+        self.mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
+        self._call_footer()
+        rawstart = self.mc.materialize(self.cpu.asmmemmgr, [])
+        self.propagate_exception_path = rawstart
+        self.mc = None
+
     def _build_stack_check_slowpath(self):
         _, _, slowpathaddr = self.cpu.insert_stack_check()
-        if slowpathaddr == 0 or self.cpu.exit_frame_with_exception_v < 0:
+        if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
             return      # no stack check (for tests, or non-translated)
         #
         # make a "function" that is called immediately at the start of
@@ -284,19 +341,11 @@
         offset = mc.get_relative_pos() - jnz_location
         assert 0 < offset <= 127
         mc.overwrite(jnz_location-1, chr(offset))
-        # clear the exception from the global position
-        mc.MOV(eax, heap(self.cpu.pos_exc_value()))
-        mc.MOV(heap(self.cpu.pos_exception()), imm0)
-        mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
-        # save the current exception instance into fail_boxes_ptr[0]
-        adr = self.fail_boxes_ptr.get_addr_for_num(0)
-        mc.MOV(heap(adr), eax)
-        # call the helper function to set the GC flag on the fail_boxes_ptr
-        # array (note that there is no exception any more here)
-        addr = self.cpu.get_on_leave_jitted_int(save_exception=False)
+        # call on_leave_jitted_save_exc()
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
         mc.CALL(imm(addr))
         #
-        mc.MOV_ri(eax.value, self.cpu.exit_frame_with_exception_v)
+        mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
         #
         # footer -- note the ADD, which skips the return address of this
         # function, and will instead return to the caller's caller.  Note
@@ -309,6 +358,7 @@
         self.stack_check_slowpath = rawstart
 
     @staticmethod
+    @rgc.no_collect
     def _release_gil_asmgcc(css):
         # similar to trackgcroot.py:pypy_asm_stackwalk, first part
         from pypy.rpython.memory.gctransform import asmgcroot
@@ -324,6 +374,7 @@
             before()
 
     @staticmethod
+    @rgc.no_collect
     def _reacquire_gil_asmgcc(css):
         # first reacquire the GIL
         after = rffi.aroundstate.after
@@ -338,12 +389,14 @@
         next.prev = prev
 
     @staticmethod
+    @rgc.no_collect
     def _release_gil_shadowstack():
         before = rffi.aroundstate.before
         if before:
             before()
 
     @staticmethod
+    @rgc.no_collect
     def _reacquire_gil_shadowstack():
         after = rffi.aroundstate.after
         if after:
@@ -392,7 +445,7 @@
         self.setup(looptoken)
         self.currently_compiling_loop = looptoken
         if log:
-            self._register_counter()
+            self._register_counter(False, looptoken.number)
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
@@ -416,10 +469,13 @@
         fullsize = self.mc.get_relative_pos()
         #
         rawstart = self.materialize_loop(looptoken)
-        debug_print("Loop #%d (%s) has address %x to %x" % (
+        debug_start("jit-backend-addr")
+        debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
             looptoken.number, loopname,
             rawstart + self.looppos,
-            rawstart + directbootstrappos))
+            rawstart + directbootstrappos,
+            rawstart))
+        debug_stop("jit-backend-addr")
         self._patch_stackadjust(rawstart + stackadjustpos,
                                 frame_depth + param_depth)
         self.patch_pending_failure_recoveries(rawstart)
@@ -458,7 +514,7 @@
 
         self.setup(original_loop_token)
         if log:
-            self._register_counter()
+            self._register_counter(True, descr_number)
             operations = self._inject_debugging_code(faildescr, operations)
 
         arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
@@ -478,9 +534,10 @@
         fullsize = self.mc.get_relative_pos()
         #
         rawstart = self.materialize_loop(original_loop_token)
-
-        debug_print("Bridge out of guard %d has address %x to %x" %
+        debug_start("jit-backend-addr")
+        debug_print("bridge out of Guard %d has address %x to %x" %
                     (descr_number, rawstart, rawstart + codeendpos))
+        debug_stop("jit-backend-addr")
         self._patch_stackadjust(rawstart + stackadjustpos,
                                 frame_depth + param_depth)
         self.patch_pending_failure_recoveries(rawstart)
@@ -504,6 +561,8 @@
         # at the end of self.mc.
         for tok in self.pending_guard_tokens:
             tok.pos_recovery_stub = self.generate_quick_failure(tok)
+        if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
+            self.error_trampoline_64 = self.generate_propagate_error_64()
 
     def patch_pending_failure_recoveries(self, rawstart):
         # after we wrote the assembler to raw memory, set up
@@ -540,6 +599,12 @@
                 # less, we would run into the issue that overwriting the
                 # 5 bytes here might get a few nonsense bytes at the
                 # return address of the following CALL.
+        if WORD == 8:
+            for pos_after_jz in self.pending_memoryerror_trampoline_from:
+                assert self.error_trampoline_64 != 0     # only if non-empty
+                mc = codebuf.MachineCodeBlockWrapper()
+                mc.writeimm32(self.error_trampoline_64 - pos_after_jz)
+                mc.copy_to_raw_memory(rawstart + pos_after_jz - 4)
 
     def get_asmmemmgr_blocks(self, looptoken):
         clt = looptoken.compiled_loop_token
@@ -554,7 +619,7 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _register_counter(self):
+    def _register_counter(self, bridge, number):
         if self._debug:
             # YYY very minor leak -- we need the counters to stay alive
             # forever, just because we want to report them at the end
@@ -562,6 +627,8 @@
             struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
                                    track_allocation=False)
             struct.i = 0
+            struct.bridge = int(bridge)
+            struct.number = number
             self.loop_run_counters.append(struct)
 
     def _find_failure_recovery_bytecode(self, faildescr):
@@ -1052,9 +1119,10 @@
                     self.implement_guard(guard_token, checkfalsecond)
         return genop_cmp_guard_float
 
-    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax):
+    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax,
+                   argtypes=None):
         if IS_X86_64:
-            return self._emit_call_64(force_index, x, arglocs, start)
+            return self._emit_call_64(force_index, x, arglocs, start, argtypes)
 
         p = 0
         n = len(arglocs)
@@ -1082,12 +1150,13 @@
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
 
-    def _emit_call_64(self, force_index, x, arglocs, start):
+    def _emit_call_64(self, force_index, x, arglocs, start, argtypes):
         src_locs = []
         dst_locs = []
         xmm_src_locs = []
         xmm_dst_locs = []
         pass_on_stack = []
+        singlefloats = None
 
         # In reverse order for use with pop()
         unused_gpr = [r9, r8, ecx, edx, esi, edi]
@@ -1107,6 +1176,11 @@
                     xmm_dst_locs.append(unused_xmm.pop())
                 else:
                     pass_on_stack.append(loc)
+            elif (argtypes is not None and argtypes[i-start] == 'S' and
+                  len(unused_xmm) > 0):
+                # Singlefloat argument
+                if singlefloats is None: singlefloats = []
+                singlefloats.append((loc, unused_xmm.pop()))
             else:
                 if len(unused_gpr) > 0:
                     src_locs.append(loc)
@@ -1134,9 +1208,15 @@
                 else:
                     self.mc.MOV_sr(i*WORD, loc.value)
 
-        # Handle register arguments
+        # Handle register arguments: first remap the xmm arguments
+        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
+                           X86_64_XMM_SCRATCH_REG)
+        # Load the singlefloat arguments from main regs or stack to xmm regs
+        if singlefloats is not None:
+            for src, dst in singlefloats:
+                self.mc.MOVD(dst, src)
+        # Finally remap the arguments in the main regs
         remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
-        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs, X86_64_XMM_SCRATCH_REG)
 
         self._regalloc.reserve_param(len(pass_on_stack))
         self.mc.CALL(x)
@@ -1251,6 +1331,20 @@
     def genop_cast_int_to_float(self, op, arglocs, resloc):
         self.mc.CVTSI2SD(resloc, arglocs[0])
 
+    def genop_cast_float_to_singlefloat(self, op, arglocs, resloc):
+        loc0, loctmp = arglocs
+        self.mc.CVTSD2SS(loctmp, loc0)
+        assert isinstance(resloc, RegLoc)
+        assert isinstance(loctmp, RegLoc)
+        self.mc.MOVD_rx(resloc.value, loctmp.value)
+
+    def genop_cast_singlefloat_to_float(self, op, arglocs, resloc):
+        loc0, = arglocs
+        assert isinstance(resloc, RegLoc)
+        assert isinstance(loc0, RegLoc)
+        self.mc.MOVD_xr(resloc.value, loc0.value)
+        self.mc.CVTSS2SD_xx(resloc.value, resloc.value)
+
     def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
         guard_opnum = guard_op.getopnum()
         self.mc.CMP(arglocs[0], imm0)
@@ -1372,7 +1466,7 @@
         assert isinstance(loc_vtable, ImmedLoc)
         arglocs = arglocs[:-1]
         self.call(self.malloc_func_addr, arglocs, eax)
-        # xxx ignore NULL returns for now
+        self.propagate_memoryerror_if_eax_is_null()
         self.set_vtable(eax, loc_vtable)
 
     def set_vtable(self, loc, loc_vtable):
@@ -1391,18 +1485,35 @@
     def genop_new(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_new_array(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_array_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_newstr(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_str_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_newunicode(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_unicode_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
+
+    def propagate_memoryerror_if_eax_is_null(self):
+        # if self.propagate_exception_path == 0 (tests), this may jump to 0
+        # and segfaults.  too bad.  the alternative is to continue anyway
+        # with eax==0, but that will segfault too.
+        self.mc.TEST_rr(eax.value, eax.value)
+        if WORD == 4:
+            self.mc.J_il(rx86.Conditions['Z'], self.propagate_exception_path)
+            self.mc.add_pending_relocation()
+        elif WORD == 8:
+            self.mc.J_il(rx86.Conditions['Z'], 0)
+            pos = self.mc.get_relative_pos()
+            self.pending_memoryerror_trampoline_from.append(pos)
 
     # ----------
 
@@ -1674,6 +1785,12 @@
         return GuardToken(faildescr, failargs, fail_locs, exc,
                           is_guard_not_invalidated)
 
+    def generate_propagate_error_64(self):
+        assert WORD == 8
+        startpos = self.mc.get_relative_pos()
+        self.mc.JMP(imm(self.propagate_exception_path))
+        return startpos
+
     def generate_quick_failure(self, guardtok):
         """Generate the initial code for handling a failure.  We try to
         keep it as compact as possible.
@@ -2009,7 +2126,8 @@
         else:
             tmp = eax
 
-        self._emit_call(force_index, x, arglocs, 3, tmp=tmp)
+        self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
+                        argtypes=op.getdescr().get_arg_types())
 
         if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.width == 8:
             # a float or a long long return
@@ -2021,7 +2139,19 @@
                 #     and this way is simpler also because the result loc
                 #     can just be always a stack location
             else:
-                self.mc.FSTP_b(resloc.value)   # float return
+                self.mc.FSTPL_b(resloc.value)   # float return
+        elif op.getdescr().get_return_type() == 'S':
+            # singlefloat return
+            assert resloc is eax
+            if IS_X86_32:
+                # must convert ST(0) to a 32-bit singlefloat and load it into EAX
+                # mess mess mess
+                self.mc.SUB_ri(esp.value, 4)
+                self.mc.FSTPS_s(0)
+                self.mc.POP_r(eax.value)
+            elif IS_X86_64:
+                # must copy from the lower 32 bits of XMM0 into eax
+                self.mc.MOVD_rx(eax.value, xmm0.value)
         elif size == WORD:
             assert resloc is eax or resloc is xmm0    # a full word
         elif size == 0:
@@ -2093,13 +2223,27 @@
                 css = get_ebp_ofs(pos + use_words - 1)
                 self._regalloc.close_stack_struct = css
             # The location where the future CALL will put its return address
-            # will be [ESP-WORD], so save that as the next frame's top address
-            self.mc.LEA_rs(eax.value, -WORD)        # LEA EAX, [ESP-4]
+            # will be [ESP-WORD].  But we can't use that as the next frame's
+            # top address!  As the code after releasegil() runs without the
+            # GIL, it might not be set yet by the time we need it (very
+            # unlikely), or it might be overwritten by the following call
+            # to reaquiregil() (much more likely).  So we hack even more
+            # and use a dummy location containing a dummy value (a pointer
+            # to itself) which we pretend is the return address :-/ :-/ :-/
+            # It prevents us to store any %esp-based stack locations but we
+            # don't so far.
+            adr = self.datablockwrapper.malloc_aligned(WORD, WORD)
+            rffi.cast(rffi.CArrayPtr(lltype.Signed), adr)[0] = adr
+            self.gcrootmap_retaddr_forced = adr
             frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
-            self.mc.MOV_br(frame_ptr, eax.value)    # MOV [css.frame], EAX
+            if rx86.fits_in_32bits(adr):
+                self.mc.MOV_bi(frame_ptr, adr)          # MOV [css.frame], adr
+            else:
+                self.mc.MOV_ri(eax.value, adr)          # MOV EAX, adr
+                self.mc.MOV_br(frame_ptr, eax.value)    # MOV [css.frame], EAX
             # Save ebp
             index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
-            self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
+            self.mc.MOV_br(index_of_ebp, ebp.value)     # MOV [css.ebp], EBP
             # Call the closestack() function (also releasing the GIL)
             if IS_X86_32:
                 reg = eax
@@ -2127,6 +2271,9 @@
         if gcrootmap.is_shadow_stack:
             args = []
         else:
+            assert self.gcrootmap_retaddr_forced == -1, (
+                      "missing mark_gc_roots() in CALL_RELEASE_GIL")
+            self.gcrootmap_retaddr_forced = 0
             css = self._regalloc.close_stack_struct
             assert css != 0
             if IS_X86_32:
@@ -2179,7 +2326,7 @@
         self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
                         tmp=ecx)
         if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
-            self.mc.FSTP_b(result_loc.value)
+            self.mc.FSTPL_b(result_loc.value)
         #else: result_loc is already either eax or None, checked below
         self.mc.JMP_l8(0) # jump to done, patched later
         jmp_location = self.mc.get_relative_pos()
@@ -2242,10 +2389,12 @@
         if opnum == rop.COND_CALL_GC_WB:
             N = 2
             func = descr.get_write_barrier_fn(self.cpu)
+            card_marking = False
         elif opnum == rop.COND_CALL_GC_WB_ARRAY:
             N = 3
             func = descr.get_write_barrier_from_array_fn(self.cpu)
             assert func != 0
+            card_marking = descr.jit_wb_cards_set != 0
         else:
             raise AssertionError(opnum)
         #
@@ -2254,6 +2403,18 @@
                       imm(descr.jit_wb_if_flag_singlebyte))
         self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
         jz_location = self.mc.get_relative_pos()
+
+        # for cond_call_gc_wb_array, also add another fast path:
+        # if GCFLAG_CARDS_SET, then we can just set one bit and be done
+        if card_marking:
+            self.mc.TEST8(addr_add_const(loc_base,
+                                         descr.jit_wb_cards_set_byteofs),
+                          imm(descr.jit_wb_cards_set_singlebyte))
+            self.mc.J_il8(rx86.Conditions['NZ'], 0) # patched later
+            jnz_location = self.mc.get_relative_pos()
+        else:
+            jnz_location = 0
+
         # the following is supposed to be the slow path, so whenever possible
         # we choose the most compact encoding over the most efficient one.
         if IS_X86_32:
@@ -2293,6 +2454,43 @@
             loc = arglocs[i]
             assert isinstance(loc, RegLoc)
             self.mc.POP_r(loc.value)
+
+        # if GCFLAG_CARDS_SET, then we can do the whole thing that would
+        # be done in the CALL above with just four instructions, so here
+        # is an inline copy of them
+        if card_marking:
+            self.mc.JMP_l8(0) # jump to the exit, patched later
+            jmp_location = self.mc.get_relative_pos()
+            # patch the JNZ above
+            offset = self.mc.get_relative_pos() - jnz_location
+            assert 0 < offset <= 127
+            self.mc.overwrite(jnz_location-1, chr(offset))
+            #
+            loc_index = arglocs[1]
+            if isinstance(loc_index, RegLoc):
+                # choose a scratch register
+                tmp1 = loc_index
+                self.mc.PUSH_r(tmp1.value)
+                # SHR tmp, card_page_shift
+                self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
+                # XOR tmp, -8
+                self.mc.XOR_ri(tmp1.value, -8)
+                # BTS [loc_base], tmp
+                self.mc.BTS(addr_add_const(loc_base, 0), tmp1)
+                # done
+                self.mc.POP_r(tmp1.value)
+            elif isinstance(loc_index, ImmedLoc):
+                byte_index = loc_index.value >> descr.jit_wb_card_page_shift
+                byte_ofs = ~(byte_index >> 3)
+                byte_val = 1 << (byte_index & 7)
+                self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
+            else:
+                raise AssertionError("index is neither RegLoc nor ImmedLoc")
+            # patch the JMP above
+            offset = self.mc.get_relative_pos() - jmp_location
+            assert 0 < offset <= 127
+            self.mc.overwrite(jmp_location-1, chr(offset))
+        #
         # patch the JZ above
         offset = self.mc.get_relative_pos() - jz_location
         assert 0 < offset <= 127
@@ -2326,7 +2524,13 @@
             if gcrootmap.is_shadow_stack:
                 gcrootmap.write_callshape(mark, force_index)
             else:
-                self.mc.insert_gcroot_marker(mark)
+                if self.gcrootmap_retaddr_forced == 0:
+                    self.mc.insert_gcroot_marker(mark)   # common case
+                else:
+                    assert self.gcrootmap_retaddr_forced != -1, (
+                              "two mark_gc_roots() in a CALL_RELEASE_GIL")
+                    gcrootmap.put(self.gcrootmap_retaddr_forced, mark)
+                    self.gcrootmap_retaddr_forced = -1
 
     def target_arglocs(self, loop_token):
         return loop_token._x86_arglocs
@@ -2369,8 +2573,7 @@
             # there are two helpers to call only with asmgcc
             slowpath_addr1 = self.malloc_slowpath1
             self.mc.CALL(imm(slowpath_addr1))
-        self.mark_gc_roots(self.write_new_force_index(),
-                           use_copy_area=shadow_stack)
+        self.mark_gc_roots(self.write_new_force_index(), use_copy_area=True)
         slowpath_addr2 = self.malloc_slowpath2
         self.mc.CALL(imm(slowpath_addr2))
 
diff --git a/pypy/jit/backend/x86/codebuf.py b/pypy/jit/backend/x86/codebuf.py
--- a/pypy/jit/backend/x86/codebuf.py
+++ b/pypy/jit/backend/x86/codebuf.py
@@ -25,8 +25,11 @@
         self.init_block_builder()
         # a list of relative positions; for each position p, the bytes
         # at [p-4:p] encode an absolute address that will need to be
-        # made relative.
-        self.relocations = []
+        # made relative.  Only works on 32-bit!
+        if WORD == 4:
+            self.relocations = []
+        else:
+            self.relocations = None
         #
         # ResOperation --> offset in the assembly.
         # ops_offset[None] represents the beginning of the code after the last op
@@ -42,9 +45,10 @@
 
     def copy_to_raw_memory(self, addr):
         self._copy_to_raw_memory(addr)
-        for reloc in self.relocations:
-            p = addr + reloc
-            adr = rffi.cast(rffi.LONGP, p - WORD)
-            adr[0] = intmask(adr[0] - p)
+        if self.relocations is not None:
+            for reloc in self.relocations:
+                p = addr + reloc
+                adr = rffi.cast(rffi.LONGP, p - WORD)
+                adr[0] = intmask(adr[0] - p)
         valgrind.discard_translations(addr, self.get_relative_pos())
         self._dump(addr, "jit-backend-dump", backend_name)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -705,6 +705,17 @@
         self.Perform(op, [loc0], loc1)
         self.rm.possibly_free_var(op.getarg(0))
 
+    def consider_cast_float_to_singlefloat(self, op):
+        loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
+        loc1 = self.rm.force_allocate_reg(op.result)
+        self.xrm.possibly_free_var(op.getarg(0))
+        tmpxvar = TempBox()
+        loctmp = self.xrm.force_allocate_reg(tmpxvar)   # may be equal to loc0
+        self.xrm.possibly_free_var(tmpxvar)
+        self.Perform(op, [loc0, loctmp], loc1)
+
+    consider_cast_singlefloat_to_float = consider_cast_int_to_float
+
     def _consider_llong_binop_xx(self, op):
         # must force both arguments into xmm registers, because we don't
         # know if they will be suitably aligned.  Exception: if the second
@@ -921,27 +932,13 @@
     def _do_fastpath_malloc(self, op, size, tid):
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
         self.rm.force_allocate_reg(op.result, selected_reg=eax)
-
-        if gc_ll_descr.gcrootmap and gc_ll_descr.gcrootmap.is_shadow_stack:
-            # ---- shadowstack ----
-            # We need edx as a temporary, but otherwise don't save any more
-            # register.  See comments in _build_malloc_slowpath().
-            tmp_box = TempBox()
-            self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
-            self.rm.possibly_free_var(tmp_box)
-        else:
-            # ---- asmgcc ----
-            # We need to force-allocate each of save_around_call_regs now.
-            # The alternative would be to save and restore them around the
-            # actual call to malloc(), in the rare case where we need to do
-            # it; however, mark_gc_roots() would need to be adapted to know
-            # where the variables end up being saved.  Messy.
-            for reg in self.rm.save_around_call_regs:
-                if reg is not eax:
-                    tmp_box = TempBox()
-                    self.rm.force_allocate_reg(tmp_box, selected_reg=reg)
-                    self.rm.possibly_free_var(tmp_box)
-
+        #
+        # We need edx as a temporary, but otherwise don't save any more
+        # register.  See comments in _build_malloc_slowpath().
+        tmp_box = TempBox()
+        self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+        self.rm.possibly_free_var(tmp_box)
+        #
         self.assembler.malloc_cond(
             gc_ll_descr.get_nursery_free_addr(),
             gc_ll_descr.get_nursery_top_addr(),
@@ -1337,14 +1334,26 @@
             if reg is eax:
                 continue      # ok to ignore this one
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
-                if use_copy_area:
-                    assert reg in self.rm.REGLOC_TO_COPY_AREA_OFS
-                    area_offset = self.rm.REGLOC_TO_COPY_AREA_OFS[reg]
-                    gcrootmap.add_frame_offset(shape, area_offset)
-                else:
-                    assert reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX
-                    gcrootmap.add_callee_save_reg(
-                        shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
+                #
+                # The register 'reg' is alive across this call.
+                gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
+                if gcrootmap is None or not gcrootmap.is_shadow_stack:
+                    #
+                    # Asmgcc: if reg is a callee-save register, we can
+                    # explicitly mark it as containing a BoxPtr.
+                    if reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX:
+                        gcrootmap.add_callee_save_reg(
+                            shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
+                        continue
+                #
+                # Else, 'use_copy_area' must be True (otherwise this BoxPtr
+                # should not be in a register).  The copy area contains the
+                # real value of the register.
+                assert use_copy_area
+                assert reg in self.rm.REGLOC_TO_COPY_AREA_OFS
+                area_offset = self.rm.REGLOC_TO_COPY_AREA_OFS[reg]
+                gcrootmap.add_frame_offset(shape, area_offset)
+        #
         return gcrootmap.compress_callshape(shape,
                                             self.assembler.datablockwrapper)
 
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -476,6 +476,7 @@
 
     AND = _binaryop('AND')
     OR  = _binaryop('OR')
+    OR8 = _binaryop('OR8')
     XOR = _binaryop('XOR')
     NOT = _unaryop('NOT')
     SHL = _binaryop('SHL')
@@ -483,6 +484,7 @@
     SAR = _binaryop('SAR')
     TEST = _binaryop('TEST')
     TEST8 = _binaryop('TEST8')
+    BTS = _binaryop('BTS')
 
     ADD = _binaryop('ADD')
     SUB = _binaryop('SUB')
@@ -519,6 +521,8 @@
     UCOMISD = _binaryop('UCOMISD')
     CVTSI2SD = _binaryop('CVTSI2SD')
     CVTTSD2SI = _binaryop('CVTTSD2SI')
+    CVTSD2SS = _binaryop('CVTSD2SS')
+    CVTSS2SD = _binaryop('CVTSS2SD')
     
     SQRTSD = _binaryop('SQRTSD')
 
@@ -532,6 +536,8 @@
     PXOR  = _binaryop('PXOR')
     PCMPEQD = _binaryop('PCMPEQD')
 
+    MOVD = _binaryop('MOVD')
+
     CALL = _relative_unaryop('CALL')
     JMP = _relative_unaryop('JMP')
 
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -19,6 +19,7 @@
 class AbstractX86CPU(AbstractLLCPU):
     debug = True
     supports_floats = True
+    supports_singlefloats = True
 
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -464,7 +464,7 @@
 
     # ------------------------------ MOV ------------------------------
 
-    MOV_ri = insn(rex_w, register(1), '\xB8', immediate(2, 'q'))
+    MOV_ri = insn(register(1), '\xB8', immediate(2))
     MOV8_ri = insn(rex_fw, byte_register(1), '\xB0', immediate(2, 'b'))
 
     # ------------------------------ Arithmetic ------------------------------
@@ -496,6 +496,10 @@
     AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), '\xC0')
 
     OR8_rr = insn(rex_fw, '\x08', byte_register(1), byte_register(2,8), '\xC0')
+    OR8_mi = insn(rex_fw, '\x80', orbyte(1<<3), mem_reg_plus_const(1),
+                  immediate(2, 'b'))
+    OR8_ji = insn(rex_fw, '\x80', orbyte(1<<3), abs_, immediate(1),
+                  immediate(2, 'b'))
 
     NEG_r = insn(rex_w, '\xF7', register(1), '\xD8')
 
@@ -565,8 +569,12 @@
     TEST8_ji = insn(rex_nw, '\xF6', orbyte(0<<3), abs_, immediate(1), immediate(2, 'b'))
     TEST_rr = insn(rex_w, '\x85', register(2,8), register(1), '\xC0')
 
+    BTS_mr = insn(rex_w, '\x0F\xAB', register(2,8), mem_reg_plus_const(1))
+    BTS_jr = insn(rex_w, '\x0F\xAB', register(2,8), abs_, immediate(1))
+
     # x87 instructions
-    FSTP_b = insn('\xDD', orbyte(3<<3), stack_bp(1))
+    FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) # rffi.DOUBLE ('as' wants L??)
+    FSTPS_s = insn('\xD9', orbyte(3<<3), stack_sp(1)) # lltype.SingleFloat
 
     # ------------------------------ Random mess -----------------------
     RDTSC = insn('\x0F\x31')
@@ -583,8 +591,18 @@
     CVTTSD2SI_rx = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), register(2), '\xC0')
     CVTTSD2SI_rb = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), stack_bp(2))
 
-    MOVD_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), register(1), '\xC0')
-    MOVD_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    CVTSD2SS_xx = xmminsn('\xF2', rex_nw, '\x0F\x5A',
+                          register(1, 8), register(2), '\xC0')
+    CVTSD2SS_xb = xmminsn('\xF2', rex_nw, '\x0F\x5A',
+                          register(1, 8), stack_bp(2))
+    CVTSS2SD_xx = xmminsn('\xF3', rex_nw, '\x0F\x5A',
+                          register(1, 8), register(2), '\xC0')
+    CVTSS2SD_xb = xmminsn('\xF3', rex_nw, '\x0F\x5A',
+                          register(1, 8), stack_bp(2))
+
+    MOVD_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), register(1), '\xC0')
+    MOVD_xr = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    MOVD_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2))
 
     PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
 
@@ -632,16 +650,20 @@
 
     CQO = insn(rex_w, '\x99')
 
-    # MOV_ri from the parent class is not wrong, but here is a better encoding
-    # for the common case where the immediate fits in 32 bits
+    # Three different encodings... following what gcc does.  From the
+    # shortest encoding to the longest one.
+    MOV_riu32 = insn(rex_nw, register(1), '\xB8', immediate(2, 'i'))
     MOV_ri32 = insn(rex_w, '\xC7', register(1), '\xC0', immediate(2, 'i'))
-    MOV_ri64 = AbstractX86CodeBuilder.MOV_ri
+    MOV_ri64 = insn(rex_w, register(1), '\xB8', immediate(2, 'q'))
 
     def MOV_ri(self, reg, immed):
-        if fits_in_32bits(immed):
+        if 0 <= immed <= 4294967295:
+            immed = intmask(rffi.cast(rffi.INT, immed))
+            self.MOV_riu32(reg, immed)
+        elif fits_in_32bits(immed):    # for negative values that fit in 32 bit
             self.MOV_ri32(reg, immed)
         else:
-            AbstractX86CodeBuilder.MOV_ri(self, reg, immed)
+            self.MOV_ri64(reg, immed)
 
 def define_modrm_modes(insnname_template, before_modrm, after_modrm=[], regtype='GPR'):
     def add_insn(code, *modrm):
diff --git a/pypy/jit/backend/x86/test/test_regloc.py b/pypy/jit/backend/x86/test/test_regloc.py
--- a/pypy/jit/backend/x86/test/test_regloc.py
+++ b/pypy/jit/backend/x86/test/test_regloc.py
@@ -24,9 +24,14 @@
     assert_encodes_as(cb64, "MOV16", (r8, ebx), '\x66\x41\x89\xD8')  # 11 011 000
     assert_encodes_as(cb64, "MOV16", (ebx, r8), '\x66\x44\x89\xC3')  # 11 000 011
     assert_encodes_as(cb64, "MOV16", (ecx, ebx), '\x66\x40\x89\xD9')
-    # XXX: What we are testing for here is actually not the most compact
-    # encoding.
-    assert_encodes_as(cb64, "MOV16", (ecx, ImmedLoc(12345)), '\x66\x40\xC7\xC1\x39\x30')
+    assert_encodes_as(cb64, "MOV16", (ecx, ImmedLoc(12345)), '\x66\xB9\x39\x30')
+    # for the next case we don't pick the most efficient encoding, but well
+    expected = '\x66\x40\xC7\xC1\xC7\xCF'  # could be '\x66\xB9\xC7\xCF'
+    assert_encodes_as(cb64, "MOV16", (ecx, ImmedLoc(-12345)), expected)
+    assert_encodes_as(cb64, "MOV16", (r9, ImmedLoc(12345)), '\x66\x41\xB9\x39\x30')
+    # for the next case we don't pick the most efficient encoding, but well
+    expected = '\x66\x41\xC7\xC1\xC7\xCF'  # could be '\x66\x41\xB9\xC7\xCF'
+    assert_encodes_as(cb64, "MOV16", (r9, ImmedLoc(-12345)), expected)
     assert_encodes_as(cb64, "MOV16", (AddressLoc(r13, ImmedLoc(0), 0, 0), ImmedLoc(12345)), '\x66\x41\xC7\x45\x00\x39\x30')
 
 def test_cmp_16():
@@ -44,7 +49,7 @@
 def test_relocation():
     from pypy.rpython.lltypesystem import lltype, rffi
     from pypy.jit.backend.x86 import codebuf
-    for target in [0x01020304, 0x0102030405060708]:
+    for target in [0x01020304, -0x05060708, 0x0102030405060708]:
         if target > sys.maxint:
             continue
         mc = codebuf.MachineCodeBlockWrapper()
@@ -57,11 +62,16 @@
             assert mc.relocations == [5]
             expected = "\xE8" + struct.pack('<i', target - (rawstart + 5))
         elif IS_X86_64:
-            assert mc.relocations == []
-            if target <= 0x7fffffff:
+            assert mc.relocations is None
+            if 0 <= target <= 0xffffffff:
+                assert length == 9
+                expected = (
+                    "\x41\xBB\x04\x03\x02\x01"      # MOV %r11, target
+                    "\x41\xFF\xD3")                 # CALL *%r11
+            elif -0x80000000 <= target < 0:
                 assert length == 10
                 expected = (
-                    "\x49\xC7\xC3\x04\x03\x02\x01"  # MOV %r11, target
+                    "\x49\xC7\xC3\xF8\xF8\xF9\xFA"  # MOV %r11, target
                     "\x41\xFF\xD3")                 # CALL *%r11
             else:
                 assert length == 13
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -463,7 +463,7 @@
             self.cpu.finish_once()
         finally:
             debug._log = None
-        assert ('jit-backend-counts', [('debug_print', '0:10')]) in dlog
+        assert ('jit-backend-counts', [('debug_print', 'loop -1:10')]) in dlog
 
     def test_debugger_checksum(self):
         loop = """
diff --git a/pypy/jit/backend/x86/test/test_rx86.py b/pypy/jit/backend/x86/test/test_rx86.py
--- a/pypy/jit/backend/x86/test/test_rx86.py
+++ b/pypy/jit/backend/x86/test/test_rx86.py
@@ -198,9 +198,19 @@
 def test_mov_ri_64():
     s = CodeBuilder64()
     s.MOV_ri(ecx, -2)
+    s.MOV_ri(r15, -3)
+    s.MOV_ri(ebx, -0x80000003)
+    s.MOV_ri(r13, -0x80000002)
+    s.MOV_ri(ecx, 42)
     s.MOV_ri(r12, 0x80000042)
+    s.MOV_ri(r12, 0x100000007)
     assert s.getvalue() == ('\x48\xC7\xC1\xFE\xFF\xFF\xFF' +
-                            '\x49\xBC\x42\x00\x00\x80\x00\x00\x00\x00')
+                            '\x49\xC7\xC7\xFD\xFF\xFF\xFF' +
+                            '\x48\xBB\xFD\xFF\xFF\x7F\xFF\xFF\xFF\xFF' +
+                            '\x49\xBD\xFE\xFF\xFF\x7F\xFF\xFF\xFF\xFF' +
+                            '\xB9\x2A\x00\x00\x00' +
+                            '\x41\xBC\x42\x00\x00\x80' +
+                            '\x49\xBC\x07\x00\x00\x00\x01\x00\x00\x00')
 
 def test_mov_rm_64():
     s = CodeBuilder64()
diff --git a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -36,6 +36,14 @@
 def hexdump(s):
     return ' '.join(["%02X" % ord(c) for c in s])
 
+def reduce_to_32bit(s):
+    if s[:2] != '%r':
+        return s
+    if s[2:].isdigit():
+        return s + 'd'
+    else:
+        return '%e' + s[2:]
+
 # ____________________________________________________________
 
 COUNT1 = 15
@@ -180,12 +188,14 @@
     ##        for m, extra in args:
     ##            if m in (i386.MODRM, i386.MODRM8) or all:
     ##                suffix = suffixes[sizes[m]] + suffix
-            if argmodes and not self.is_xmm_insn:
+            if (argmodes and not self.is_xmm_insn
+                         and not instrname.startswith('FSTP')):
                 suffix = suffixes[self.WORD]
             # Special case: On 64-bit CPUs, rx86 assumes 64-bit integer
             # operands when converting to/from floating point, so we need to
             # indicate that with a suffix
-            if (self.WORD == 8) and instrname.startswith('CVT'):
+            if (self.WORD == 8) and (instrname.startswith('CVT') and
+                                     'SI' in instrname):
                 suffix = suffixes[self.WORD]
 
             if instr_suffix is not None:
@@ -212,6 +222,17 @@
             for mode, v in zip(argmodes, args):
                 ops.append(assembler_operand[mode](v))
             ops.reverse()
+            #
+            if (instrname.lower() == 'mov' and suffix == 'q' and
+                ops[0].startswith('$') and 0 <= int(ops[0][1:]) <= 4294967295
+                and ops[1].startswith('%r')):
+                # movq $xxx, %rax => movl $xxx, %eax
+                suffix = 'l'
+                ops[1] = reduce_to_32bit(ops[1])
+            if instrname.lower() == 'movd':
+                ops[0] = reduce_to_32bit(ops[0])
+                ops[1] = reduce_to_32bit(ops[1])
+            #
             op = '\t%s%s %s%s' % (instrname.lower(), suffix,
                                   ', '.join(ops), following)
             g.write('%s\n' % op)
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -524,6 +524,76 @@
     def test_compile_framework_8(self):
         self.run('compile_framework_8')
 
+    def define_compile_framework_9(cls):
+        # Like compile_framework_8, but with variable indexes and large
+        # arrays, testing the card_marking case
+        def before(n, x):
+            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            if n < 1900:
+                check(l[0].x == 123)
+                num = 512 + (n & 7)
+                l = [None] * num
+                l[0] = X(123)
+                l[1] = X(n)
+                l[2] = X(n+10)
+                l[3] = X(n+20)
+                l[4] = X(n+30)
+                l[5] = X(n+40)
+                l[6] = X(n+50)
+                l[7] = X(n+60)
+                l[num-8] = X(n+70)
+                l[num-9] = X(n+80)
+                l[num-10] = X(n+90)
+                l[num-11] = X(n+100)
+                l[-12] = X(n+110)
+                l[-13] = X(n+120)
+                l[-14] = X(n+130)
+                l[-15] = X(n+140)
+            if n < 1800:
+                num = 512 + (n & 7)
+                check(len(l) == num)
+                check(l[0].x == 123)
+                check(l[1].x == n)
+                check(l[2].x == n+10)
+                check(l[3].x == n+20)
+                check(l[4].x == n+30)
+                check(l[5].x == n+40)
+                check(l[6].x == n+50)
+                check(l[7].x == n+60)
+                check(l[num-8].x == n+70)
+                check(l[num-9].x == n+80)
+                check(l[num-10].x == n+90)
+                check(l[num-11].x == n+100)
+                check(l[-12].x == n+110)
+                check(l[-13].x == n+120)
+                check(l[-14].x == n+130)
+                check(l[-15].x == n+140)
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            check(len(l) >= 512)
+            check(l[0].x == 123)
+            check(l[1].x == 2)
+            check(l[2].x == 12)
+            check(l[3].x == 22)
+            check(l[4].x == 32)
+            check(l[5].x == 42)
+            check(l[6].x == 52)
+            check(l[7].x == 62)
+            check(l[-8].x == 72)
+            check(l[-9].x == 82)
+            check(l[-10].x == 92)
+            check(l[-11].x == 102)
+            check(l[-12].x == 112)
+            check(l[-13].x == 122)
+            check(l[-14].x == 132)
+            check(l[-15].x == 142)
+        return before, f, after
+
+    def test_compile_framework_9(self):
+        self.run('compile_framework_9')
+
     def define_compile_framework_external_exception_handling(cls):
         def before(n, x):
             x = X(0)
diff --git a/pypy/jit/codewriter/assembler.py b/pypy/jit/codewriter/assembler.py
--- a/pypy/jit/codewriter/assembler.py
+++ b/pypy/jit/codewriter/assembler.py
@@ -76,6 +76,8 @@
                 TYPE = llmemory.Address
             if TYPE == llmemory.Address:
                 value = heaptracker.adr2int(value)
+            if TYPE is lltype.SingleFloat:
+                value = longlong.singlefloat2int(value)
             if not isinstance(value, (llmemory.AddressAsInt,
                                       ComputedIntSymbolic)):
                 value = lltype.cast_primitive(lltype.Signed, value)
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -5,10 +5,9 @@
 
 from pypy.jit.codewriter import support
 from pypy.jit.codewriter.jitcode import JitCode
-from pypy.jit.codewriter.effectinfo import VirtualizableAnalyzer
-from pypy.jit.codewriter.effectinfo import QuasiImmutAnalyzer
-from pypy.jit.codewriter.effectinfo import effectinfo_from_writeanalyze
-from pypy.jit.codewriter.effectinfo import EffectInfo, CallInfoCollection
+from pypy.jit.codewriter.effectinfo import (VirtualizableAnalyzer,
+    QuasiImmutAnalyzer, CanReleaseGILAnalyzer, effectinfo_from_writeanalyze,
+    EffectInfo, CallInfoCollection)
 from pypy.translator.simplify import get_funcobj, get_functype
 from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.translator.backendopt.canraise import RaiseAnalyzer
@@ -32,6 +31,7 @@
             self.readwrite_analyzer = ReadWriteAnalyzer(translator)
             self.virtualizable_analyzer = VirtualizableAnalyzer(translator)
             self.quasiimmut_analyzer = QuasiImmutAnalyzer(translator)
+            self.canreleasegil_analyzer = CanReleaseGILAnalyzer(translator)
         #
         for index, jd in enumerate(jitdrivers_sd):
             jd.index = index
@@ -219,15 +219,19 @@
                 assert not NON_VOID_ARGS, ("arguments not supported for "
                                            "loop-invariant function!")
         # build the extraeffect
-        can_invalidate = self.quasiimmut_analyzer.analyze(op)
+        can_release_gil = self.canreleasegil_analyzer.analyze(op)
+        # can_release_gil implies can_invalidate
+        can_invalidate = can_release_gil or self.quasiimmut_analyzer.analyze(op)
         if extraeffect is None:
             if self.virtualizable_analyzer.analyze(op):
                 extraeffect = EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
             elif loopinvariant:
                 extraeffect = EffectInfo.EF_LOOPINVARIANT
             elif elidable:
-                # XXX check what to do about exceptions (also MemoryError?)
-                extraeffect = EffectInfo.EF_ELIDABLE
+                if self._canraise(op):
+                    extraeffect = EffectInfo.EF_ELIDABLE_CAN_RAISE
+                else:
+                    extraeffect = EffectInfo.EF_ELIDABLE_CANNOT_RAISE
             elif self._canraise(op):
                 extraeffect = EffectInfo.EF_CAN_RAISE
             else:
@@ -235,7 +239,7 @@
         #
         effectinfo = effectinfo_from_writeanalyze(
             self.readwrite_analyzer.analyze(op), self.cpu, extraeffect,
-            oopspecindex, can_invalidate)
+            oopspecindex, can_invalidate, can_release_gil)
         #
         if oopspecindex != EffectInfo.OS_NONE:
             assert effectinfo is not None
@@ -261,7 +265,7 @@
     def calldescr_canraise(self, calldescr):
         effectinfo = calldescr.get_extra_info()
         return (effectinfo is None or
-                effectinfo.extraeffect >= EffectInfo.EF_CAN_RAISE)
+                effectinfo.extraeffect > EffectInfo.EF_CANNOT_RAISE)
 
     def jitdriver_sd_from_portal_graph(self, graph):
         for jd in self.jitdrivers_sd:
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -9,10 +9,11 @@
     _cache = {}
 
     # the 'extraeffect' field is one of the following values:
-    EF_ELIDABLE                        = 0 #elidable function (and cannot raise)
+    EF_ELIDABLE_CANNOT_RAISE           = 0 #elidable function (and cannot raise)
     EF_LOOPINVARIANT                   = 1 #special: call it only once per loop
     EF_CANNOT_RAISE                    = 2 #a function which cannot raise
-    EF_CAN_RAISE                       = 3 #normal function (can raise)
+    EF_ELIDABLE_CAN_RAISE              = 3 #elidable function (but can raise)
+    EF_CAN_RAISE                       = 4 #normal function (can raise)
     EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE = 5 #can raise and force virtualizables
 
     # the 'oopspecindex' field is one of the following values:
@@ -79,20 +80,23 @@
                 write_descrs_fields, write_descrs_arrays,
                 extraeffect=EF_CAN_RAISE,
                 oopspecindex=OS_NONE,
-                can_invalidate=False):
+                can_invalidate=False, can_release_gil=False):
         key = (frozenset(readonly_descrs_fields),
                frozenset(readonly_descrs_arrays),
                frozenset(write_descrs_fields),
                frozenset(write_descrs_arrays),
                extraeffect,
-               oopspecindex)
+               oopspecindex,
+               can_invalidate,
+               can_release_gil)
         if key in cls._cache:
             return cls._cache[key]
         result = object.__new__(cls)
         result.readonly_descrs_fields = readonly_descrs_fields
         result.readonly_descrs_arrays = readonly_descrs_arrays
         if extraeffect == EffectInfo.EF_LOOPINVARIANT or \
-           extraeffect == EffectInfo.EF_ELIDABLE:
+           extraeffect == EffectInfo.EF_ELIDABLE_CANNOT_RAISE or \
+           extraeffect == EffectInfo.EF_ELIDABLE_CAN_RAISE:
             result.write_descrs_fields = []
             result.write_descrs_arrays = []
         else:
@@ -100,6 +104,7 @@
             result.write_descrs_arrays = write_descrs_arrays
         result.extraeffect = extraeffect
         result.can_invalidate = can_invalidate
+        result.can_release_gil = can_release_gil
         result.oopspecindex = oopspecindex
         cls._cache[key] = result
         return result
@@ -111,12 +116,13 @@
         return self.extraeffect >= self.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
 
     def has_random_effects(self):
-        return self.oopspecindex == self.OS_LIBFFI_CALL
+        return self.oopspecindex == self.OS_LIBFFI_CALL or self.can_release_gil
 
 def effectinfo_from_writeanalyze(effects, cpu,
                                  extraeffect=EffectInfo.EF_CAN_RAISE,
                                  oopspecindex=EffectInfo.OS_NONE,
-                                 can_invalidate=False):
+                                 can_invalidate=False,
+                                 can_release_gil=False):
     from pypy.translator.backendopt.writeanalyze import top_set
     if effects is top_set:
         return None
@@ -158,7 +164,8 @@
                       write_descrs_arrays,
                       extraeffect,
                       oopspecindex,
-                      can_invalidate)
+                      can_invalidate,
+                      can_release_gil)
 
 def consider_struct(TYPE, fieldname):
     if fieldType(TYPE, fieldname) is lltype.Void:
@@ -194,6 +201,16 @@
     def analyze_simple_operation(self, op, graphinfo):
         return op.opname == 'jit_force_quasi_immutable'
 
+class CanReleaseGILAnalyzer(BoolGraphAnalyzer):
+    def analyze_direct_call(self, graph, seen=None):
+        releases_gil = False
+        if hasattr(graph, "func") and hasattr(graph.func, "_ptr"):
+            releases_gil = graph.func._ptr._obj.releases_gil
+        return releases_gil or super(CanReleaseGILAnalyzer, self).analyze_direct_call(graph, seen)
+
+    def analyze_simple_operation(self, op, graphinfo):
+        return False
+
 # ____________________________________________________________
 
 class CallInfoCollection(object):
diff --git a/pypy/jit/codewriter/jitcode.py b/pypy/jit/codewriter/jitcode.py
--- a/pypy/jit/codewriter/jitcode.py
+++ b/pypy/jit/codewriter/jitcode.py
@@ -1,7 +1,6 @@
 from pypy.jit.metainterp.history import AbstractDescr
 from pypy.jit.codewriter import heaptracker
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.rpython.lltypesystem import llmemory
 
 
 class JitCode(AbstractDescr):
@@ -102,7 +101,7 @@
 
     def _clone_if_mutable(self):
         raise NotImplementedError
-    
+
 class MissingLiveness(Exception):
     pass
 
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -1,18 +1,16 @@
-import py, sys
-from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass
-from pypy.rpython import rlist
-from pypy.jit.metainterp.history import getkind
-from pypy.objspace.flow.model import SpaceOperation, Variable, Constant
-from pypy.objspace.flow.model import Block, Link, c_last_exception
-from pypy.jit.codewriter.flatten import ListOfKind, IndirectCallTargets
+import py
 from pypy.jit.codewriter import support, heaptracker, longlong
 from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.codewriter.flatten import ListOfKind, IndirectCallTargets
 from pypy.jit.codewriter.policy import log
+from pypy.jit.metainterp import quasiimmut
+from pypy.jit.metainterp.history import getkind
 from pypy.jit.metainterp.typesystem import deref, arrayItem
-from pypy.jit.metainterp import quasiimmut
-from pypy.rpython.rclass import IR_QUASIIMMUTABLE, IR_QUASIIMMUTABLE_ARRAY
+from pypy.objspace.flow.model import SpaceOperation, Variable, Constant, c_last_exception
 from pypy.rlib import objectmodel
 from pypy.rlib.jit import _we_are_jitted
+from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass, rffi
+from pypy.rpython.rclass import IR_QUASIIMMUTABLE, IR_QUASIIMMUTABLE_ARRAY
 from pypy.translator.simplify import get_funcobj
 from pypy.translator.unsimplify import varoftype
 
@@ -200,7 +198,6 @@
             self.vable_array_vars[op.result]= self.vable_array_vars[op.args[0]]
 
     rewrite_op_cast_pointer = rewrite_op_same_as
-    rewrite_op_cast_opaque_ptr = rewrite_op_same_as   # rlib.rerased
     def rewrite_op_cast_bool_to_int(self, op): pass
     def rewrite_op_cast_bool_to_uint(self, op): pass
     def rewrite_op_cast_char_to_int(self, op): pass
@@ -574,6 +571,7 @@
                 pure = '_pure'
         else:
             pure = ''
+        self.check_field_access(v_inst.concretetype.TO)
         argname = getattr(v_inst.concretetype.TO, '_gckind', 'gc')
         descr = self.cpu.fielddescrof(v_inst.concretetype.TO,
                                       c_fieldname.value)
@@ -607,6 +605,7 @@
             return [SpaceOperation('-live-', [], None),
                     SpaceOperation('setfield_vable_%s' % kind,
                                    [v_inst, descr, v_value], None)]
+        self.check_field_access(v_inst.concretetype.TO)
         argname = getattr(v_inst.concretetype.TO, '_gckind', 'gc')
         descr = self.cpu.fielddescrof(v_inst.concretetype.TO,
                                       c_fieldname.value)
@@ -619,6 +618,22 @@
         return (op.args[1].value == 'typeptr' and
                 op.args[0].concretetype.TO._hints.get('typeptr'))
 
+    def check_field_access(self, STRUCT):
+        # check against a GcStruct with a nested GcStruct as a first argument
+        # but which is not an object at all; see metainterp/test/test_loop,
+        # test_regular_pointers_in_short_preamble.
+        if not isinstance(STRUCT, lltype.GcStruct):
+            return
+        if STRUCT._first_struct() == (None, None):
+            return
+        PARENT = STRUCT
+        while not PARENT._hints.get('typeptr'):
+            _, PARENT = PARENT._first_struct()
+            if PARENT is None:
+                raise NotImplementedError("%r is a GcStruct using nesting but "
+                                          "not inheriting from object" %
+                                          (STRUCT,))
+
     def get_vinfo(self, v_virtualizable):
         if self.callcontrol is None:      # for tests
             return None
@@ -765,13 +780,70 @@
             raise NotImplementedError("cast_ptr_to_int")
 
     def rewrite_op_force_cast(self, op):
-        from pypy.rpython.lltypesystem.rffi import size_and_sign, sizeof
-        from pypy.rlib.rarithmetic import intmask
         assert not self._is_gc(op.args[0])
-        size2, unsigned2 = size_and_sign(op.result.concretetype)
-        if size2 >= sizeof(lltype.Signed):
+        fromll = longlong.is_longlong(op.args[0].concretetype)
+        toll   = longlong.is_longlong(op.result.concretetype)
+        if fromll and toll:
+            return
+        if fromll:
+            args = op.args
+            opname = 'truncate_longlong_to_int'
+            RESULT = lltype.Signed
+            v = varoftype(RESULT)
+            op1 = SpaceOperation(opname, args, v)
+            op2 = self.rewrite_operation(op1)
+            oplist = self.force_cast_without_longlong(op2.result, op.result)
+            if oplist:
+                return [op2] + oplist
+            #
+            # force a renaming to put the correct result in place, even though
+            # it might be slightly mistyped (e.g. Signed versus Unsigned)
+            assert op2.result is v
+            op2.result = op.result
+            return op2
+        elif toll:
+            size, unsigned = rffi.size_and_sign(op.args[0].concretetype)
+            if unsigned:
+                INTERMEDIATE = lltype.Unsigned
+            else:
+                INTERMEDIATE = lltype.Signed
+            v = varoftype(INTERMEDIATE)
+            oplist = self.force_cast_without_longlong(op.args[0], v)
+            if not oplist:
+                v = op.args[0]
+                oplist = []
+            if unsigned:
+                opname = 'cast_uint_to_longlong'
+            else:
+                opname = 'cast_int_to_longlong'
+            op1 = SpaceOperation(opname, [v], op.result)
+            op2 = self.rewrite_operation(op1)
+            return oplist + [op2]
+        else:
+            return self.force_cast_without_longlong(op.args[0], op.result)
+
+    def force_cast_without_longlong(self, v_arg, v_result):
+        if v_result.concretetype == v_arg.concretetype:
+            return
+        if v_arg.concretetype == rffi.FLOAT:
+            assert v_result.concretetype == lltype.Float, "cast %s -> %s" % (
+                v_arg.concretetype, v_result.concretetype)
+            return SpaceOperation('cast_singlefloat_to_float', [v_arg],
+                                  v_result)
+        if v_result.concretetype == rffi.FLOAT:
+            assert v_arg.concretetype == lltype.Float, "cast %s -> %s" % (
+                v_arg.concretetype, v_result.concretetype)
+            return SpaceOperation('cast_float_to_singlefloat', [v_arg],
+                                  v_result)
+        return self.force_cast_without_singlefloat(v_arg, v_result)
+
+    def force_cast_without_singlefloat(self, v_arg, v_result):
+        size2, unsigned2 = rffi.size_and_sign(v_result.concretetype)
+        assert size2 <= rffi.sizeof(lltype.Signed)
+        if size2 == rffi.sizeof(lltype.Signed):
             return     # the target type is LONG or ULONG
-        size1, unsigned1 = size_and_sign(op.args[0].concretetype)
+        size1, unsigned1 = rffi.size_and_sign(v_arg.concretetype)
+        assert size1 <= rffi.sizeof(lltype.Signed)
         #
         def bounds(size, unsigned):
             if unsigned:
@@ -784,22 +856,27 @@
             return     # the target type includes the source range
         #
         result = []
-        v1 = op.args[0]
         if min2:
             c_min2 = Constant(min2, lltype.Signed)
-            v2 = Variable(); v2.concretetype = lltype.Signed
-            result.append(SpaceOperation('int_sub', [v1, c_min2], v2))
+            v2 = varoftype(lltype.Signed)
+            result.append(SpaceOperation('int_sub', [v_arg, c_min2], v2))
         else:
-            v2 = v1
+            v2 = v_arg
         c_mask = Constant(int((1<<(8*size2))-1), lltype.Signed)
-        v3 = Variable(); v3.concretetype = lltype.Signed
+        v3 = varoftype(lltype.Signed)
         result.append(SpaceOperation('int_and', [v2, c_mask], v3))
         if min2:
-            result.append(SpaceOperation('int_add', [v3, c_min2], op.result))
+            result.append(SpaceOperation('int_add', [v3, c_min2], v_result))
         else:
-            result[-1].result = op.result
+            result[-1].result = v_result
         return result
 
+    def rewrite_op_direct_ptradd(self, op):
+        # xxx otherwise, not implemented:
+        assert op.args[0].concretetype == rffi.CCHARP
+        #
+        return SpaceOperation('int_add', [op.args[0], op.args[1]], op.result)
+
     # ----------
     # Long longs, for 32-bit only.  Supported operations are left unmodified,
     # and unsupported ones are turned into a call to a function from
@@ -847,7 +924,7 @@
                 op1 = self.prepare_builtin_call(op, "llong_%s", args)
                 op2 = self._handle_oopspec_call(op1, args,
                                                 EffectInfo.OS_LLONG_%s,
-                                                EffectInfo.EF_ELIDABLE)
+                                           EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
                 if %r == "TO_INT":
                     assert op2.result.concretetype == lltype.Signed
                 return op2
@@ -883,30 +960,7 @@
     rewrite_op_ullong_is_true = rewrite_op_llong_is_true
 
     def rewrite_op_cast_primitive(self, op):
-        fromll = longlong.is_longlong(op.args[0].concretetype)
-        toll   = longlong.is_longlong(op.result.concretetype)
-        if fromll != toll:
-            args = op.args
-            if fromll:
-                opname = 'truncate_longlong_to_int'
-                RESULT = lltype.Signed
-            else:
-                from pypy.rpython.lltypesystem import rffi
-                if rffi.cast(op.args[0].concretetype, -1) < 0:
-                    opname = 'cast_int_to_longlong'
-                else:
-                    opname = 'cast_uint_to_longlong'
-                RESULT = lltype.SignedLongLong
-            v = varoftype(RESULT)
-            op1 = SpaceOperation(opname, args, v)
-            op2 = self.rewrite_operation(op1)
-            #
-            # force a renaming to put the correct result in place, even though
-            # it might be slightly mistyped (e.g. Signed versus Unsigned)
-            assert op2.result is v
-            op2.result = op.result
-            #
-            return op2
+        return self.rewrite_op_force_cast(op)
 
     # ----------
     # Renames, from the _old opname to the _new one.
@@ -1083,6 +1137,9 @@
         return meth(op, args, *descrs)
 
     def _get_list_nonneg_canraise_flags(self, op):
+        # XXX as far as I can see, this function will always return True
+        # because functions that are neither nonneg nor fast don't have an
+        # oopspec any more
         # xxx break of abstraction:
         func = get_funcobj(op.args[0].value)._callable
         # base hints on the name of the ll function, which is a bit xxx-ish
@@ -1240,7 +1297,7 @@
         calldescr = self.callcontrol.getcalldescr(op, oopspecindex,
                                                   extraeffect)
         if extraeffect is not None:
-            assert (type(calldescr) is str      # for tests
+            assert (is_test_calldescr(calldescr)      # for tests
                     or calldescr.get_extra_info().extraeffect == extraeffect)
         if isinstance(op.args[0].value, str):
             pass  # for tests only
@@ -1328,15 +1385,15 @@
                     otherindex += EffectInfo._OS_offset_uni
                 self._register_extra_helper(otherindex, othername,
                                             argtypes, resulttype,
-                                            EffectInfo.EF_ELIDABLE)
+                                           EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
         #
         return self._handle_oopspec_call(op, args, dict[oopspec_name],
-                                         EffectInfo.EF_ELIDABLE)
+                                         EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
 
     def _handle_str2unicode_call(self, op, oopspec_name, args):
-        # ll_str2unicode is not EF_ELIDABLE, because it can raise
-        # UnicodeDecodeError...
-        return self._handle_oopspec_call(op, args, EffectInfo.OS_STR2UNICODE)
+        # ll_str2unicode can raise UnicodeDecodeError
+        return self._handle_oopspec_call(op, args, EffectInfo.OS_STR2UNICODE,
+                                         EffectInfo.EF_ELIDABLE_CAN_RAISE)
 
     # ----------
     # VirtualRefs.
@@ -1374,13 +1431,13 @@
         assert vinfo is not None
         self.vable_flags[op.args[0]] = op.args[2].value
         return []
-        
+
     # ---------
     # ll_math.sqrt_nonneg()
-    
+
     def _handle_math_sqrt_call(self, op, oopspec_name, args):
         return self._handle_oopspec_call(op, args, EffectInfo.OS_MATH_SQRT,
-                                         EffectInfo.EF_ELIDABLE)
+                                         EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
 
     def rewrite_op_jit_force_quasi_immutable(self, op):
         v_inst, c_fieldname = op.args
@@ -1401,6 +1458,9 @@
         return "using virtualizable array in illegal way in %r" % (
             self.args[0],)
 
+def is_test_calldescr(calldescr):
+    return type(calldescr) is str or getattr(calldescr, '_for_tests_only', False)
+
 def _with_prefix(prefix):
     result = {}
     for name in dir(Transformer):
diff --git a/pypy/jit/codewriter/longlong.py b/pypy/jit/codewriter/longlong.py
--- a/pypy/jit/codewriter/longlong.py
+++ b/pypy/jit/codewriter/longlong.py
@@ -7,7 +7,8 @@
 """
 
 import sys
-from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib import rarithmetic, longlong2float
 
 
 if sys.maxint > 2147483647:
@@ -31,8 +32,6 @@
     # ---------- 32-bit platform ----------
     # the type FloatStorage is r_longlong, and conversion is needed
 
-    from pypy.rlib import rarithmetic, longlong2float
-
     is_64_bit = False
     supports_longlong = True
     r_float_storage = rarithmetic.r_longlong
@@ -41,9 +40,19 @@
     getfloatstorage = longlong2float.float2longlong
     getrealfloat    = longlong2float.longlong2float
     gethash         = lambda xll: rarithmetic.intmask(xll - (xll >> 32))
-    is_longlong     = lambda TYPE: (TYPE == lltype.SignedLongLong or
-                                    TYPE == lltype.UnsignedLongLong)
+    is_longlong     = lambda TYPE: (TYPE is lltype.SignedLongLong or
+                                    TYPE is lltype.UnsignedLongLong)
 
     # -------------------------------------
 
 ZEROF = getfloatstorage(0.0)
+
+# ____________________________________________________________
+
+def int2singlefloat(x):
+    x = rffi.r_uint(x)
+    return longlong2float.uint2singlefloat(x)
+
+def singlefloat2int(x):
+    x = longlong2float.singlefloat2uint(x)
+    return rffi.cast(lltype.Signed, x)
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -1,9 +1,7 @@
-from pypy.translator.simplify import get_funcobj
 from pypy.jit.metainterp import history
-from pypy.rpython.lltypesystem import lltype, rclass
 from pypy.tool.udir import udir
 
-import py, sys
+import py
 from pypy.tool.ansi_print import ansi_log
 log = py.log.Producer('jitcodewriter')
 py.log.setconsumer('jitcodewriter', ansi_log)
@@ -14,6 +12,7 @@
         self.unsafe_loopy_graphs = set()
         self.supports_floats = False
         self.supports_longlong = False
+        self.supports_singlefloats = False
 
     def set_supports_floats(self, flag):
         self.supports_floats = flag
@@ -21,6 +20,9 @@
     def set_supports_longlong(self, flag):
         self.supports_longlong = flag
 
+    def set_supports_singlefloats(self, flag):
+        self.supports_singlefloats = flag
+
     def dump_unsafe_loops(self):
         f = udir.join("unsafe-loops.txt").open('w')
         strs = [str(graph) for graph in self.unsafe_loopy_graphs]
@@ -60,8 +62,9 @@
                     func, '_jit_unroll_safe_', False)
 
         unsupported = contains_unsupported_variable_type(graph,
-                                                         self.supports_floats,
-                                                         self.supports_longlong)
+                            self.supports_floats,
+                            self.supports_longlong,
+                            self.supports_singlefloats)
         res = see_function and not unsupported
         if res and contains_loop:
             self.unsafe_loopy_graphs.add(graph)
@@ -82,17 +85,24 @@
         return res
 
 def contains_unsupported_variable_type(graph, supports_floats,
-                                       supports_longlong):
+                                              supports_longlong,
+                                              supports_singlefloats):
     getkind = history.getkind
     try:
         for block in graph.iterblocks():
             for v in block.inputargs:
-                getkind(v.concretetype, supports_floats, supports_longlong)
+                getkind(v.concretetype, supports_floats,
+                                        supports_longlong,
+                                        supports_singlefloats)
             for op in block.operations:
                 for v in op.args:
-                    getkind(v.concretetype, supports_floats, supports_longlong)
+                    getkind(v.concretetype, supports_floats,
+                                            supports_longlong,
+                                            supports_singlefloats)
                 v = op.result
-                getkind(v.concretetype, supports_floats, supports_longlong)
+                getkind(v.concretetype, supports_floats,
+                                        supports_longlong,
+                                        supports_singlefloats)
     except NotImplementedError, e:
         log.WARNING('%s, ignoring graph' % (e,))
         log.WARNING('  %s' % (graph,))
diff --git a/pypy/jit/codewriter/regalloc.py b/pypy/jit/codewriter/regalloc.py
--- a/pypy/jit/codewriter/regalloc.py
+++ b/pypy/jit/codewriter/regalloc.py
@@ -1,128 +1,8 @@
-import sys
-from pypy.objspace.flow.model import Variable
-from pypy.tool.algo.color import DependencyGraph
-from pypy.tool.algo.unionfind import UnionFind
+from pypy.tool.algo import regalloc
 from pypy.jit.metainterp.history import getkind
 from pypy.jit.codewriter.flatten import ListOfKind
 
+
 def perform_register_allocation(graph, kind):
-    """Perform register allocation for the Variables of the given 'kind'
-    in the 'graph'."""
-    regalloc = RegAllocator(graph, kind)
-    regalloc.make_dependencies()
-    regalloc.coalesce_variables()
-    regalloc.find_node_coloring()
-    return regalloc
-
-
-class RegAllocator(object):
-    DEBUG_REGALLOC = False
-
-    def __init__(self, graph, kind):
-        self.graph = graph
-        self.kind = kind
-
-    def make_dependencies(self):
-        dg = DependencyGraph()
-        for block in self.graph.iterblocks():
-            # Compute die_at = {Variable: index_of_operation_with_last_usage}
-            die_at = dict.fromkeys(block.inputargs, 0)
-            for i, op in enumerate(block.operations):
-                for v in op.args:
-                    if isinstance(v, Variable):
-                        die_at[v] = i
-                    elif isinstance(v, ListOfKind):
-                        for v1 in v:
-                            if isinstance(v1, Variable):
-                                die_at[v1] = i
-                if op.result is not None:
-                    die_at[op.result] = i + 1
-            if isinstance(block.exitswitch, tuple):
-                for x in block.exitswitch:
-                    die_at.pop(x, None)
-            else:
-                die_at.pop(block.exitswitch, None)
-            for link in block.exits:
-                for v in link.args:
-                    die_at.pop(v, None)
-            die_at = [(value, key) for (key, value) in die_at.items()]
-            die_at.sort()
-            die_at.append((sys.maxint,))
-            # Done.  XXX the code above this line runs 3 times
-            # (for kind in KINDS) to produce the same result...
-            livevars = [v for v in block.inputargs
-                          if getkind(v.concretetype) == self.kind]
-            # Add the variables of this block to the dependency graph
-            for i, v in enumerate(livevars):
-                dg.add_node(v)
-                for j in range(i):
-                    dg.add_edge(livevars[j], v)
-            livevars = set(livevars)
-            die_index = 0
-            for i, op in enumerate(block.operations):
-                while die_at[die_index][0] == i:
-                    try:
-                        livevars.remove(die_at[die_index][1])
-                    except KeyError:
-                        pass
-                    die_index += 1
-                if (op.result is not None and
-                    getkind(op.result.concretetype) == self.kind):
-                    dg.add_node(op.result)
-                    for v in livevars:
-                        if getkind(v.concretetype) == self.kind:
-                            dg.add_edge(v, op.result)
-                    livevars.add(op.result)
-        self._depgraph = dg
-
-    def coalesce_variables(self):
-        self._unionfind = UnionFind()
-        pendingblocks = list(self.graph.iterblocks())
-        while pendingblocks:
-            block = pendingblocks.pop()
-            # Aggressively try to coalesce each source variable with its
-            # target.  We start from the end of the graph instead of
-            # from the beginning.  This is a bit arbitrary, but the idea
-            # is that the end of the graph runs typically more often
-            # than the start, given that we resume execution from the
-            # middle during blackholing.
-            for link in block.exits:
-                if link.last_exception is not None:
-                    self._depgraph.add_node(link.last_exception)
-                if link.last_exc_value is not None:
-                    self._depgraph.add_node(link.last_exc_value)
-                for i, v in enumerate(link.args):
-                    self._try_coalesce(v, link.target.inputargs[i])
-
-    def _try_coalesce(self, v, w):
-        if isinstance(v, Variable) and getkind(v.concretetype) == self.kind:
-            dg = self._depgraph
-            uf = self._unionfind
-            v0 = uf.find_rep(v)
-            w0 = uf.find_rep(w)
-            if v0 is not w0 and v0 not in dg.neighbours[w0]:
-                _, rep, _ = uf.union(v0, w0)
-                assert uf.find_rep(v0) is uf.find_rep(w0) is rep
-                if rep is v0:
-                    dg.coalesce(w0, v0)
-                else:
-                    assert rep is w0
-                    dg.coalesce(v0, w0)
-
-    def find_node_coloring(self):
-        self._coloring = self._depgraph.find_node_coloring()
-        if self.DEBUG_REGALLOC:
-            for block in self.graph.iterblocks():
-                print block
-                for v in block.getvariables():
-                    print '\t', v, '\t', self.getcolor(v)
-
-    def getcolor(self, v):
-        return self._coloring[self._unionfind.find_rep(v)]
-
-    def swapcolors(self, col1, col2):
-        for key, value in self._coloring.items():
-            if value == col1:
-                self._coloring[key] = col2
-            elif value == col2:
-                self._coloring[key] = col1
+    checkkind = lambda v: getkind(v.concretetype) == kind
+    return regalloc.perform_register_allocation(graph, checkkind, ListOfKind)
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -20,6 +20,7 @@
 from pypy.rpython.annlowlevel import MixLevelHelperAnnotator
 from pypy.jit.metainterp.typesystem import deref
 from pypy.rlib import rgc
+from pypy.rlib.jit import elidable
 from pypy.rlib.rarithmetic import r_longlong, r_ulonglong, r_uint, intmask
 
 def getargtypes(annotator, values):
@@ -167,9 +168,14 @@
 
 _ll_5_list_ll_arraycopy = rgc.ll_arraycopy
 
+ at elidable
 def _ll_1_gc_identityhash(x):
     return lltype.identityhash(x)
 
+# the following function should not be "@elidable": I can think of
+# a corner case in which id(const) is constant-folded, and then 'const'
+# disappears and is collected too early (possibly causing another object
+# with the same id() to appear).
 def _ll_1_gc_id(ptr):
     return llop.gc_id(lltype.Signed, ptr)
 
@@ -185,7 +191,7 @@
     return llop.int_floordiv(lltype.Signed, x, y)
 
 def _ll_2_int_floordiv_ovf(x, y):
-    if x == -sys.maxint - 1 and y == -1:        
+    if x == -sys.maxint - 1 and y == -1:
         raise OverflowError
     return llop.int_floordiv(lltype.Signed, x, y)
 
@@ -222,7 +228,7 @@
         return -x
     else:
         return x
-        
+
 # math support
 # ------------
 
@@ -395,7 +401,7 @@
     ('int_lshift_ovf',       [lltype.Signed, lltype.Signed], lltype.Signed),
     ('int_abs',              [lltype.Signed],                lltype.Signed),
     ('ll_math.ll_math_sqrt', [lltype.Float],                 lltype.Float),
-    ]
+]
 
 
 class LLtypeHelpers:
@@ -420,10 +426,6 @@
     _ll_1_dict_values.need_result_type = True
     _ll_1_dict_items .need_result_type = True
 
-    def _ll_1_newdictiter(ITER, d):
-        return ll_rdict.ll_dictiter(lltype.Ptr(ITER), d)
-    _ll_1_newdictiter.need_result_type = True
-
     _dictnext_keys   = staticmethod(ll_rdict.ll_dictnext_group['keys'])
     _dictnext_values = staticmethod(ll_rdict.ll_dictnext_group['values'])
     _dictnext_items  = staticmethod(ll_rdict.ll_dictnext_group['items'])
@@ -574,10 +576,6 @@
     _ll_1_dict_values.need_result_type = True
     _ll_1_dict_items .need_result_type = True
 
-    def _ll_1_newdictiter(ITER, d):
-        return oo_rdict.ll_dictiter(ITER, d)
-    _ll_1_newdictiter.need_result_type = True
-
     _dictnext_keys   = staticmethod(oo_rdict.ll_dictnext_group['keys'])
     _dictnext_values = staticmethod(oo_rdict.ll_dictnext_group['values'])
     _dictnext_items  = staticmethod(oo_rdict.ll_dictnext_group['items'])
diff --git a/pypy/jit/codewriter/test/test_call.py b/pypy/jit/codewriter/test/test_call.py
--- a/pypy/jit/codewriter/test/test_call.py
+++ b/pypy/jit/codewriter/test/test_call.py
@@ -1,6 +1,6 @@
 import py
 from pypy.objspace.flow.model import SpaceOperation, Constant, Variable
-from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.translator.unsimplify import varoftype
 from pypy.rlib import jit
 from pypy.jit.codewriter.call import CallControl
@@ -103,7 +103,7 @@
 
     op = SpaceOperation('direct_call', [Constant(object())],
                         Variable())
-    assert cc.guess_call_kind(op) == 'residual'        
+    assert cc.guess_call_kind(op) == 'residual'
 
     class funcptr:
         class graph:
@@ -118,7 +118,7 @@
     op = SpaceOperation('direct_call', [Constant(funcptr)],
                         Variable())
     res = cc.graphs_from(op)
-    assert res == [g]        
+    assert res == [g]
     assert cc.guess_call_kind(op) == 'regular'
 
     class funcptr:
@@ -126,7 +126,7 @@
     op = SpaceOperation('direct_call', [Constant(funcptr)],
                         Variable())
     res = cc.graphs_from(op)
-    assert res is None        
+    assert res is None
     assert cc.guess_call_kind(op) == 'residual'
 
     h = object()
@@ -142,7 +142,7 @@
                         Variable())
     res = cc.graphs_from(op)
     assert res is None
-    assert cc.guess_call_kind(op) == 'residual'        
+    assert cc.guess_call_kind(op) == 'residual'
 
 # ____________________________________________________________
 
@@ -171,3 +171,24 @@
 
 def test_jit_force_virtualizable_effectinfo():
     py.test.skip("XXX add a test for CallControl.getcalldescr() -> EF_xxx")
+
+def test_releases_gil_analyzer():
+    from pypy.jit.backend.llgraph.runner import LLtypeCPU
+
+    T = rffi.CArrayPtr(rffi.TIME_T)
+    external = rffi.llexternal("time", [T], rffi.TIME_T, threadsafe=True)
+
+    @jit.dont_look_inside
+    def f():
+        return external(lltype.nullptr(T.TO))
+
+    rtyper = support.annotate(f, [])
+    jitdriver_sd = FakeJitDriverSD(rtyper.annotator.translator.graphs[0])
+    cc = CallControl(LLtypeCPU(rtyper), jitdrivers_sd=[jitdriver_sd])
+    res = cc.find_all_graphs(FakePolicy())
+
+    [f_graph] = [x for x in res if x.func is f]
+    [block, _] = list(f_graph.iterblocks())
+    [op] = block.operations
+    call_descr = cc.getcalldescr(op)
+    assert call_descr.extrainfo.can_release_gil
\ No newline at end of file
diff --git a/pypy/jit/codewriter/test/test_flatten.py b/pypy/jit/codewriter/test/test_flatten.py
--- a/pypy/jit/codewriter/test/test_flatten.py
+++ b/pypy/jit/codewriter/test/test_flatten.py
@@ -3,6 +3,7 @@
 from pypy.jit.codewriter.flatten import flatten_graph, reorder_renaming_list
 from pypy.jit.codewriter.flatten import GraphFlattener, ListOfKind, Register
 from pypy.jit.codewriter.format import assert_format
+from pypy.jit.codewriter import longlong
 from pypy.jit.metainterp.history import AbstractDescr
 from pypy.rpython.lltypesystem import lltype, rclass, rstr
 from pypy.objspace.flow.model import SpaceOperation, Variable, Constant
@@ -30,6 +31,9 @@
             'float': FakeRegAlloc()}
 
 class FakeDescr(AbstractDescr):
+    _for_tests_only = True
+    def __init__(self, oopspecindex=None):
+        self.oopspecindex = oopspecindex
     def __repr__(self):
         return '<Descr>'
     def as_vtable_size_descr(self):
@@ -55,19 +59,24 @@
     def arraydescrof(self, ARRAY):
         return FakeDescr()
 
+class FakeCallInfoCollection:
+    def add(self, *args):
+        pass
+
 class FakeCallControl:
     _descr_cannot_raise = FakeDescr()
+    callinfocollection = FakeCallInfoCollection()
     def guess_call_kind(self, op):
         return 'residual'
-    def getcalldescr(self, op):
+    def getcalldescr(self, op, oopspecindex=None, extraeffect=None):
         try:
             if 'cannot_raise' in op.args[0].value._obj.graph.name:
                 return self._descr_cannot_raise
         except AttributeError:
             pass
-        return FakeDescr()
+        return FakeDescr(oopspecindex)
     def calldescr_canraise(self, calldescr):
-        return calldescr is not self._descr_cannot_raise
+        return calldescr is not self._descr_cannot_raise and calldescr.oopspecindex is None
     def get_vinfo(self, VTYPEPTR):
         return None
 
@@ -734,7 +743,9 @@
 
     def test_force_cast(self):
         from pypy.rpython.lltypesystem import rffi
-
+        # NB: we don't need to test for INT here, the logic in jtransform is
+        # general enough so that if we have the below cases it should
+        # generalize also to INT
         for FROM, TO, expected in [
             (rffi.SIGNEDCHAR, rffi.SIGNEDCHAR, ""),
             (rffi.SIGNEDCHAR, rffi.UCHAR, "int_and %i0, $255 -> %i1"),
@@ -797,14 +808,44 @@
             expected = [s.strip() for s in expected.splitlines()]
             check_force_cast(FROM, TO, expected, 42)
             check_force_cast(FROM, TO, expected, -42)
-            expected.append('int_return %i' + str(len(expected)))
-            expected = '\n'.join(expected)
+            returnvar = "%i" + str(len(expected))
+            expected.append('int_return ' + returnvar)
+            expectedstr = '\n'.join(expected)
             #
             def f(n):
                 return rffi.cast(TO, n)
-            self.encoding_test(f, [rffi.cast(FROM, 42)], expected,
+            self.encoding_test(f, [rffi.cast(FROM, 42)], expectedstr,
                                transform=True)
 
+            if not longlong.is_64_bit:
+                if FROM in (rffi.LONG, rffi.ULONG):
+                    if FROM == rffi.LONG:
+                        FROM = rffi.LONGLONG
+                    else:
+                        FROM = rffi.ULONGLONG
+                    expected.insert(0,
+                        "residual_call_irf_i $<* fn llong_to_int>, <Descr>, I[], R[], F[%f0] -> %i0")
+                    expectedstr = '\n'.join(expected)
+                    self.encoding_test(f, [rffi.cast(FROM, 42)], expectedstr,
+                                       transform=True)
+                elif TO in (rffi.LONG, rffi.ULONG):
+                    if TO == rffi.LONG:
+                        TO = rffi.LONGLONG
+                    else:
+                        TO = rffi.ULONGLONG
+                    if rffi.cast(FROM, -1) < 0:
+                        fnname = "llong_from_int"
+                    else:
+                        fnname = "llong_from_uint"
+                    expected.pop()   # remove int_return
+                    expected.append(
+                        "residual_call_irf_f $<* fn %s>, <Descr>, I[%s], R[], F[] -> %%f0"
+                        % (fnname, returnvar))
+                    expected.append("float_return %f0")
+                    expectedstr = '\n'.join(expected)
+                    self.encoding_test(f, [rffi.cast(FROM, 42)], expectedstr,
+                                       transform=True)
+
     def test_force_cast_pointer(self):
         from pypy.rpython.lltypesystem import rffi
         def h(p):
@@ -813,6 +854,23 @@
             int_return %i0
         """, transform=True)
 
+    def test_force_cast_float(self):
+        from pypy.rpython.lltypesystem import rffi
+        def f(n):
+            return rffi.cast(lltype.Float, n)
+        self.encoding_test(f, [12.456], """
+            float_return %f0
+        """, transform=True)
+
+    def test_direct_ptradd(self):
+        from pypy.rpython.lltypesystem import rffi
+        def f(p, n):
+            return lltype.direct_ptradd(p, n)
+        self.encoding_test(f, [lltype.nullptr(rffi.CCHARP.TO), 123], """
+            int_add %i0, %i1 -> %i2
+            int_return %i2
+        """, transform=True)
+
 
 def check_force_cast(FROM, TO, operations, value):
     """Check that the test is correctly written..."""
diff --git a/pypy/jit/codewriter/test/test_jtransform.py b/pypy/jit/codewriter/test/test_jtransform.py
--- a/pypy/jit/codewriter/test/test_jtransform.py
+++ b/pypy/jit/codewriter/test/test_jtransform.py
@@ -120,9 +120,9 @@
             assert argtypes[0] == [v.concretetype for v in op.args[1:]]
             assert argtypes[1] == op.result.concretetype
             if oopspecindex == EI.OS_STR2UNICODE:
-                assert extraeffect == None    # not pure, can raise!
+                assert extraeffect == EI.EF_ELIDABLE_CAN_RAISE
             else:
-                assert extraeffect == EI.EF_ELIDABLE
+                assert extraeffect == EI.EF_ELIDABLE_CANNOT_RAISE
         return 'calldescr-%d' % oopspecindex
     def calldescr_canraise(self, calldescr):
         return False
@@ -769,7 +769,7 @@
         def get_vinfo(self, v):
             return None
         def could_be_green_field(self, S1, name1):
-            assert S1 is S
+            assert S1 == S
             assert name1 == 'x'
             return True
     S = lltype.GcStruct('S', ('x', lltype.Char),
@@ -1014,3 +1014,13 @@
         assert op1.opname == 'jit_force_quasi_immutable'
         assert op1.args[0] == v_x
         assert op1.args[1] == ('fielddescr', STRUCT, 'mutate_x')
+
+def test_no_gcstruct_nesting_outside_of_OBJECT():
+    PARENT = lltype.GcStruct('parent')
+    STRUCT = lltype.GcStruct('struct', ('parent', PARENT),
+                                       ('x', lltype.Signed))
+    v_x = varoftype(lltype.Ptr(STRUCT))
+    op = SpaceOperation('getfield', [v_x, Constant('x', lltype.Void)],
+                        varoftype(lltype.Signed))
+    tr = Transformer(None, None)
+    raises(NotImplementedError, tr.rewrite_operation, op)
diff --git a/pypy/jit/codewriter/test/test_longlong.py b/pypy/jit/codewriter/test/test_longlong.py
--- a/pypy/jit/codewriter/test/test_longlong.py
+++ b/pypy/jit/codewriter/test/test_longlong.py
@@ -37,7 +37,7 @@
 
 class TestLongLong:
     def setup_class(cls):
-        if sys.maxint > 2147483647:
+        if longlong.is_64_bit:
             py.test.skip("only for 32-bit platforms")
 
     def do_check(self, opname, oopspecindex, ARGS, RESULT):
@@ -46,6 +46,8 @@
         op = SpaceOperation(opname, vlist, v_result)
         tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
         op1 = tr.rewrite_operation(op)
+        if isinstance(op1, list):
+            [op1] = op1
         #
         def is_llf(TYPE):
             return (TYPE == lltype.SignedLongLong or
@@ -196,6 +198,23 @@
             for T2 in [lltype.Signed, lltype.Unsigned]:
                 self.do_check('cast_primitive', EffectInfo.OS_LLONG_TO_INT,
                               [T1], T2)
+                self.do_check('force_cast', EffectInfo.OS_LLONG_TO_INT,
+                              [T1], T2)
+                if T2 == lltype.Signed:
+                    expected = EffectInfo.OS_LLONG_FROM_INT
+                else:
+                    expected = EffectInfo.OS_LLONG_FROM_UINT
+                self.do_check('cast_primitive', expected, [T2], T1)
+                self.do_check('force_cast', expected, [T2], T1)
+        #
+        for T1 in [lltype.SignedLongLong, lltype.UnsignedLongLong]:
+            for T2 in [lltype.SignedLongLong, lltype.UnsignedLongLong]:
+                vlist = [varoftype(T1)]
+                v_result = varoftype(T2)
+                op = SpaceOperation('force_cast', vlist, v_result)
+                tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
+                op1 = tr.rewrite_operation(op)
+                assert op1 is None
 
     def test_constants(self):
         for TYPE in [lltype.SignedLongLong, lltype.UnsignedLongLong]:
@@ -211,3 +230,18 @@
             assert list(op1.args[3]) == []
             assert list(op1.args[4]) == vlist
             assert op1.result == v_result
+
+
+##def test_singlefloat_constants():
+##    v_x = varoftype(TYPE)
+##    vlist = [v_x, const(rffi.cast(TYPE, 7))]
+##    v_result = varoftype(TYPE)
+##    op = SpaceOperation('llong_add', vlist, v_result)
+##    tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
+##    op1 = tr.rewrite_operation(op)
+##    #
+##    assert op1.opname == 'residual_call_irf_f'
+##    assert list(op1.args[2]) == []
+##    assert list(op1.args[3]) == []
+##    assert list(op1.args[4]) == vlist
+##    assert op1.result == v_result
diff --git a/pypy/jit/codewriter/test/test_policy.py b/pypy/jit/codewriter/test/test_policy.py
--- a/pypy/jit/codewriter/test/test_policy.py
+++ b/pypy/jit/codewriter/test/test_policy.py
@@ -12,24 +12,30 @@
     graph = support.getgraph(f, [5])
     for sf in [False, True]:
         for sll in [False, True]:
-            assert not contains_unsupported_variable_type(graph, sf, sll)
+            for ssf in [False, True]:
+                assert not contains_unsupported_variable_type(graph, sf,
+                                                              sll, ssf)
     #
     graph = support.getgraph(f, [5.5])
     for sf in [False, True]:
         for sll in [False, True]:
-            res = contains_unsupported_variable_type(graph, sf, sll)
-            assert res is not sf
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res is not sf
     #
     graph = support.getgraph(f, [r_singlefloat(5.5)])
     for sf in [False, True]:
         for sll in [False, True]:
-            assert contains_unsupported_variable_type(graph, sf, sll)
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res == (not ssf)
     #
     graph = support.getgraph(f, [r_longlong(5)])
     for sf in [False, True]:
         for sll in [False, True]:
-            res = contains_unsupported_variable_type(graph, sf, sll)
-            assert res == (sys.maxint == 2147483647 and not sll)
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res == (sys.maxint == 2147483647 and not sll)
 
 
 def test_regular_function():
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -500,6 +500,9 @@
     @arguments("r", returns="i")
     def bhimpl_ptr_nonzero(a):
         return bool(a)
+    @arguments("r", returns="r")
+    def bhimpl_cast_opaque_ptr(a):
+        return a
 
     @arguments("i", returns="i")
     def bhimpl_int_copy(a):
@@ -623,6 +626,19 @@
         x = float(a)
         return longlong.getfloatstorage(x)
 
+    @arguments("f", returns="i")
+    def bhimpl_cast_float_to_singlefloat(a):
+        from pypy.rlib.rarithmetic import r_singlefloat
+        a = longlong.getrealfloat(a)
+        a = r_singlefloat(a)
+        return longlong.singlefloat2int(a)
+
+    @arguments("i", returns="f")
+    def bhimpl_cast_singlefloat_to_float(a):
+        a = longlong.int2singlefloat(a)
+        a = float(a)
+        return longlong.getfloatstorage(a)
+
     # ----------
     # control flow operations
 
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -137,6 +137,10 @@
             jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
                 greenkey, loop.preamble.token)
             record_loop_or_bridge(metainterp_sd, loop.preamble)
+        elif token.short_preamble:
+            short = token.short_preamble[-1]
+            metainterp_sd.logger_ops.log_short_preamble(short.inputargs,
+                                                        short.operations)
         return token
     else:
         send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop,
@@ -637,6 +641,7 @@
         debug_print("compile_new_bridge: got an InvalidLoop")
         # XXX I am fairly convinced that optimize_bridge cannot actually raise
         # InvalidLoop
+        debug_print('InvalidLoop in compile_new_bridge')
         return None
     # Did it work?
     if target_loop_token is not None:
@@ -668,10 +673,9 @@
     def handle_fail(self, metainterp_sd, jitdriver_sd):
         cpu = metainterp_sd.cpu
         exception = cpu.grab_exc_value()
+        assert exception, "PropagateExceptionDescr: no exception??"
         raise metainterp_sd.ExitFrameWithExceptionRef(cpu, exception)
 
-propagate_exception_descr = PropagateExceptionDescr()
-
 def compile_tmp_callback(cpu, jitdriver_sd, greenboxes, redboxes,
                          memory_manager=None):
     """Make a LoopToken that corresponds to assembler code that just
@@ -705,7 +709,7 @@
         finishargs = []
     #
     jd = jitdriver_sd
-    faildescr = propagate_exception_descr
+    faildescr = PropagateExceptionDescr()
     operations = [
         ResOperation(rop.CALL, callargs, result, descr=jd.portal_calldescr),
         ResOperation(rop.GUARD_NO_EXCEPTION, [], None, descr=faildescr),
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -50,7 +50,7 @@
     func = argboxes[0].getint()
     # do the call using the correct function from the cpu
     rettype = descr.get_return_type()
-    if rettype == INT:
+    if rettype == INT or rettype == 'S':       # *S*ingle float
         try:
             result = cpu.bh_call_i(func, descr, args_i, args_r, args_f)
         except Exception, e:
@@ -64,7 +64,7 @@
             metainterp.execute_raised(e)
             result = NULL
         return BoxPtr(result)
-    if rettype == FLOAT or rettype == 'L':
+    if rettype == FLOAT or rettype == 'L':     # *L*ong long
         try:
             result = cpu.bh_call_f(func, descr, args_i, args_r, args_f)
         except Exception, e:
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -20,12 +20,16 @@
 
 FAILARGS_LIMIT = 1000
 
-def getkind(TYPE, supports_floats=True, supports_longlong=True):
+def getkind(TYPE, supports_floats=True,
+                  supports_longlong=True,
+                  supports_singlefloats=True):
     if TYPE is lltype.Void:
         return "void"
     elif isinstance(TYPE, lltype.Primitive):
         if TYPE is lltype.Float and supports_floats:
             return 'float'
+        if TYPE is lltype.SingleFloat and supports_singlefloats:
+            return 'int'     # singlefloats are stored in an int
         if TYPE in (lltype.Float, lltype.SingleFloat):
             raise NotImplementedError("type %s not supported" % TYPE)
         # XXX fix this for oo...
@@ -145,6 +149,7 @@
         """ Implement in call descr.
         Must return INT, REF, FLOAT, or 'v' for void.
         On 32-bit (hack) it can also be 'L' for longlongs.
+        Additionally it can be 'S' for singlefloats.
         """
         raise NotImplementedError
 
diff --git a/pypy/jit/metainterp/optimize.py b/pypy/jit/metainterp/optimize.py
--- a/pypy/jit/metainterp/optimize.py
+++ b/pypy/jit/metainterp/optimize.py
@@ -1,4 +1,4 @@
-from pypy.rlib.debug import debug_start, debug_stop
+from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.jit.metainterp.jitexc import JitException
 
 class InvalidLoop(JitException):
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -33,10 +33,6 @@
         if name in enable_opts:
             if opt is not None:
                 o = opt()
-                if unroll and name == 'string':
-                    o.enabled = False
-                # FIXME: Workaround to disable string optimisation
-                # during preamble but to keep it during the loop
                 optimizations.append(o)
             elif name == 'ffi' and config.translation.jit_ffi:
                 # we cannot put the class directly in the unrolling_iterable,
@@ -55,17 +51,16 @@
 
 
 def optimize_loop_1(metainterp_sd, loop, enable_opts,
-                    inline_short_preamble=True, retraced=False):
+                    inline_short_preamble=True, retraced=False, bridge=False):
     """Optimize loop.operations to remove internal overheadish operations.
     """
 
     optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts,
                                             inline_short_preamble, retraced)
-
     if unroll:
         optimize_unroll(metainterp_sd, loop, optimizations)
     else:
-        optimizer = Optimizer(metainterp_sd, loop, optimizations)
+        optimizer = Optimizer(metainterp_sd, loop, optimizations, bridge)
         optimizer.propagate_all_forward()
 
 def optimize_bridge_1(metainterp_sd, bridge, enable_opts,
@@ -77,7 +72,7 @@
     except KeyError:
         pass
     optimize_loop_1(metainterp_sd, bridge, enable_opts,
-                    inline_short_preamble, retraced)
+                    inline_short_preamble, retraced, bridge=True)
 
 if __name__ == '__main__':
     print ALL_OPTS_NAMES
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -1,12 +1,11 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.libffi import Func
-from pypy.rlib.debug import debug_start, debug_stop, debug_print, have_debug_prints
+from pypy.rlib.debug import debug_print
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
-from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
 
 
 class FuncInfo(object):
@@ -20,11 +19,8 @@
         self.funcval = funcval
         self.opargs = []
         argtypes, restype = self._get_signature(funcval)
-        try:
-            self.descr = cpu.calldescrof_dynamic(argtypes, restype)
-        except UnsupportedKind:
-            # e.g., I or U for long longs
-            self.descr = None
+        self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        # ^^^ may be None if unsupported
         self.prepare_op = prepare_op
         self.delayed_ops = []
 
@@ -48,7 +44,7 @@
           inst_argtypes is actually a low-level array, but we can use it
           directly since the only thing we do with it is to read its items
         """
-        
+
         llfunc = funcval.box.getref_base()
         if we_are_translated():
             func = cast_base_ptr_to_instance(Func, llfunc)
@@ -78,18 +74,9 @@
         else:
             self.logops = None
 
-    def propagate_begin_forward(self):
-        debug_start('jit-log-ffiopt')
-        Optimization.propagate_begin_forward(self)
-
-    def propagate_end_forward(self):
-        debug_stop('jit-log-ffiopt')
-        Optimization.propagate_end_forward(self)
-
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
+    def new(self):
         return OptFfiCall()
-        # FIXME: Should any status be saved for next iteration?
-
+    
     def begin_optimization(self, funcval, op):
         self.rollback_maybe('begin_optimization', op)
         self.funcinfo = FuncInfo(funcval, self.optimizer.cpu, op)
@@ -184,7 +171,8 @@
     def do_call(self, op):
         funcval = self._get_funcval(op)
         funcinfo = self.funcinfo
-        if not funcinfo or funcinfo.funcval is not funcval:
+        if (not funcinfo or funcinfo.funcval is not funcval or
+            funcinfo.descr is None):
             return [op] # cannot optimize
         funcsymval = self.getvalue(op.getarg(2))
         arglist = [funcsymval.force_box()]
diff --git a/pypy/jit/metainterp/optimizeopt/generalize.py b/pypy/jit/metainterp/optimizeopt/generalize.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/optimizeopt/generalize.py
@@ -0,0 +1,19 @@
+from pypy.jit.metainterp.optimizeopt.optimizer import MININT, MAXINT
+
+class GeneralizationStrategy(object):
+    def __init__(self, optimizer):
+        self.optimizer = optimizer
+
+    def apply(self):
+        raise NotImplementedError
+
+class KillHugeIntBounds(GeneralizationStrategy):
+    def apply(self):
+        for v in self.optimizer.values.values():
+            if v.is_constant():
+                continue
+            if v.intbound.lower < MININT/2:
+                v.intbound.lower = MININT
+            if v.intbound.upper > MAXINT/2:
+                v.intbound.upper = MAXINT
+          
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -1,9 +1,11 @@
 import os
+
+from pypy.jit.metainterp.jitexc import JitException
+from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, MODE_ARRAY
+from pypy.jit.metainterp.history import ConstInt, Const
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.jit.metainterp.jitexc import JitException
-from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
 
 
 class CachedField(object):
@@ -23,6 +25,7 @@
         #      'cached_fields'.
         #
         self._cached_fields = {}
+        self._cached_fields_getfield_op = {}        
         self._lazy_setfield = None
         self._lazy_setfield_registered = False
 
@@ -69,36 +72,71 @@
         else:
             return self._cached_fields.get(structvalue, None)
 
-    def remember_field_value(self, structvalue, fieldvalue):
+    def remember_field_value(self, structvalue, fieldvalue, getfield_op=None):
         assert self._lazy_setfield is None
         self._cached_fields[structvalue] = fieldvalue
+        self._cached_fields_getfield_op[structvalue] = getfield_op        
 
-    def force_lazy_setfield(self, optheap):
+    def force_lazy_setfield(self, optheap, can_cache=True):
         op = self._lazy_setfield
         if op is not None:
             # This is the way _lazy_setfield is usually reset to None.
             # Now we clear _cached_fields, because actually doing the
             # setfield might impact any of the stored result (because of
             # possible aliasing).
-            self._cached_fields.clear()
+            self.clear()
             self._lazy_setfield = None
             optheap.next_optimization.propagate_forward(op)
+            if not can_cache:
+                return
             # Once it is done, we can put at least one piece of information
             # back in the cache: the value of this particular structure's
             # field.
             structvalue = optheap.getvalue(op.getarg(0))
             fieldvalue  = optheap.getvalue(op.getarglist()[-1])
-            self.remember_field_value(structvalue, fieldvalue)
+            self.remember_field_value(structvalue, fieldvalue, op)
+        elif not can_cache:
+            self.clear()
 
-    def get_reconstructed(self, optimizer, valuemap):
-        assert self._lazy_setfield is None
-        cf = CachedField()
-        for structvalue, fieldvalue in self._cached_fields.iteritems():
-            structvalue2 = structvalue.get_reconstructed(optimizer, valuemap)
-            fieldvalue2  = fieldvalue .get_reconstructed(optimizer, valuemap)
-            cf._cached_fields[structvalue2] = fieldvalue2
-        return cf
+    def clear(self):
+        self._cached_fields.clear()
+        self._cached_fields_getfield_op.clear()
 
+    def turned_constant(self, newvalue, value):
+        if newvalue not in self._cached_fields and value in self._cached_fields:
+            self._cached_fields[newvalue] = self._cached_fields[value]
+            op = self._cached_fields_getfield_op[value].clone()
+            constbox = value.box
+            assert isinstance(constbox, Const)
+            op.setarg(0, constbox)
+            self._cached_fields_getfield_op[newvalue] = op
+        for structvalue in self._cached_fields.keys():
+            if self._cached_fields[structvalue] is value:
+                self._cached_fields[structvalue] = newvalue
+
+    def produce_potential_short_preamble_ops(self, optimizer, shortboxes, descr):
+        if self._lazy_setfield is not None:
+            return
+        for structvalue in self._cached_fields_getfield_op.keys():
+            op = self._cached_fields_getfield_op[structvalue]
+            if not op:
+                continue
+            if optimizer.getvalue(op.getarg(0)) in optimizer.opaque_pointers:
+                continue
+            if structvalue in self._cached_fields:
+                if op.getopnum() == rop.SETFIELD_GC:
+                    result = op.getarg(1)
+                    if isinstance(result, Const):
+                        newresult = result.clonebox()
+                        optimizer.make_constant(newresult, result)
+                        result = newresult
+                    getop = ResOperation(rop.GETFIELD_GC, [op.getarg(0)],
+                                         result, op.getdescr())
+                    getop = shortboxes.add_potential(getop)
+                    self._cached_fields_getfield_op[structvalue] = getop
+                    self._cached_fields[structvalue] = optimizer.getvalue(result)
+                elif op.result is not None:
+                    shortboxes.add_potential(op)
 
 class BogusPureField(JitException):
     pass
@@ -117,24 +155,32 @@
         self._remove_guard_not_invalidated = False
         self._seen_guard_not_invalidated = False
 
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
-        new = OptHeap()
+    def force_at_end_of_preamble(self):
+        self.force_all_lazy_setfields_and_arrayitems()
 
-        if True:
-            self.force_all_lazy_setfields_and_arrayitems()
-        else:
-            assert 0   # was: new.lazy_setfields = self.lazy_setfields
+    def flush(self):
+        self.force_all_lazy_setfields_and_arrayitems()
 
-        for descr, d in self.cached_fields.items():
-            new.cached_fields[descr] = d.get_reconstructed(optimizer, valuemap)
+    def new(self):
+        return OptHeap()
+        
+    def produce_potential_short_preamble_ops(self, sb):
+        descrkeys = self.cached_fields.keys()
+        if not we_are_translated():
+            # XXX Pure operation of boxes that are cached in several places will
+            #     only be removed from the peeled loop when red from the first
+            #     place discovered here. This is far from ideal, as it makes
+            #     the effectiveness of our optimization a bit random. It should
+            #     howevere always generate correct results. For tests we dont
+            #     want this randomness.
+            descrkeys.sort(key=str, reverse=True)
+        for descr in descrkeys:
+            d = self.cached_fields[descr]
+            d.produce_potential_short_preamble_ops(self.optimizer, sb, descr)
 
         for descr, submap in self.cached_arrayitems.items():
-            newdict = {}
             for index, d in submap.items():
-                newdict[index] = d.get_reconstructed(optimizer, valuemap)
-            new.cached_arrayitems[descr] = newdict
-
-        return new
+                d.produce_potential_short_preamble_ops(self.optimizer, sb, descr)
 
     def clean_caches(self):
         del self._lazy_setfields_and_arrayitems[:]
@@ -202,20 +248,9 @@
                 for arraydescr in effectinfo.readonly_descrs_arrays:
                     self.force_lazy_setarrayitem(arraydescr)
                 for fielddescr in effectinfo.write_descrs_fields:
-                    self.force_lazy_setfield(fielddescr)
-                    try:
-                        cf = self.cached_fields[fielddescr]
-                        cf._cached_fields.clear()
-                    except KeyError:
-                        pass
+                    self.force_lazy_setfield(fielddescr, can_cache=False)
                 for arraydescr in effectinfo.write_descrs_arrays:
-                    self.force_lazy_setarrayitem(arraydescr)
-                    try:
-                        submap = self.cached_arrayitems[arraydescr]
-                        for cf in submap.itervalues():
-                            cf._cached_fields.clear()
-                    except KeyError:
-                        pass
+                    self.force_lazy_setarrayitem(arraydescr, can_cache=False)
                 if effectinfo.check_forces_virtual_or_virtualizable():
                     vrefinfo = self.optimizer.metainterp_sd.virtualref_info
                     self.force_lazy_setfield(vrefinfo.descr_forced)
@@ -231,27 +266,26 @@
         newvalue = self.getvalue(value.box)
         if value is not newvalue:
             for cf in self.cached_fields.itervalues():
-                if value in cf._cached_fields:
-                    cf._cached_fields[newvalue] = cf._cached_fields[value]
+                cf.turned_constant(newvalue, value)
             for submap in self.cached_arrayitems.itervalues():
                 for cf in submap.itervalues():
-                    if value in cf._cached_fields:
-                        cf._cached_fields[newvalue] = cf._cached_fields[value]
+                    cf.turned_constant(newvalue, value)
 
-    def force_lazy_setfield(self, descr):
+    def force_lazy_setfield(self, descr, can_cache=True):
         try:
             cf = self.cached_fields[descr]
         except KeyError:
             return
-        cf.force_lazy_setfield(self)
+        cf.force_lazy_setfield(self, can_cache)
 
-    def force_lazy_setarrayitem(self, arraydescr):
+    def force_lazy_setarrayitem(self, arraydescr, indexvalue=None, can_cache=True):
         try:
             submap = self.cached_arrayitems[arraydescr]
         except KeyError:
             return
-        for cf in submap.values():
-            cf.force_lazy_setfield(self)
+        for idx, cf in submap.iteritems():
+            if indexvalue is None or indexvalue.intbound.contains(idx):
+                cf.force_lazy_setfield(self, can_cache)
 
     def fixup_guard_situation(self):
         # hackish: reverse the order of the last two operations if it makes
@@ -338,7 +372,7 @@
         self.emit_operation(op)
         # then remember the result of reading the field
         fieldvalue = self.getvalue(op.result)
-        cf.remember_field_value(structvalue, fieldvalue)
+        cf.remember_field_value(structvalue, fieldvalue, op)
 
     def optimize_SETFIELD_GC(self, op):
         if self.has_pure_result(rop.GETFIELD_GC_PURE, [op.getarg(0)],
@@ -355,6 +389,7 @@
         indexvalue = self.getvalue(op.getarg(1))
         cf = None
         if indexvalue.is_constant():
+            arrayvalue.make_len_gt(MODE_ARRAY, op.getdescr(), indexvalue.box.getint())
             # use the cache on (arraydescr, index), which is a constant
             cf = self.arrayitem_cache(op.getdescr(), indexvalue.box.getint())
             fieldvalue = cf.getfield_from_cache(self, arrayvalue)
@@ -363,14 +398,14 @@
                 return
         else:
             # variable index, so make sure the lazy setarrayitems are done
-            self.force_lazy_setarrayitem(op.getdescr())
+            self.force_lazy_setarrayitem(op.getdescr(), indexvalue=indexvalue)
         # default case: produce the operation
         arrayvalue.ensure_nonnull()
         self.emit_operation(op)
         # the remember the result of reading the array item
         if cf is not None:
             fieldvalue = self.getvalue(op.result)
-            cf.remember_field_value(arrayvalue, fieldvalue)
+            cf.remember_field_value(arrayvalue, fieldvalue, op)
 
     def optimize_SETARRAYITEM_GC(self, op):
         if self.has_pure_result(rop.GETARRAYITEM_GC_PURE, [op.getarg(0),
@@ -382,12 +417,14 @@
         #
         indexvalue = self.getvalue(op.getarg(1))
         if indexvalue.is_constant():
+            arrayvalue = self.getvalue(op.getarg(0))
+            arrayvalue.make_len_gt(MODE_ARRAY, op.getdescr(), indexvalue.box.getint())
             # use the cache on (arraydescr, index), which is a constant
             cf = self.arrayitem_cache(op.getdescr(), indexvalue.box.getint())
             cf.do_setfield(self, op)
         else:
             # variable index, so make sure the lazy setarrayitems are done
-            self.force_lazy_setarrayitem(op.getdescr())
+            self.force_lazy_setarrayitem(op.getdescr(), indexvalue=indexvalue, can_cache=False)
             # and then emit the operation
             self.emit_operation(op)
 
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -1,9 +1,11 @@
-from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, CONST_1, CONST_0
+from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, CONST_1, CONST_0, \
+                                                  MODE_ARRAY, MODE_STR, MODE_UNICODE
+from pypy.jit.metainterp.history import ConstInt
+from pypy.jit.metainterp.optimizeopt.intutils import (IntBound, IntLowerBound,
+    IntUpperBound)
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.metainterp.optimizeopt.intutils import (IntBound, IntUnbounded,
-    IntLowerBound, IntUpperBound)
-from pypy.jit.metainterp.history import Const, ConstInt
-from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.jit.metainterp.resoperation import rop
+
 
 class OptIntBounds(Optimization):
     """Keeps track of the bounds placed on integers by guards and remove
@@ -13,18 +15,17 @@
         self.posponedop = None
         self.nextop = None
 
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
+    def new(self):
         assert self.posponedop is None
-        return self
+        return OptIntBounds()
+        
+    def flush(self):
+        assert self.posponedop is None
 
     def setup(self):
         self.posponedop = None
         self.nextop = None
 
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
-        assert self.posponedop is None
-        return self
-
     def propagate_forward(self, op):
         if op.is_ovf():
             self.posponedop = op
@@ -124,6 +125,17 @@
         r = self.getvalue(op.result)
         r.intbound.intersect(v1.intbound.div_bound(v2.intbound))
 
+    def optimize_INT_MOD(self, op):
+        self.emit_operation(op)
+        v2 = self.getvalue(op.getarg(1))
+        if v2.is_constant():
+            val = v2.box.getint()
+            r = self.getvalue(op.result)
+            if val < 0:
+                val = -val
+            r.intbound.make_gt(IntBound(-val, -val))
+            r.intbound.make_lt(IntBound(val, val))
+
     def optimize_INT_LSHIFT(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
@@ -274,10 +286,27 @@
 
     def optimize_ARRAYLEN_GC(self, op):
         self.emit_operation(op)
-        v1 = self.getvalue(op.result)
-        v1.intbound.make_ge(IntLowerBound(0))
+        array  = self.getvalue(op.getarg(0))
+        result = self.getvalue(op.result)
+        array.make_len_gt(MODE_ARRAY, op.getdescr(), -1)
+        array.lenbound.bound.intersect(result.intbound)
+        result.intbound = array.lenbound.bound
 
-    optimize_STRLEN = optimize_UNICODELEN = optimize_ARRAYLEN_GC
+    def optimize_STRLEN(self, op):
+        self.emit_operation(op)
+        array  = self.getvalue(op.getarg(0))
+        result = self.getvalue(op.result)
+        array.make_len_gt(MODE_STR, op.getdescr(), -1)
+        array.lenbound.bound.intersect(result.intbound)
+        result.intbound = array.lenbound.bound
+
+    def optimize_UNICODELEN(self, op):
+        self.emit_operation(op)
+        array  = self.getvalue(op.getarg(0))
+        result = self.getvalue(op.result)
+        array.make_len_gt(MODE_UNICODE, op.getdescr(), -1)
+        array.lenbound.bound.intersect(result.intbound)
+        result.intbound = array.lenbound.bound
 
     def optimize_STRGETITEM(self, op):
         self.emit_operation(op)
diff --git a/pypy/jit/metainterp/optimizeopt/intutils.py b/pypy/jit/metainterp/optimizeopt/intutils.py
--- a/pypy/jit/metainterp/optimizeopt/intutils.py
+++ b/pypy/jit/metainterp/optimizeopt/intutils.py
@@ -1,4 +1,9 @@
 from pypy.rlib.rarithmetic import ovfcheck, ovfcheck_lshift, LONG_BIT
+from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.jit.metainterp.history import BoxInt, ConstInt
+import sys
+MAXINT = sys.maxint
+MININT = -sys.maxint - 1
 
 class IntBound(object):
     _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower')
@@ -210,11 +215,11 @@
         
     def __repr__(self):
         if self.has_lower:
-            l = '%4d' % self.lower
+            l = '%d' % self.lower
         else:
             l = '-Inf'
         if self.has_upper:
-            u = '%3d' % self.upper
+            u = '%d' % self.upper
         else:
             u = 'Inf'
         return '%s <= x <= %s' % (l, u)
@@ -224,7 +229,24 @@
         res.has_lower = self.has_lower
         res.has_upper = self.has_upper
         return res
+
+    def make_guards(self, box, guards):
+        if self.has_lower and self.lower > MININT:
+            bound = self.lower
+            res = BoxInt()
+            op = ResOperation(rop.INT_GE, [box, ConstInt(bound)], res)
+            guards.append(op)
+            op = ResOperation(rop.GUARD_TRUE, [res], None)
+            guards.append(op)
+        if self.has_upper and self.upper < MAXINT:
+            bound = self.upper
+            res = BoxInt()
+            op = ResOperation(rop.INT_LE, [box, ConstInt(bound)], res)
+            guards.append(op)
+            op = ResOperation(rop.GUARD_TRUE, [res], None)
+            guards.append(op)
     
+
 class IntUpperBound(IntBound):
     def __init__(self, upper):
         self.has_upper = True
@@ -244,7 +266,23 @@
         self.has_upper = False
         self.has_lower = False
         self.upper = 0
-        self.lower = 0        
+        self.lower = 0
+
+class ImmutableIntUnbounded(IntUnbounded):
+    def _raise(self):
+        raise TypeError('ImmutableIntUnbounded is immutable')
+    def make_le(self, other):
+        self._raise()
+    def make_lt(self, other):
+        self._raise()
+    def make_ge(self, other):
+        self._raise()
+    def make_gt(self, other):
+        self._raise()
+    def make_constant(self, value):
+        self._raise()
+    def intersect(self, other):        
+        self._raise()
 
 def min4(t):
     return min(min(t[0], t[1]), min(t[2], t[3]))
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -1,70 +1,105 @@
-from pypy.jit.metainterp.history import Box, BoxInt, LoopToken, BoxFloat,\
-     ConstFloat
-from pypy.jit.metainterp.history import Const, ConstInt, ConstPtr, ConstObj, REF
+from pypy.jit.metainterp import jitprof, resume, compile
+from pypy.jit.metainterp.executor import execute_nonspec
+from pypy.jit.metainterp.history import BoxInt, BoxFloat, Const, ConstInt, REF
+from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded, \
+                                                     ImmutableIntUnbounded, \
+                                                     IntLowerBound, MININT, MAXINT
+from pypy.jit.metainterp.optimizeopt.util import (make_dispatcher_method,
+    args_dict)
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp import jitprof
-from pypy.jit.metainterp.executor import execute_nonspec
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method, sort_descrs
-from pypy.jit.metainterp.optimizeopt.util import descrlist_dict, args_dict
-from pypy.jit.metainterp.optimize import InvalidLoop
-from pypy.jit.metainterp import resume, compile
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
-from pypy.rpython.lltypesystem import lltype
-from pypy.jit.metainterp.history import AbstractDescr, make_hashable_int
-from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded
 from pypy.tool.pairtype import extendabletype
+from pypy.rlib.debug import debug_start, debug_stop, debug_print
 
 LEVEL_UNKNOWN    = '\x00'
 LEVEL_NONNULL    = '\x01'
 LEVEL_KNOWNCLASS = '\x02'     # might also mean KNOWNARRAYDESCR, for arrays
 LEVEL_CONSTANT   = '\x03'
 
-import sys
-MAXINT = sys.maxint
-MININT = -sys.maxint - 1
+MODE_ARRAY   = '\x00'
+MODE_STR     = '\x01'
+MODE_UNICODE = '\x02'
+class LenBound(object):
+    def __init__(self, mode, descr, bound):
+        self.mode = mode
+        self.descr = descr
+        self.bound = bound
 
 class OptValue(object):
     __metaclass__ = extendabletype
-    _attrs_ = ('box', 'known_class', 'last_guard_index', 'level', 'intbound')
+    _attrs_ = ('box', 'known_class', 'last_guard_index', 'level', 'intbound', 'lenbound')
     last_guard_index = -1
 
     level = LEVEL_UNKNOWN
     known_class = None
-    intbound = None
+    intbound = ImmutableIntUnbounded()
+    lenbound = None
 
-    def __init__(self, box):
+    def __init__(self, box, level=None, known_class=None, intbound=None):
         self.box = box
-        self.intbound = IntBound(MININT, MAXINT) #IntUnbounded()
+        if level is not None:
+            self.level = level
+        self.known_class = known_class
+        if intbound:
+            self.intbound = intbound
+        else:
+            if isinstance(box, BoxInt):
+                self.intbound = IntBound(MININT, MAXINT)
+            else:
+                self.intbound = IntUnbounded()
+
         if isinstance(box, Const):
             self.make_constant(box)
         # invariant: box is a Const if and only if level == LEVEL_CONSTANT
 
+    def make_len_gt(self, mode, descr, val):
+        if self.lenbound:
+            assert self.lenbound.mode == mode
+            assert self.lenbound.descr == descr
+            self.lenbound.bound.make_gt(IntBound(val, val))
+        else:
+            self.lenbound = LenBound(mode, descr, IntLowerBound(val + 1))
+
+    def make_guards(self, box):
+        guards = []
+        if self.level == LEVEL_CONSTANT:
+            op = ResOperation(rop.GUARD_VALUE, [box, self.box], None)
+            guards.append(op)
+        elif self.level == LEVEL_KNOWNCLASS:
+            op = ResOperation(rop.GUARD_NONNULL, [box], None)
+            guards.append(op)            
+            op = ResOperation(rop.GUARD_CLASS, [box, self.known_class], None)
+            guards.append(op)
+        else:
+            if self.level == LEVEL_NONNULL:
+                op = ResOperation(rop.GUARD_NONNULL, [box], None)
+                guards.append(op)
+            self.intbound.make_guards(box, guards)
+            if self.lenbound:
+                lenbox = BoxInt()
+                if self.lenbound.mode == MODE_ARRAY:
+                    op = ResOperation(rop.ARRAYLEN_GC, [box], lenbox, self.lenbound.descr)
+                elif self.lenbound.mode == MODE_STR:
+                    op = ResOperation(rop.STRLEN, [box], lenbox, self.lenbound.descr)
+                elif self.lenbound.mode == MODE_UNICODE:
+                    op = ResOperation(rop.UNICODELEN, [box], lenbox, self.lenbound.descr)
+                else:
+                    debug_print("Unknown lenbound mode")
+                    assert False
+                guards.append(op)
+                self.lenbound.bound.make_guards(lenbox, guards)
+
+        return guards
+
     def force_box(self):
         return self.box
 
     def get_key_box(self):
         return self.box
 
-    def enum_forced_boxes(self, boxes, already_seen):
-        key = self.get_key_box()
-        if key not in already_seen:
-            boxes.append(self.force_box())
-            already_seen[self.get_key_box()] = None
-
-    def get_reconstructed(self, optimizer, valuemap):
-        if self in valuemap:
-            return valuemap[self]
-        new = self.reconstruct_for_next_iteration(optimizer)
-        valuemap[self] = new
-        self.reconstruct_childs(new, valuemap)
-        return new
-
-    def reconstruct_for_next_iteration(self, optimizer):
+    def force_at_end_of_preamble(self, already_forced):
         return self
 
-    def reconstruct_childs(self, new, valuemap):
-        pass
-
     def get_args_for_fail(self, modifier):
         pass
 
@@ -88,6 +123,7 @@
         assert isinstance(constbox, Const)
         self.box = constbox
         self.level = LEVEL_CONSTANT
+        
         if isinstance(constbox, ConstInt):
             val = constbox.getint()
             self.intbound = IntBound(val, val)
@@ -228,7 +264,9 @@
 
     def pure(self, opnum, args, result):
         op = ResOperation(opnum, args, result)
-        self.optimizer.pure_operations[self.optimizer.make_args_key(op)] = op
+        key = self.optimizer.make_args_key(op)
+        if key not in self.optimizer.pure_operations:
+            self.optimizer.pure_operations[key] = op
 
     def has_pure_result(self, opnum, args, descr):
         op = ResOperation(opnum, args, None, descr)
@@ -241,35 +279,45 @@
     def setup(self):
         pass
 
+    def turned_constant(self, value):
+        pass
+
     def force_at_end_of_preamble(self):
         pass
 
-    def turned_constant(self, value):
+    # It is too late to force stuff here, it must be done in force_at_end_of_preamble
+    def new(self):
+        raise NotImplementedError
+
+    # Called after last operation has been propagated to flush out any posponed ops
+    def flush(self):
         pass
 
-    def reconstruct_for_next_iteration(self, optimizer=None, valuemap=None):
-        #return self.__class__()
-        raise NotImplementedError
-
+    def produce_potential_short_preamble_ops(self, potential_ops):
+        pass
 
 class Optimizer(Optimization):
 
-    def __init__(self, metainterp_sd, loop, optimizations=None):
+    def __init__(self, metainterp_sd, loop, optimizations=None, bridge=False):
         self.metainterp_sd = metainterp_sd
         self.cpu = metainterp_sd.cpu
         self.loop = loop
+        self.bridge = bridge
         self.values = {}
         self.interned_refs = self.cpu.ts.new_ref_dict()
         self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd)
         self.bool_boxes = {}
-        self.loop_invariant_results = {}
         self.pure_operations = args_dict()
+        self.emitted_pure_operations = {}
         self.producer = {}
         self.pendingfields = []
         self.posponedop = None
         self.exception_might_have_happened = False
         self.quasi_immutable_deps = None
+        self.opaque_pointers = {}
         self.newoperations = []
+        self.emitting_dissabled = False
+        self.emitted_guards = 0        
         if loop is not None:
             self.call_pure_results = loop.call_pure_results
 
@@ -291,39 +339,32 @@
         self.optimizations  = optimizations
 
     def force_at_end_of_preamble(self):
-        self.resumedata_memo = resume.ResumeDataLoopMemo(self.metainterp_sd)
         for o in self.optimizations:
             o.force_at_end_of_preamble()
 
-    def reconstruct_for_next_iteration(self, optimizer=None, valuemap=None):
-        assert optimizer is None
-        assert valuemap is None
-        valuemap = {}
+    def flush(self):
+        for o in self.optimizations:
+            o.flush()
+        assert self.posponedop is None
+
+    def new(self):
+        assert self.posponedop is None
         new = Optimizer(self.metainterp_sd, self.loop)
-        optimizations = [o.reconstruct_for_next_iteration(new, valuemap) for o in
-                         self.optimizations]
+        optimizations = [o.new() for o in self.optimizations]
         new.set_optimizations(optimizations)
-
-        new.values = {}
-        for box, value in self.values.items():
-            new.values[box] = value.get_reconstructed(new, valuemap)
-        new.interned_refs = self.interned_refs
-        new.bool_boxes = {}
-        for value in new.bool_boxes.keys():
-            new.bool_boxes[value.get_reconstructed(new, valuemap)] = None
-
-        # FIXME: Move to rewrite.py
-        new.loop_invariant_results = {}
-        for key, value in self.loop_invariant_results.items():
-            new.loop_invariant_results[key] = \
-                                 value.get_reconstructed(new, valuemap)
-
-        new.pure_operations = self.pure_operations
-        new.producer = self.producer
-        assert self.posponedop is None
         new.quasi_immutable_deps = self.quasi_immutable_deps
-
         return new
+        
+    def produce_potential_short_preamble_ops(self, sb):
+        for op in self.emitted_pure_operations:
+            if op.getopnum() == rop.GETARRAYITEM_GC_PURE or \
+               op.getopnum() == rop.STRGETITEM or \
+               op.getopnum() == rop.UNICODEGETITEM:
+                if not self.getvalue(op.getarg(1)).is_constant():
+                    continue
+            sb.add_potential(op)
+        for opt in self.optimizations:
+            opt.produce_potential_short_preamble_ops(sb)
 
     def turned_constant(self, value):
         for o in self.optimizations:
@@ -413,9 +454,7 @@
             return CVAL_ZERO
 
     def propagate_all_forward(self):
-        self.exception_might_have_happened = True
-        # ^^^ at least at the start of bridges.  For loops, we could set
-        # it to False, but we probably don't care
+        self.exception_might_have_happened = self.bridge
         self.newoperations = []
         self.first_optimization.propagate_begin_forward()
         self.i = 0
@@ -440,10 +479,11 @@
         return True
 
     def emit_operation(self, op):
-        ###self.heap_op_optimizer.emitting_operation(op)
-        self._emit_operation(op)
-
-    def _emit_operation(self, op):
+        if op.returns_bool_result():
+            self.bool_boxes[self.getvalue(op.result)] = None
+        if self.emitting_dissabled:
+            return
+        
         for i in range(op.numargs()):
             arg = op.getarg(i)
             if arg in self.values:
@@ -452,11 +492,10 @@
         self.metainterp_sd.profiler.count(jitprof.OPT_OPS)
         if op.is_guard():
             self.metainterp_sd.profiler.count(jitprof.OPT_GUARDS)
+            self.emitted_guards += 1 # FIXME: can we reuse above counter?
             op = self.store_final_boxes_in_guard(op)
         elif op.can_raise():
             self.exception_might_have_happened = True
-        elif op.returns_bool_result():
-            self.bool_boxes[self.getvalue(op.result)] = None
         self.newoperations.append(op)
 
     def store_final_boxes_in_guard(self, op):
@@ -540,6 +579,7 @@
                 return
             else:
                 self.pure_operations[args] = op
+                self.emitted_pure_operations[op] = True
 
         # otherwise, the operation remains
         self.emit_operation(op)
@@ -562,6 +602,35 @@
     def optimize_DEBUG_MERGE_POINT(self, op):
         self.emit_operation(op)
 
+    def optimize_CAST_OPAQUE_PTR(self, op):
+        value = self.getvalue(op.getarg(0))
+        self.opaque_pointers[value] = True
+        self.make_equal_to(op.result, value)
+
+    def optimize_GETARRAYITEM_GC_PURE(self, op):
+        indexvalue = self.getvalue(op.getarg(1))
+        if indexvalue.is_constant():
+            arrayvalue = self.getvalue(op.getarg(0))
+            arrayvalue.make_len_gt(MODE_ARRAY, op.getdescr(), indexvalue.box.getint())
+        self.optimize_default(op)
+
+    def optimize_STRGETITEM(self, op):
+        indexvalue = self.getvalue(op.getarg(1))
+        if indexvalue.is_constant():
+            arrayvalue = self.getvalue(op.getarg(0))
+            arrayvalue.make_len_gt(MODE_STR, op.getdescr(), indexvalue.box.getint())
+        self.optimize_default(op)
+
+    def optimize_UNICODEGETITEM(self, op):
+        indexvalue = self.getvalue(op.getarg(1))
+        if indexvalue.is_constant():
+            arrayvalue = self.getvalue(op.getarg(0))
+            arrayvalue.make_len_gt(MODE_UNICODE, op.getdescr(), indexvalue.box.getint())
+        self.optimize_default(op)
+        
+
+    
+
 dispatch_opt = make_dispatcher_method(Optimizer, 'optimize_',
         default=Optimizer.optimize_default)
 
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -1,10 +1,11 @@
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.history import ConstInt, make_hashable_int
+from pypy.jit.metainterp.optimize import InvalidLoop
+from pypy.jit.metainterp.optimizeopt.intutils import IntBound
 from pypy.jit.metainterp.optimizeopt.optimizer import *
-from pypy.jit.metainterp.resoperation import opboolinvers, opboolreflex
-from pypy.jit.metainterp.history import ConstInt
 from pypy.jit.metainterp.optimizeopt.util import _findall, make_dispatcher_method
-from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.metainterp.optimizeopt.intutils import IntBound
+from pypy.jit.metainterp.resoperation import (opboolinvers, opboolreflex, rop,
+    ResOperation)
 from pypy.rlib.rarithmetic import highest_bit
 
 
@@ -12,9 +13,16 @@
     """Rewrite operations into equivalent, cheaper operations.
        This includes already executed operations and constants.
     """
+    def __init__(self):
+        self.loop_invariant_results = {}
+        self.loop_invariant_producer = {}
 
-    def reconstruct_for_next_iteration(self, optimizer, valuemap):
-        return self
+    def new(self):
+        return OptRewrite()
+        
+    def produce_potential_short_preamble_ops(self, sb):
+        for op in self.loop_invariant_producer.values():
+            sb.add_potential(op)
 
     def propagate_forward(self, op):
         args = self.optimizer.make_args_key(op)
@@ -199,6 +207,7 @@
                     ))
                     return
         self.emit_operation(op)
+        self.pure(rop.FLOAT_MUL, [arg2, arg1], op.result)
 
     def optimize_FLOAT_NEG(self, op):
         v1 = op.getarg(0)
@@ -342,16 +351,18 @@
         # expects a compile-time constant
         assert isinstance(arg, Const)
         key = make_hashable_int(arg.getint())
-        resvalue = self.optimizer.loop_invariant_results.get(key, None)
+        
+        resvalue = self.loop_invariant_results.get(key, None)
         if resvalue is not None:
             self.make_equal_to(op.result, resvalue)
             return
         # change the op to be a normal call, from the backend's point of view
         # there is no reason to have a separate operation for this
+        self.loop_invariant_producer[key] = op
         op = op.copy_and_change(rop.CALL)
         self.emit_operation(op)
         resvalue = self.getvalue(op.result)
-        self.optimizer.loop_invariant_results[key] = resvalue
+        self.loop_invariant_results[key] = resvalue
 
     def _optimize_nullness(self, op, box, expect_nonnull):
         value = self.getvalue(box)
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -1,7 +1,7 @@
-
-from pypy.jit.metainterp.resoperation import ResOperation, rop
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from pypy.jit.metainterp.resoperation import ResOperation, rop
+
 
 class OptSimplify(Optimization):
     def optimize_CALL_PURE(self, op):
@@ -25,6 +25,8 @@
         #     but it's a bit hard to implement robustly if heap.py is also run
         pass
 
+    optimize_CAST_OPAQUE_PTR = optimize_VIRTUAL_REF
+
 
 dispatch_opt = make_dispatcher_method(OptSimplify, 'optimize_',
         default=OptSimplify.emit_operation)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -693,7 +693,6 @@
         """
         expected = """
         [i]
-        guard_no_exception() []
         i1 = int_add(i, 3)
         i2 = call(i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
@@ -1755,6 +1754,48 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_duplicate_getarrayitem_after_setarrayitem_bug(self):
+        ops = """
+        [p0, i0, i1]
+        setarrayitem_gc(p0, 0, i0, descr=arraydescr)
+        i6 = int_add(i0, 1)
+        setarrayitem_gc(p0, i1, i6, descr=arraydescr)
+        i10 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i11 = int_add(i10, i0)
+        jump(p0, i11, i1)
+        """
+        expected = """
+        [p0, i0, i1]
+        i6 = int_add(i0, 1)
+        setarrayitem_gc(p0, 0, i0, descr=arraydescr)
+        setarrayitem_gc(p0, i1, i6, descr=arraydescr)
+        i10 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i11 = int_add(i10, i0)
+        jump(p0, i11, i1)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_duplicate_getarrayitem_after_setarrayitem_bug2(self):
+        ops = """
+        [p0, i0, i1]
+        i2 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i6 = int_add(i0, 1)
+        setarrayitem_gc(p0, i1, i6, descr=arraydescr)
+        i10 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i11 = int_add(i10, i2)
+        jump(p0, i11, i1)
+        """
+        expected = """
+        [p0, i0, i1]
+        i2 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i6 = int_add(i0, 1)
+        setarrayitem_gc(p0, i1, i6, descr=arraydescr)
+        i10 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i11 = int_add(i10, i2)
+        jump(p0, i11, i1)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_bug_1(self):
         ops = """
         [i0, p1]
@@ -2701,11 +2742,11 @@
     def test_residual_call_invalidate_some_arrays(self):
         ops = """
         [p1, p2, i1]
-        p3 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p3 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p4 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
         i3 = call(i1, descr=writearraydescr)
-        p5 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p5 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p6 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i4 = getarrayitem_gc(p1, 1, descr=arraydescr)
         escape(p3)
@@ -2718,7 +2759,7 @@
         """
         expected = """
         [p1, p2, i1]
-        p3 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p3 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p4 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
         i3 = call(i1, descr=writearraydescr)
@@ -3916,11 +3957,8 @@
         i2 = strlen(p2)
         i3 = int_add(i1, i2)
         p3 = newstr(i3)
-        i4 = strlen(p1)
-        copystrcontent(p1, p3, 0, 0, i4)
-        i5 = strlen(p2)
-        i6 = int_add(i4, i5)      # will be killed by the backend
-        copystrcontent(p2, p3, 0, i4, i5)
+        copystrcontent(p1, p3, 0, 0, i1)
+        copystrcontent(p2, p3, 0, i1, i2)
         jump(p2, p3)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -3941,9 +3979,7 @@
         p3 = newstr(i3)
         strsetitem(p3, 0, i0)
         strsetitem(p3, 1, i1)
-        i4 = strlen(p2)
-        i5 = int_add(2, i4)      # will be killed by the backend
-        copystrcontent(p2, p3, 0, 2, i4)
+        copystrcontent(p2, p3, 0, 2, i2)
         jump(i1, i0, p3)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -3962,10 +3998,9 @@
         i2 = strlen(p2)
         i3 = int_add(i2, 2)
         p3 = newstr(i3)
-        i4 = strlen(p2)
-        copystrcontent(p2, p3, 0, 0, i4)
-        strsetitem(p3, i4, i0)
-        i5 = int_add(i4, 1)
+        copystrcontent(p2, p3, 0, 0, i2)
+        strsetitem(p3, i2, i0)
+        i5 = int_add(i2, 1)
         strsetitem(p3, i5, i1)
         i6 = int_add(i5, 1)      # will be killed by the backend
         jump(i1, i0, p3)
@@ -3987,14 +4022,9 @@
         i3 = strlen(p3)
         i123 = int_add(i12, i3)
         p5 = newstr(i123)
-        i1b = strlen(p1)
-        copystrcontent(p1, p5, 0, 0, i1b)
-        i2b = strlen(p2)
-        i12b = int_add(i1b, i2b)
-        copystrcontent(p2, p5, 0, i1b, i2b)
-        i3b = strlen(p3)
-        i123b = int_add(i12b, i3b)      # will be killed by the backend
-        copystrcontent(p3, p5, 0, i12b, i3b)
+        copystrcontent(p1, p5, 0, 0, i1)
+        copystrcontent(p2, p5, 0, i1, i2)
+        copystrcontent(p3, p5, 0, i12, i3)
         jump(p2, p3, p5)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -4010,10 +4040,8 @@
         i2 = strlen(p2)
         i3 = int_add(i2, 1)
         p3 = newstr(i3)
-        i4 = strlen(p2)
-        copystrcontent(p2, p3, 0, 0, i4)
-        strsetitem(p3, i4, 120)     # == ord('x')
-        i5 = int_add(i4, 1)      # will be killed by the backend
+        copystrcontent(p2, p3, 0, 0, i2)
+        strsetitem(p3, i2, 120)     # == ord('x')
         jump(p3)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -4131,9 +4159,7 @@
         i5 = int_add(i3, i4)
         p4 = newstr(i5)
         copystrcontent(p1, p4, i1, 0, i3)
-        i4b = strlen(p2)
-        i6 = int_add(i3, i4b)    # killed by the backend
-        copystrcontent(p2, p4, 0, i3, i4b)
+        copystrcontent(p2, p4, 0, i3, i4)
         jump(p4, i1, i2, p2)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -4178,11 +4204,8 @@
         i2 = strlen(p2)
         i3 = int_add(i1, i2)
         p4 = newstr(i3)
-        i4 = strlen(p1)
-        copystrcontent(p1, p4, 0, 0, i4)
-        i5 = strlen(p2)
-        i6 = int_add(i4, i5)      # will be killed by the backend
-        copystrcontent(p2, p4, 0, i4, i5)
+        copystrcontent(p1, p4, 0, 0, i1)
+        copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, p3, p4, descr=strequaldescr)
         escape(i0)
         jump(p1, p2, p3)
@@ -4374,11 +4397,8 @@
         i2 = strlen(p2)
         i3 = int_add(i1, i2)
         p4 = newstr(i3)
-        i4 = strlen(p1)
-        copystrcontent(p1, p4, 0, 0, i4)
-        i5 = strlen(p2)
-        i6 = int_add(i4, i5)      # will be killed by the backend
-        copystrcontent(p2, p4, 0, i4, i5)
+        copystrcontent(p1, p4, 0, 0, i1)
+        copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, s"hello world", p4, descr=streq_nonnull_descr)
         escape(i0)
         jump(p1, p2)
@@ -4511,7 +4531,7 @@
         escape(i1)
         jump(p0, i0)
         """
-        self.optimize_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected)
 
     def test_int_is_true_bounds(self):
         ops = """
@@ -4530,6 +4550,165 @@
         guard_true(i1) []
         jump(p0)
         """
+        self.optimize_strunicode_loop(ops, expected)
+
+    def test_strslice_subtraction_folds(self):
+        ops = """
+        [p0, i0]
+        i1 = int_add(i0, 1)
+        p1 = call(0, p0, i0, i1, descr=strslicedescr)
+        escape(p1)
+        jump(p0, i1)
+        """
+        expected = """
+        [p0, i0]
+        i1 = int_add(i0, 1)
+        p1 = newstr(1)
+        i2 = strgetitem(p0, i0)
+        strsetitem(p1, 0, i2)
+        escape(p1)
+        jump(p0, i1)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
+    def test_float_mul_reversed(self):
+        ops = """
+        [f0, f1]
+        f2 = float_mul(f0, f1)
+        f3 = float_mul(f1, f0)
+        jump(f2, f3)
+        """
+        expected = """
+        [f0, f1]
+        f2 = float_mul(f0, f1)
+        jump(f2, f2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_null_char_str(self):
+        ops = """
+        [p0]
+        p1 = newstr(4)
+        setfield_gc(p0, p1, descr=valuedescr)
+        jump(p0)
+        """
+        # It used to be the case that this would have a series of
+        # strsetitem(p1, idx, 0), which was silly because memory is 0 filled
+        # when allocated.
+        expected = """
+        [p0]
+        p1 = newstr(4)
+        setfield_gc(p0, p1, descr=valuedescr)
+        jump(p0)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
+    def test_newstr_strlen(self):
+        ops = """
+        [i0]
+        p0 = newstr(i0)
+        escape(p0)
+        i1 = strlen(p0)
+        i2 = int_add(i1, 1)
+        jump(i2)
+        """
+        expected = """
+        [i0]
+        p0 = newstr(i0)
+        escape(p0)
+        i1 = int_add(i0, 1)
+        jump(i1)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
+    def test_intmod_bounds(self):
+        ops = """
+        [i0, i1]
+        i2 = int_mod(i0, 12)
+        i3 = int_gt(i2, 12)
+        guard_false(i3) []
+        i4 = int_lt(i2, -12)
+        guard_false(i4) []
+        i5 = int_mod(i1, -12)
+        i6 = int_lt(i5, -12)
+        guard_false(i6) []
+        i7 = int_gt(i5, 12)
+        guard_false(i7) []
+        jump(i2, i5)
+        """
+        expected = """
+        [i0, i1]
+        i2 = int_mod(i0, 12)
+        i5 = int_mod(i1, -12)
+        jump(i2, i5)
+        """
+        self.optimize_loop(ops, expected)
+
+        # This the sequence of resoperations that is generated for a Python
+        # app-level int % int.  When the modulus is constant and when i0
+        # is known non-negative it should be optimized to a single int_mod.
+        ops = """
+        [i0]
+        i5 = int_ge(i0, 0)
+        guard_true(i5) []
+        i1 = int_mod(i0, 42)
+        i2 = int_rshift(i1, 63)
+        i3 = int_and(42, i2)
+        i4 = int_add(i1, i3)
+        finish(i4)
+        """
+        expected = """
+        [i0]
+        i5 = int_ge(i0, 0)
+        guard_true(i5) []
+        i1 = int_mod(i0, 42)
+        finish(i1)
+        """
+        py.test.skip("in-progress")
+        self.optimize_loop(ops, expected)
+
+        # Also, 'n % power-of-two' can be turned into int_and(),
+        # but that's a bit harder to detect here because it turns into
+        # several operations, and of course it is wrong to just turn
+        # int_mod(i0, 16) into int_and(i0, 15).
+        ops = """
+        [i0]
+        i1 = int_mod(i0, 16)
+        i2 = int_rshift(i1, 63)
+        i3 = int_and(16, i2)
+        i4 = int_add(i1, i3)
+        finish(i4)
+        """
+        expected = """
+        [i0]
+        i4 = int_and(i0, 15)
+        finish(i4)
+        """
+        py.test.skip("harder")
+        self.optimize_loop(ops, expected)
+
+    def test_bounded_lazy_setfield(self):
+        ops = """
+        [p0, i0]
+        i1 = int_gt(i0, 2)
+        guard_true(i1) []
+        setarrayitem_gc(p0, 0, 3)
+        setarrayitem_gc(p0, 2, 4)
+        setarrayitem_gc(p0, i0, 15)
+        i2 = getarrayitem_gc(p0, 2)
+        jump(p0, i2)
+        """
+        # Remove the getarrayitem_gc, because we know that p[i0] does not alias
+        # p0[2]
+        expected = """
+        [p0, i0]
+        i1 = int_gt(i0, 2)
+        guard_true(i1) []
+        setarrayitem_gc(p0, i0, 15)
+        setarrayitem_gc(p0, 0, 3)
+        setarrayitem_gc(p0, 2, 4)
+        jump(p0, 4)
+        """
         self.optimize_loop(ops, expected)
 
 
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -61,7 +61,9 @@
             boxes = []
         boxes = []
     def clone_if_mutable(self):
-        return self
+        return FakeDescr()
+    def __eq__(self, other):
+        return isinstance(other, Storage) or isinstance(other, FakeDescr)
 
 
 class BaseTestWithUnroll(BaseTest):
@@ -69,13 +71,14 @@
     enable_opts = "intbounds:rewrite:virtualize:string:heap:unroll"
 
     def optimize_loop(self, ops, expected, expected_preamble=None,
-                      call_pure_results=None):
+                      call_pure_results=None, expected_short=None):
         loop = self.parse(ops)
         if expected != "crash!":
             expected = self.parse(expected)
         if expected_preamble:
             expected_preamble = self.parse(expected_preamble)
-
+        if expected_short:
+            expected_short = self.parse(expected_short)
         loop.preamble = TreeLoop('preamble')
         loop.preamble.inputargs = loop.inputargs
         loop.preamble.token = LoopToken()
@@ -84,17 +87,33 @@
         self._do_optimize_loop(loop, call_pure_results)
         #
         print
+        print "Preamble:"
         print loop.preamble.inputargs
-        print '\n'.join([str(o) for o in loop.preamble.operations])
+        if loop.preamble.operations:
+            print '\n'.join([str(o) for o in loop.preamble.operations])
+        else:
+            print 'Failed!'
         print
+        print "Loop:"
         print loop.inputargs
         print '\n'.join([str(o) for o in loop.operations])
         print
+        if expected_short:
+            print "Short Preamble:"
+            short = loop.preamble.token.short_preamble[0]
+            print short.inputargs
+            print '\n'.join([str(o) for o in short.operations])        
+            print
+        
         assert expected != "crash!", "should have raised an exception"
         self.assert_equal(loop, expected)
         if expected_preamble:
             self.assert_equal(loop.preamble, expected_preamble,
                               text_right='expected preamble')
+        if expected_short:
+            self.assert_equal(short, expected_short,
+                              text_right='expected short preamble')
+            
         return loop
 
 class OptimizeOptTest(BaseTestWithUnroll):
@@ -840,7 +859,13 @@
         p3sub = new_with_vtable(ConstClass(node_vtable2))
         setfield_gc(p3sub, i1, descr=valuedescr)
         setfield_gc(p1, p3sub, descr=nextdescr)
-        jump(i1, p1, p3sub)
+        # XXX: We get two extra operations here because the setfield
+        #      above is the result of forcing p1 and thus not 
+        #      registered with the heap optimizer. I've makred tests
+        #      below with VIRTUALHEAP if they suffer from this issue
+        p3sub2 = getfield_gc(p1, descr=nextdescr) 
+        guard_nonnull_class(p3sub2, ConstClass(node_vtable2)) []
+        jump(i1, p1, p3sub2)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -871,7 +896,9 @@
         guard_true(i2b) []
         p3 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p3, i2, descr=nextdescr)
-        jump(p3, i2)
+        # XXX: VIRTUALHEAP (see above)
+        i3 = getfield_gc(p3, descr=nextdescr)
+        jump(p3, i3)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -889,12 +916,10 @@
         i3 = call(i2, descr=nonwritedescr)
         jump(i1)       # the exception is considered lost when we loop back
         """
-        # note that 'guard_no_exception' at the very start must be kept
-        # around: bridges may start with one.  (In case of loops we could
-        # remove it, but we probably don't care.)
+        # note that 'guard_no_exception' at the very start is kept around
+        # for bridges, but not for loops
         preamble = """
         [i]
-        guard_no_exception() []
         i1 = int_add(i, 3)
         i2 = call(i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
@@ -1168,6 +1193,29 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_virtual_field_forced_by_lazy_setfield(self):
+        ops = """
+        [i0, p1, p3]
+        i28 = int_add(i0, 1)
+        p30 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p30, i28, descr=nextdescr)
+        setfield_gc(p3, p30, descr=valuedescr)
+        p45 = getfield_gc(p3, descr=valuedescr)
+        i29 = int_add(i28, 1)
+        jump(i29, p45, p3)
+        """
+        preamble = """
+        [i0, p1, p3]
+        i28 = int_add(i0, 1)
+        i29 = int_add(i28, 1)
+        p30 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p30, i28, descr=nextdescr)
+        setfield_gc(p3, p30, descr=valuedescr)
+        jump(i29, p30, p3)
+        """
+        expected = preamble
+        self.optimize_loop(ops, expected, preamble)
+
     def test_nonvirtual_1(self):
         ops = """
         [i]
@@ -1310,15 +1358,78 @@
         ops = """
         [i]
         i1 = getfield_gc(ConstPtr(myptr), descr=valuedescr)
-        jump(i1)
-        """
-        preamble = ops
-        expected = """
+        call(i1, descr=nonwritedescr)
+        jump(i)
+        """
+        preamble = """
         [i]
-        jump(i)
+        i1 = getfield_gc(ConstPtr(myptr), descr=valuedescr)
+        call(i1, descr=nonwritedescr)
+        jump(i, i1)
+        """
+        expected = """
+        [i, i1]
+        call(i1, descr=nonwritedescr)
+        jump(i, i1)
         """
         self.optimize_loop(ops, expected, preamble)
 
+    def test_varray_boxed1(self):
+        ops = """
+        [p0, p8]
+        p11 = getfield_gc(p0, descr=otherdescr)
+        guard_nonnull(p11) [p0, p8]
+        guard_class(p11, ConstClass(node_vtable2)) [p0, p8]
+        p14 = getfield_gc(p11, descr=otherdescr)
+        guard_isnull(p14) [p0, p8]
+        p18 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        guard_isnull(p18) [p0, p8]
+        p31 = new(descr=ssize)
+        setfield_gc(p31, 0, descr=adescr)
+        p33 = new_array(0, descr=arraydescr)
+        setfield_gc(p31, p33, descr=bdescr)
+        p35 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p35, p31, descr=valuedescr)
+        jump(p0, p35)
+        """
+        expected = """
+        [p0]
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_varray_boxed_simplified(self):
+        ops = """
+        [p0, p8]
+        p18 = getfield_gc(ConstPtr(myptr), descr=otherdescr)
+        guard_isnull(p18) [p0, p8]
+        p31 = new(descr=ssize)
+        p35 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p35, p31, descr=valuedescr)        
+        jump(p0, p35)
+        """
+        expected = """
+        [p0]
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_varray_boxed_noconst(self):
+        ops = """
+        [p0, p8, p18, p19]
+        guard_isnull(p18) [p0, p8]
+        p31 = new(descr=ssize)
+        p35 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p35, p31, descr=valuedescr)        
+        jump(p0, p35, p19, p18)
+        """
+        expected = """
+        [p0, p19]
+        guard_isnull(p19) [p0]
+        jump(p0, NULL)
+        """
+        self.optimize_loop(ops, expected)
+        
     def test_varray_1(self):
         ops = """
         [i1]
@@ -1554,6 +1665,24 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_duplicate_getfield_2(self):
+        ops = """
+        [p1, p2, i0]
+        i1 = getfield_gc(p1, descr=valuedescr)
+        i2 = getfield_gc(p2, descr=valuedescr)
+        i3 = getfield_gc(p1, descr=valuedescr)
+        i4 = getfield_gc(p2, descr=valuedescr)
+        i5 = int_add(i3, i4)
+        i6 = int_add(i0, i5)
+        jump(p1, p2, i6)
+        """
+        expected = """
+        [p1, p2, i0, i5]
+        i6 = int_add(i0, i5)
+        jump(p1, p2, i6, i5)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_getfield_after_setfield(self):
         ops = """
         [p1, i1]
@@ -1730,6 +1859,7 @@
         """
         expected = """
         [p1, i1, i2]
+        setfield_gc(p1, i2, descr=valuedescr)
         jump(p1, i1, i2)
         """
         # in this case, all setfields are removed, because we can prove
@@ -1874,14 +2004,14 @@
         guard_true(i3) []
         i4 = int_neg(i2)
         setfield_gc(p1, i2, descr=valuedescr)
-        jump(p1, i1, i2, i4)
-        """
-        expected = """
-        [p1, i1, i2, i4]
+        jump(p1, i1, i2, i4, i4)
+        """
+        expected = """
+        [p1, i1, i2, i4, i5]
         setfield_gc(p1, i1, descr=valuedescr)
         guard_true(i4) []
         setfield_gc(p1, i2, descr=valuedescr)
-        jump(p1, i1, i2, 1)
+        jump(p1, i1, i2, i5, i5)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -1904,14 +2034,14 @@
         i4 = int_neg(i2)
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, i4)
-        """
-        expected = """
-        [p1, i2, i4]
+        jump(p1, i2, i4, i4)
+        """
+        expected = """
+        [p1, i2, i4, i5]
         guard_true(i4) [p1]
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, 1)
+        jump(p1, i2, i5, i5)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -1933,14 +2063,14 @@
         i4 = int_neg(i2)
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, i4)
-        """
-        expected = """
-        [p1, i2, i4]
+        jump(p1, i2, i4, i4)
+        """
+        expected = """
+        [p1, i2, i4, i5]
         guard_true(i4) [i2, p1]
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, 1)
+        jump(p1, i2, i5, i5)
         """
         self.optimize_loop(ops, expected)
 
@@ -1956,14 +2086,22 @@
         setfield_gc(p1, i2, descr=valuedescr)
         jump(p1, i1, i2, i4)
         """
-        preamble = ops
-        expected = """
-        [p1, i1, i2, i4]
+        preamble = """
+        [p1, i1, i2, i3]
+        setfield_gc(p1, i1, descr=valuedescr)
+        i5 = int_eq(i3, 5)
+        guard_true(i5) []
+        i4 = int_neg(i2)
+        setfield_gc(p1, i2, descr=valuedescr)
+        jump(p1, i1, i2, i4, i4)
+        """
+        expected = """
+        [p1, i1, i2, i4, i7]
         setfield_gc(p1, i1, descr=valuedescr)
         i5 = int_eq(i4, 5)
         guard_true(i5) []
         setfield_gc(p1, i2, descr=valuedescr)
-        jump(p1, i1, i2, 5)
+        jump(p1, i1, i2, i7, i7)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -2037,7 +2175,25 @@
         jump(p1)
         """
         self.optimize_loop(ops, expected)
-
+        
+    def test_duplicate_getarrayitem_2(self):
+        ops = """
+        [p1, i0]
+        i2 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        i3 = getarrayitem_gc(p1, 1, descr=arraydescr2)
+        i4 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        i5 = getarrayitem_gc(p1, 1, descr=arraydescr2)
+        i6 = int_add(i3, i4)
+        i7 = int_add(i0, i6)
+        jump(p1, i7)
+        """
+        expected = """
+        [p1, i0, i6]
+        i7 = int_add(i0, i6)
+        jump(p1, i7, i6)
+        """
+        self.optimize_loop(ops, expected)
+        
     def test_duplicate_getarrayitem_after_setarrayitem_1(self):
         ops = """
         [p1, p2]
@@ -2163,15 +2319,15 @@
         p2 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p2, p4, descr=nextdescr)
         setfield_gc(p1, p2, descr=nextdescr)
-        jump(p1, i2, i4, p4)
-        """
-        expected = """
-        [p1, i2, i4, p4]
+        jump(p1, i2, i4, p4, i4)
+        """
+        expected = """
+        [p1, i2, i4, p4, i5]
         guard_true(i4) [p1, p4]
         p2 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p2, p4, descr=nextdescr)
         setfield_gc(p1, p2, descr=nextdescr)
-        jump(p1, i2, 1, p4)
+        jump(p1, i2, i5, p4, i5)
         """
         self.optimize_loop(ops, expected, preamble)
 
@@ -2627,6 +2783,101 @@
         """
         self.optimize_loop(ops, expected, preamble)
 
+    def test_remove_duplicate_pure_op_ovf_with_lazy_setfield(self):
+        py.test.skip('this optimization is not yet supprted')
+        ops = """
+        [i1, p1]
+        i3 = int_add_ovf(i1, 1)
+        guard_no_overflow() []
+        i3b = int_is_true(i3)
+        guard_true(i3b) []
+        setfield_gc(p1, i1, descr=valuedescr)
+        i4 = int_add_ovf(i1, 1)
+        guard_no_overflow() []
+        i4b = int_is_true(i4)
+        guard_true(i4b) []
+        escape(i3)
+        escape(i4)
+        jump(i1, p1)
+        """
+        preamble = """
+        [i1, p1]
+        i3 = int_add_ovf(i1, 1)
+        guard_no_overflow() []
+        i3b = int_is_true(i3)
+        guard_true(i3b) []
+        setfield_gc(p1, i1, descr=valuedescr)        
+        escape(i3)
+        escape(i3)
+        jump(i1, p1, i3)
+        """
+        expected = """
+        [i1, p1, i3]
+        setfield_gc(p1, i1, descr=valuedescr)        
+        escape(i3)
+        escape(i3)
+        jump(i1, p1, i3)
+        """
+        self.optimize_loop(ops, expected, preamble)
+
+    def test_ovf_guard_in_short_preamble1(self):
+        ops = """
+        [p8, p11, i24]
+        p26 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p26, i24, descr=adescr)        
+        i34 = getfield_gc_pure(p11, descr=valuedescr)
+        i35 = getfield_gc_pure(p26, descr=adescr)
+        i36 = int_add_ovf(i34, i35)
+        guard_no_overflow() []
+        jump(p8, p11, i35)
+        """
+        expected = """
+        [p8, p11, i26]
+        jump(p8, p11, i26)        
+        """
+        self.optimize_loop(ops, expected)
+        
+    def test_ovf_guard_in_short_preamble2(self):
+        ops = """
+        [p8, p11, p12]
+        p16 = getfield_gc(p8, descr=valuedescr)
+        i17 = getfield_gc(p8, descr=nextdescr)
+        i19 = getfield_gc(p16, descr=valuedescr)
+        i20 = int_ge(i17, i19)
+        guard_false(i20) []
+        i21 = getfield_gc(p16, descr=otherdescr)
+        i22 = getfield_gc(p16, descr=nextdescr)
+        i23 = int_mul(i17, i22)
+        i24 = int_add(i21, i23)
+        p26 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p26, i24, descr=adescr)
+        i28 = int_add(i17, 1)
+        setfield_gc(p8, i28, descr=nextdescr)
+        i34 = getfield_gc_pure(p11, descr=valuedescr)
+        i35 = getfield_gc_pure(p26, descr=adescr)
+        guard_nonnull(p12) []
+        i36 = int_add_ovf(i34, i35)
+        guard_no_overflow() []
+        p38 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p38, i36, descr=adescr)
+        jump(p8, p11, p26)
+        """
+        expected = """
+        [p8, p11, i24, i39, i19, p16, i21, i34]
+        i40 = int_ge(i39, i19)
+        guard_false(i40) []
+        i41 = getfield_gc(p16, descr=nextdescr)
+        i42 = int_mul(i39, i41)
+        i43 = int_add(i21, i42)
+        i44 = int_add(i39, 1)
+        setfield_gc(p8, i44, descr=nextdescr)
+        i45 = int_add_ovf(i34, i43)
+        guard_no_overflow() []
+        jump(p8, p11, i43, i44, i19, p16, i21, i34)
+        """
+        self.optimize_loop(ops, expected)
+
+
     def test_int_and_or_with_zero(self):
         ops = """
         [i0, i1]
@@ -2820,11 +3071,11 @@
     def test_residual_call_invalidate_some_arrays(self):
         ops = """
         [p1, p2, i1]
-        p3 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p3 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p4 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
         i3 = call(i1, descr=writearraydescr)
-        p5 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p5 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p6 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i4 = getarrayitem_gc(p1, 1, descr=arraydescr)
         escape(p3)
@@ -2837,7 +3088,7 @@
         """
         expected = """
         [p1, p2, i1]
-        p3 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p3 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p4 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
         i3 = call(i1, descr=writearraydescr)
@@ -2993,6 +3244,38 @@
         '''
         self.optimize_loop(ops, expected, preamble, call_pure_results)
 
+    def test_call_pure_constant_folding_exc(self):
+        # CALL_PURE may be followed by GUARD_NO_EXCEPTION
+        arg_consts = [ConstInt(i) for i in (123456, 4, 5, 6)]
+        call_pure_results = {tuple(arg_consts): ConstInt(42)}
+        ops = '''
+        [i0, i1, i2]
+        escape(i1)
+        escape(i2)
+        i3 = call_pure(123456, 4, 5, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        i4 = call_pure(123456, 4, i0, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        jump(i0, i3, i4)
+        '''
+        preamble = '''
+        [i0, i1, i2]
+        escape(i1)
+        escape(i2)
+        i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        jump(i0, i4)
+        '''
+        expected = '''
+        [i0, i2]
+        escape(42)
+        escape(i2)
+        i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        jump(i0, i4)
+        '''
+        self.optimize_loop(ops, expected, preamble, call_pure_results)
+
     # ----------
 
     def test_vref_nonvirtual_nonescape(self):
@@ -3869,11 +4152,13 @@
         jump(p4364)
         """
         expected = """
-        [i0, i1]
+        [i0]
+        i1 = int_sub_ovf(i0, 1)
+        guard_no_overflow() []
         escape(i1)
         i2 = int_add_ovf(i0, 1)
         guard_no_overflow() []
-        jump(i2, i0)
+        jump(i2)
         """
         self.optimize_loop(ops, expected)
 
@@ -4837,32 +5122,38 @@
 
     def test_invariant_ovf(self):
         ops = """
-        [i0, i1, i10, i11, i12]
+        [i0, i1, i10, i11, i20, i21]
         i2 = int_add_ovf(i0, i1)
         guard_no_overflow() []
         i3 = int_sub_ovf(i0, i1)
         guard_no_overflow() []
         i4 = int_mul_ovf(i0, i1)
         guard_no_overflow() []
+        escape(i2)
+        escape(i3)
+        escape(i4)
         i24 = int_mul_ovf(i10, i11)
         guard_no_overflow() []
         i23 = int_sub_ovf(i10, i11)
         guard_no_overflow() []
         i22 = int_add_ovf(i10, i11)
         guard_no_overflow() []
-        jump(i0, i1, i2, i3, i4)
-        """
-        expected = """
-        [i0, i1, i10, i11, i12]
+        jump(i0, i1, i20, i21, i20, i21)
+        """
+        expected = """
+        [i0, i1, i10, i11, i2, i3, i4]
+        escape(i2)
+        escape(i3)
+        escape(i4)        
         i24 = int_mul_ovf(i10, i11)
         guard_no_overflow() []
         i23 = int_sub_ovf(i10, i11)
         guard_no_overflow() []
         i22 = int_add_ovf(i10, i11)
         guard_no_overflow() []
-        jump(i0, i1, i10, i11, i12)
-        """
-        self.optimize_loop(ops, expected, ops)
+        jump(i0, i1, i10, i11, i2, i3, i4) 
+        """
+        self.optimize_loop(ops, expected)
 
     def test_value_proven_to_be_constant_after_two_iterations(self):
         class FakeDescr(AbstractDescr):
@@ -4878,8 +5169,8 @@
         ops = """
         [p0, p1, p2, p3, i4, p5, i6, p7, p8, p9, p14]
         guard_value(i4, 3) []
-        guard_class(p9, 17278984) []
-        guard_class(p9, 17278984) []
+        guard_class(p9, ConstClass(node_vtable)) []
+        guard_class(p9, ConstClass(node_vtable)) []
         p22 = getfield_gc(p9, descr=inst_w_seq)
         guard_nonnull(p22) []
         i23 = getfield_gc(p9, descr=inst_index)
@@ -4894,11 +5185,11 @@
         guard_class(p14, 17273920) []
         guard_class(p14, 17273920) []
 
-        p75 = new_with_vtable(17278984)
+        p75 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p75, p14, descr=inst_w_seq)
         setfield_gc(p75, 0, descr=inst_index)
-        guard_class(p75, 17278984) []
-        guard_class(p75, 17278984) []
+        guard_class(p75, ConstClass(node_vtable)) []
+        guard_class(p75, ConstClass(node_vtable)) []
         p79 = getfield_gc(p75, descr=inst_w_seq)
         guard_nonnull(p79) []
         i80 = getfield_gc(p75, descr=inst_index)
@@ -4944,6 +5235,7 @@
         """
         expected = """
         [p0]
+        setfield_gc(p0, p0, descr=valuedescr)
         jump(p0)
         """
         self.optimize_loop(ops, expected, preamble)
@@ -5030,9 +5322,7 @@
         self.optimize_loop(ops, expected)
 
     # ----------
-    def optimize_strunicode_loop(self, ops, optops, preamble=None):
-        if not preamble:
-            preamble = ops # FIXME: Force proper testing of preamble
+    def optimize_strunicode_loop(self, ops, optops, preamble):
         # check with the arguments passed in
         self.optimize_loop(ops, optops, preamble)
         # check with replacing 'str' with 'unicode' everywhere
@@ -5052,7 +5342,7 @@
         [i0]
         jump(i0)
         """
-        self.optimize_strunicode_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected, expected)
 
     def test_newstr_2(self):
         ops = """
@@ -5068,7 +5358,7 @@
         [i0, i1]
         jump(i1, i0)
         """
-        self.optimize_strunicode_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected, expected)
 
     def test_str_concat_1(self):
         ops = """
@@ -5076,20 +5366,26 @@
         p3 = call(0, p1, p2, descr=strconcatdescr)
         jump(p2, p3)
         """
-        expected = """
+        preamble = """
         [p1, p2]
         i1 = strlen(p1)
         i2 = strlen(p2)
         i3 = int_add(i1, i2)
         p3 = newstr(i3)
-        i4 = strlen(p1)
-        copystrcontent(p1, p3, 0, 0, i4)
-        i5 = strlen(p2)
-        i6 = int_add(i4, i5)      # will be killed by the backend
-        copystrcontent(p2, p3, 0, i4, i5)
-        jump(p2, p3)
-        """
-        self.optimize_strunicode_loop(ops, expected)
+        copystrcontent(p1, p3, 0, 0, i1)
+        copystrcontent(p2, p3, 0, i1, i2)
+        jump(p2, p3, i2)
+        """
+        expected = """
+        [p1, p2, i1]
+        i2 = strlen(p2)
+        i3 = int_add(i1, i2)
+        p3 = newstr(i3)
+        copystrcontent(p1, p3, 0, 0, i1)
+        copystrcontent(p2, p3, 0, i1, i2)
+        jump(p2, p3, i2)
+        """
+        self.optimize_strunicode_loop(ops, expected, preamble)
 
     def test_str_concat_vstr2_str(self):
         ops = """
@@ -5107,12 +5403,10 @@
         p3 = newstr(i3)
         strsetitem(p3, 0, i0)
         strsetitem(p3, 1, i1)
-        i4 = strlen(p2)
-        i5 = int_add(2, i4)      # will be killed by the backend
-        copystrcontent(p2, p3, 0, 2, i4)
+        copystrcontent(p2, p3, 0, 2, i2)
         jump(i1, i0, p3)
         """
-        self.optimize_strunicode_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected, expected)
 
     def test_str_concat_str_vstr2(self):
         ops = """
@@ -5128,15 +5422,14 @@
         i2 = strlen(p2)
         i3 = int_add(i2, 2)