[pypy-commit] pypy reflex-support: merge default into branch

wlav noreply at buildbot.pypy.org
Thu Aug 4 03:07:25 CEST 2011


Author: Wim Lavrijsen <WLavrijsen at lbl.gov>
Branch: reflex-support
Changeset: r46262:f451e0e52a5b
Date: 2011-08-03 12:31 -0700
http://bitbucket.org/pypy/pypy/changeset/f451e0e52a5b/

Log:	merge default into branch

diff too long, truncating to 10000 out of 19403 lines

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -1,1 +1,2 @@
 b590cf6de4190623aad9aa698694c22e614d67b9 release-1.5
+b48df0bf4e75b81d98f19ce89d4a7dc3e1dab5e5 benchmarked
diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -37,22 +37,22 @@
     Armin Rigo
     Maciej Fijalkowski
     Carl Friedrich Bolz
+    Antonio Cuni
     Amaury Forgeot d'Arc
-    Antonio Cuni
     Samuele Pedroni
     Michael Hudson
     Holger Krekel
+    Benjamin Peterson
     Christian Tismer
-    Benjamin Peterson
+    Hakan Ardo
+    Alex Gaynor
     Eric van Riet Paap
-    Anders Chrigstr&#246;m
-    H&#229;kan Ard&#246;
+    Anders Chrigstrom
+    David Schneider
     Richard Emslie
     Dan Villiom Podlaski Christiansen
     Alexander Schremmer
-    Alex Gaynor
-    David Schneider
-    Aureli&#233;n Campeas
+    Aurelien Campeas
     Anders Lehmann
     Camillo Bruni
     Niklaus Haldimann
@@ -63,16 +63,17 @@
     Bartosz Skowron
     Jakub Gustak
     Guido Wesdorp
+    Daniel Roberts
     Adrien Di Mascio
     Laura Creighton
     Ludovic Aubry
     Niko Matsakis
-    Daniel Roberts
     Jason Creighton
-    Jacob Hall&#233;n
+    Jacob Hallen
     Alex Martelli
     Anders Hammarquist
     Jan de Mooij
+    Wim Lavrijsen
     Stephan Diehl
     Michael Foord
     Stefan Schwarzer
@@ -83,9 +84,13 @@
     Alexandre Fayolle
     Marius Gedminas
     Simon Burton
+    Justin Peel
     Jean-Paul Calderone
     John Witulski
+    Lukas Diekmann
+    holger krekel
     Wim Lavrijsen
+    Dario Bertini
     Andreas St&#252;hrk
     Jean-Philippe St. Pierre
     Guido van Rossum
@@ -97,15 +102,16 @@
     Georg Brandl
     Gerald Klix
     Wanja Saatkamp
+    Ronny Pfannschmidt
     Boris Feigin
     Oscar Nierstrasz
-    Dario Bertini
     David Malcolm
     Eugene Oden
     Henry Mason
+    Sven Hager
     Lukas Renggli
+    Ilya Osadchiy
     Guenter Jantzen
-    Ronny Pfannschmidt
     Bert Freudenberg
     Amit Regmi
     Ben Young
@@ -122,8 +128,8 @@
     Jared Grubb
     Karl Bartel
     Gabriel Lavoie
+    Victor Stinner
     Brian Dorsey
-    Victor Stinner
     Stuart Williams
     Toby Watson
     Antoine Pitrou
@@ -134,19 +140,23 @@
     Jonathan David Riehl
     Elmo M&#228;ntynen
     Anders Qvist
-    Beatrice D&#252;ring
+    Beatrice During
     Alexander Sedov
+    Timo Paulssen
+    Corbin Simpson
     Vincent Legoll
+    Romain Guillebert
     Alan McIntyre
-    Romain Guillebert
     Alex Perry
     Jens-Uwe Mager
+    Simon Cross
     Dan Stromberg
-    Lukas Diekmann
+    Guillebert Romain
     Carl Meyer
     Pieter Zieschang
     Alejandro J. Cura
     Sylvain Thenault
+    Christoph Gerum
     Travis Francis Athougies
     Henrik Vendelbo
     Lutz Paelike
@@ -157,6 +167,7 @@
     Miguel de Val Borro
     Ignas Mikalajunas
     Artur Lisiecki
+    Philip Jenvey
     Joshua Gilbert
     Godefroid Chappelle
     Yusei Tahara
@@ -165,26 +176,31 @@
     Gustavo Niemeyer
     William Leslie
     Akira Li
-    Kristj&#225;n Valur J&#243;nsson
+    Kristjan Valur Jonsson
     Bobby Impollonia
+    Michael Hudson-Doyle
     Andrew Thompson
     Anders Sigfridsson
+    Floris Bruynooghe
     Jacek Generowicz
     Dan Colish
-    Sven Hager
     Zooko Wilcox-O Hearn
+    Dan Villiom Podlaski Christiansen
     Anders Hammarquist
+    Chris Lambacher
     Dinu Gherman
     Dan Colish
+    Brett Cannon
     Daniel Neuh&#228;user
     Michael Chermside
     Konrad Delong
     Anna Ravencroft
     Greg Price
     Armin Ronacher
+    Christian Muirhead
     Jim Baker
-    Philip Jenvey
     Rodrigo Ara&#250;jo
+    Romain Guillebert
 
     Heinrich-Heine University, Germany 
     Open End AB (formerly AB Strakt), Sweden
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -154,18 +154,18 @@
     RegrTest('test_cmd.py'),
     RegrTest('test_cmd_line_script.py'),
     RegrTest('test_codeccallbacks.py', core=True),
-    RegrTest('test_codecencodings_cn.py'),
-    RegrTest('test_codecencodings_hk.py'),
-    RegrTest('test_codecencodings_jp.py'),
-    RegrTest('test_codecencodings_kr.py'),
-    RegrTest('test_codecencodings_tw.py'),
+    RegrTest('test_codecencodings_cn.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_hk.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_jp.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_kr.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_tw.py', usemodules='_multibytecodec'),
 
-    RegrTest('test_codecmaps_cn.py'),
-    RegrTest('test_codecmaps_hk.py'),
-    RegrTest('test_codecmaps_jp.py'),
-    RegrTest('test_codecmaps_kr.py'),
-    RegrTest('test_codecmaps_tw.py'),
-    RegrTest('test_codecs.py', core=True),
+    RegrTest('test_codecmaps_cn.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_hk.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_jp.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_kr.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_tw.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecs.py', core=True, usemodules='_multibytecodec'),
     RegrTest('test_codeop.py', core=True),
     RegrTest('test_coercion.py', core=True),
     RegrTest('test_collections.py'),
@@ -314,7 +314,7 @@
     RegrTest('test_mmap.py'),
     RegrTest('test_module.py', core=True),
     RegrTest('test_modulefinder.py'),
-    RegrTest('test_multibytecodec.py'),
+    RegrTest('test_multibytecodec.py', usemodules='_multibytecodec'),
     RegrTest('test_multibytecodec_support.py', skip="not a test"),
     RegrTest('test_multifile.py'),
     RegrTest('test_multiprocessing.py', skip='FIXME leaves subprocesses'),
diff --git a/lib-python/modified-2.7/ctypes/__init__.py b/lib-python/modified-2.7/ctypes/__init__.py
--- a/lib-python/modified-2.7/ctypes/__init__.py
+++ b/lib-python/modified-2.7/ctypes/__init__.py
@@ -489,9 +489,12 @@
         _flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI
     return CFunctionType
 
-_cast = PYFUNCTYPE(py_object, c_void_p, py_object, py_object)(_cast_addr)
 def cast(obj, typ):
-    return _cast(obj, obj, typ)
+    try:
+        c_void_p.from_param(obj)
+    except TypeError, e:
+        raise ArgumentError(str(e))
+    return _cast_addr(obj, obj, typ)
 
 _string_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_string_at_addr)
 def string_at(ptr, size=-1):
diff --git a/lib-python/modified-2.7/distutils/sysconfig_pypy.py b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
--- a/lib-python/modified-2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
@@ -116,6 +116,12 @@
     if compiler.compiler_type == "unix":
         compiler.compiler_so.extend(['-fPIC', '-Wimplicit'])
         compiler.shared_lib_extension = get_config_var('SO')
+        if "CFLAGS" in os.environ:
+            cflags = os.environ["CFLAGS"]
+            compiler.compiler.append(cflags)
+            compiler.compiler_so.append(cflags)
+            compiler.linker_so.append(cflags)
+
 
 from sysconfig_cpython import (
     parse_makefile, _variable_rx, expand_makefile_vars)
diff --git a/lib-python/modified-2.7/test/test_multibytecodec.py b/lib-python/modified-2.7/test/test_multibytecodec.py
--- a/lib-python/modified-2.7/test/test_multibytecodec.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec.py
@@ -148,7 +148,8 @@
 class Test_StreamReader(unittest.TestCase):
     def test_bug1728403(self):
         try:
-            open(TESTFN, 'w').write('\xa1')
+            with open(TESTFN, 'w') as f:
+                f.write('\xa1')
             f = codecs.open(TESTFN, encoding='cp949')
             self.assertRaises(UnicodeDecodeError, f.read, 2)
         finally:
diff --git a/lib-python/2.7/test/test_tarfile.py b/lib-python/modified-2.7/test/test_tarfile.py
copy from lib-python/2.7/test/test_tarfile.py
copy to lib-python/modified-2.7/test/test_tarfile.py
--- a/lib-python/2.7/test/test_tarfile.py
+++ b/lib-python/modified-2.7/test/test_tarfile.py
@@ -169,6 +169,7 @@
         except tarfile.ReadError:
             self.fail("tarfile.open() failed on empty archive")
         self.assertListEqual(tar.getmembers(), [])
+        tar.close()
 
     def test_null_tarfile(self):
         # Test for issue6123: Allow opening empty archives.
@@ -207,16 +208,21 @@
         fobj = open(self.tarname, "rb")
         tar = tarfile.open(fileobj=fobj, mode=self.mode)
         self.assertEqual(tar.name, os.path.abspath(fobj.name))
+        tar.close()
 
     def test_no_name_attribute(self):
-        data = open(self.tarname, "rb").read()
+        f = open(self.tarname, "rb")
+        data = f.read()
+        f.close()
         fobj = StringIO.StringIO(data)
         self.assertRaises(AttributeError, getattr, fobj, "name")
         tar = tarfile.open(fileobj=fobj, mode=self.mode)
         self.assertEqual(tar.name, None)
 
     def test_empty_name_attribute(self):
-        data = open(self.tarname, "rb").read()
+        f = open(self.tarname, "rb")
+        data = f.read()
+        f.close()
         fobj = StringIO.StringIO(data)
         fobj.name = ""
         tar = tarfile.open(fileobj=fobj, mode=self.mode)
@@ -515,6 +521,7 @@
         self.tar = tarfile.open(self.tarname, mode=self.mode, encoding="iso8859-1")
         tarinfo = self.tar.getmember("pax/umlauts-&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;")
         self._test_member(tarinfo, size=7011, chksum=md5_regtype)
+        self.tar.close()
 
 
 class LongnameTest(ReadTest):
@@ -675,6 +682,7 @@
             tar = tarfile.open(tmpname, self.mode)
             tarinfo = tar.gettarinfo(path)
             self.assertEqual(tarinfo.size, 0)
+            tar.close()
         finally:
             os.rmdir(path)
 
@@ -692,6 +700,7 @@
                 tar.gettarinfo(target)
                 tarinfo = tar.gettarinfo(link)
                 self.assertEqual(tarinfo.size, 0)
+                tar.close()
             finally:
                 os.remove(target)
                 os.remove(link)
@@ -704,6 +713,7 @@
                 tar = tarfile.open(tmpname, self.mode)
                 tarinfo = tar.gettarinfo(path)
                 self.assertEqual(tarinfo.size, 0)
+                tar.close()
             finally:
                 os.remove(path)
 
@@ -722,6 +732,7 @@
         tar.add(dstname)
         os.chdir(cwd)
         self.assertTrue(tar.getnames() == [], "added the archive to itself")
+        tar.close()
 
     def test_exclude(self):
         tempdir = os.path.join(TEMPDIR, "exclude")
@@ -742,6 +753,7 @@
             tar = tarfile.open(tmpname, "r")
             self.assertEqual(len(tar.getmembers()), 1)
             self.assertEqual(tar.getnames()[0], "empty_dir")
+            tar.close()
         finally:
             shutil.rmtree(tempdir)
 
@@ -859,7 +871,9 @@
             fobj.close()
         elif self.mode.endswith("bz2"):
             dec = bz2.BZ2Decompressor()
-            data = open(tmpname, "rb").read()
+            f = open(tmpname, "rb")
+            data = f.read()
+            f.close()
             data = dec.decompress(data)
             self.assertTrue(len(dec.unused_data) == 0,
                     "found trailing data")
@@ -938,6 +952,7 @@
                 "unable to read longname member")
         self.assertEqual(tarinfo.linkname, member.linkname,
                 "unable to read longname member")
+        tar.close()
 
     def test_longname_1023(self):
         self._test(("longnam/" * 127) + "longnam")
@@ -1030,6 +1045,7 @@
         else:
             n = tar.getmembers()[0].name
             self.assertTrue(name == n, "PAX longname creation failed")
+        tar.close()
 
     def test_pax_global_header(self):
         pax_headers = {
@@ -1058,6 +1074,7 @@
                     tarfile.PAX_NUMBER_FIELDS[key](val)
                 except (TypeError, ValueError):
                     self.fail("unable to convert pax header field")
+        tar.close()
 
     def test_pax_extended_header(self):
         # The fields from the pax header have priority over the
@@ -1077,6 +1094,7 @@
         self.assertEqual(t.pax_headers, pax_headers)
         self.assertEqual(t.name, "foo")
         self.assertEqual(t.uid, 123)
+        tar.close()
 
 
 class UstarUnicodeTest(unittest.TestCase):
@@ -1120,6 +1138,7 @@
         tarinfo.name = "foo"
         tarinfo.uname = u"&#65533;&#65533;&#65533;"
         self.assertRaises(UnicodeError, tar.addfile, tarinfo)
+        tar.close()
 
     def test_unicode_argument(self):
         tar = tarfile.open(tarname, "r", encoding="iso8859-1", errors="strict")
@@ -1174,6 +1193,7 @@
             tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
                     errors=handler)
             self.assertEqual(tar.getnames()[0], name)
+            tar.close()
 
         self.assertRaises(UnicodeError, tarfile.open, tmpname,
                 encoding="ascii", errors="strict")
@@ -1186,6 +1206,7 @@
         tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
                 errors="utf-8")
         self.assertEqual(tar.getnames()[0], "&#65533;&#65533;&#65533;/" + u"&#65533;".encode("utf8"))
+        tar.close()
 
 
 class AppendTest(unittest.TestCase):
@@ -1213,6 +1234,7 @@
     def _test(self, names=["bar"], fileobj=None):
         tar = tarfile.open(self.tarname, fileobj=fileobj)
         self.assertEqual(tar.getnames(), names)
+        tar.close()
 
     def test_non_existing(self):
         self._add_testfile()
@@ -1231,7 +1253,9 @@
 
     def test_fileobj(self):
         self._create_testtar()
-        data = open(self.tarname).read()
+        f = open(self.tarname)
+        data = f.read()
+        f.close()
         fobj = StringIO.StringIO(data)
         self._add_testfile(fobj)
         fobj.seek(0)
@@ -1257,7 +1281,9 @@
     # Append mode is supposed to fail if the tarfile to append to
     # does not end with a zero block.
     def _test_error(self, data):
-        open(self.tarname, "wb").write(data)
+        f = open(self.tarname, "wb")
+        f.write(data)
+        f.close()
         self.assertRaises(tarfile.ReadError, self._add_testfile)
 
     def test_null(self):
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -78,8 +78,6 @@
     _com_iid = None
     _is_fastpath = False
 
-    __restype_set = False
-
     def _getargtypes(self):
         return self._argtypes_
 
@@ -93,13 +91,15 @@
                     raise TypeError(
                         "item %d in _argtypes_ has no from_param method" % (
                             i + 1,))
-            #
-            if all([hasattr(argtype, '_ffiargshape') for argtype in argtypes]):
-                fastpath_cls = make_fastpath_subclass(self.__class__)
-                fastpath_cls.enable_fastpath_maybe(self)
             self._argtypes_ = list(argtypes)
+            self._check_argtypes_for_fastpath()
     argtypes = property(_getargtypes, _setargtypes)
 
+    def _check_argtypes_for_fastpath(self):
+        if all([hasattr(argtype, '_ffiargshape') for argtype in self._argtypes_]):
+            fastpath_cls = make_fastpath_subclass(self.__class__)
+            fastpath_cls.enable_fastpath_maybe(self)
+
     def _getparamflags(self):
         return self._paramflags
 
@@ -149,7 +149,6 @@
         return self._restype_
 
     def _setrestype(self, restype):
-        self.__restype_set = True
         self._ptr = None
         if restype is int:
             from ctypes import c_int
@@ -219,6 +218,7 @@
                 import ctypes
                 restype = ctypes.c_int
             self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype)
+            self._check_argtypes_for_fastpath()
             return
 
         
@@ -296,13 +296,12 @@
                     "This function takes %d argument%s (%s given)"
                     % (len(self._argtypes_), plural, len(args)))
 
-            # check that arguments are convertible
-            ## XXX Not as long as ctypes.cast is a callback function with
-            ## py_object arguments...
-            ## self._convert_args(self._argtypes_, args, {})
-
             try:
-                res = self.callable(*args)
+                newargs = self._convert_args_for_callback(argtypes, args)
+            except (UnicodeError, TypeError, ValueError), e:
+                raise ArgumentError(str(e))
+            try:
+                res = self.callable(*newargs)
             except:
                 exc_info = sys.exc_info()
                 traceback.print_tb(exc_info[2], file=sys.stderr)
@@ -316,10 +315,6 @@
             warnings.warn('C function without declared arguments called',
                           RuntimeWarning, stacklevel=2)
             argtypes = []
-            
-        if not self.__restype_set:
-            warnings.warn('C function without declared return type called',
-                          RuntimeWarning, stacklevel=2)
 
         if self._com_index:
             from ctypes import cast, c_void_p, POINTER
@@ -366,7 +361,10 @@
             if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
                 set_last_error(_rawffi.get_last_error())
         #
-        return self._build_result(self._restype_, result, newargs)
+        try:
+            return self._build_result(self._restype_, result, newargs)
+        finally:
+            funcptr.free_temp_buffers()
 
     def _do_errcheck(self, result, args):
         # The 'errcheck' protocol
@@ -466,6 +464,18 @@
 
         return cobj, cobj._to_ffi_param(), type(cobj)
 
+    def _convert_args_for_callback(self, argtypes, args):
+        assert len(argtypes) == len(args)
+        newargs = []
+        for argtype, arg in zip(argtypes, args):
+            param = argtype.from_param(arg)
+            if argtype._type_ == 'P': # special-case for c_void_p
+                param = param._get_buffer_value()
+            elif self._is_primitive(argtype):
+                param = param.value
+            newargs.append(param)
+        return newargs
+
     def _convert_args(self, argtypes, args, kwargs, marker=object()):
         newargs = []
         outargs = []
@@ -556,6 +566,9 @@
                 newargtypes.append(newargtype)
         return keepalives, newargs, newargtypes, outargs
 
+    @staticmethod
+    def _is_primitive(argtype):
+        return argtype.__bases__[0] is _SimpleCData
     
     def _wrap_result(self, restype, result):
         """
@@ -564,7 +577,7 @@
         """
         # hack for performance: if restype is a "simple" primitive type, don't
         # allocate the buffer because it's going to be thrown away immediately
-        if restype.__bases__[0] is _SimpleCData and not restype._is_pointer_like():
+        if self._is_primitive(restype) and not restype._is_pointer_like():
             return result
         #
         shape = restype._ffishape
@@ -680,7 +693,7 @@
             try:
                 result = self._call_funcptr(funcptr, *args)
                 result = self._do_errcheck(result, args)
-            except (TypeError, ArgumentError): # XXX, should be FFITypeError
+            except (TypeError, ArgumentError, UnicodeDecodeError):
                 assert self._slowpath_allowed
                 return CFuncPtr.__call__(self, *args)
             return result
diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py
--- a/lib_pypy/_ctypes/primitive.py
+++ b/lib_pypy/_ctypes/primitive.py
@@ -10,6 +10,8 @@
 from _ctypes.builtin import ConvMode
 from _ctypes.array import Array
 from _ctypes.pointer import _Pointer, as_ffi_pointer
+#from _ctypes.function import CFuncPtr # this import is moved at the bottom
+                                       # because else it's circular
 
 class NULL(object):
     pass
@@ -86,7 +88,7 @@
         return res
     if isinstance(value, Array):
         return value
-    if isinstance(value, _Pointer):
+    if isinstance(value, (_Pointer, CFuncPtr)):
         return cls.from_address(value._buffer.buffer)
     if isinstance(value, (int, long)):
         return cls(value)
@@ -338,3 +340,5 @@
 
     def __nonzero__(self):
         return self._buffer[0] not in (0, '\x00')
+
+from _ctypes.function import CFuncPtr
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -34,16 +34,18 @@
     for i, field in enumerate(all_fields):
         name = field[0]
         value = field[1]
+        is_bitfield = (len(field) == 3)
         fields[name] = Field(name,
                              self._ffistruct.fieldoffset(name),
                              self._ffistruct.fieldsize(name),
-                             value, i)
+                             value, i, is_bitfield)
 
     if anonymous_fields:
         resnames = []
         for i, field in enumerate(all_fields):
             name = field[0]
             value = field[1]
+            is_bitfield = (len(field) == 3)
             startpos = self._ffistruct.fieldoffset(name)
             if name in anonymous_fields:
                 for subname in value._names:
@@ -52,7 +54,7 @@
                     subvalue = value._fieldtypes[subname].ctype
                     fields[subname] = Field(subname,
                                             relpos, subvalue._sizeofinstances(),
-                                            subvalue, i)
+                                            subvalue, i, is_bitfield)
             else:
                 resnames.append(name)
         names = resnames
@@ -60,8 +62,8 @@
     self._fieldtypes = fields
 
 class Field(object):
-    def __init__(self, name, offset, size, ctype, num):
-        for k in ('name', 'offset', 'size', 'ctype', 'num'):
+    def __init__(self, name, offset, size, ctype, num, is_bitfield):
+        for k in ('name', 'offset', 'size', 'ctype', 'num', 'is_bitfield'):
             self.__dict__[k] = locals()[k]
 
     def __setattr__(self, name, value):
@@ -225,7 +227,7 @@
             field = self._fieldtypes[name]
         except KeyError:
             return _CData.__getattribute__(self, name)
-        if field.size >> 16:
+        if field.is_bitfield:
             # bitfield member, use direct access
             return self._buffer.__getattr__(name)
         else:
diff --git a/lib_pypy/pyrepl/unix_console.py b/lib_pypy/pyrepl/unix_console.py
--- a/lib_pypy/pyrepl/unix_console.py
+++ b/lib_pypy/pyrepl/unix_console.py
@@ -384,15 +384,19 @@
 
         self.__maybe_write_code(self._smkx)
 
-        self.old_sigwinch = signal.signal(
-            signal.SIGWINCH, self.__sigwinch)
+        try:
+            self.old_sigwinch = signal.signal(
+                signal.SIGWINCH, self.__sigwinch)
+        except ValueError:
+            pass
 
     def restore(self):
         self.__maybe_write_code(self._rmkx)
         self.flushoutput()
         tcsetattr(self.input_fd, termios.TCSADRAIN, self.__svtermstate)
 
-        signal.signal(signal.SIGWINCH, self.old_sigwinch)
+        if hasattr(self, 'old_sigwinch'):
+            signal.signal(signal.SIGWINCH, self.old_sigwinch)
 
     def __sigwinch(self, signum, frame):
         self.height, self.width = self.getheightwidth()
diff --git a/pypy/annotation/builtin.py b/pypy/annotation/builtin.py
--- a/pypy/annotation/builtin.py
+++ b/pypy/annotation/builtin.py
@@ -416,7 +416,8 @@
 from pypy.annotation.model import SomePtr
 from pypy.rpython.lltypesystem import lltype
 
-def malloc(s_T, s_n=None, s_flavor=None, s_zero=None, s_track_allocation=None):
+def malloc(s_T, s_n=None, s_flavor=None, s_zero=None, s_track_allocation=None,
+           s_add_memory_pressure=None):
     assert (s_n is None or s_n.knowntype == int
             or issubclass(s_n.knowntype, pypy.rlib.rarithmetic.base_int))
     assert s_T.is_constant()
@@ -432,6 +433,8 @@
     else:
         assert s_flavor.is_constant()
         assert s_track_allocation is None or s_track_allocation.is_constant()
+        assert (s_add_memory_pressure is None or
+                s_add_memory_pressure.is_constant())
         # not sure how to call malloc() for the example 'p' in the
         # presence of s_extraargs
         r = SomePtr(lltype.Ptr(s_T.const))
diff --git a/pypy/config/config.py b/pypy/config/config.py
--- a/pypy/config/config.py
+++ b/pypy/config/config.py
@@ -81,6 +81,12 @@
                                  (self.__class__, name))
         return self._cfgimpl_values[name]
 
+    def __dir__(self):
+        from_type = dir(type(self))
+        from_dict = list(self.__dict__)
+        extras = list(self._cfgimpl_values)
+        return sorted(set(extras + from_type + from_dict))
+
     def __delattr__(self, name):
         # XXX if you use delattr you are responsible for all bad things
         # happening
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -327,6 +327,9 @@
         BoolOption("mutable_builtintypes",
                    "Allow the changing of builtin types", default=False,
                    requires=[("objspace.std.builtinshortcut", True)]),
+        BoolOption("withidentitydict",
+                   "track types that override __hash__, __eq__ or __cmp__ and use a special dict strategy for those which do not",
+                   default=True),
      ]),
 ])
 
diff --git a/pypy/config/support.py b/pypy/config/support.py
--- a/pypy/config/support.py
+++ b/pypy/config/support.py
@@ -9,7 +9,7 @@
         return 1    # don't override MAKEFLAGS.  This will call 'make' without any '-j' option
     if sys.platform == 'darwin':
         return darwin_get_cpu_count()
-    elif sys.platform != 'linux2':
+    elif not sys.platform.startswith('linux'):
         return 1    # implement me
     try:
         if isinstance(filename_or_file, str):
diff --git a/pypy/config/test/test_config.py b/pypy/config/test/test_config.py
--- a/pypy/config/test/test_config.py
+++ b/pypy/config/test/test_config.py
@@ -63,6 +63,20 @@
     py.test.raises(ConfigError, 'config.gc.name = "ref"')
     config.gc.name = "framework"
 
+def test___dir__():
+    descr = make_description()
+    config = Config(descr, bool=False)
+    attrs = dir(config)
+    assert '__repr__' in attrs        # from the type
+    assert '_cfgimpl_values' in attrs # from self
+    assert 'gc' in attrs              # custom attribute
+    assert 'objspace' in attrs        # custom attribute
+    #
+    attrs = dir(config.gc)
+    assert 'name' in attrs
+    assert 'dummy' in attrs
+    assert 'float' in attrs
+
 def test_arbitrary_option():
     descr = OptionDescription("top", "", [
         ArbitraryOption("a", "no help", default=None)
diff --git a/pypy/config/test/test_support.py b/pypy/config/test/test_support.py
--- a/pypy/config/test/test_support.py
+++ b/pypy/config/test/test_support.py
@@ -40,7 +40,7 @@
         return self._value
 
 def test_cpuinfo_linux():
-    if sys.platform != 'linux2':
+    if not sys.platform.startswith('linux'):
         py.test.skip("linux only")
     saved = os.environ
     try:
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -13,6 +13,10 @@
 DEFL_LOW_INLINE_THRESHOLD = DEFL_INLINE_THRESHOLD / 2.0
 
 DEFL_GC = "minimark"
+if sys.platform.startswith("linux"):
+    DEFL_ROOTFINDER_WITHJIT = "asmgcc"
+else:
+    DEFL_ROOTFINDER_WITHJIT = "shadowstack"
 
 IS_64_BITS = sys.maxint > 2147483647
 
@@ -109,7 +113,7 @@
     BoolOption("jit", "generate a JIT",
                default=False,
                suggests=[("translation.gc", DEFL_GC),
-                         ("translation.gcrootfinder", "asmgcc"),
+                         ("translation.gcrootfinder", DEFL_ROOTFINDER_WITHJIT),
                          ("translation.list_comprehension_operations", True)]),
     ChoiceOption("jit_backend", "choose the backend for the JIT",
                  ["auto", "x86", "x86-without-sse2", "llvm"],
@@ -140,7 +144,10 @@
                  ["annotate", "rtype", "backendopt", "database", "source",
                   "pyjitpl"],
                  default=None, cmdline="--fork-before"),
-
+    BoolOption("dont_write_c_files",
+               "Make the C backend write everyting to /dev/null. " +
+               "Useful for benchmarking, so you don't actually involve the disk",
+               default=False, cmdline="--dont-write-c-files"),
     ArbitraryOption("instrumentctl", "internal",
                default=None),
     StrOption("output", "Output file name", cmdline="--output"),
diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst
--- a/pypy/doc/coding-guide.rst
+++ b/pypy/doc/coding-guide.rst
@@ -929,6 +929,19 @@
 located in the ``py/bin/`` directory.  For switches to
 modify test execution pass the ``-h`` option.
 
+Coverage reports
+----------------
+
+In order to get coverage reports the `pytest-cov`_ plugin is included.
+it adds some extra requirements ( coverage_ and `cov-core`_ )
+and can once they are installed coverage testing can be invoked via::
+
+  python test_all.py --cov file_or_direcory_to_cover file_or_directory
+
+.. _`pytest-cov`: http://pypi.python.org/pypi/pytest-cov
+.. _`coverage`: http://pypi.python.org/pypi/coverage
+.. _`cov-core`: http://pypi.python.org/pypi/cov-core
+
 Test conventions
 ----------------
 
diff --git a/pypy/doc/config/objspace.std.withidentitydict.txt b/pypy/doc/config/objspace.std.withidentitydict.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.std.withidentitydict.txt
@@ -0,0 +1,21 @@
+=============================
+objspace.std.withidentitydict
+=============================
+
+* **name:** withidentitydict
+
+* **description:** enable a dictionary strategy for "by identity" comparisons
+
+* **command-line:** --objspace-std-withidentitydict
+
+* **command-line for negation:** --no-objspace-std-withidentitydict
+
+* **option type:** boolean option
+
+* **default:** True
+
+
+Enable a dictionary strategy specialized for instances of classes which
+compares "by identity", which is the default unless you override ``__hash__``,
+``__eq__`` or ``__cmp__``.  This strategy will be used only with new-style
+classes.
diff --git a/pypy/doc/config/translation.dont_write_c_files.txt b/pypy/doc/config/translation.dont_write_c_files.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/translation.dont_write_c_files.txt
@@ -0,0 +1,4 @@
+write the generated C files to ``/dev/null`` instead of to the disk. Useful if
+you want to use translate.py as a benchmark and don't want to access the disk.
+
+.. _`translation documentation`: ../translation.html
diff --git a/pypy/doc/config/translation.gc.txt b/pypy/doc/config/translation.gc.txt
--- a/pypy/doc/config/translation.gc.txt
+++ b/pypy/doc/config/translation.gc.txt
@@ -1,4 +1,6 @@
-Choose the Garbage Collector used by the translated program:
+Choose the Garbage Collector used by the translated program.
+The good performing collectors are "hybrid" and "minimark".
+The default is "minimark".
 
   - "ref": reference counting. Takes very long to translate and the result is
     slow.
@@ -11,3 +13,12 @@
     older generation.
 
   - "boehm": use the Boehm conservative GC.
+
+  - "hybrid": a hybrid collector of "generation" together with a
+    mark-n-sweep old space
+
+  - "markcompact": a slow, but memory-efficient collector,
+    influenced e.g. by Smalltalk systems.
+
+  - "minimark": a generational mark-n-sweep collector with good
+    performance.  Includes page marking for large arrays.
diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst
--- a/pypy/doc/contributor.rst
+++ b/pypy/doc/contributor.rst
@@ -9,22 +9,22 @@
     Armin Rigo
     Maciej Fijalkowski
     Carl Friedrich Bolz
+    Antonio Cuni
     Amaury Forgeot d'Arc
-    Antonio Cuni
     Samuele Pedroni
     Michael Hudson
     Holger Krekel
+    Benjamin Peterson
     Christian Tismer
-    Benjamin Peterson
+    Hakan Ardo
+    Alex Gaynor
     Eric van Riet Paap
-    Anders Chrigstr&#246;m
-    H&#229;kan Ard&#246;
+    Anders Chrigstrom
+    David Schneider
     Richard Emslie
     Dan Villiom Podlaski Christiansen
     Alexander Schremmer
-    Alex Gaynor
-    David Schneider
-    Aureli&#233;n Campeas
+    Aurelien Campeas
     Anders Lehmann
     Camillo Bruni
     Niklaus Haldimann
@@ -35,16 +35,17 @@
     Bartosz Skowron
     Jakub Gustak
     Guido Wesdorp
+    Daniel Roberts
     Adrien Di Mascio
     Laura Creighton
     Ludovic Aubry
     Niko Matsakis
-    Daniel Roberts
     Jason Creighton
-    Jacob Hall&#233;n
+    Jacob Hallen
     Alex Martelli
     Anders Hammarquist
     Jan de Mooij
+    Wim Lavrijsen
     Stephan Diehl
     Michael Foord
     Stefan Schwarzer
@@ -55,9 +56,13 @@
     Alexandre Fayolle
     Marius Gedminas
     Simon Burton
+    Justin Peel
     Jean-Paul Calderone
     John Witulski
+    Lukas Diekmann
+    holger krekel
     Wim Lavrijsen
+    Dario Bertini
     Andreas St&#252;hrk
     Jean-Philippe St. Pierre
     Guido van Rossum
@@ -69,15 +74,16 @@
     Georg Brandl
     Gerald Klix
     Wanja Saatkamp
+    Ronny Pfannschmidt
     Boris Feigin
     Oscar Nierstrasz
-    Dario Bertini
     David Malcolm
     Eugene Oden
     Henry Mason
+    Sven Hager
     Lukas Renggli
+    Ilya Osadchiy
     Guenter Jantzen
-    Ronny Pfannschmidt
     Bert Freudenberg
     Amit Regmi
     Ben Young
@@ -94,8 +100,8 @@
     Jared Grubb
     Karl Bartel
     Gabriel Lavoie
+    Victor Stinner
     Brian Dorsey
-    Victor Stinner
     Stuart Williams
     Toby Watson
     Antoine Pitrou
@@ -106,19 +112,23 @@
     Jonathan David Riehl
     Elmo M&#228;ntynen
     Anders Qvist
-    Beatrice D&#252;ring
+    Beatrice During
     Alexander Sedov
+    Timo Paulssen
+    Corbin Simpson
     Vincent Legoll
+    Romain Guillebert
     Alan McIntyre
-    Romain Guillebert
     Alex Perry
     Jens-Uwe Mager
+    Simon Cross
     Dan Stromberg
-    Lukas Diekmann
+    Guillebert Romain
     Carl Meyer
     Pieter Zieschang
     Alejandro J. Cura
     Sylvain Thenault
+    Christoph Gerum
     Travis Francis Athougies
     Henrik Vendelbo
     Lutz Paelike
@@ -129,6 +139,7 @@
     Miguel de Val Borro
     Ignas Mikalajunas
     Artur Lisiecki
+    Philip Jenvey
     Joshua Gilbert
     Godefroid Chappelle
     Yusei Tahara
@@ -137,24 +148,29 @@
     Gustavo Niemeyer
     William Leslie
     Akira Li
-    Kristj&#225;n Valur J&#243;nsson
+    Kristjan Valur Jonsson
     Bobby Impollonia
+    Michael Hudson-Doyle
     Andrew Thompson
     Anders Sigfridsson
+    Floris Bruynooghe
     Jacek Generowicz
     Dan Colish
-    Sven Hager
     Zooko Wilcox-O Hearn
+    Dan Villiom Podlaski Christiansen
     Anders Hammarquist
+    Chris Lambacher
     Dinu Gherman
     Dan Colish
+    Brett Cannon
     Daniel Neuh&#228;user
     Michael Chermside
     Konrad Delong
     Anna Ravencroft
     Greg Price
     Armin Ronacher
+    Christian Muirhead
     Jim Baker
-    Philip Jenvey
     Rodrigo Ara&#250;jo
+    Romain Guillebert
 
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -211,6 +211,38 @@
     >>>> print d1['a']
     42
 
+Mutating classes of objects which are already used as dictionary keys
+---------------------------------------------------------------------
+
+Consider the following snippet of code::
+
+    class X(object):
+        pass
+
+    def __evil_eq__(self, other):
+        print 'hello world'
+        return False
+
+    def evil(y):
+        d = {x(): 1}
+        X.__eq__ = __evil_eq__
+        d[y] # might trigger a call to __eq__?
+
+In CPython, __evil_eq__ **might** be called, although there is no way to write
+a test which reliably calls it.  It happens if ``y is not x`` and ``hash(y) ==
+hash(x)``, where ``hash(x)`` is computed when ``x`` is inserted into the
+dictionary.  If **by chance** the condition is satisfied, then ``__evil_eq__``
+is called.
+
+PyPy uses a special strategy to optimize dictionaries whose keys are instances
+of user-defined classes which do not override the default ``__hash__``,
+``__eq__`` and ``__cmp__``: when using this strategy, ``__eq__`` and
+``__cmp__`` are never called, but instead the lookup is done by identity, so
+in the case above it is guaranteed that ``__eq__`` won't be called.
+
+Note that in all other cases (e.g., if you have a custom ``__hash__`` and
+``__eq__`` in ``y``) the behavior is exactly the same as CPython.
+
 
 Ignored exceptions
 -----------------------
diff --git a/pypy/doc/extending.rst b/pypy/doc/extending.rst
--- a/pypy/doc/extending.rst
+++ b/pypy/doc/extending.rst
@@ -19,12 +19,12 @@
   section
 
 * Write them in pure python and use direct libffi low-level bindings, See
-  \_rawffi_ module description.
+  \_ffi_ module description.
 
 * Write them in RPython as mixedmodule_, using *rffi* as bindings.
 
 .. _ctypes: #CTypes
-.. _\_rawffi: #LibFFI
+.. _\_ffi: #LibFFI
 .. _mixedmodule: #Mixed Modules
 
 CTypes
@@ -42,41 +42,50 @@
 platform-dependent details (compiling small snippets of C code and running
 them), so it'll benefit not pypy-related ctypes-based modules as well.
 
+ctypes call are optimized by the JIT and the resulting machine code contains a
+direct call to the target C function.  However, due to the very dynamic nature
+of ctypes, some overhead over a bare C call is still present, in particular to
+check/convert the types of the parameters.  Moreover, even if most calls are
+optimized, some cannot and thus need to follow the slow path, not optimized by
+the JIT.
+
 .. _`ctypes-configure`: ctypes-implementation.html#ctypes-configure
+.. _`CPython ctypes`: http://docs.python.org/library/ctypes.html
 
 Pros
 ----
 
-Stable, CPython-compatible API
+Stable, CPython-compatible API.  Most calls are fast, optimized by JIT.
 
 Cons
 ----
 
-Only pure-python code (slow), problems with platform-dependency (although
-we partially solve those). PyPy implementation is now very slow.
+Problems with platform-dependency (although we partially solve
+those). Although the JIT optimizes ctypes calls, some overhead is still
+present.  The slow-path is very slow.
 
-_`CPython ctypes`: http://python.net/crew/theller/ctypes/
 
 LibFFI
 ======
 
 Mostly in order to be able to write a ctypes module, we developed a very
-low-level libffi bindings. (libffi is a C-level library for dynamic calling,
+low-level libffi bindings called ``_ffi``. (libffi is a C-level library for dynamic calling,
 which is used by CPython ctypes). This library provides stable and usable API,
 although it's API is a very low-level one. It does not contain any
-magic.
+magic.  It is also optimized by the JIT, but has much less overhead than ctypes.
 
 Pros
 ----
 
-Works. Combines disadvantages of using ctypes with disadvantages of
-using mixed modules. Probably more suitable for a delicate code
-where ctypes magic goes in a way.
+It Works. Probably more suitable for a delicate code where ctypes magic goes
+in a way.  All calls are optimized by the JIT, there is no slow path as in
+ctypes.
 
 Cons
 ----
 
-Slow. CPython-incompatible API, very rough and low-level
+It combines disadvantages of using ctypes with disadvantages of using mixed
+modules. CPython-incompatible API, very rough and low-level.
 
 Mixed Modules
 =============
@@ -87,15 +96,15 @@
 * a mixed module needs to be written in RPython, which is far more
   complicated than Python (XXX link)
 
-* due to lack of separate compilation (as of April 2008), each
+* due to lack of separate compilation (as of July 2011), each
   compilation-check requires to recompile whole PyPy python interpreter,
   which takes 0.5-1h. We plan to solve this at some point in near future.
 
 * although rpython is a garbage-collected language, the border between
   C and RPython needs to be managed by hand (each object that goes into the
-  C level must be explicitly freed) XXX we try to solve this
+  C level must be explicitly freed).
 
-Some document is available `here`_
+Some documentation is available `here`_
 
 .. _`here`: rffi.html
 
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -21,8 +21,8 @@
 Release Steps
 ----------------
 
-* at code freeze make a release branch under
-  http://codepeak.net/svn/pypy/release/x.y(.z). IMPORTANT: bump the
+* at code freeze make a release branch using release-x.x.x in mercurial
+  IMPORTANT: bump the
   pypy version number in module/sys/version.py and in
   module/cpyext/include/patchlevel.h, notice that the branch
   will capture the revision number of this change for the release;
@@ -48,12 +48,6 @@
   the release announcement should contain a direct link to the download page
 * update pypy.org (under extradoc/pypy.org), rebuild and commit
 
-* update http://codespeak.net/pypy/trunk:
-   code0> + chmod -R yourname:users /www/codespeak.net/htdocs/pypy/trunk
-   local> cd ..../pypy/doc && py.test
-   local> cd ..../pypy
-   local> rsync -az doc codespeak.net:/www/codespeak.net/htdocs/pypy/trunk/pypy/
-
 * post announcement on morepypy.blogspot.com
 * send announcements to pypy-dev, python-list,
   python-announce, python-dev ...
diff --git a/pypy/doc/release-1.6.0.rst b/pypy/doc/release-1.6.0.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-1.6.0.rst
@@ -0,0 +1,5 @@
+===========================
+PyPy 1.6 - faster than ever
+===========================
+
+We're pleased to announce 1.6 release of PyPy.
diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst
--- a/pypy/doc/windows.rst
+++ b/pypy/doc/windows.rst
@@ -32,6 +32,24 @@
 modules that relies on third-party libraries.  See below how to get
 and build them.
 
+Preping Windows for the Large Build
+-----------------------------------
+
+Normally 32bit programs are limited to 2GB of memory on Windows. It is
+possible to raise this limit, to 3GB on Windows 32bit, and almost 4GB
+on Windows 64bit.
+
+On Windows 32bit, it is necessary to modify the system: follow
+http://usa.autodesk.com/adsk/servlet/ps/dl/item?siteID=123112&id=9583842&linkID=9240617
+to enable the "3GB" feature, and reboot. This step is not necessary on
+Windows 64bit.
+
+Then you need to execute::
+
+    editbin /largeaddressaware pypy.exe
+
+on the pypy.exe file you compiled.
+
 Installing external packages
 ----------------------------
 
diff --git a/pypy/interpreter/astcompiler/ast.py b/pypy/interpreter/astcompiler/ast.py
--- a/pypy/interpreter/astcompiler/ast.py
+++ b/pypy/interpreter/astcompiler/ast.py
@@ -2541,8 +2541,9 @@
 class ASTVisitor(object):
 
     def visit_sequence(self, seq):
-        for node in seq:
-            node.walkabout(self)
+        if seq is not None:
+            for node in seq:
+                node.walkabout(self)
 
     def default_visitor(self, node):
         raise NodeVisitorNotImplemented
@@ -2673,46 +2674,36 @@
 class GenericASTVisitor(ASTVisitor):
 
     def visit_Module(self, node):
-        if node.body:
-            self.visit_sequence(node.body)
+        self.visit_sequence(node.body)
 
     def visit_Interactive(self, node):
-        if node.body:
-            self.visit_sequence(node.body)
+        self.visit_sequence(node.body)
 
     def visit_Expression(self, node):
         node.body.walkabout(self)
 
     def visit_Suite(self, node):
-        if node.body:
-            self.visit_sequence(node.body)
+        self.visit_sequence(node.body)
 
     def visit_FunctionDef(self, node):
         node.args.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.decorator_list:
-            self.visit_sequence(node.decorator_list)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.decorator_list)
 
     def visit_ClassDef(self, node):
-        if node.bases:
-            self.visit_sequence(node.bases)
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.decorator_list:
-            self.visit_sequence(node.decorator_list)
+        self.visit_sequence(node.bases)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.decorator_list)
 
     def visit_Return(self, node):
         if node.value:
             node.value.walkabout(self)
 
     def visit_Delete(self, node):
-        if node.targets:
-            self.visit_sequence(node.targets)
+        self.visit_sequence(node.targets)
 
     def visit_Assign(self, node):
-        if node.targets:
-            self.visit_sequence(node.targets)
+        self.visit_sequence(node.targets)
         node.value.walkabout(self)
 
     def visit_AugAssign(self, node):
@@ -2722,37 +2713,29 @@
     def visit_Print(self, node):
         if node.dest:
             node.dest.walkabout(self)
-        if node.values:
-            self.visit_sequence(node.values)
+        self.visit_sequence(node.values)
 
     def visit_For(self, node):
         node.target.walkabout(self)
         node.iter.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.orelse:
-            self.visit_sequence(node.orelse)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.orelse)
 
     def visit_While(self, node):
         node.test.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.orelse:
-            self.visit_sequence(node.orelse)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.orelse)
 
     def visit_If(self, node):
         node.test.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.orelse:
-            self.visit_sequence(node.orelse)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.orelse)
 
     def visit_With(self, node):
         node.context_expr.walkabout(self)
         if node.optional_vars:
             node.optional_vars.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
+        self.visit_sequence(node.body)
 
     def visit_Raise(self, node):
         if node.type:
@@ -2763,18 +2746,13 @@
             node.tback.walkabout(self)
 
     def visit_TryExcept(self, node):
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.handlers:
-            self.visit_sequence(node.handlers)
-        if node.orelse:
-            self.visit_sequence(node.orelse)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.handlers)
+        self.visit_sequence(node.orelse)
 
     def visit_TryFinally(self, node):
-        if node.body:
-            self.visit_sequence(node.body)
-        if node.finalbody:
-            self.visit_sequence(node.finalbody)
+        self.visit_sequence(node.body)
+        self.visit_sequence(node.finalbody)
 
     def visit_Assert(self, node):
         node.test.walkabout(self)
@@ -2782,12 +2760,10 @@
             node.msg.walkabout(self)
 
     def visit_Import(self, node):
-        if node.names:
-            self.visit_sequence(node.names)
+        self.visit_sequence(node.names)
 
     def visit_ImportFrom(self, node):
-        if node.names:
-            self.visit_sequence(node.names)
+        self.visit_sequence(node.names)
 
     def visit_Exec(self, node):
         node.body.walkabout(self)
@@ -2812,8 +2788,7 @@
         pass
 
     def visit_BoolOp(self, node):
-        if node.values:
-            self.visit_sequence(node.values)
+        self.visit_sequence(node.values)
 
     def visit_BinOp(self, node):
         node.left.walkabout(self)
@@ -2832,35 +2807,28 @@
         node.orelse.walkabout(self)
 
     def visit_Dict(self, node):
-        if node.keys:
-            self.visit_sequence(node.keys)
-        if node.values:
-            self.visit_sequence(node.values)
+        self.visit_sequence(node.keys)
+        self.visit_sequence(node.values)
 
     def visit_Set(self, node):
-        if node.elts:
-            self.visit_sequence(node.elts)
+        self.visit_sequence(node.elts)
 
     def visit_ListComp(self, node):
         node.elt.walkabout(self)
-        if node.generators:
-            self.visit_sequence(node.generators)
+        self.visit_sequence(node.generators)
 
     def visit_SetComp(self, node):
         node.elt.walkabout(self)
-        if node.generators:
-            self.visit_sequence(node.generators)
+        self.visit_sequence(node.generators)
 
     def visit_DictComp(self, node):
         node.key.walkabout(self)
         node.value.walkabout(self)
-        if node.generators:
-            self.visit_sequence(node.generators)
+        self.visit_sequence(node.generators)
 
     def visit_GeneratorExp(self, node):
         node.elt.walkabout(self)
-        if node.generators:
-            self.visit_sequence(node.generators)
+        self.visit_sequence(node.generators)
 
     def visit_Yield(self, node):
         if node.value:
@@ -2868,15 +2836,12 @@
 
     def visit_Compare(self, node):
         node.left.walkabout(self)
-        if node.comparators:
-            self.visit_sequence(node.comparators)
+        self.visit_sequence(node.comparators)
 
     def visit_Call(self, node):
         node.func.walkabout(self)
-        if node.args:
-            self.visit_sequence(node.args)
-        if node.keywords:
-            self.visit_sequence(node.keywords)
+        self.visit_sequence(node.args)
+        self.visit_sequence(node.keywords)
         if node.starargs:
             node.starargs.walkabout(self)
         if node.kwargs:
@@ -2902,12 +2867,10 @@
         pass
 
     def visit_List(self, node):
-        if node.elts:
-            self.visit_sequence(node.elts)
+        self.visit_sequence(node.elts)
 
     def visit_Tuple(self, node):
-        if node.elts:
-            self.visit_sequence(node.elts)
+        self.visit_sequence(node.elts)
 
     def visit_Const(self, node):
         pass
@@ -2924,8 +2887,7 @@
             node.step.walkabout(self)
 
     def visit_ExtSlice(self, node):
-        if node.dims:
-            self.visit_sequence(node.dims)
+        self.visit_sequence(node.dims)
 
     def visit_Index(self, node):
         node.value.walkabout(self)
@@ -2933,22 +2895,18 @@
     def visit_comprehension(self, node):
         node.target.walkabout(self)
         node.iter.walkabout(self)
-        if node.ifs:
-            self.visit_sequence(node.ifs)
+        self.visit_sequence(node.ifs)
 
     def visit_ExceptHandler(self, node):
         if node.type:
             node.type.walkabout(self)
         if node.name:
             node.name.walkabout(self)
-        if node.body:
-            self.visit_sequence(node.body)
+        self.visit_sequence(node.body)
 
     def visit_arguments(self, node):
-        if node.args:
-            self.visit_sequence(node.args)
-        if node.defaults:
-            self.visit_sequence(node.defaults)
+        self.visit_sequence(node.args)
+        self.visit_sequence(node.defaults)
 
     def visit_keyword(self, node):
         node.value.walkabout(self)
@@ -3069,6 +3027,7 @@
             raise
         w_self.setdictvalue(space, 'body', w_new_value)
         return
+    w_self.deldictvalue(space, 'body')
     w_self.initialization_state |= 1
 
 _Expression_field_unroller = unrolling_iterable(['body'])
@@ -3157,6 +3116,7 @@
             raise
         w_self.setdictvalue(space, 'lineno', w_new_value)
         return
+    w_self.deldictvalue(space, 'lineno')
     w_self.initialization_state |= w_self._lineno_mask
 
 def stmt_get_col_offset(space, w_self):
@@ -3178,6 +3138,7 @@
             raise
         w_self.setdictvalue(space, 'col_offset', w_new_value)
         return
+    w_self.deldictvalue(space, 'col_offset')
     w_self.initialization_state |= w_self._col_offset_mask
 
 stmt.typedef = typedef.TypeDef("stmt",
@@ -3208,6 +3169,7 @@
             raise
         w_self.setdictvalue(space, 'name', w_new_value)
         return
+    w_self.deldictvalue(space, 'name')
     w_self.initialization_state |= 1
 
 def FunctionDef_get_args(space, w_self):
@@ -3229,6 +3191,7 @@
             raise
         w_self.setdictvalue(space, 'args', w_new_value)
         return
+    w_self.deldictvalue(space, 'args')
     w_self.initialization_state |= 2
 
 def FunctionDef_get_body(space, w_self):
@@ -3315,6 +3278,7 @@
             raise
         w_self.setdictvalue(space, 'name', w_new_value)
         return
+    w_self.deldictvalue(space, 'name')
     w_self.initialization_state |= 1
 
 def ClassDef_get_bases(space, w_self):
@@ -3420,6 +3384,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Return_field_unroller = unrolling_iterable(['value'])
@@ -3526,6 +3491,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 2
 
 _Assign_field_unroller = unrolling_iterable(['targets', 'value'])
@@ -3573,6 +3539,7 @@
             raise
         w_self.setdictvalue(space, 'target', w_new_value)
         return
+    w_self.deldictvalue(space, 'target')
     w_self.initialization_state |= 1
 
 def AugAssign_get_op(space, w_self):
@@ -3590,13 +3557,13 @@
     try:
         obj = space.interp_w(operator, w_new_value)
         w_self.op = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'op', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'op', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'op', w_new_value)
     w_self.initialization_state |= 2
 
 def AugAssign_get_value(space, w_self):
@@ -3618,6 +3585,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 4
 
 _AugAssign_field_unroller = unrolling_iterable(['target', 'op', 'value'])
@@ -3665,6 +3633,7 @@
             raise
         w_self.setdictvalue(space, 'dest', w_new_value)
         return
+    w_self.deldictvalue(space, 'dest')
     w_self.initialization_state |= 1
 
 def Print_get_values(space, w_self):
@@ -3704,6 +3673,7 @@
             raise
         w_self.setdictvalue(space, 'nl', w_new_value)
         return
+    w_self.deldictvalue(space, 'nl')
     w_self.initialization_state |= 4
 
 _Print_field_unroller = unrolling_iterable(['dest', 'values', 'nl'])
@@ -3752,6 +3722,7 @@
             raise
         w_self.setdictvalue(space, 'target', w_new_value)
         return
+    w_self.deldictvalue(space, 'target')
     w_self.initialization_state |= 1
 
 def For_get_iter(space, w_self):
@@ -3773,6 +3744,7 @@
             raise
         w_self.setdictvalue(space, 'iter', w_new_value)
         return
+    w_self.deldictvalue(space, 'iter')
     w_self.initialization_state |= 2
 
 def For_get_body(space, w_self):
@@ -3859,6 +3831,7 @@
             raise
         w_self.setdictvalue(space, 'test', w_new_value)
         return
+    w_self.deldictvalue(space, 'test')
     w_self.initialization_state |= 1
 
 def While_get_body(space, w_self):
@@ -3944,6 +3917,7 @@
             raise
         w_self.setdictvalue(space, 'test', w_new_value)
         return
+    w_self.deldictvalue(space, 'test')
     w_self.initialization_state |= 1
 
 def If_get_body(space, w_self):
@@ -4029,6 +4003,7 @@
             raise
         w_self.setdictvalue(space, 'context_expr', w_new_value)
         return
+    w_self.deldictvalue(space, 'context_expr')
     w_self.initialization_state |= 1
 
 def With_get_optional_vars(space, w_self):
@@ -4050,6 +4025,7 @@
             raise
         w_self.setdictvalue(space, 'optional_vars', w_new_value)
         return
+    w_self.deldictvalue(space, 'optional_vars')
     w_self.initialization_state |= 2
 
 def With_get_body(space, w_self):
@@ -4116,6 +4092,7 @@
             raise
         w_self.setdictvalue(space, 'type', w_new_value)
         return
+    w_self.deldictvalue(space, 'type')
     w_self.initialization_state |= 1
 
 def Raise_get_inst(space, w_self):
@@ -4137,6 +4114,7 @@
             raise
         w_self.setdictvalue(space, 'inst', w_new_value)
         return
+    w_self.deldictvalue(space, 'inst')
     w_self.initialization_state |= 2
 
 def Raise_get_tback(space, w_self):
@@ -4158,6 +4136,7 @@
             raise
         w_self.setdictvalue(space, 'tback', w_new_value)
         return
+    w_self.deldictvalue(space, 'tback')
     w_self.initialization_state |= 4
 
 _Raise_field_unroller = unrolling_iterable(['type', 'inst', 'tback'])
@@ -4351,6 +4330,7 @@
             raise
         w_self.setdictvalue(space, 'test', w_new_value)
         return
+    w_self.deldictvalue(space, 'test')
     w_self.initialization_state |= 1
 
 def Assert_get_msg(space, w_self):
@@ -4372,6 +4352,7 @@
             raise
         w_self.setdictvalue(space, 'msg', w_new_value)
         return
+    w_self.deldictvalue(space, 'msg')
     w_self.initialization_state |= 2
 
 _Assert_field_unroller = unrolling_iterable(['test', 'msg'])
@@ -4464,6 +4445,7 @@
             raise
         w_self.setdictvalue(space, 'module', w_new_value)
         return
+    w_self.deldictvalue(space, 'module')
     w_self.initialization_state |= 1
 
 def ImportFrom_get_names(space, w_self):
@@ -4503,6 +4485,7 @@
             raise
         w_self.setdictvalue(space, 'level', w_new_value)
         return
+    w_self.deldictvalue(space, 'level')
     w_self.initialization_state |= 4
 
 _ImportFrom_field_unroller = unrolling_iterable(['module', 'names', 'level'])
@@ -4551,6 +4534,7 @@
             raise
         w_self.setdictvalue(space, 'body', w_new_value)
         return
+    w_self.deldictvalue(space, 'body')
     w_self.initialization_state |= 1
 
 def Exec_get_globals(space, w_self):
@@ -4572,6 +4556,7 @@
             raise
         w_self.setdictvalue(space, 'globals', w_new_value)
         return
+    w_self.deldictvalue(space, 'globals')
     w_self.initialization_state |= 2
 
 def Exec_get_locals(space, w_self):
@@ -4593,6 +4578,7 @@
             raise
         w_self.setdictvalue(space, 'locals', w_new_value)
         return
+    w_self.deldictvalue(space, 'locals')
     w_self.initialization_state |= 4
 
 _Exec_field_unroller = unrolling_iterable(['body', 'globals', 'locals'])
@@ -4683,6 +4669,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Expr_field_unroller = unrolling_iterable(['value'])
@@ -4779,6 +4766,7 @@
             raise
         w_self.setdictvalue(space, 'lineno', w_new_value)
         return
+    w_self.deldictvalue(space, 'lineno')
     w_self.initialization_state |= w_self._lineno_mask
 
 def expr_get_col_offset(space, w_self):
@@ -4800,6 +4788,7 @@
             raise
         w_self.setdictvalue(space, 'col_offset', w_new_value)
         return
+    w_self.deldictvalue(space, 'col_offset')
     w_self.initialization_state |= w_self._col_offset_mask
 
 expr.typedef = typedef.TypeDef("expr",
@@ -4826,13 +4815,13 @@
     try:
         obj = space.interp_w(boolop, w_new_value)
         w_self.op = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'op', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'op', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'op', w_new_value)
     w_self.initialization_state |= 1
 
 def BoolOp_get_values(space, w_self):
@@ -4898,6 +4887,7 @@
             raise
         w_self.setdictvalue(space, 'left', w_new_value)
         return
+    w_self.deldictvalue(space, 'left')
     w_self.initialization_state |= 1
 
 def BinOp_get_op(space, w_self):
@@ -4915,13 +4905,13 @@
     try:
         obj = space.interp_w(operator, w_new_value)
         w_self.op = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'op', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'op', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'op', w_new_value)
     w_self.initialization_state |= 2
 
 def BinOp_get_right(space, w_self):
@@ -4943,6 +4933,7 @@
             raise
         w_self.setdictvalue(space, 'right', w_new_value)
         return
+    w_self.deldictvalue(space, 'right')
     w_self.initialization_state |= 4
 
 _BinOp_field_unroller = unrolling_iterable(['left', 'op', 'right'])
@@ -4986,13 +4977,13 @@
     try:
         obj = space.interp_w(unaryop, w_new_value)
         w_self.op = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'op', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'op', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'op', w_new_value)
     w_self.initialization_state |= 1
 
 def UnaryOp_get_operand(space, w_self):
@@ -5014,6 +5005,7 @@
             raise
         w_self.setdictvalue(space, 'operand', w_new_value)
         return
+    w_self.deldictvalue(space, 'operand')
     w_self.initialization_state |= 2
 
 _UnaryOp_field_unroller = unrolling_iterable(['op', 'operand'])
@@ -5060,6 +5052,7 @@
             raise
         w_self.setdictvalue(space, 'args', w_new_value)
         return
+    w_self.deldictvalue(space, 'args')
     w_self.initialization_state |= 1
 
 def Lambda_get_body(space, w_self):
@@ -5081,6 +5074,7 @@
             raise
         w_self.setdictvalue(space, 'body', w_new_value)
         return
+    w_self.deldictvalue(space, 'body')
     w_self.initialization_state |= 2
 
 _Lambda_field_unroller = unrolling_iterable(['args', 'body'])
@@ -5127,6 +5121,7 @@
             raise
         w_self.setdictvalue(space, 'test', w_new_value)
         return
+    w_self.deldictvalue(space, 'test')
     w_self.initialization_state |= 1
 
 def IfExp_get_body(space, w_self):
@@ -5148,6 +5143,7 @@
             raise
         w_self.setdictvalue(space, 'body', w_new_value)
         return
+    w_self.deldictvalue(space, 'body')
     w_self.initialization_state |= 2
 
 def IfExp_get_orelse(space, w_self):
@@ -5169,6 +5165,7 @@
             raise
         w_self.setdictvalue(space, 'orelse', w_new_value)
         return
+    w_self.deldictvalue(space, 'orelse')
     w_self.initialization_state |= 4
 
 _IfExp_field_unroller = unrolling_iterable(['test', 'body', 'orelse'])
@@ -5322,6 +5319,7 @@
             raise
         w_self.setdictvalue(space, 'elt', w_new_value)
         return
+    w_self.deldictvalue(space, 'elt')
     w_self.initialization_state |= 1
 
 def ListComp_get_generators(space, w_self):
@@ -5387,6 +5385,7 @@
             raise
         w_self.setdictvalue(space, 'elt', w_new_value)
         return
+    w_self.deldictvalue(space, 'elt')
     w_self.initialization_state |= 1
 
 def SetComp_get_generators(space, w_self):
@@ -5452,6 +5451,7 @@
             raise
         w_self.setdictvalue(space, 'key', w_new_value)
         return
+    w_self.deldictvalue(space, 'key')
     w_self.initialization_state |= 1
 
 def DictComp_get_value(space, w_self):
@@ -5473,6 +5473,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 2
 
 def DictComp_get_generators(space, w_self):
@@ -5539,6 +5540,7 @@
             raise
         w_self.setdictvalue(space, 'elt', w_new_value)
         return
+    w_self.deldictvalue(space, 'elt')
     w_self.initialization_state |= 1
 
 def GeneratorExp_get_generators(space, w_self):
@@ -5604,6 +5606,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Yield_field_unroller = unrolling_iterable(['value'])
@@ -5649,6 +5652,7 @@
             raise
         w_self.setdictvalue(space, 'left', w_new_value)
         return
+    w_self.deldictvalue(space, 'left')
     w_self.initialization_state |= 1
 
 def Compare_get_ops(space, w_self):
@@ -5734,6 +5738,7 @@
             raise
         w_self.setdictvalue(space, 'func', w_new_value)
         return
+    w_self.deldictvalue(space, 'func')
     w_self.initialization_state |= 1
 
 def Call_get_args(space, w_self):
@@ -5791,6 +5796,7 @@
             raise
         w_self.setdictvalue(space, 'starargs', w_new_value)
         return
+    w_self.deldictvalue(space, 'starargs')
     w_self.initialization_state |= 8
 
 def Call_get_kwargs(space, w_self):
@@ -5812,6 +5818,7 @@
             raise
         w_self.setdictvalue(space, 'kwargs', w_new_value)
         return
+    w_self.deldictvalue(space, 'kwargs')
     w_self.initialization_state |= 16
 
 _Call_field_unroller = unrolling_iterable(['func', 'args', 'keywords', 'starargs', 'kwargs'])
@@ -5863,6 +5870,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Repr_field_unroller = unrolling_iterable(['value'])
@@ -5908,6 +5916,7 @@
             raise
         w_self.setdictvalue(space, 'n', w_new_value)
         return
+    w_self.deldictvalue(space, 'n')
     w_self.initialization_state |= 1
 
 _Num_field_unroller = unrolling_iterable(['n'])
@@ -5953,6 +5962,7 @@
             raise
         w_self.setdictvalue(space, 's', w_new_value)
         return
+    w_self.deldictvalue(space, 's')
     w_self.initialization_state |= 1
 
 _Str_field_unroller = unrolling_iterable(['s'])
@@ -5998,6 +6008,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 def Attribute_get_attr(space, w_self):
@@ -6019,6 +6030,7 @@
             raise
         w_self.setdictvalue(space, 'attr', w_new_value)
         return
+    w_self.deldictvalue(space, 'attr')
     w_self.initialization_state |= 2
 
 def Attribute_get_ctx(space, w_self):
@@ -6036,13 +6048,13 @@
     try:
         obj = space.interp_w(expr_context, w_new_value)
         w_self.ctx = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'ctx', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'ctx', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'ctx', w_new_value)
     w_self.initialization_state |= 4
 
 _Attribute_field_unroller = unrolling_iterable(['value', 'attr', 'ctx'])
@@ -6090,6 +6102,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 def Subscript_get_slice(space, w_self):
@@ -6111,6 +6124,7 @@
             raise
         w_self.setdictvalue(space, 'slice', w_new_value)
         return
+    w_self.deldictvalue(space, 'slice')
     w_self.initialization_state |= 2
 
 def Subscript_get_ctx(space, w_self):
@@ -6128,13 +6142,13 @@
     try:
         obj = space.interp_w(expr_context, w_new_value)
         w_self.ctx = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'ctx', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'ctx', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'ctx', w_new_value)
     w_self.initialization_state |= 4
 
 _Subscript_field_unroller = unrolling_iterable(['value', 'slice', 'ctx'])
@@ -6182,6 +6196,7 @@
             raise
         w_self.setdictvalue(space, 'id', w_new_value)
         return
+    w_self.deldictvalue(space, 'id')
     w_self.initialization_state |= 1
 
 def Name_get_ctx(space, w_self):
@@ -6199,13 +6214,13 @@
     try:
         obj = space.interp_w(expr_context, w_new_value)
         w_self.ctx = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'ctx', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'ctx', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'ctx', w_new_value)
     w_self.initialization_state |= 2
 
 _Name_field_unroller = unrolling_iterable(['id', 'ctx'])
@@ -6266,13 +6281,13 @@
     try:
         obj = space.interp_w(expr_context, w_new_value)
         w_self.ctx = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'ctx', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'ctx', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'ctx', w_new_value)
     w_self.initialization_state |= 2
 
 _List_field_unroller = unrolling_iterable(['elts', 'ctx'])
@@ -6334,13 +6349,13 @@
     try:
         obj = space.interp_w(expr_context, w_new_value)
         w_self.ctx = obj.to_simple_int(space)
-        # need to save the original object too
-        w_self.setdictvalue(space, 'ctx', w_new_value)
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
         w_self.setdictvalue(space, 'ctx', w_new_value)
         return
+    # need to save the original object too
+    w_self.setdictvalue(space, 'ctx', w_new_value)
     w_self.initialization_state |= 2
 
 _Tuple_field_unroller = unrolling_iterable(['elts', 'ctx'])
@@ -6388,6 +6403,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Const_field_unroller = unrolling_iterable(['value'])
@@ -6506,6 +6522,7 @@
             raise
         w_self.setdictvalue(space, 'lower', w_new_value)
         return
+    w_self.deldictvalue(space, 'lower')
     w_self.initialization_state |= 1
 
 def Slice_get_upper(space, w_self):
@@ -6527,6 +6544,7 @@
             raise
         w_self.setdictvalue(space, 'upper', w_new_value)
         return
+    w_self.deldictvalue(space, 'upper')
     w_self.initialization_state |= 2
 
 def Slice_get_step(space, w_self):
@@ -6548,6 +6566,7 @@
             raise
         w_self.setdictvalue(space, 'step', w_new_value)
         return
+    w_self.deldictvalue(space, 'step')
     w_self.initialization_state |= 4
 
 _Slice_field_unroller = unrolling_iterable(['lower', 'upper', 'step'])
@@ -6638,6 +6657,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 1
 
 _Index_field_unroller = unrolling_iterable(['value'])
@@ -6907,6 +6927,7 @@
             raise
         w_self.setdictvalue(space, 'target', w_new_value)
         return
+    w_self.deldictvalue(space, 'target')
     w_self.initialization_state |= 1
 
 def comprehension_get_iter(space, w_self):
@@ -6928,6 +6949,7 @@
             raise
         w_self.setdictvalue(space, 'iter', w_new_value)
         return
+    w_self.deldictvalue(space, 'iter')
     w_self.initialization_state |= 2
 
 def comprehension_get_ifs(space, w_self):
@@ -6994,6 +7016,7 @@
             raise
         w_self.setdictvalue(space, 'lineno', w_new_value)
         return
+    w_self.deldictvalue(space, 'lineno')
     w_self.initialization_state |= w_self._lineno_mask
 
 def excepthandler_get_col_offset(space, w_self):
@@ -7015,6 +7038,7 @@
             raise
         w_self.setdictvalue(space, 'col_offset', w_new_value)
         return
+    w_self.deldictvalue(space, 'col_offset')
     w_self.initialization_state |= w_self._col_offset_mask
 
 excepthandler.typedef = typedef.TypeDef("excepthandler",
@@ -7045,6 +7069,7 @@
             raise
         w_self.setdictvalue(space, 'type', w_new_value)
         return
+    w_self.deldictvalue(space, 'type')
     w_self.initialization_state |= 1
 
 def ExceptHandler_get_name(space, w_self):
@@ -7066,6 +7091,7 @@
             raise
         w_self.setdictvalue(space, 'name', w_new_value)
         return
+    w_self.deldictvalue(space, 'name')
     w_self.initialization_state |= 2
 
 def ExceptHandler_get_body(space, w_self):
@@ -7153,6 +7179,7 @@
             raise
         w_self.setdictvalue(space, 'vararg', w_new_value)
         return
+    w_self.deldictvalue(space, 'vararg')
     w_self.initialization_state |= 2
 
 def arguments_get_kwarg(space, w_self):
@@ -7177,6 +7204,7 @@
             raise
         w_self.setdictvalue(space, 'kwarg', w_new_value)
         return
+    w_self.deldictvalue(space, 'kwarg')
     w_self.initialization_state |= 4
 
 def arguments_get_defaults(space, w_self):
@@ -7245,6 +7273,7 @@
             raise
         w_self.setdictvalue(space, 'arg', w_new_value)
         return
+    w_self.deldictvalue(space, 'arg')
     w_self.initialization_state |= 1
 
 def keyword_get_value(space, w_self):
@@ -7266,6 +7295,7 @@
             raise
         w_self.setdictvalue(space, 'value', w_new_value)
         return
+    w_self.deldictvalue(space, 'value')
     w_self.initialization_state |= 2
 
 _keyword_field_unroller = unrolling_iterable(['arg', 'value'])
@@ -7312,6 +7342,7 @@
             raise
         w_self.setdictvalue(space, 'name', w_new_value)
         return
+    w_self.deldictvalue(space, 'name')
     w_self.initialization_state |= 1
 
 def alias_get_asname(space, w_self):
@@ -7336,6 +7367,7 @@
             raise
         w_self.setdictvalue(space, 'asname', w_new_value)
         return
+    w_self.deldictvalue(space, 'asname')
     w_self.initialization_state |= 2
 
 _alias_field_unroller = unrolling_iterable(['name', 'asname'])
diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -295,15 +295,11 @@
     def visit_FunctionDef(self, func):
         self.update_position(func.lineno, True)
         # Load decorators first, but apply them after the function is created.
-        if func.decorator_list:
-            self.visit_sequence(func.decorator_list)
+        self.visit_sequence(func.decorator_list)
         args = func.args
         assert isinstance(args, ast.arguments)
-        if args.defaults:
-            self.visit_sequence(args.defaults)
-            num_defaults = len(args.defaults)
-        else:
-            num_defaults = 0
+        self.visit_sequence(args.defaults)
+        num_defaults = len(args.defaults) if args.defaults is not None else 0
         code = self.sub_scope(FunctionCodeGenerator, func.name, func,
                               func.lineno)
         self._make_function(code, num_defaults)
@@ -317,24 +313,17 @@
         self.update_position(lam.lineno)
         args = lam.args
         assert isinstance(args, ast.arguments)
-        if args.defaults:
-            self.visit_sequence(args.defaults)
-            default_count = len(args.defaults)
-        else:
-            default_count = 0
+        self.visit_sequence(args.defaults)
+        default_count = len(args.defaults) if args.defaults is not None else 0
         code = self.sub_scope(LambdaCodeGenerator, "<lambda>", lam, lam.lineno)
         self._make_function(code, default_count)
 
     def visit_ClassDef(self, cls):
         self.update_position(cls.lineno, True)
-        if cls.decorator_list:
-            self.visit_sequence(cls.decorator_list)
+        self.visit_sequence(cls.decorator_list)
         self.load_const(self.space.wrap(cls.name))
-        if cls.bases:
-            bases_count = len(cls.bases)
-            self.visit_sequence(cls.bases)
-        else:
-            bases_count = 0
+        self.visit_sequence(cls.bases)
+        bases_count = len(cls.bases) if cls.bases is not None else 0
         self.emit_op_arg(ops.BUILD_TUPLE, bases_count)
         code = self.sub_scope(ClassCodeGenerator, cls.name, cls, cls.lineno)
         self._make_function(code, 0)
@@ -446,8 +435,7 @@
         end = self.new_block()
         test_constant = if_.test.as_constant_truth(self.space)
         if test_constant == optimize.CONST_FALSE:
-            if if_.orelse:
-                self.visit_sequence(if_.orelse)
+            self.visit_sequence(if_.orelse)
         elif test_constant == optimize.CONST_TRUE:
             self.visit_sequence(if_.body)
         else:
@@ -515,16 +503,14 @@
         self.use_next_block(cleanup)
         self.emit_op(ops.POP_BLOCK)
         self.pop_frame_block(F_BLOCK_LOOP, start)
-        if fr.orelse:
-            self.visit_sequence(fr.orelse)
+        self.visit_sequence(fr.orelse)
         self.use_next_block(end)
 
     def visit_While(self, wh):
         self.update_position(wh.lineno, True)
         test_constant = wh.test.as_constant_truth(self.space)
         if test_constant == optimize.CONST_FALSE:
-            if wh.orelse:
-                self.visit_sequence(wh.orelse)
+            self.visit_sequence(wh.orelse)
         else:
             end = self.new_block()
             anchor = None
@@ -544,8 +530,7 @@
                 self.use_next_block(anchor)
                 self.emit_op(ops.POP_BLOCK)
             self.pop_frame_block(F_BLOCK_LOOP, loop)
-            if wh.orelse:
-                self.visit_sequence(wh.orelse)
+            self.visit_sequence(wh.orelse)
             self.use_next_block(end)
 
     def visit_TryExcept(self, te):
@@ -581,8 +566,7 @@
             self.use_next_block(next_except)
         self.emit_op(ops.END_FINALLY)
         self.use_next_block(otherwise)
-        if te.orelse:
-            self.visit_sequence(te.orelse)
+        self.visit_sequence(te.orelse)
         self.use_next_block(end)
 
     def visit_TryFinally(self, tf):
@@ -893,27 +877,19 @@
 
     def visit_Tuple(self, tup):
         self.update_position(tup.lineno)
-        if tup.elts:
-            elt_count = len(tup.elts)
-        else:
-            elt_count = 0
+        elt_count = len(tup.elts) if tup.elts is not None else 0
         if tup.ctx == ast.Store:
             self.emit_op_arg(ops.UNPACK_SEQUENCE, elt_count)
-        if elt_count:
-            self.visit_sequence(tup.elts)
+        self.visit_sequence(tup.elts)
         if tup.ctx == ast.Load:
             self.emit_op_arg(ops.BUILD_TUPLE, elt_count)
 
     def visit_List(self, l):
         self.update_position(l.lineno)
-        if l.elts:
-            elt_count = len(l.elts)
-        else:
-            elt_count = 0
+        elt_count = len(l.elts) if l.elts is not None else 0
         if l.ctx == ast.Store:
             self.emit_op_arg(ops.UNPACK_SEQUENCE, elt_count)
-        if elt_count:
-            self.visit_sequence(l.elts)
+        self.visit_sequence(l.elts)
         if l.ctx == ast.Load:
             self.emit_op_arg(ops.BUILD_LIST, elt_count)
 
@@ -944,11 +920,9 @@
         if self._optimize_method_call(call):
             return
         call.func.walkabout(self)
-        arg = 0
+        arg = len(call.args) if call.args is not None else 0
         call_type = 0
-        if call.args:
-            arg = len(call.args)
-            self.visit_sequence(call.args)
+        self.visit_sequence(call.args)
         if call.keywords:
             self.visit_sequence(call.keywords)
             arg |= len(call.keywords) << 8
@@ -984,16 +958,10 @@
         assert isinstance(attr_lookup, ast.Attribute)
         attr_lookup.value.walkabout(self)
         self.emit_op_name(ops.LOOKUP_METHOD, self.names, attr_lookup.attr)
-        if call.args:
-            self.visit_sequence(call.args)
-            arg_count = len(call.args)
-        else:
-            arg_count = 0
-        if call.keywords:
-            self.visit_sequence(call.keywords)
-            kwarg_count = len(call.keywords)
-        else:
-            kwarg_count = 0
+        self.visit_sequence(call.args)
+        arg_count = len(call.args) if call.args is not None else 0
+        self.visit_sequence(call.keywords)
+        kwarg_count = len(call.keywords) if call.keywords is not None else 0
         self.emit_op_arg(ops.CALL_METHOD, (kwarg_count << 8) | arg_count)
         return True
 
@@ -1251,7 +1219,10 @@
     def _compile(self, func):
         assert isinstance(func, ast.FunctionDef)
         # If there's a docstring, store it as the first constant.
-        doc_expr = self.possible_docstring(func.body[0])
+        if func.body:
+            doc_expr = self.possible_docstring(func.body[0])
+        else:
+            doc_expr = None
         if doc_expr is not None:
             self.add_const(doc_expr.s)
             start = 1
@@ -1263,8 +1234,9 @@
         if args.args:
             self._handle_nested_args(args.args)
             self.argcount = len(args.args)
-        for i in range(start, len(func.body)):
-            func.body[i].walkabout(self)
+        if func.body:
+            for i in range(start, len(func.body)):
+                func.body[i].walkabout(self)
 
 
 class LambdaCodeGenerator(AbstractFunctionCodeGenerator):
diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -27,9 +27,10 @@
     _emit_syntax_warning(space, w_msg, w_filename, w_lineno, w_offset)
 
 
-def parse_future(tree):
+def parse_future(tree, feature_flags):
     future_lineno = 0
     future_column = 0
+    flags = 0
     have_docstring = False
     body = None
     if isinstance(tree, ast.Module):
@@ -37,7 +38,7 @@
     elif isinstance(tree, ast.Interactive):
         body = tree.body
     if body is None:
-        return 0, 0
+        return 0, 0, 0
     for stmt in body:
         if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Str):
             if have_docstring:
@@ -48,11 +49,16 @@
             if stmt.module == "__future__":
                 future_lineno = stmt.lineno
                 future_column = stmt.col_offset
+                for alias in stmt.names:
+                    assert isinstance(alias, ast.alias)
+                    # If this is an invalid flag, it will be caught later in
+                    # codegen.py.
+                    flags |= feature_flags.get(alias.name, 0)
             else:
                 break
         else:
             break
-    return future_lineno, future_column
+    return flags, future_lineno, future_column
 
 
 class ForbiddenNameAssignment(Exception):
diff --git a/pypy/interpreter/astcompiler/symtable.py b/pypy/interpreter/astcompiler/symtable.py
--- a/pypy/interpreter/astcompiler/symtable.py
+++ b/pypy/interpreter/astcompiler/symtable.py
@@ -356,10 +356,8 @@
         # Function defaults and decorators happen in the outer scope.
         args = func.args
         assert isinstance(args, ast.arguments)
-        if args.defaults:
-            self.visit_sequence(args.defaults)
-        if func.decorator_list:
-            self.visit_sequence(func.decorator_list)
+        self.visit_sequence(args.defaults)
+        self.visit_sequence(func.decorator_list)
         new_scope = FunctionScope(func.name, func.lineno, func.col_offset)
         self.push_scope(new_scope, func)
         func.args.walkabout(self)
@@ -372,10 +370,8 @@
 
     def visit_ClassDef(self, clsdef):
         self.note_symbol(clsdef.name, SYM_ASSIGNED)
-        if clsdef.bases:
-            self.visit_sequence(clsdef.bases)
-        if clsdef.decorator_list:
-            self.visit_sequence(clsdef.decorator_list)
+        self.visit_sequence(clsdef.bases)
+        self.visit_sequence(clsdef.decorator_list)
         self.push_scope(ClassScope(clsdef), clsdef)
         self.visit_sequence(clsdef.body)
         self.pop_scope()
@@ -431,8 +427,7 @@
     def visit_Lambda(self, lamb):
         args = lamb.args
         assert isinstance(args, ast.arguments)
-        if args.defaults:
-            self.visit_sequence(args.defaults)
+        self.visit_sequence(args.defaults)
         new_scope = FunctionScope("lambda", lamb.lineno, lamb.col_offset)
         self.push_scope(new_scope, lamb)
         lamb.args.walkabout(self)
@@ -447,8 +442,7 @@
         self.push_scope(new_scope, node)
         self.implicit_arg(0)
         outer.target.walkabout(self)
-        if outer.ifs:
-            self.visit_sequence(outer.ifs)
+        self.visit_sequence(outer.ifs)
         self.visit_sequence(comps[1:])
         for item in list(consider):
             item.walkabout(self)
diff --git a/pypy/interpreter/astcompiler/tools/asdl_py.py b/pypy/interpreter/astcompiler/tools/asdl_py.py
--- a/pypy/interpreter/astcompiler/tools/asdl_py.py
+++ b/pypy/interpreter/astcompiler/tools/asdl_py.py
@@ -221,8 +221,9 @@
         self.emit("class ASTVisitor(object):")
         self.emit("")
         self.emit("def visit_sequence(self, seq):", 1)
-        self.emit("for node in seq:", 2)
-        self.emit("node.walkabout(self)", 3)
+        self.emit("if seq is not None:", 2)
+        self.emit("for node in seq:", 3)
+        self.emit("node.walkabout(self)", 4)
         self.emit("")
         self.emit("def default_visitor(self, node):", 1)
         self.emit("raise NodeVisitorNotImplemented", 2)
@@ -280,15 +281,13 @@
     def visitField(self, field):
         if field.type.value not in asdl.builtin_types and \
                 field.type.value not in self.data.simple_types:
-            if field.seq or field.opt:
+            level = 2
+            template = "node.%s.walkabout(self)"
+            if field.seq:
+                template = "self.visit_sequence(node.%s)"
+            elif field.opt:
                 self.emit("if node.%s:" % (field.name,), 2)
                 level = 3
-            else:
-                level = 2
-            if field.seq:
-                template = "self.visit_sequence(node.%s)"
-            else:
-                template = "node.%s.walkabout(self)"
             self.emit(template % (field.name,), level)
             return True
         return False
@@ -446,6 +445,7 @@
         if field.seq:
             self.emit("w_self.w_%s = w_new_value" % (field.name,), 1)
         else:
+            save_original_object = False
             self.emit("try:", 1)
             if field.type.value not in asdl.builtin_types:
                 # These are always other AST nodes.
@@ -454,9 +454,7 @@
                                   (field.type,), 2)
                     self.emit("w_self.%s = obj.to_simple_int(space)" %
                               (field.name,), 2)
-                    self.emit("# need to save the original object too", 2)
-                    self.emit("w_self.setdictvalue(space, '%s', w_new_value)"
-                              % (field.name,), 2)
+                    save_original_object = True
                 else:
                     config = (field.name, field.type, repr(field.opt))
                     self.emit("w_self.%s = space.interp_w(%s, w_new_value, %s)" %
@@ -480,6 +478,12 @@
             self.emit("    w_self.setdictvalue(space, '%s', w_new_value)"
                       % (field.name,), 1)
             self.emit("    return", 1)
+            if save_original_object:
+                self.emit("# need to save the original object too", 1)
+                self.emit("w_self.setdictvalue(space, '%s', w_new_value)"
+                          % (field.name,), 1)
+            else:
+                self.emit("w_self.deldictvalue(space, '%s')" %(field.name,), 1)
         self.emit("w_self.initialization_state |= %s" % (flag,), 1)
         self.emit("")
 
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -44,11 +44,11 @@
             return True
         return False
 
-    def deldictvalue(self, space, w_name):
+    def deldictvalue(self, space, attr):
         w_dict = self.getdict(space)
         if w_dict is not None:
             try:
-                space.delitem(w_dict, w_name)
+                space.delitem(w_dict, space.wrap(attr))
                 return True
             except OperationError, ex:
                 if not ex.match(space, space.w_KeyError):
@@ -111,6 +111,9 @@
     def setslotvalue(self, index, w_val):
         raise NotImplementedError
 
+    def delslotvalue(self, index):
+        raise NotImplementedError
+
     def descr_call_mismatch(self, space, opname, RequiredClass, args):
         if RequiredClass is None:
             classname = '?'
@@ -1284,6 +1287,17 @@
                                  self.wrap("expected a 32-bit integer"))
         return value
 
+    def truncatedint(self, w_obj):
+        # Like space.gateway_int_w(), but return the integer truncated
+        # instead of raising OverflowError.  For obscure cases only.
+        try:
+            return self.int_w(w_obj)
+        except OperationError, e:
+            if not e.match(self, self.w_OverflowError):
+                raise
+            from pypy.rlib.rarithmetic import intmask
+            return intmask(self.bigint_w(w_obj).uintmask())
+
     def c_filedescriptor_w(self, w_fd):
         # This is only used sometimes in CPython, e.g. for os.fsync() but
         # not os.close().  It's likely designed for 'select'.  It's irregular
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -189,7 +189,7 @@
             if space.is_w(w_value, space.w_None):
                 # raise Type: we assume we have to instantiate Type
                 w_value = space.call_function(w_type)
-                w_type = space.exception_getclass(w_value)
+                w_type = self._exception_getclass(space, w_value)
             else:
                 w_valuetype = space.exception_getclass(w_value)
                 if space.exception_issubclass_w(w_valuetype, w_type):
@@ -204,18 +204,12 @@
                     else:
                         # raise Type, X: assume X is the constructor argument
                         w_value = space.call_function(w_type, w_value)
-                    w_type = space.exception_getclass(w_value)
+                    w_type = self._exception_getclass(space, w_value)
 
         else:
             # the only case left here is (inst, None), from a 'raise inst'.
             w_inst = w_type
-            w_instclass = space.exception_getclass(w_inst)
-            if not space.exception_is_valid_class_w(w_instclass):
-                instclassname = w_instclass.getname(space)
-                msg = ("exceptions must be old-style classes or derived "
-                       "from BaseException, not %s")
-                raise operationerrfmt(space.w_TypeError, msg, instclassname)
-
+            w_instclass = self._exception_getclass(space, w_inst)
             if not space.is_w(w_value, space.w_None):
                 raise OperationError(space.w_TypeError,
                                      space.wrap("instance exception may not "
@@ -226,6 +220,15 @@
         self.w_type   = w_type
         self._w_value = w_value
 
+    def _exception_getclass(self, space, w_inst):
+        w_type = space.exception_getclass(w_inst)
+        if not space.exception_is_valid_class_w(w_type):
+            typename = w_type.getname(space)
+            msg = ("exceptions must be old-style classes or derived "
+                   "from BaseException, not %s")
+            raise operationerrfmt(space.w_TypeError, msg, typename)
+        return w_type
+
     def write_unraisable(self, space, where, w_object=None):
         if w_object is None:
             objrepr = ''
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -31,7 +31,8 @@
     _immutable_fields_ = ['code?',
                           'w_func_globals?',
                           'closure?',
-                          'defs_w?[*]']
+                          'defs_w?[*]',
+                          'name?']
 
     def __init__(self, space, code, w_globals=None, defs_w=[], closure=None,
                  forcename=None):
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -64,7 +64,7 @@
                 self.visit_self(el[1], *args)
             else:
                 self.visit_function(el, *args)
-        else:
+        elif isinstance(el, type):
             for typ in self.bases_order:
                 if issubclass(el, typ):
                     visit = getattr(self, "visit__%s" % (typ.__name__,))
@@ -73,6 +73,8 @@
             else:
                 raise Exception("%s: no match for unwrap_spec element %s" % (
                     self.__class__.__name__, el))
+        else:
+            raise Exception("unable to dispatch, %s, perhaps your parameter should have started with w_?" % el)
 
     def apply_over(self, unwrap_spec, *extra):
         dispatch = self.dispatch
@@ -140,6 +142,9 @@
     def visit_c_nonnegint(self, el, app_sig):
         self.checked_space_method(el, app_sig)
 
+    def visit_truncatedint(self, el, app_sig):
+        self.checked_space_method(el, app_sig)
+
     def visit__Wrappable(self, el, app_sig):
         name = el.__name__
         argname = self.orig_arg()
@@ -257,6 +262,9 @@
     def visit_c_nonnegint(self, typ):
         self.run_args.append("space.c_nonnegint_w(%s)" % (self.scopenext(),))
 
+    def visit_truncatedint(self, typ):
+        self.run_args.append("space.truncatedint(%s)" % (self.scopenext(),))
+
     def _make_unwrap_activation_class(self, unwrap_spec, cache={}):
         try:
             key = tuple(unwrap_spec)
@@ -387,6 +395,9 @@
     def visit_c_nonnegint(self, typ):
         self.unwrap.append("space.c_nonnegint_w(%s)" % (self.nextarg(),))
 
+    def visit_truncatedint(self, typ):
+        self.unwrap.append("space.truncatedint(%s)" % (self.nextarg(),))
+
     def make_fastfunc(unwrap_spec, func):
         unwrap_info = UnwrapSpec_FastFunc_Unwrap()
         unwrap_info.apply_over(unwrap_spec)
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -145,11 +145,11 @@
     def __del__(self):
         # Only bother enqueuing self to raise an exception if the frame is
         # still not finished and finally or except blocks are present.
+        self.clear_all_weakrefs()
         if self.frame is not None:
             block = self.frame.lastblock
             while block is not None:
                 if not isinstance(block, LoopBlock):
-                    self.clear_all_weakrefs()
                     self.enqueue_for_destruction(self.space,
                                                  GeneratorIterator.descr_close,
                                                  "interrupting generator of ")
diff --git a/pypy/interpreter/pycompiler.py b/pypy/interpreter/pycompiler.py
--- a/pypy/interpreter/pycompiler.py
+++ b/pypy/interpreter/pycompiler.py
@@ -119,7 +119,10 @@
             raise OperationError(self.space.w_TypeError, self.space.wrap(
                 "invalid node type"))
 
-        future_pos = misc.parse_future(node)
+        fut = misc.parse_future(node, self.future_flags.compiler_features)
+        f_flags, f_lineno, f_col = fut
+        future_pos = f_lineno, f_col
+        flags |= f_flags
         info = pyparse.CompileInfo(filename, mode, flags, future_pos)
         return self._compile_ast(node, info)
 
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -905,16 +905,15 @@
 
     def SETUP_WITH(self, offsettoend, next_instr):
         w_manager = self.peekvalue()
+        w_enter = self.space.lookup(w_manager, "__enter__")
         w_descr = self.space.lookup(w_manager, "__exit__")
-        if w_descr is None:
-            raise OperationError(self.space.w_AttributeError,
-                                 self.space.wrap("__exit__"))
+        if w_enter is None or w_descr is None:
+            typename = self.space.type(w_manager).getname(self.space)
+            raise operationerrfmt(self.space.w_AttributeError,
+                "'%s' object is not a context manager"
+                " (no __enter__/__exit__ method)", typename)
         w_exit = self.space.get(w_descr, w_manager)
         self.settopvalue(w_exit)
-        w_enter = self.space.lookup(w_manager, "__enter__")
-        if w_enter is None:
-            raise OperationError(self.space.w_AttributeError,
-                                 self.space.wrap("__enter__"))
         w_result = self.space.get_and_call_function(w_enter, w_manager)
         block = WithBlock(self, next_instr + offsettoend)
         self.append_block(block)
diff --git a/pypy/interpreter/test/test_raise.py b/pypy/interpreter/test/test_raise.py
--- a/pypy/interpreter/test/test_raise.py
+++ b/pypy/interpreter/test/test_raise.py
@@ -274,3 +274,9 @@
             pass
         except A:
             pass
+
+    def test_new_returns_bad_instance(self):
+        class MyException(Exception):
+            def __new__(cls, *args):
+                return object()
+        raises(TypeError, "raise MyException")
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -23,7 +23,7 @@
             self.hasdict     |= __base.hasdict
             self.weakrefable |= __base.weakrefable
         self.rawdict = {}
-        self.acceptable_as_base_class = True
+        self.acceptable_as_base_class = '__new__' in rawdict
         self.applevel_subclasses_base = None
         # xxx used by faking
         self.fakedcpytype = None
@@ -258,6 +258,11 @@
                     self.slots_w = [None] * nslots
             def setslotvalue(self, index, w_value):
                 self.slots_w[index] = w_value
+            def delslotvalue(self, index):
+                if self.slots_w[index] is None:
+                    return False
+                self.slots_w[index] = None
+                return True
             def getslotvalue(self, index):
                 return self.slots_w[index]
         add(Proto)
@@ -530,11 +535,10 @@
         """member.__delete__(obj)
         Delete the value of the slot 'member' from the given 'obj'."""
         self.typecheck(space, w_obj)
-        w_oldresult = w_obj.getslotvalue(self.index)
-        if w_oldresult is None:
+        success = w_obj.delslotvalue(self.index)
+        if not success:
             raise OperationError(space.w_AttributeError,
                                  space.wrap(self.name)) # XXX better message
-        w_obj.setslotvalue(self.index, None)
 
 Member.typedef = TypeDef(
     "member_descriptor",
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -1071,6 +1071,8 @@
         return heaptracker.adr2int(llmemory.cast_ptr_to_adr(x))
     if TP == llmemory.Address:
         return heaptracker.adr2int(x)
+    if TP is lltype.SingleFloat:
+        return longlong.singlefloat2int(x)
     return lltype.cast_primitive(lltype.Signed, x)
 
 def cast_from_int(TYPE, x):
@@ -1086,6 +1088,9 @@
             x = llmemory.cast_int_to_adr(x)
         assert lltype.typeOf(x) == llmemory.Address
         return x
+    elif TYPE is lltype.SingleFloat:
+        assert lltype.typeOf(x) is lltype.Signed
+        return longlong.int2singlefloat(x)
     else:
         if lltype.typeOf(x) == llmemory.Address:
             x = heaptracker.adr2int(x)
@@ -1140,6 +1145,7 @@
     del _future_values[:]
 
 def set_future_value_int(index, value):
+    assert lltype.typeOf(value) is lltype.Signed
     set_future_value_ref(index, value)
 
 def set_future_value_float(index, value):
@@ -1488,6 +1494,7 @@
     'i': lltype.Signed,
     'f': lltype.Float,
     'L': lltype.SignedLongLong,
+    'S': lltype.SingleFloat,
     'v': lltype.Void,
     }
 
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -91,6 +91,7 @@
 class BaseCPU(model.AbstractCPU):
     supports_floats = True
     supports_longlong = llimpl.IS_32_BIT
+    supports_singlefloats = True
 
     def __init__(self, rtyper, stats=None, opts=None,
                  translate_support_code=False,
@@ -327,12 +328,16 @@
 
     def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo=None):
         from pypy.jit.backend.llsupport.ffisupport import get_ffi_type_kind
+        from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
         arg_types = []
-        for arg in ffi_args:
-            kind = get_ffi_type_kind(arg)
-            if kind != history.VOID:
-                arg_types.append(kind)
-        reskind = get_ffi_type_kind(ffi_result)
+        try:
+            for arg in ffi_args:
+                kind = get_ffi_type_kind(self, arg)
+                if kind != history.VOID:
+                    arg_types.append(kind)
+            reskind = get_ffi_type_kind(self, ffi_result)
+        except UnsupportedKind:
+            return None
         return self.getdescr(0, reskind, extrainfo=extrainfo,
                              arg_types=''.join(arg_types))
 
diff --git a/pypy/jit/backend/llgraph/test/test_llgraph.py b/pypy/jit/backend/llgraph/test/test_llgraph.py
--- a/pypy/jit/backend/llgraph/test/test_llgraph.py
+++ b/pypy/jit/backend/llgraph/test/test_llgraph.py
@@ -19,6 +19,9 @@
     def setup_method(self, _):
         self.cpu = self.cpu_type(None)
 
+    def test_memoryerror(self):
+        py.test.skip("does not make much sense on the llgraph backend")
+
 
 def test_cast_adr_to_int_and_back():
     X = lltype.Struct('X', ('foo', lltype.Signed))
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -303,6 +303,8 @@
                 c = 'f'
             elif c == 'f' and longlong.supports_longlong:
                 return 'longlong.getrealfloat(%s)' % (process('L'),)
+            elif c == 'S':
+                return 'longlong.int2singlefloat(%s)' % (process('i'),)
             arg = 'args_%s[%d]' % (c, seen[c])
             seen[c] += 1
             return arg
@@ -318,6 +320,8 @@
                 return lltype.Void
             elif arg == 'L':
                 return lltype.SignedLongLong
+            elif arg == 'S':
+                return lltype.SingleFloat
             else:
                 raise AssertionError(arg)
 
@@ -334,6 +338,8 @@
             result = 'rffi.cast(lltype.SignedLongLong, res)'
         elif self.get_return_type() == history.VOID:
             result = 'None'
+        elif self.get_return_type() == 'S':
+            result = 'longlong.singlefloat2int(res)'
         else:
             assert 0
         source = py.code.Source("""
@@ -344,14 +350,15 @@
         """ % locals())
         ARGS = [TYPE(arg) for arg in self.arg_classes]
         FUNC = lltype.FuncType(ARGS, RESULT)
-        d = locals().copy()
-        d.update(globals())
+        d = globals().copy()
+        d.update(locals())
         exec source.compile() in d
         self.call_stub = d['call_stub']
 
     def verify_types(self, args_i, args_r, args_f, return_type):
         assert self._return_type in return_type
-        assert self.arg_classes.count('i') == len(args_i or ())
+        assert (self.arg_classes.count('i') +
+                self.arg_classes.count('S')) == len(args_i or ())
         assert self.arg_classes.count('r') == len(args_r or ())
         assert (self.arg_classes.count('f') +
                 self.arg_classes.count('L')) == len(args_f or ())
@@ -428,23 +435,39 @@
     def get_result_size(self, translate_support_code):
         return 0
 
+_SingleFloatCallDescr = None   # built lazily
+
 def getCallDescrClass(RESULT):
     if RESULT is lltype.Void:
         return VoidCallDescr
     if RESULT is lltype.Float:
         return FloatCallDescr
+    if RESULT is lltype.SingleFloat:
+        global _SingleFloatCallDescr
+        if _SingleFloatCallDescr is None:
+            assert rffi.sizeof(rffi.UINT) == rffi.sizeof(RESULT)
+            class SingleFloatCallDescr(getCallDescrClass(rffi.UINT)):
+                _clsname = 'SingleFloatCallDescr'
+                _return_type = 'S'
+            _SingleFloatCallDescr = SingleFloatCallDescr
+        return _SingleFloatCallDescr
     if is_longlong(RESULT):
         return LongLongCallDescr
     return getDescrClass(RESULT, BaseIntCallDescr, GcPtrCallDescr,
                          NonGcPtrCallDescr, 'Call', 'get_result_size',
                          Ellipsis,  # <= floatattrname should not be used here
                          '_is_result_signed')
+getCallDescrClass._annspecialcase_ = 'specialize:memo'
 
 def get_call_descr(gccache, ARGS, RESULT, extrainfo=None):
     arg_classes = []
     for ARG in ARGS:
         kind = getkind(ARG)
-        if   kind == 'int': arg_classes.append('i')
+        if   kind == 'int':
+            if ARG is lltype.SingleFloat:
+                arg_classes.append('S')
+            else:
+                arg_classes.append('i')
         elif kind == 'ref': arg_classes.append('r')
         elif kind == 'float':
             if is_longlong(ARG):
@@ -476,6 +499,9 @@
             return GcPtrDescr
         else:
             return NonGcPtrDescr
+    if TYPE is lltype.SingleFloat:
+        assert rffi.sizeof(rffi.UINT) == rffi.sizeof(TYPE)
+        TYPE = rffi.UINT
     try:
         return _cache[nameprefix, TYPE]
     except KeyError:
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -1,19 +1,21 @@
 from pypy.rlib.rarithmetic import intmask
 from pypy.jit.metainterp import history
-from pypy.jit.backend.llsupport.descr import DynamicIntCallDescr, NonGcPtrCallDescr,\
-    FloatCallDescr, VoidCallDescr
+from pypy.rpython.lltypesystem import rffi
+from pypy.jit.backend.llsupport.descr import (
+    DynamicIntCallDescr, NonGcPtrCallDescr, FloatCallDescr, VoidCallDescr,
+    LongLongCallDescr, getCallDescrClass)
 
 class UnsupportedKind(Exception):
     pass
 
-def get_call_descr_dynamic(ffi_args, ffi_result, extrainfo=None):
+def get_call_descr_dynamic(cpu, ffi_args, ffi_result, extrainfo=None):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
-        reskind = get_ffi_type_kind(ffi_result)
-        argkinds = [get_ffi_type_kind(arg) for arg in ffi_args]
+        reskind = get_ffi_type_kind(cpu, ffi_result)
+        argkinds = [get_ffi_type_kind(cpu, arg) for arg in ffi_args]
     except UnsupportedKind:
-        return None # ??
+        return None
     arg_classes = ''.join(argkinds)
     if reskind == history.INT:
         size = intmask(ffi_result.c_size)
@@ -25,17 +27,26 @@
         return FloatCallDescr(arg_classes, extrainfo)
     elif reskind == history.VOID:
         return VoidCallDescr(arg_classes, extrainfo)
+    elif reskind == 'L':
+        return LongLongCallDescr(arg_classes, extrainfo)
+    elif reskind == 'S':
+        SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
+        return SingleFloatCallDescr(arg_classes, extrainfo)
     assert False
 
-def get_ffi_type_kind(ffi_type):
+def get_ffi_type_kind(cpu, ffi_type):
     from pypy.rlib.libffi import types
     kind = types.getkind(ffi_type)
     if kind == 'i' or kind == 'u':
         return history.INT
-    elif kind == 'f':
+    elif cpu.supports_floats and kind == 'f':
         return history.FLOAT
     elif kind == 'v':
         return history.VOID
+    elif cpu.supports_longlong and (kind == 'I' or kind == 'U'):     # longlong
+        return 'L'
+    elif cpu.supports_singlefloats and kind == 's':    # singlefloat
+        return 'S'
     raise UnsupportedKind("Unsupported kind '%s'" % kind)
 
 def is_ffi_type_signed(ffi_type):
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -453,21 +453,33 @@
 
 class WriteBarrierDescr(AbstractDescr):
     def __init__(self, gc_ll_descr):
+        GCClass = gc_ll_descr.GCClass
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
         self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
-        self.fielddescr_tid = get_field_descr(gc_ll_descr,
-                                              gc_ll_descr.GCClass.HDR, 'tid')
-        self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
-        # if convenient for the backend, we also compute the info about
+        self.fielddescr_tid = get_field_descr(gc_ll_descr, GCClass.HDR, 'tid')
+        #
+        self.jit_wb_if_flag = GCClass.JIT_WB_IF_FLAG
+        self.jit_wb_if_flag_byteofs, self.jit_wb_if_flag_singlebyte = (
+            self.extract_flag_byte(self.jit_wb_if_flag))
+        #
+        if hasattr(GCClass, 'JIT_WB_CARDS_SET'):
+            self.jit_wb_cards_set = GCClass.JIT_WB_CARDS_SET
+            self.jit_wb_card_page_shift = GCClass.JIT_WB_CARD_PAGE_SHIFT
+            self.jit_wb_cards_set_byteofs, self.jit_wb_cards_set_singlebyte = (
+                self.extract_flag_byte(self.jit_wb_cards_set))
+        else:
+            self.jit_wb_cards_set = 0
+
+    def extract_flag_byte(self, flag_word):
+        # if convenient for the backend, we compute the info about
         # the flag as (byte-offset, single-byte-flag).
         import struct
-        value = struct.pack("l", self.jit_wb_if_flag)
+        value = struct.pack("l", flag_word)
         assert value.count('\x00') == len(value) - 1    # only one byte is != 0
         i = 0
         while value[i] == '\x00': i += 1
-        self.jit_wb_if_flag_byteofs = i
-        self.jit_wb_if_flag_singlebyte = struct.unpack('b', value[i])[0]
+        return (i, struct.unpack('b', value[i])[0])
 
     def get_write_barrier_fn(self, cpu):
         llop1 = self.llop1
@@ -532,18 +544,19 @@
         assert self.GCClass.inline_simple_malloc
         assert self.GCClass.inline_simple_malloc_varsize
 
-        # make a malloc function, with three arguments
+        # make a malloc function, with two arguments
         def malloc_basic(size, tid):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
             has_finalizer = bool(tid & (1<<llgroup.HALFSHIFT))
             check_typeid(type_id)
-            try:
-                res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                                      type_id, size, True,
-                                                      has_finalizer, False)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                res = lltype.nullptr(llmemory.GCREF.TO)
+            res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                                  type_id, size, True,
+                                                  has_finalizer, False)
+            # In case the operation above failed, we are returning NULL
+            # from this function to assembler.  There is also an RPython
+            # exception set, typically MemoryError; but it's easier and
+            # faster to check for the NULL return value, as done by
+            # translator/exceptiontransform.py.
             #llop.debug_print(lltype.Void, "\tmalloc_basic", size, type_id,
             #                 "-->", res)
             return res
@@ -559,14 +572,10 @@
         def malloc_array(itemsize, tid, num_elem):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
             check_typeid(type_id)
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    type_id, num_elem, self.array_basesize, itemsize,
-                    self.array_length_ofs, True)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                type_id, num_elem, self.array_basesize, itemsize,
+                self.array_length_ofs, True)
         self.malloc_array = malloc_array
         self.GC_MALLOC_ARRAY = lltype.Ptr(lltype.FuncType(
             [lltype.Signed] * 3, llmemory.GCREF))
@@ -579,23 +588,15 @@
         unicode_type_id = self.layoutbuilder.get_type_id(rstr.UNICODE)
         #
         def malloc_str(length):
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    str_type_id, length, str_basesize, str_itemsize,
-                    str_ofs_length, True)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                str_type_id, length, str_basesize, str_itemsize,
+                str_ofs_length, True)
         def malloc_unicode(length):
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    unicode_type_id, length, unicode_basesize,unicode_itemsize,
-                    unicode_ofs_length, True)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                unicode_type_id, length, unicode_basesize,unicode_itemsize,
+                unicode_ofs_length, True)
         self.malloc_str = malloc_str
         self.malloc_unicode = malloc_unicode
         self.GC_MALLOC_STR_UNICODE = lltype.Ptr(lltype.FuncType(
@@ -616,16 +617,12 @@
             if self.DEBUG:
                 random_usage_of_xmm_registers()
             assert size >= self.minimal_size_in_nursery
-            try:
-                # NB. although we call do_malloc_fixedsize_clear() here,
-                # it's a bit of a hack because we set tid to 0 and may
-                # also use it to allocate varsized objects.  The tid
-                # and possibly the length are both set afterward.
-                gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                            0, size, True, False, False)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return 0
+            # NB. although we call do_malloc_fixedsize_clear() here,
+            # it's a bit of a hack because we set tid to 0 and may
+            # also use it to allocate varsized objects.  The tid
+            # and possibly the length are both set afterward.
+            gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                        0, size, True, False, False)
             return rffi.cast(lltype.Signed, gcref)
         self.malloc_slowpath = malloc_slowpath
         self.MALLOC_SLOWPATH = lltype.FuncType([lltype.Signed], lltype.Signed)
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -259,7 +259,7 @@
 
     def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo=None):
         from pypy.jit.backend.llsupport import ffisupport
-        return ffisupport.get_call_descr_dynamic(ffi_args, ffi_result,
+        return ffisupport.get_call_descr_dynamic(self, ffi_args, ffi_result,
                                                  extrainfo)
 
     def get_overflow_error(self):
@@ -499,7 +499,7 @@
     def bh_call_i(self, func, calldescr, args_i, args_r, args_f):
         assert isinstance(calldescr, BaseIntCallDescr)
         if not we_are_translated():
-            calldescr.verify_types(args_i, args_r, args_f, history.INT)
+            calldescr.verify_types(args_i, args_r, args_f, history.INT + 'S')
         return calldescr.call_stub(func, args_i, args_r, args_f)
 
     def bh_call_r(self, func, calldescr, args_i, args_r, args_f):
diff --git a/pypy/jit/backend/llsupport/test/test_descr.py b/pypy/jit/backend/llsupport/test/test_descr.py
--- a/pypy/jit/backend/llsupport/test/test_descr.py
+++ b/pypy/jit/backend/llsupport/test/test_descr.py
@@ -52,7 +52,8 @@
     S = lltype.GcStruct('S', ('x', lltype.Char),
                              ('y', lltype.Ptr(T)),
                              ('z', lltype.Ptr(U)),
-                             ('f', lltype.Float))
+                             ('f', lltype.Float),
+                             ('s', lltype.SingleFloat))
     assert getFieldDescrClass(lltype.Ptr(T)) is GcPtrFieldDescr
     assert getFieldDescrClass(lltype.Ptr(U)) is NonGcPtrFieldDescr
     cls = getFieldDescrClass(lltype.Char)
@@ -61,6 +62,10 @@
     clsf = getFieldDescrClass(lltype.Float)
     assert clsf != cls
     assert clsf == getFieldDescrClass(lltype.Float)
+    clss = getFieldDescrClass(lltype.SingleFloat)
+    assert clss not in (cls, clsf)
+    assert clss == getFieldDescrClass(lltype.SingleFloat)
+    assert clss == getFieldDescrClass(rffi.UINT)    # for now
     #
     c0 = GcCache(False)
     c1 = GcCache(True)
@@ -72,14 +77,17 @@
         descr_y = get_field_descr(c2, S, 'y')
         descr_z = get_field_descr(c2, S, 'z')
         descr_f = get_field_descr(c2, S, 'f')
+        descr_s = get_field_descr(c2, S, 's')
         assert descr_x.__class__ is cls
         assert descr_y.__class__ is GcPtrFieldDescr
         assert descr_z.__class__ is NonGcPtrFieldDescr
         assert descr_f.__class__ is clsf
+        assert descr_s.__class__ is clss
         assert descr_x.name == 'S.x'
         assert descr_y.name == 'S.y'
         assert descr_z.name == 'S.z'
         assert descr_f.name == 'S.f'
+        assert descr_s.name == 'S.s'
         if not tsc:
             assert descr_x.offset < descr_y.offset < descr_z.offset
             assert descr_x.sort_key() < descr_y.sort_key() < descr_z.sort_key()
@@ -87,23 +95,29 @@
             assert descr_y.get_field_size(False) == rffi.sizeof(lltype.Ptr(T))
             assert descr_z.get_field_size(False) == rffi.sizeof(lltype.Ptr(U))
             assert descr_f.get_field_size(False) == rffi.sizeof(lltype.Float)
+            assert descr_s.get_field_size(False) == rffi.sizeof(
+                                                            lltype.SingleFloat)
         else:
             assert isinstance(descr_x.offset, Symbolic)
             assert isinstance(descr_y.offset, Symbolic)
             assert isinstance(descr_z.offset, Symbolic)
             assert isinstance(descr_f.offset, Symbolic)
+            assert isinstance(descr_s.offset, Symbolic)
             assert isinstance(descr_x.get_field_size(True), Symbolic)
             assert isinstance(descr_y.get_field_size(True), Symbolic)
             assert isinstance(descr_z.get_field_size(True), Symbolic)
             assert isinstance(descr_f.get_field_size(True), Symbolic)
+            assert isinstance(descr_s.get_field_size(True), Symbolic)
         assert not descr_x.is_pointer_field()
         assert     descr_y.is_pointer_field()
         assert not descr_z.is_pointer_field()
         assert not descr_f.is_pointer_field()
+        assert not descr_s.is_pointer_field()
         assert not descr_x.is_float_field()
         assert not descr_y.is_float_field()
         assert not descr_z.is_float_field()
         assert     descr_f.is_float_field()
+        assert not descr_s.is_float_field()
 
 
 def test_get_field_descr_sign():
@@ -135,6 +149,7 @@
     A2 = lltype.GcArray(lltype.Ptr(T))
     A3 = lltype.GcArray(lltype.Ptr(U))
     A4 = lltype.GcArray(lltype.Float)
+    A5 = lltype.GcArray(lltype.SingleFloat)
     assert getArrayDescrClass(A2) is GcPtrArrayDescr
     assert getArrayDescrClass(A3) is NonGcPtrArrayDescr
     cls = getArrayDescrClass(A1)
@@ -143,25 +158,32 @@
     clsf = getArrayDescrClass(A4)
     assert clsf != cls
     assert clsf == getArrayDescrClass(lltype.GcArray(lltype.Float))
+    clss = getArrayDescrClass(A5)
+    assert clss not in (clsf, cls)
+    assert clss == getArrayDescrClass(lltype.GcArray(rffi.UINT))
     #
     c0 = GcCache(False)
     descr1 = get_array_descr(c0, A1)
     descr2 = get_array_descr(c0, A2)
     descr3 = get_array_descr(c0, A3)
     descr4 = get_array_descr(c0, A4)
+    descr5 = get_array_descr(c0, A5)
     assert descr1.__class__ is cls
     assert descr2.__class__ is GcPtrArrayDescr
     assert descr3.__class__ is NonGcPtrArrayDescr
     assert descr4.__class__ is clsf
+    assert descr5.__class__ is clss
     assert descr1 == get_array_descr(c0, lltype.GcArray(lltype.Char))
     assert not descr1.is_array_of_pointers()
     assert     descr2.is_array_of_pointers()
     assert not descr3.is_array_of_pointers()
     assert not descr4.is_array_of_pointers()
+    assert not descr5.is_array_of_pointers()
     assert not descr1.is_array_of_floats()
     assert not descr2.is_array_of_floats()
     assert not descr3.is_array_of_floats()
     assert     descr4.is_array_of_floats()
+    assert not descr5.is_array_of_floats()
     #
     def get_alignment(code):
         # Retrieve default alignment for the compiler/platform
@@ -170,27 +192,33 @@
     assert descr2.get_base_size(False) == get_alignment('p')
     assert descr3.get_base_size(False) == get_alignment('p')
     assert descr4.get_base_size(False) == get_alignment('d')
+    assert descr5.get_base_size(False) == get_alignment('f')
     assert descr1.get_ofs_length(False) == 0
     assert descr2.get_ofs_length(False) == 0
     assert descr3.get_ofs_length(False) == 0
     assert descr4.get_ofs_length(False) == 0
+    assert descr5.get_ofs_length(False) == 0
     assert descr1.get_item_size(False) == rffi.sizeof(lltype.Char)
     assert descr2.get_item_size(False) == rffi.sizeof(lltype.Ptr(T))
     assert descr3.get_item_size(False) == rffi.sizeof(lltype.Ptr(U))
     assert descr4.get_item_size(False) == rffi.sizeof(lltype.Float)
+    assert descr5.get_item_size(False) == rffi.sizeof(lltype.SingleFloat)
     #
     assert isinstance(descr1.get_base_size(True), Symbolic)
     assert isinstance(descr2.get_base_size(True), Symbolic)
     assert isinstance(descr3.get_base_size(True), Symbolic)
     assert isinstance(descr4.get_base_size(True), Symbolic)
+    assert isinstance(descr5.get_base_size(True), Symbolic)
     assert isinstance(descr1.get_ofs_length(True), Symbolic)
     assert isinstance(descr2.get_ofs_length(True), Symbolic)
     assert isinstance(descr3.get_ofs_length(True), Symbolic)
     assert isinstance(descr4.get_ofs_length(True), Symbolic)
+    assert isinstance(descr5.get_ofs_length(True), Symbolic)
     assert isinstance(descr1.get_item_size(True), Symbolic)
     assert isinstance(descr2.get_item_size(True), Symbolic)
     assert isinstance(descr3.get_item_size(True), Symbolic)
     assert isinstance(descr4.get_item_size(True), Symbolic)
+    assert isinstance(descr5.get_item_size(True), Symbolic)
     CA = rffi.CArray(lltype.Signed)
     descr = get_array_descr(c0, CA)
     assert not descr.is_array_of_floats()
@@ -210,6 +238,11 @@
     assert descr.is_array_of_floats()
     assert descr.get_base_size(False) == 0
     assert descr.get_ofs_length(False) == -1
+    CA = rffi.CArray(rffi.FLOAT)
+    descr = get_array_descr(c0, CA)
+    assert not descr.is_array_of_floats()
+    assert descr.get_base_size(False) == 0
+    assert descr.get_ofs_length(False) == -1
 
 
 def test_get_array_descr_sign():
@@ -257,6 +290,11 @@
     assert descr4.get_result_size(False) == rffi.sizeof(lltype.Float)
     assert descr4.get_return_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
+    #
+    descr5 = get_call_descr(c0, [lltype.SingleFloat], lltype.SingleFloat)
+    assert descr5.get_result_size(False) == rffi.sizeof(lltype.SingleFloat)
+    assert descr5.get_return_type() == "S"
+    assert descr5.arg_classes == "S"
 
 def test_get_call_descr_not_translated_longlong():
     if sys.maxint > 2147483647:
@@ -286,6 +324,11 @@
     assert isinstance(descr4.get_result_size(True), Symbolic)
     assert descr4.get_return_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
+    #
+    descr5 = get_call_descr(c1, [lltype.SingleFloat], lltype.SingleFloat)
+    assert isinstance(descr5.get_result_size(True), Symbolic)
+    assert descr5.get_return_type() == "S"
+    assert descr5.arg_classes == "S"
 
 def test_call_descr_extra_info():
     c1 = GcCache(True)
@@ -345,8 +388,11 @@
     #
     descr4f = get_call_descr(c0, [lltype.Char, lltype.Ptr(S)], lltype.Float)
     assert 'FloatCallDescr' in descr4f.repr_of_descr()
+    #
+    descr5f = get_call_descr(c0, [lltype.Char], lltype.SingleFloat)
+    assert 'SingleFloatCallDescr' in descr5f.repr_of_descr()
 
-def test_call_stubs():
+def test_call_stubs_1():
     c0 = GcCache(False)
     ARGS = [lltype.Char, lltype.Signed]
     RES = lltype.Char
@@ -360,6 +406,8 @@
     res = call_stub(rffi.cast(lltype.Signed, fnptr), [1, 2], None, None)
     assert res == ord('c')
 
+def test_call_stubs_2():
+    c0 = GcCache(False)
     ARRAY = lltype.GcArray(lltype.Signed)
     ARGS = [lltype.Float, lltype.Ptr(ARRAY)]
     RES = lltype.Float
@@ -375,3 +423,27 @@
     res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
                            [], [opaquea], [longlong.getfloatstorage(3.5)])
     assert longlong.getrealfloat(res) == 4.5
+
+def test_call_stubs_single_float():
+    from pypy.rlib.longlong2float import uint2singlefloat, singlefloat2uint
+    from pypy.rlib.rarithmetic import r_singlefloat, intmask
+    #
+    c0 = GcCache(False)
+    ARGS = [lltype.SingleFloat, lltype.SingleFloat, lltype.SingleFloat]
+    RES = lltype.SingleFloat
+
+    def f(a, b, c):
+        a = float(a)
+        b = float(b)
+        c = float(c)
+        x = a - (b / c)
+        return r_singlefloat(x)
+
+    fnptr = llhelper(lltype.Ptr(lltype.FuncType(ARGS, RES)), f)
+    descr2 = get_call_descr(c0, ARGS, RES)
+    a = intmask(singlefloat2uint(r_singlefloat(-10.0)))
+    b = intmask(singlefloat2uint(r_singlefloat(3.0)))
+    c = intmask(singlefloat2uint(r_singlefloat(2.0)))
+    res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
+                           [a, b, c], [], [])
+    assert float(uint2singlefloat(rffi.r_uint(res))) == -11.5
diff --git a/pypy/jit/backend/llsupport/test/test_ffisupport.py b/pypy/jit/backend/llsupport/test/test_ffisupport.py
--- a/pypy/jit/backend/llsupport/test/test_ffisupport.py
+++ b/pypy/jit/backend/llsupport/test/test_ffisupport.py
@@ -1,24 +1,52 @@
 from pypy.rlib.libffi import types
-from pypy.jit.backend.llsupport.ffisupport import get_call_descr_dynamic, \
-    VoidCallDescr, DynamicIntCallDescr
-    
+from pypy.jit.codewriter.longlong import is_64_bit
+from pypy.jit.backend.llsupport.ffisupport import *
+
+
+class FakeCPU:
+    def __init__(self, supports_floats=False, supports_longlong=False,
+                 supports_singlefloats=False):
+        self.supports_floats = supports_floats
+        self.supports_longlong = supports_longlong
+        self.supports_singlefloats = supports_singlefloats
+
+
 def test_call_descr_dynamic():
+    args = [types.sint, types.pointer]
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.sint)
+    assert isinstance(descr, DynamicIntCallDescr)
+    assert descr.arg_classes == 'ii'
 
     args = [types.sint, types.double, types.pointer]
-    descr = get_call_descr_dynamic(args, types.void)
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.void)
+    assert descr is None    # missing floats
+    descr = get_call_descr_dynamic(FakeCPU(supports_floats=True),
+                                   args, types.void)
     assert isinstance(descr, VoidCallDescr)
     assert descr.arg_classes == 'ifi'
 
-    descr = get_call_descr_dynamic([], types.sint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.sint8)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == True
 
-    descr = get_call_descr_dynamic([], types.uint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.uint8)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == False
 
-    descr = get_call_descr_dynamic([], types.float)
-    assert descr is None # single floats are not supported so far
-    
+    if not is_64_bit:
+        descr = get_call_descr_dynamic(FakeCPU(), [], types.slonglong)
+        assert descr is None   # missing longlongs
+        descr = get_call_descr_dynamic(FakeCPU(supports_longlong=True),
+                                       [], types.slonglong)
+        assert isinstance(descr, LongLongCallDescr)
+    else:
+        assert types.slonglong is types.slong
+
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.float)
+    assert descr is None   # missing singlefloats
+    descr = get_call_descr_dynamic(FakeCPU(supports_singlefloats=True),
+                                   [], types.float)
+    SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
+    assert isinstance(descr, SingleFloatCallDescr)
diff --git a/pypy/jit/backend/llvm/llvm_rffi.py b/pypy/jit/backend/llvm/llvm_rffi.py
--- a/pypy/jit/backend/llvm/llvm_rffi.py
+++ b/pypy/jit/backend/llvm/llvm_rffi.py
@@ -3,7 +3,7 @@
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.translator.tool.cbuild import ExternalCompilationInfo, log
 
-if sys.platform != 'linux2':
+if not sys.platform.startswith('linux'):
     py.test.skip("Linux only for now")
 
 # ____________________________________________________________
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -8,12 +8,13 @@
     # ^^^ This is only useful on 32-bit platforms.  If True,
     # longlongs are supported by the JIT, but stored as doubles.
     # Boxes and Consts are BoxFloats and ConstFloats.
+    supports_singlefloats = False
 
     done_with_this_frame_void_v = -1
     done_with_this_frame_int_v = -1
     done_with_this_frame_ref_v = -1
     done_with_this_frame_float_v = -1
-    exit_frame_with_exception_v = -1
+    propagate_exception_v = -1
     total_compiled_loops = 0
     total_compiled_bridges = 0
     total_freed_loops = 0
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -290,3 +290,58 @@
                 assert abs(x - expected_result) < 0.0001
             finally:
                 del self.cpu.done_with_this_frame_float_v
+
+    def test_call_with_singlefloats(self):
+        cpu = self.cpu
+        if not cpu.supports_floats or not cpu.supports_singlefloats:
+            py.test.skip('requires floats and singlefloats')
+
+        import random
+        from pypy.rlib.libffi import types
+        from pypy.rlib.rarithmetic import r_singlefloat
+
+        def func(*args):
+            res = 0.0
+            for i, x in enumerate(args):
+                res += (i + 1.1) * float(x)
+            return res
+
+        F = lltype.Float
+        S = lltype.SingleFloat
+        I = lltype.Signed
+        floats = [random.random() - 0.5 for i in range(8)]
+        singlefloats = [r_singlefloat(random.random() - 0.5) for i in range(8)]
+        ints = [random.randrange(-99, 99) for i in range(8)]
+        for repeat in range(100):
+            args = []
+            argvalues = []
+            argslist = []
+            local_floats = list(floats)
+            local_singlefloats = list(singlefloats)
+            local_ints = list(ints)
+            for i in range(8):
+                case = random.randrange(0, 3)
+                if case == 0:
+                    args.append(F)
+                    arg = local_floats.pop()
+                    argslist.append(boxfloat(arg))
+                elif case == 1:
+                    args.append(S)
+                    arg = local_singlefloats.pop()
+                    argslist.append(BoxInt(longlong.singlefloat2int(arg)))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    argslist.append(BoxInt(arg))
+                argvalues.append(arg)
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
+
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            expected = func(*argvalues)
+            assert abs(res.getfloat() - expected) < 0.0001
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1707,6 +1707,7 @@
             jit_wb_if_flag = 4096
             jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
             jit_wb_if_flag_singlebyte = 0x10
+            jit_wb_cards_set = 0
             def get_write_barrier_from_array_fn(self, cpu):
                 return funcbox.getint()
         #
@@ -1728,6 +1729,72 @@
             else:
                 assert record == []
 
+    def test_cond_call_gc_wb_array_card_marking_fast_path(self):
+        def func_void(a, b, c):
+            record.append((a, b, c))
+        record = []
+        #
+        S = lltype.Struct('S', ('tid', lltype.Signed))
+        S_WITH_CARDS = lltype.Struct('S_WITH_CARDS',
+                                     ('card0', lltype.Char),
+                                     ('card1', lltype.Char),
+                                     ('card2', lltype.Char),
+                                     ('card3', lltype.Char),
+                                     ('card4', lltype.Char),
+                                     ('card5', lltype.Char),
+                                     ('card6', lltype.Char),
+                                     ('card7', lltype.Char),
+                                     ('data',  S))
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
+                             lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+        funcbox = self.get_funcbox(self.cpu, func_ptr)
+        class WriteBarrierDescr(AbstractDescr):
+            jit_wb_if_flag = 4096
+            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_singlebyte = 0x10
+            jit_wb_cards_set = 8192
+            jit_wb_cards_set_byteofs = struct.pack("i", 8192).index('\x20')
+            jit_wb_cards_set_singlebyte = 0x20
+            jit_wb_card_page_shift = 7
+            def get_write_barrier_from_array_fn(self, cpu):
+                return funcbox.getint()
+        #
+        for BoxIndexCls in [BoxInt, ConstInt]:
+            for cond in [False, True]:
+                print
+                print '_'*79
+                print 'BoxIndexCls =', BoxIndexCls
+                print 'JIT_WB_CARDS_SET =', cond
+                print
+                value = random.randrange(-sys.maxint, sys.maxint)
+                value |= 4096
+                if cond:
+                    value |= 8192
+                else:
+                    value &= ~8192
+                s = lltype.malloc(S_WITH_CARDS, immortal=True, zero=True)
+                s.data.tid = value
+                sgcref = rffi.cast(llmemory.GCREF, s.data)
+                del record[:]
+                box_index = BoxIndexCls((9<<7) + 17)
+                self.execute_operation(rop.COND_CALL_GC_WB_ARRAY,
+                           [BoxPtr(sgcref), box_index, BoxPtr(sgcref)],
+                           'void', descr=WriteBarrierDescr())
+                if cond:
+                    assert record == []
+                    assert s.card6 == '\x02'
+                else:
+                    assert record == [(s.data, (9<<7) + 17, s.data)]
+                    assert s.card6 == '\x00'
+                assert s.card0 == '\x00'
+                assert s.card1 == '\x00'
+                assert s.card2 == '\x00'
+                assert s.card3 == '\x00'
+                assert s.card4 == '\x00'
+                assert s.card5 == '\x00'
+                assert s.card7 == '\x00'
+
     def test_force_operations_returning_void(self):
         values = []
         def maybe_force(token, flag):
@@ -2667,6 +2734,65 @@
                                      'float', descr=calldescr)
         assert res.getfloatstorage() == expected
 
+    def test_singlefloat_result_of_call_direct(self):
+        if not self.cpu.supports_singlefloats:
+            py.test.skip("singlefloat test")
+        from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.rlib.rarithmetic import r_singlefloat
+        eci = ExternalCompilationInfo(
+            separate_module_sources=["""
+            float fn_test_result_of_call(float x)
+            {
+                return x / 2.0f;
+            }
+            """],
+            export_symbols=['fn_test_result_of_call'])
+        f = rffi.llexternal('fn_test_result_of_call', [lltype.SingleFloat],
+                            lltype.SingleFloat,
+                            compilation_info=eci, _nowrapper=True)
+        value = r_singlefloat(-42.5)
+        expected = r_singlefloat(-21.25)
+        assert f(value) == expected
+        #
+        FUNC = self.FuncType([lltype.SingleFloat], lltype.SingleFloat)
+        FPTR = self.Ptr(FUNC)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        ivalue = longlong.singlefloat2int(value)
+        iexpected = longlong.singlefloat2int(expected)
+        x = self.cpu.bh_call_i(self.get_funcbox(self.cpu, f).value,
+                               calldescr, [ivalue], None, None)
+        assert x == iexpected
+
+    def test_singlefloat_result_of_call_compiled(self):
+        if not self.cpu.supports_singlefloats:
+            py.test.skip("test of singlefloat result")
+        from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.rlib.rarithmetic import r_singlefloat
+        eci = ExternalCompilationInfo(
+            separate_module_sources=["""
+            float fn_test_result_of_call(float x)
+            {
+                return x / 2.0f;
+            }
+            """],
+            export_symbols=['fn_test_result_of_call'])
+        f = rffi.llexternal('fn_test_result_of_call', [lltype.SingleFloat],
+                            lltype.SingleFloat,
+                            compilation_info=eci, _nowrapper=True)
+        value = r_singlefloat(-42.5)
+        expected = r_singlefloat(-21.25)
+        assert f(value) == expected
+        #
+        FUNC = self.FuncType([lltype.SingleFloat], lltype.SingleFloat)
+        FPTR = self.Ptr(FUNC)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        funcbox = self.get_funcbox(self.cpu, f)
+        ivalue = longlong.singlefloat2int(value)
+        iexpected = longlong.singlefloat2int(expected)
+        res = self.execute_operation(rop.CALL, [funcbox, BoxInt(ivalue)],
+                                     'int', descr=calldescr)
+        assert res.value == iexpected
+
     def test_free_loop_and_bridges(self):
         from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
         if not isinstance(self.cpu, AbstractLLCPU):
@@ -2681,6 +2807,26 @@
         assert mem2 < mem1
         assert mem2 == mem0
 
+    def test_memoryerror(self):
+        excdescr = BasicFailDescr(666)
+        self.cpu.propagate_exception_v = self.cpu.get_fail_descr_number(
+            excdescr)
+        self.cpu.setup_once()    # xxx redo it, because we added
+                                 # propagate_exception_v
+        i0 = BoxInt()
+        p0 = BoxPtr()
+        operations = [
+            ResOperation(rop.NEWUNICODE, [i0], p0),
+            ResOperation(rop.FINISH, [p0], None, descr=BasicFailDescr(1))
+            ]
+        inputargs = [i0]
+        looptoken = LoopToken()
+        self.cpu.compile_loop(inputargs, operations, looptoken)
+        # overflowing value:
+        self.cpu.set_future_value_int(0, sys.maxint // 4 + 1)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == excdescr.identifier
+
 
 class OOtypeBackendTest(BaseBackendTest):
 
diff --git a/pypy/jit/backend/x86/arch.py b/pypy/jit/backend/x86/arch.py
--- a/pypy/jit/backend/x86/arch.py
+++ b/pypy/jit/backend/x86/arch.py
@@ -27,3 +27,6 @@
 # which are used in the malloc itself.  They are:
 #   ecx, ebx, esi, edi               [32 and 64 bits]
 #   r8, r9, r10, r12, r13, r14, r15    [64 bits only]
+#
+# Note that with asmgcc, the locations corresponding to callee-save registers
+# are never used.
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -56,7 +56,9 @@
         self.exc = exc
         self.is_guard_not_invalidated = is_guard_not_invalidated
 
-DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed))
+DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
+                              ('bridge', lltype.Signed), # 0 or 1
+                              ('number', lltype.Signed))
 
 class Assembler386(object):
     _regalloc = None
@@ -89,6 +91,7 @@
         self._current_depths_cache = (0, 0)
         self.datablockwrapper = None
         self.stack_check_slowpath = 0
+        self.propagate_exception_path = 0
         self.teardown()
 
     def leave_jitted_hook(self):
@@ -125,6 +128,7 @@
             self._build_failure_recovery(True, withfloats=True)
             support.ensure_sse2_floats()
             self._build_float_constants()
+        self._build_propagate_exception_path()
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
         self._build_stack_check_slowpath()
@@ -138,6 +142,9 @@
         assert self.memcpy_addr != 0, "setup_once() not called?"
         self.current_clt = looptoken.compiled_loop_token
         self.pending_guard_tokens = []
+        if WORD == 8:
+            self.pending_memoryerror_trampoline_from = []
+            self.error_trampoline_64 = 0
         self.mc = codebuf.MachineCodeBlockWrapper()
         #assert self.datablockwrapper is None --- but obscure case
         # possible, e.g. getting MemoryError and continuing
@@ -147,6 +154,8 @@
 
     def teardown(self):
         self.pending_guard_tokens = None
+        if WORD == 8:
+            self.pending_memoryerror_trampoline_from = None
         self.mc = None
         self.looppos = -1
         self.currently_compiling_loop = None
@@ -155,9 +164,12 @@
     def finish_once(self):
         if self._debug:
             debug_start('jit-backend-counts')
-            for i in range(len(self.loop_run_counters)):
-                struct = self.loop_run_counters[i]
-                debug_print(str(i) + ':' + str(struct.i))
+            for struct in self.loop_run_counters:
+                if struct.bridge:
+                    prefix = 'bridge '
+                else:
+                    prefix = 'loop '
+                debug_print(prefix + str(struct.number) + ':' + str(struct.i))
             debug_stop('jit-backend-counts')
 
     def _build_float_constants(self):
@@ -181,6 +193,7 @@
         # instructions in assembler, with a mark_gc_roots in between.
         # With shadowstack, this is not needed, so we produce a single helper.
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
         #
         # ---------- first helper for the slow path of malloc ----------
         mc = codebuf.MachineCodeBlockWrapper()
@@ -190,10 +203,19 @@
         mc.SUB_rr(edx.value, eax.value)       # compute the size we want
         addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
         #
-        if gcrootmap is not None and gcrootmap.is_shadow_stack:
+        # The registers to save in the copy area: with shadowstack, most
+        # registers need to be saved.  With asmgcc, the callee-saved registers
+        # don't need to.
+        save_in_copy_area = gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items()
+        if not shadow_stack:
+            save_in_copy_area = [(reg, ofs) for (reg, ofs) in save_in_copy_area
+                   if reg not in gpr_reg_mgr_cls.REGLOC_TO_GCROOTMAP_REG_INDEX]
+        #
+        for reg, ofs in save_in_copy_area:
+            mc.MOV_br(ofs, reg.value)
+        #
+        if shadow_stack:
             # ---- shadowstack ----
-            for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
-                mc.MOV_br(ofs, reg.value)
             mc.SUB_ri(esp.value, 16 - WORD)      # stack alignment of 16 bytes
             if IS_X86_32:
                 mc.MOV_sr(0, edx.value)          # push argument
@@ -201,15 +223,13 @@
                 mc.MOV_rr(edi.value, edx.value)
             mc.CALL(imm(addr))
             mc.ADD_ri(esp.value, 16 - WORD)
-            for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
-                mc.MOV_rb(reg.value, ofs)
         else:
             # ---- asmgcc ----
             if IS_X86_32:
                 mc.MOV_sr(WORD, edx.value)       # save it as the new argument
             elif IS_X86_64:
-                # rdi can be clobbered: its content was forced to the stack
-                # by _fastpath_malloc(), like all other save_around_call_regs.
+                # rdi can be clobbered: its content was saved in the
+                # copy area of the stack
                 mc.MOV_rr(edi.value, edx.value)
             mc.JMP(imm(addr))                    # tail call to the real malloc
             rawstart = mc.materialize(self.cpu.asmmemmgr, [])
@@ -217,18 +237,54 @@
             # ---------- second helper for the slow path of malloc ----------
             mc = codebuf.MachineCodeBlockWrapper()
         #
+        for reg, ofs in save_in_copy_area:
+            mc.MOV_rb(reg.value, ofs)
+            assert reg is not eax and reg is not edx
+        #
         if self.cpu.supports_floats:          # restore the XMM registers
             for i in range(self.cpu.NUM_REGS):# from where they were saved
                 mc.MOVSD_xs(i, (WORD*2)+8*i)
+        #
+        # Note: we check this after the code above, just because the code
+        # above is more than 127 bytes on 64-bits...
+        mc.TEST_rr(eax.value, eax.value)
+        mc.J_il8(rx86.Conditions['Z'], 0) # patched later
+        jz_location = mc.get_relative_pos()
+        #
         nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
         mc.MOV(edx, heap(nursery_free_adr))   # load this in EDX
         mc.RET()
+        #
+        # If the slowpath malloc failed, we raise a MemoryError that
+        # always interrupts the current loop, as a "good enough"
+        # approximation.  Also note that we didn't RET from this helper;
+        # but the code we jump to will actually restore the stack
+        # position based on EBP, which will get us out of here for free.
+        offset = mc.get_relative_pos() - jz_location
+        assert 0 < offset <= 127
+        mc.overwrite(jz_location-1, chr(offset))
+        mc.JMP(imm(self.propagate_exception_path))
+        #
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.malloc_slowpath2 = rawstart
 
+    def _build_propagate_exception_path(self):
+        if self.cpu.propagate_exception_v < 0:
+            return      # not supported (for tests, or non-translated)
+        #
+        self.mc = codebuf.MachineCodeBlockWrapper()
+        # call on_leave_jitted_save_exc()
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
+        self.mc.CALL(imm(addr))
+        self.mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
+        self._call_footer()
+        rawstart = self.mc.materialize(self.cpu.asmmemmgr, [])
+        self.propagate_exception_path = rawstart
+        self.mc = None
+
     def _build_stack_check_slowpath(self):
         _, _, slowpathaddr = self.cpu.insert_stack_check()
-        if slowpathaddr == 0 or self.cpu.exit_frame_with_exception_v < 0:
+        if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
             return      # no stack check (for tests, or non-translated)
         #
         # make a "function" that is called immediately at the start of
@@ -284,19 +340,11 @@
         offset = mc.get_relative_pos() - jnz_location
         assert 0 < offset <= 127
         mc.overwrite(jnz_location-1, chr(offset))
-        # clear the exception from the global position
-        mc.MOV(eax, heap(self.cpu.pos_exc_value()))
-        mc.MOV(heap(self.cpu.pos_exception()), imm0)
-        mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
-        # save the current exception instance into fail_boxes_ptr[0]
-        adr = self.fail_boxes_ptr.get_addr_for_num(0)
-        mc.MOV(heap(adr), eax)
-        # call the helper function to set the GC flag on the fail_boxes_ptr
-        # array (note that there is no exception any more here)
-        addr = self.cpu.get_on_leave_jitted_int(save_exception=False)
+        # call on_leave_jitted_save_exc()
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
         mc.CALL(imm(addr))
         #
-        mc.MOV_ri(eax.value, self.cpu.exit_frame_with_exception_v)
+        mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
         #
         # footer -- note the ADD, which skips the return address of this
         # function, and will instead return to the caller's caller.  Note
@@ -392,7 +440,7 @@
         self.setup(looptoken)
         self.currently_compiling_loop = looptoken
         if log:
-            self._register_counter()
+            self._register_counter(False, looptoken.number)
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
@@ -461,7 +509,7 @@
 
         self.setup(original_loop_token)
         if log:
-            self._register_counter()
+            self._register_counter(True, descr_number)
             operations = self._inject_debugging_code(faildescr, operations)
 
         arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
@@ -482,7 +530,7 @@
         #
         rawstart = self.materialize_loop(original_loop_token)
         debug_start("jit-backend-addr")
-        debug_print("Bridge out of Guard %d has address %x to %x" %
+        debug_print("bridge out of Guard %d has address %x to %x" %
                     (descr_number, rawstart, rawstart + codeendpos))
         debug_stop("jit-backend-addr")
         self._patch_stackadjust(rawstart + stackadjustpos,
@@ -508,6 +556,8 @@
         # at the end of self.mc.
         for tok in self.pending_guard_tokens:
             tok.pos_recovery_stub = self.generate_quick_failure(tok)
+        if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
+            self.error_trampoline_64 = self.generate_propagate_error_64()
 
     def patch_pending_failure_recoveries(self, rawstart):
         # after we wrote the assembler to raw memory, set up
@@ -544,6 +594,12 @@
                 # less, we would run into the issue that overwriting the
                 # 5 bytes here might get a few nonsense bytes at the
                 # return address of the following CALL.
+        if WORD == 8:
+            for pos_after_jz in self.pending_memoryerror_trampoline_from:
+                assert self.error_trampoline_64 != 0     # only if non-empty
+                mc = codebuf.MachineCodeBlockWrapper()
+                mc.writeimm32(self.error_trampoline_64 - pos_after_jz)
+                mc.copy_to_raw_memory(rawstart + pos_after_jz - 4)
 
     def get_asmmemmgr_blocks(self, looptoken):
         clt = looptoken.compiled_loop_token
@@ -558,7 +614,7 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _register_counter(self):
+    def _register_counter(self, bridge, number):
         if self._debug:
             # YYY very minor leak -- we need the counters to stay alive
             # forever, just because we want to report them at the end
@@ -566,6 +622,8 @@
             struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
                                    track_allocation=False)
             struct.i = 0
+            struct.bridge = int(bridge)
+            struct.number = number
             self.loop_run_counters.append(struct)
 
     def _find_failure_recovery_bytecode(self, faildescr):
@@ -1056,9 +1114,10 @@
                     self.implement_guard(guard_token, checkfalsecond)
         return genop_cmp_guard_float
 
-    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax):
+    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax,
+                   argtypes=None):
         if IS_X86_64:
-            return self._emit_call_64(force_index, x, arglocs, start)
+            return self._emit_call_64(force_index, x, arglocs, start, argtypes)
 
         p = 0
         n = len(arglocs)
@@ -1086,12 +1145,13 @@
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
 
-    def _emit_call_64(self, force_index, x, arglocs, start):
+    def _emit_call_64(self, force_index, x, arglocs, start, argtypes):
         src_locs = []
         dst_locs = []
         xmm_src_locs = []
         xmm_dst_locs = []
         pass_on_stack = []
+        singlefloats = None
 
         # In reverse order for use with pop()
         unused_gpr = [r9, r8, ecx, edx, esi, edi]
@@ -1111,6 +1171,11 @@
                     xmm_dst_locs.append(unused_xmm.pop())
                 else:
                     pass_on_stack.append(loc)
+            elif (argtypes is not None and argtypes[i-start] == 'S' and
+                  len(unused_xmm) > 0):
+                # Singlefloat argument
+                if singlefloats is None: singlefloats = []
+                singlefloats.append((loc, unused_xmm.pop()))
             else:
                 if len(unused_gpr) > 0:
                     src_locs.append(loc)
@@ -1138,9 +1203,15 @@
                 else:
                     self.mc.MOV_sr(i*WORD, loc.value)
 
-        # Handle register arguments
+        # Handle register arguments: first remap the xmm arguments
+        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
+                           X86_64_XMM_SCRATCH_REG)
+        # Load the singlefloat arguments from main regs or stack to xmm regs
+        if singlefloats is not None:
+            for src, dst in singlefloats:
+                self.mc.MOVD(dst, src)
+        # Finally remap the arguments in the main regs
         remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
-        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs, X86_64_XMM_SCRATCH_REG)
 
         self._regalloc.reserve_param(len(pass_on_stack))
         self.mc.CALL(x)
@@ -1255,6 +1326,20 @@
     def genop_cast_int_to_float(self, op, arglocs, resloc):
         self.mc.CVTSI2SD(resloc, arglocs[0])
 
+    def genop_cast_float_to_singlefloat(self, op, arglocs, resloc):
+        loc0, loctmp = arglocs
+        self.mc.CVTSD2SS(loctmp, loc0)
+        assert isinstance(resloc, RegLoc)
+        assert isinstance(loctmp, RegLoc)
+        self.mc.MOVD_rx(resloc.value, loctmp.value)
+
+    def genop_cast_singlefloat_to_float(self, op, arglocs, resloc):
+        loc0, = arglocs
+        assert isinstance(resloc, RegLoc)
+        assert isinstance(loc0, RegLoc)
+        self.mc.MOVD_xr(resloc.value, loc0.value)
+        self.mc.CVTSS2SD_xx(resloc.value, resloc.value)
+
     def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
         guard_opnum = guard_op.getopnum()
         self.mc.CMP(arglocs[0], imm0)
@@ -1376,7 +1461,7 @@
         assert isinstance(loc_vtable, ImmedLoc)
         arglocs = arglocs[:-1]
         self.call(self.malloc_func_addr, arglocs, eax)
-        # xxx ignore NULL returns for now
+        self.propagate_memoryerror_if_eax_is_null()
         self.set_vtable(eax, loc_vtable)
 
     def set_vtable(self, loc, loc_vtable):
@@ -1395,18 +1480,35 @@
     def genop_new(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_new_array(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_array_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_newstr(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_str_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_newunicode(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_unicode_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
+
+    def propagate_memoryerror_if_eax_is_null(self):
+        # if self.propagate_exception_path == 0 (tests), this may jump to 0
+        # and segfaults.  too bad.  the alternative is to continue anyway
+        # with eax==0, but that will segfault too.
+        self.mc.TEST_rr(eax.value, eax.value)
+        if WORD == 4:
+            self.mc.J_il(rx86.Conditions['Z'], self.propagate_exception_path)
+            self.mc.add_pending_relocation()
+        elif WORD == 8:
+            self.mc.J_il(rx86.Conditions['Z'], 0)
+            pos = self.mc.get_relative_pos()
+            self.pending_memoryerror_trampoline_from.append(pos)
 
     # ----------
 
@@ -1678,6 +1780,12 @@
         return GuardToken(faildescr, failargs, fail_locs, exc,
                           is_guard_not_invalidated)
 
+    def generate_propagate_error_64(self):
+        assert WORD == 8
+        startpos = self.mc.get_relative_pos()
+        self.mc.JMP(imm(self.propagate_exception_path))
+        return startpos
+
     def generate_quick_failure(self, guardtok):
         """Generate the initial code for handling a failure.  We try to
         keep it as compact as possible.
@@ -2013,7 +2121,8 @@
         else:
             tmp = eax
 
-        self._emit_call(force_index, x, arglocs, 3, tmp=tmp)
+        self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
+                        argtypes=op.getdescr().get_arg_types())
 
         if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.width == 8:
             # a float or a long long return
@@ -2025,7 +2134,19 @@
                 #     and this way is simpler also because the result loc
                 #     can just be always a stack location
             else:
-                self.mc.FSTP_b(resloc.value)   # float return
+                self.mc.FSTPL_b(resloc.value)   # float return
+        elif op.getdescr().get_return_type() == 'S':
+            # singlefloat return
+            assert resloc is eax
+            if IS_X86_32:
+                # must convert ST(0) to a 32-bit singlefloat and load it into EAX
+                # mess mess mess
+                self.mc.SUB_ri(esp.value, 4)
+                self.mc.FSTPS_s(0)
+                self.mc.POP_r(eax.value)
+            elif IS_X86_64:
+                # must copy from the lower 32 bits of XMM0 into eax
+                self.mc.MOVD_rx(eax.value, xmm0.value)
         elif size == WORD:
             assert resloc is eax or resloc is xmm0    # a full word
         elif size == 0:
@@ -2183,7 +2304,7 @@
         self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
                         tmp=ecx)
         if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
-            self.mc.FSTP_b(result_loc.value)
+            self.mc.FSTPL_b(result_loc.value)
         #else: result_loc is already either eax or None, checked below
         self.mc.JMP_l8(0) # jump to done, patched later
         jmp_location = self.mc.get_relative_pos()
@@ -2246,10 +2367,12 @@
         if opnum == rop.COND_CALL_GC_WB:
             N = 2
             func = descr.get_write_barrier_fn(self.cpu)
+            card_marking = False
         elif opnum == rop.COND_CALL_GC_WB_ARRAY:
             N = 3
             func = descr.get_write_barrier_from_array_fn(self.cpu)
             assert func != 0
+            card_marking = descr.jit_wb_cards_set != 0
         else:
             raise AssertionError(opnum)
         #
@@ -2258,6 +2381,18 @@
                       imm(descr.jit_wb_if_flag_singlebyte))
         self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
         jz_location = self.mc.get_relative_pos()
+
+        # for cond_call_gc_wb_array, also add another fast path:
+        # if GCFLAG_CARDS_SET, then we can just set one bit and be done
+        if card_marking:
+            self.mc.TEST8(addr_add_const(loc_base,
+                                         descr.jit_wb_cards_set_byteofs),
+                          imm(descr.jit_wb_cards_set_singlebyte))
+            self.mc.J_il8(rx86.Conditions['NZ'], 0) # patched later
+            jnz_location = self.mc.get_relative_pos()
+        else:
+            jnz_location = 0
+
         # the following is supposed to be the slow path, so whenever possible
         # we choose the most compact encoding over the most efficient one.
         if IS_X86_32:
@@ -2297,6 +2432,43 @@
             loc = arglocs[i]
             assert isinstance(loc, RegLoc)
             self.mc.POP_r(loc.value)
+
+        # if GCFLAG_CARDS_SET, then we can do the whole thing that would
+        # be done in the CALL above with just four instructions, so here
+        # is an inline copy of them
+        if card_marking:
+            self.mc.JMP_l8(0) # jump to the exit, patched later
+            jmp_location = self.mc.get_relative_pos()
+            # patch the JNZ above
+            offset = self.mc.get_relative_pos() - jnz_location
+            assert 0 < offset <= 127
+            self.mc.overwrite(jnz_location-1, chr(offset))
+            #
+            loc_index = arglocs[1]
+            if isinstance(loc_index, RegLoc):
+                # choose a scratch register
+                tmp1 = loc_index
+                self.mc.PUSH_r(tmp1.value)
+                # SHR tmp, card_page_shift
+                self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
+                # XOR tmp, -8
+                self.mc.XOR_ri(tmp1.value, -8)
+                # BTS [loc_base], tmp
+                self.mc.BTS(addr_add_const(loc_base, 0), tmp1)
+                # done
+                self.mc.POP_r(tmp1.value)
+            elif isinstance(loc_index, ImmedLoc):
+                byte_index = loc_index.value >> descr.jit_wb_card_page_shift
+                byte_ofs = ~(byte_index >> 3)
+                byte_val = 1 << (byte_index & 7)
+                self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
+            else:
+                raise AssertionError("index is neither RegLoc nor ImmedLoc")
+            # patch the JMP above
+            offset = self.mc.get_relative_pos() - jmp_location
+            assert 0 < offset <= 127
+            self.mc.overwrite(jmp_location-1, chr(offset))
+        #
         # patch the JZ above
         offset = self.mc.get_relative_pos() - jz_location
         assert 0 < offset <= 127
@@ -2373,8 +2545,7 @@
             # there are two helpers to call only with asmgcc
             slowpath_addr1 = self.malloc_slowpath1
             self.mc.CALL(imm(slowpath_addr1))
-        self.mark_gc_roots(self.write_new_force_index(),
-                           use_copy_area=shadow_stack)
+        self.mark_gc_roots(self.write_new_force_index(), use_copy_area=True)
         slowpath_addr2 = self.malloc_slowpath2
         self.mc.CALL(imm(slowpath_addr2))
 
diff --git a/pypy/jit/backend/x86/codebuf.py b/pypy/jit/backend/x86/codebuf.py
--- a/pypy/jit/backend/x86/codebuf.py
+++ b/pypy/jit/backend/x86/codebuf.py
@@ -25,8 +25,11 @@
         self.init_block_builder()
         # a list of relative positions; for each position p, the bytes
         # at [p-4:p] encode an absolute address that will need to be
-        # made relative.
-        self.relocations = []
+        # made relative.  Only works on 32-bit!
+        if WORD == 4:
+            self.relocations = []
+        else:
+            self.relocations = None
         #
         # ResOperation --> offset in the assembly.
         # ops_offset[None] represents the beginning of the code after the last op
@@ -42,9 +45,10 @@
 
     def copy_to_raw_memory(self, addr):
         self._copy_to_raw_memory(addr)
-        for reloc in self.relocations:
-            p = addr + reloc
-            adr = rffi.cast(rffi.LONGP, p - WORD)
-            adr[0] = intmask(adr[0] - p)
+        if self.relocations is not None:
+            for reloc in self.relocations:
+                p = addr + reloc
+                adr = rffi.cast(rffi.LONGP, p - WORD)
+                adr[0] = intmask(adr[0] - p)
         valgrind.discard_translations(addr, self.get_relative_pos())
         self._dump(addr, "jit-backend-dump", backend_name)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -705,6 +705,17 @@
         self.Perform(op, [loc0], loc1)
         self.rm.possibly_free_var(op.getarg(0))
 
+    def consider_cast_float_to_singlefloat(self, op):
+        loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
+        loc1 = self.rm.force_allocate_reg(op.result)
+        self.xrm.possibly_free_var(op.getarg(0))
+        tmpxvar = TempBox()
+        loctmp = self.xrm.force_allocate_reg(tmpxvar)   # may be equal to loc0
+        self.xrm.possibly_free_var(tmpxvar)
+        self.Perform(op, [loc0, loctmp], loc1)
+
+    consider_cast_singlefloat_to_float = consider_cast_int_to_float
+
     def _consider_llong_binop_xx(self, op):
         # must force both arguments into xmm registers, because we don't
         # know if they will be suitably aligned.  Exception: if the second
@@ -921,27 +932,13 @@
     def _do_fastpath_malloc(self, op, size, tid):
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
         self.rm.force_allocate_reg(op.result, selected_reg=eax)
-
-        if gc_ll_descr.gcrootmap and gc_ll_descr.gcrootmap.is_shadow_stack:
-            # ---- shadowstack ----
-            # We need edx as a temporary, but otherwise don't save any more
-            # register.  See comments in _build_malloc_slowpath().
-            tmp_box = TempBox()
-            self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
-            self.rm.possibly_free_var(tmp_box)
-        else:
-            # ---- asmgcc ----
-            # We need to force-allocate each of save_around_call_regs now.
-            # The alternative would be to save and restore them around the
-            # actual call to malloc(), in the rare case where we need to do
-            # it; however, mark_gc_roots() would need to be adapted to know
-            # where the variables end up being saved.  Messy.
-            for reg in self.rm.save_around_call_regs:
-                if reg is not eax:
-                    tmp_box = TempBox()
-                    self.rm.force_allocate_reg(tmp_box, selected_reg=reg)
-                    self.rm.possibly_free_var(tmp_box)
-
+        #
+        # We need edx as a temporary, but otherwise don't save any more
+        # register.  See comments in _build_malloc_slowpath().
+        tmp_box = TempBox()
+        self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+        self.rm.possibly_free_var(tmp_box)
+        #
         self.assembler.malloc_cond(
             gc_ll_descr.get_nursery_free_addr(),
             gc_ll_descr.get_nursery_top_addr(),
@@ -1337,14 +1334,26 @@
             if reg is eax:
                 continue      # ok to ignore this one
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
-                if use_copy_area:
-                    assert reg in self.rm.REGLOC_TO_COPY_AREA_OFS
-                    area_offset = self.rm.REGLOC_TO_COPY_AREA_OFS[reg]
-                    gcrootmap.add_frame_offset(shape, area_offset)
-                else:
-                    assert reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX
-                    gcrootmap.add_callee_save_reg(
-                        shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
+                #
+                # The register 'reg' is alive across this call.
+                gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
+                if gcrootmap is None or not gcrootmap.is_shadow_stack:
+                    #
+                    # Asmgcc: if reg is a callee-save register, we can
+                    # explicitly mark it as containing a BoxPtr.
+                    if reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX:
+                        gcrootmap.add_callee_save_reg(
+                            shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
+                        continue
+                #
+                # Else, 'use_copy_area' must be True (otherwise this BoxPtr
+                # should not be in a register).  The copy area contains the
+                # real value of the register.
+                assert use_copy_area
+                assert reg in self.rm.REGLOC_TO_COPY_AREA_OFS
+                area_offset = self.rm.REGLOC_TO_COPY_AREA_OFS[reg]
+                gcrootmap.add_frame_offset(shape, area_offset)
+        #
         return gcrootmap.compress_callshape(shape,
                                             self.assembler.datablockwrapper)
 
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -476,6 +476,7 @@
 
     AND = _binaryop('AND')
     OR  = _binaryop('OR')
+    OR8 = _binaryop('OR8')
     XOR = _binaryop('XOR')
     NOT = _unaryop('NOT')
     SHL = _binaryop('SHL')
@@ -483,6 +484,7 @@
     SAR = _binaryop('SAR')
     TEST = _binaryop('TEST')
     TEST8 = _binaryop('TEST8')
+    BTS = _binaryop('BTS')
 
     ADD = _binaryop('ADD')
     SUB = _binaryop('SUB')
@@ -519,6 +521,8 @@
     UCOMISD = _binaryop('UCOMISD')
     CVTSI2SD = _binaryop('CVTSI2SD')
     CVTTSD2SI = _binaryop('CVTTSD2SI')
+    CVTSD2SS = _binaryop('CVTSD2SS')
+    CVTSS2SD = _binaryop('CVTSS2SD')
     
     SQRTSD = _binaryop('SQRTSD')
 
@@ -532,6 +536,8 @@
     PXOR  = _binaryop('PXOR')
     PCMPEQD = _binaryop('PCMPEQD')
 
+    MOVD = _binaryop('MOVD')
+
     CALL = _relative_unaryop('CALL')
     JMP = _relative_unaryop('JMP')
 
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -19,6 +19,7 @@
 class AbstractX86CPU(AbstractLLCPU):
     debug = True
     supports_floats = True
+    supports_singlefloats = True
 
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -496,6 +496,10 @@
     AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), '\xC0')
 
     OR8_rr = insn(rex_fw, '\x08', byte_register(1), byte_register(2,8), '\xC0')
+    OR8_mi = insn(rex_fw, '\x80', orbyte(1<<3), mem_reg_plus_const(1),
+                  immediate(2, 'b'))
+    OR8_ji = insn(rex_fw, '\x80', orbyte(1<<3), abs_, immediate(1),
+                  immediate(2, 'b'))
 
     NEG_r = insn(rex_w, '\xF7', register(1), '\xD8')
 
@@ -565,8 +569,12 @@
     TEST8_ji = insn(rex_nw, '\xF6', orbyte(0<<3), abs_, immediate(1), immediate(2, 'b'))
     TEST_rr = insn(rex_w, '\x85', register(2,8), register(1), '\xC0')
 
+    BTS_mr = insn(rex_w, '\x0F\xAB', register(2,8), mem_reg_plus_const(1))
+    BTS_jr = insn(rex_w, '\x0F\xAB', register(2,8), abs_, immediate(1))
+
     # x87 instructions
-    FSTP_b = insn('\xDD', orbyte(3<<3), stack_bp(1))
+    FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) # rffi.DOUBLE ('as' wants L??)
+    FSTPS_s = insn('\xD9', orbyte(3<<3), stack_sp(1)) # lltype.SingleFloat
 
     # ------------------------------ Random mess -----------------------
     RDTSC = insn('\x0F\x31')
@@ -583,8 +591,18 @@
     CVTTSD2SI_rx = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), register(2), '\xC0')
     CVTTSD2SI_rb = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), stack_bp(2))
 
-    MOVD_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), register(1), '\xC0')
-    MOVD_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    CVTSD2SS_xx = xmminsn('\xF2', rex_nw, '\x0F\x5A',
+                          register(1, 8), register(2), '\xC0')
+    CVTSD2SS_xb = xmminsn('\xF2', rex_nw, '\x0F\x5A',
+                          register(1, 8), stack_bp(2))
+    CVTSS2SD_xx = xmminsn('\xF3', rex_nw, '\x0F\x5A',
+                          register(1, 8), register(2), '\xC0')
+    CVTSS2SD_xb = xmminsn('\xF3', rex_nw, '\x0F\x5A',
+                          register(1, 8), stack_bp(2))
+
+    MOVD_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), register(1), '\xC0')
+    MOVD_xr = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    MOVD_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2))
 
     PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
 
diff --git a/pypy/jit/backend/x86/test/test_regloc.py b/pypy/jit/backend/x86/test/test_regloc.py
--- a/pypy/jit/backend/x86/test/test_regloc.py
+++ b/pypy/jit/backend/x86/test/test_regloc.py
@@ -62,7 +62,7 @@
             assert mc.relocations == [5]
             expected = "\xE8" + struct.pack('<i', target - (rawstart + 5))
         elif IS_X86_64:
-            assert mc.relocations == []
+            assert mc.relocations is None
             if 0 <= target <= 0xffffffff:
                 assert length == 9
                 expected = (
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -463,7 +463,7 @@
             self.cpu.finish_once()
         finally:
             debug._log = None
-        assert ('jit-backend-counts', [('debug_print', '0:10')]) in dlog
+        assert ('jit-backend-counts', [('debug_print', 'loop -1:10')]) in dlog
 
     def test_debugger_checksum(self):
         loop = """
diff --git a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -36,6 +36,14 @@
 def hexdump(s):
     return ' '.join(["%02X" % ord(c) for c in s])
 
+def reduce_to_32bit(s):
+    if s[:2] != '%r':
+        return s
+    if s[2:].isdigit():
+        return s + 'd'
+    else:
+        return '%e' + s[2:]
+
 # ____________________________________________________________
 
 COUNT1 = 15
@@ -180,12 +188,14 @@
     ##        for m, extra in args:
     ##            if m in (i386.MODRM, i386.MODRM8) or all:
     ##                suffix = suffixes[sizes[m]] + suffix
-            if argmodes and not self.is_xmm_insn:
+            if (argmodes and not self.is_xmm_insn
+                         and not instrname.startswith('FSTP')):
                 suffix = suffixes[self.WORD]
             # Special case: On 64-bit CPUs, rx86 assumes 64-bit integer
             # operands when converting to/from floating point, so we need to
             # indicate that with a suffix
-            if (self.WORD == 8) and instrname.startswith('CVT'):
+            if (self.WORD == 8) and (instrname.startswith('CVT') and
+                                     'SI' in instrname):
                 suffix = suffixes[self.WORD]
 
             if instr_suffix is not None:
@@ -218,10 +228,10 @@
                 and ops[1].startswith('%r')):
                 # movq $xxx, %rax => movl $xxx, %eax
                 suffix = 'l'
-                if ops[1][2:].isdigit():
-                    ops[1] += 'd'
-                else:
-                    ops[1] = '%e' + ops[1][2:]
+                ops[1] = reduce_to_32bit(ops[1])
+            if instrname.lower() == 'movd':
+                ops[0] = reduce_to_32bit(ops[0])
+                ops[1] = reduce_to_32bit(ops[1])
             #
             op = '\t%s%s %s%s' % (instrname.lower(), suffix,
                                   ', '.join(ops), following)
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -524,6 +524,76 @@
     def test_compile_framework_8(self):
         self.run('compile_framework_8')
 
+    def define_compile_framework_9(cls):
+        # Like compile_framework_8, but with variable indexes and large
+        # arrays, testing the card_marking case
+        def before(n, x):
+            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            if n < 1900:
+                check(l[0].x == 123)
+                num = 512 + (n & 7)
+                l = [None] * num
+                l[0] = X(123)
+                l[1] = X(n)
+                l[2] = X(n+10)
+                l[3] = X(n+20)
+                l[4] = X(n+30)
+                l[5] = X(n+40)
+                l[6] = X(n+50)
+                l[7] = X(n+60)
+                l[num-8] = X(n+70)
+                l[num-9] = X(n+80)
+                l[num-10] = X(n+90)
+                l[num-11] = X(n+100)
+                l[-12] = X(n+110)
+                l[-13] = X(n+120)
+                l[-14] = X(n+130)
+                l[-15] = X(n+140)
+            if n < 1800:
+                num = 512 + (n & 7)
+                check(len(l) == num)
+                check(l[0].x == 123)
+                check(l[1].x == n)
+                check(l[2].x == n+10)
+                check(l[3].x == n+20)
+                check(l[4].x == n+30)
+                check(l[5].x == n+40)
+                check(l[6].x == n+50)
+                check(l[7].x == n+60)
+                check(l[num-8].x == n+70)
+                check(l[num-9].x == n+80)
+                check(l[num-10].x == n+90)
+                check(l[num-11].x == n+100)
+                check(l[-12].x == n+110)
+                check(l[-13].x == n+120)
+                check(l[-14].x == n+130)
+                check(l[-15].x == n+140)
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            check(len(l) >= 512)
+            check(l[0].x == 123)
+            check(l[1].x == 2)
+            check(l[2].x == 12)
+            check(l[3].x == 22)
+            check(l[4].x == 32)
+            check(l[5].x == 42)
+            check(l[6].x == 52)
+            check(l[7].x == 62)
+            check(l[-8].x == 72)
+            check(l[-9].x == 82)
+            check(l[-10].x == 92)
+            check(l[-11].x == 102)
+            check(l[-12].x == 112)
+            check(l[-13].x == 122)
+            check(l[-14].x == 132)
+            check(l[-15].x == 142)
+        return before, f, after
+
+    def test_compile_framework_9(self):
+        self.run('compile_framework_9')
+
     def define_compile_framework_external_exception_handling(cls):
         def before(n, x):
             x = X(0)
diff --git a/pypy/jit/codewriter/assembler.py b/pypy/jit/codewriter/assembler.py
--- a/pypy/jit/codewriter/assembler.py
+++ b/pypy/jit/codewriter/assembler.py
@@ -76,6 +76,8 @@
                 TYPE = llmemory.Address
             if TYPE == llmemory.Address:
                 value = heaptracker.adr2int(value)
+            if TYPE is lltype.SingleFloat:
+                value = longlong.singlefloat2int(value)
             if not isinstance(value, (llmemory.AddressAsInt,
                                       ComputedIntSymbolic)):
                 value = lltype.cast_primitive(lltype.Signed, value)
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -5,10 +5,9 @@
 
 from pypy.jit.codewriter import support
 from pypy.jit.codewriter.jitcode import JitCode
-from pypy.jit.codewriter.effectinfo import VirtualizableAnalyzer
-from pypy.jit.codewriter.effectinfo import QuasiImmutAnalyzer
-from pypy.jit.codewriter.effectinfo import effectinfo_from_writeanalyze
-from pypy.jit.codewriter.effectinfo import EffectInfo, CallInfoCollection
+from pypy.jit.codewriter.effectinfo import (VirtualizableAnalyzer,
+    QuasiImmutAnalyzer, CanReleaseGILAnalyzer, effectinfo_from_writeanalyze,
+    EffectInfo, CallInfoCollection)
 from pypy.translator.simplify import get_funcobj, get_functype
 from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.translator.backendopt.canraise import RaiseAnalyzer
@@ -32,6 +31,7 @@
             self.readwrite_analyzer = ReadWriteAnalyzer(translator)
             self.virtualizable_analyzer = VirtualizableAnalyzer(translator)
             self.quasiimmut_analyzer = QuasiImmutAnalyzer(translator)
+            self.canreleasegil_analyzer = CanReleaseGILAnalyzer(translator)
         #
         for index, jd in enumerate(jitdrivers_sd):
             jd.index = index
@@ -219,15 +219,19 @@
                 assert not NON_VOID_ARGS, ("arguments not supported for "
                                            "loop-invariant function!")
         # build the extraeffect
-        can_invalidate = self.quasiimmut_analyzer.analyze(op)
+        can_release_gil = self.canreleasegil_analyzer.analyze(op)
+        # can_release_gil implies can_invalidate
+        can_invalidate = can_release_gil or self.quasiimmut_analyzer.analyze(op)
         if extraeffect is None:
             if self.virtualizable_analyzer.analyze(op):
                 extraeffect = EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
             elif loopinvariant:
                 extraeffect = EffectInfo.EF_LOOPINVARIANT
             elif elidable:
-                # XXX check what to do about exceptions (also MemoryError?)
-                extraeffect = EffectInfo.EF_ELIDABLE
+                if self._canraise(op):
+                    extraeffect = EffectInfo.EF_ELIDABLE_CAN_RAISE
+                else:
+                    extraeffect = EffectInfo.EF_ELIDABLE_CANNOT_RAISE
             elif self._canraise(op):
                 extraeffect = EffectInfo.EF_CAN_RAISE
             else:
@@ -235,7 +239,7 @@
         #
         effectinfo = effectinfo_from_writeanalyze(
             self.readwrite_analyzer.analyze(op), self.cpu, extraeffect,
-            oopspecindex, can_invalidate)
+            oopspecindex, can_invalidate, can_release_gil)
         #
         if oopspecindex != EffectInfo.OS_NONE:
             assert effectinfo is not None
@@ -261,7 +265,7 @@
     def calldescr_canraise(self, calldescr):
         effectinfo = calldescr.get_extra_info()
         return (effectinfo is None or
-                effectinfo.extraeffect >= EffectInfo.EF_CAN_RAISE)
+                effectinfo.extraeffect > EffectInfo.EF_CANNOT_RAISE)
 
     def jitdriver_sd_from_portal_graph(self, graph):
         for jd in self.jitdrivers_sd:
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -9,10 +9,11 @@
     _cache = {}
 
     # the 'extraeffect' field is one of the following values:
-    EF_ELIDABLE                        = 0 #elidable function (and cannot raise)
+    EF_ELIDABLE_CANNOT_RAISE           = 0 #elidable function (and cannot raise)
     EF_LOOPINVARIANT                   = 1 #special: call it only once per loop
     EF_CANNOT_RAISE                    = 2 #a function which cannot raise
-    EF_CAN_RAISE                       = 3 #normal function (can raise)
+    EF_ELIDABLE_CAN_RAISE              = 3 #elidable function (but can raise)
+    EF_CAN_RAISE                       = 4 #normal function (can raise)
     EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE = 5 #can raise and force virtualizables
 
     # the 'oopspecindex' field is one of the following values:
@@ -79,20 +80,23 @@
                 write_descrs_fields, write_descrs_arrays,
                 extraeffect=EF_CAN_RAISE,
                 oopspecindex=OS_NONE,
-                can_invalidate=False):
+                can_invalidate=False, can_release_gil=False):
         key = (frozenset(readonly_descrs_fields),
                frozenset(readonly_descrs_arrays),
                frozenset(write_descrs_fields),
                frozenset(write_descrs_arrays),
                extraeffect,
-               oopspecindex)
+               oopspecindex,
+               can_invalidate,
+               can_release_gil)
         if key in cls._cache:
             return cls._cache[key]
         result = object.__new__(cls)
         result.readonly_descrs_fields = readonly_descrs_fields
         result.readonly_descrs_arrays = readonly_descrs_arrays
         if extraeffect == EffectInfo.EF_LOOPINVARIANT or \
-           extraeffect == EffectInfo.EF_ELIDABLE:
+           extraeffect == EffectInfo.EF_ELIDABLE_CANNOT_RAISE or \
+           extraeffect == EffectInfo.EF_ELIDABLE_CAN_RAISE:
             result.write_descrs_fields = []
             result.write_descrs_arrays = []
         else:
@@ -100,6 +104,7 @@
             result.write_descrs_arrays = write_descrs_arrays
         result.extraeffect = extraeffect
         result.can_invalidate = can_invalidate
+        result.can_release_gil = can_release_gil
         result.oopspecindex = oopspecindex
         cls._cache[key] = result
         return result
@@ -111,12 +116,13 @@
         return self.extraeffect >= self.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
 
     def has_random_effects(self):
-        return self.oopspecindex == self.OS_LIBFFI_CALL
+        return self.oopspecindex == self.OS_LIBFFI_CALL or self.can_release_gil
 
 def effectinfo_from_writeanalyze(effects, cpu,
                                  extraeffect=EffectInfo.EF_CAN_RAISE,
                                  oopspecindex=EffectInfo.OS_NONE,
-                                 can_invalidate=False):
+                                 can_invalidate=False,
+                                 can_release_gil=False):
     from pypy.translator.backendopt.writeanalyze import top_set
     if effects is top_set:
         return None
@@ -158,7 +164,8 @@
                       write_descrs_arrays,
                       extraeffect,
                       oopspecindex,
-                      can_invalidate)
+                      can_invalidate,
+                      can_release_gil)
 
 def consider_struct(TYPE, fieldname):
     if fieldType(TYPE, fieldname) is lltype.Void:
@@ -194,6 +201,16 @@
     def analyze_simple_operation(self, op, graphinfo):
         return op.opname == 'jit_force_quasi_immutable'
 
+class CanReleaseGILAnalyzer(BoolGraphAnalyzer):
+    def analyze_direct_call(self, graph, seen=None):
+        releases_gil = False
+        if hasattr(graph, "func") and hasattr(graph.func, "_ptr"):
+            releases_gil = graph.func._ptr._obj.releases_gil
+        return releases_gil or super(CanReleaseGILAnalyzer, self).analyze_direct_call(graph, seen)
+
+    def analyze_simple_operation(self, op, graphinfo):
+        return False
+
 # ____________________________________________________________
 
 class CallInfoCollection(object):
diff --git a/pypy/jit/codewriter/jitcode.py b/pypy/jit/codewriter/jitcode.py
--- a/pypy/jit/codewriter/jitcode.py
+++ b/pypy/jit/codewriter/jitcode.py
@@ -1,7 +1,6 @@
 from pypy.jit.metainterp.history import AbstractDescr
 from pypy.jit.codewriter import heaptracker
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.rpython.lltypesystem import llmemory
 
 
 class JitCode(AbstractDescr):
@@ -102,7 +101,7 @@
 
     def _clone_if_mutable(self):
         raise NotImplementedError
-    
+
 class MissingLiveness(Exception):
     pass
 
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -1,18 +1,16 @@
-import py, sys
-from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass
-from pypy.rpython import rlist
-from pypy.jit.metainterp.history import getkind
-from pypy.objspace.flow.model import SpaceOperation, Variable, Constant
-from pypy.objspace.flow.model import Block, Link, c_last_exception
-from pypy.jit.codewriter.flatten import ListOfKind, IndirectCallTargets
+import py
 from pypy.jit.codewriter import support, heaptracker, longlong
 from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.codewriter.flatten import ListOfKind, IndirectCallTargets
 from pypy.jit.codewriter.policy import log
+from pypy.jit.metainterp import quasiimmut
+from pypy.jit.metainterp.history import getkind
 from pypy.jit.metainterp.typesystem import deref, arrayItem
-from pypy.jit.metainterp import quasiimmut
-from pypy.rpython.rclass import IR_QUASIIMMUTABLE, IR_QUASIIMMUTABLE_ARRAY
+from pypy.objspace.flow.model import SpaceOperation, Variable, Constant, c_last_exception
 from pypy.rlib import objectmodel
 from pypy.rlib.jit import _we_are_jitted
+from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass, rffi
+from pypy.rpython.rclass import IR_QUASIIMMUTABLE, IR_QUASIIMMUTABLE_ARRAY
 from pypy.translator.simplify import get_funcobj
 from pypy.translator.unsimplify import varoftype
 
@@ -787,7 +785,6 @@
             op2.result = op.result
             return op2
         elif toll:
-            from pypy.rpython.lltypesystem import rffi
             size, unsigned = rffi.size_and_sign(op.args[0].concretetype)
             if unsigned:
                 INTERMEDIATE = lltype.Unsigned
@@ -809,21 +806,27 @@
             return self.force_cast_without_longlong(op.args[0], op.result)
 
     def force_cast_without_longlong(self, v_arg, v_result):
-        from pypy.rpython.lltypesystem.rffi import size_and_sign, sizeof, FLOAT
-        from pypy.rlib.rarithmetic import intmask
-        #
-        if (v_result.concretetype in (FLOAT, lltype.Float) or
-            v_arg.concretetype in (FLOAT, lltype.Float)):
-            assert (v_result.concretetype == lltype.Float and
-                    v_arg.concretetype == lltype.Float), "xxx unsupported cast"
+        if v_result.concretetype == v_arg.concretetype:
             return
-        #
-        size2, unsigned2 = size_and_sign(v_result.concretetype)
-        assert size2 <= sizeof(lltype.Signed)
-        if size2 == sizeof(lltype.Signed):
+        if v_arg.concretetype == rffi.FLOAT:
+            assert v_result.concretetype == lltype.Float, "cast %s -> %s" % (
+                v_arg.concretetype, v_result.concretetype)
+            return SpaceOperation('cast_singlefloat_to_float', [v_arg],
+                                  v_result)
+        if v_result.concretetype == rffi.FLOAT:
+            assert v_arg.concretetype == lltype.Float, "cast %s -> %s" % (
+                v_arg.concretetype, v_result.concretetype)
+            return SpaceOperation('cast_float_to_singlefloat', [v_arg],
+                                  v_result)
+        return self.force_cast_without_singlefloat(v_arg, v_result)
+
+    def force_cast_without_singlefloat(self, v_arg, v_result):
+        size2, unsigned2 = rffi.size_and_sign(v_result.concretetype)
+        assert size2 <= rffi.sizeof(lltype.Signed)
+        if size2 == rffi.sizeof(lltype.Signed):
             return     # the target type is LONG or ULONG
-        size1, unsigned1 = size_and_sign(v_arg.concretetype)
-        assert size1 <= sizeof(lltype.Signed)
+        size1, unsigned1 = rffi.size_and_sign(v_arg.concretetype)
+        assert size1 <= rffi.sizeof(lltype.Signed)
         #
         def bounds(size, unsigned):
             if unsigned:
@@ -852,7 +855,6 @@
         return result
 
     def rewrite_op_direct_ptradd(self, op):
-        from pypy.rpython.lltypesystem import rffi
         # xxx otherwise, not implemented:
         assert op.args[0].concretetype == rffi.CCHARP
         #
@@ -905,7 +907,7 @@
                 op1 = self.prepare_builtin_call(op, "llong_%s", args)
                 op2 = self._handle_oopspec_call(op1, args,
                                                 EffectInfo.OS_LLONG_%s,
-                                                EffectInfo.EF_ELIDABLE)
+                                           EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
                 if %r == "TO_INT":
                     assert op2.result.concretetype == lltype.Signed
                 return op2
@@ -1118,6 +1120,9 @@
         return meth(op, args, *descrs)
 
     def _get_list_nonneg_canraise_flags(self, op):
+        # XXX as far as I can see, this function will always return True
+        # because functions that are neither nonneg nor fast don't have an
+        # oopspec any more
         # xxx break of abstraction:
         func = get_funcobj(op.args[0].value)._callable
         # base hints on the name of the ll function, which is a bit xxx-ish
@@ -1363,15 +1368,15 @@
                     otherindex += EffectInfo._OS_offset_uni
                 self._register_extra_helper(otherindex, othername,
                                             argtypes, resulttype,
-                                            EffectInfo.EF_ELIDABLE)
+                                           EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
         #
         return self._handle_oopspec_call(op, args, dict[oopspec_name],
-                                         EffectInfo.EF_ELIDABLE)
+                                         EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
 
     def _handle_str2unicode_call(self, op, oopspec_name, args):
-        # ll_str2unicode is not EF_ELIDABLE, because it can raise
-        # UnicodeDecodeError...
-        return self._handle_oopspec_call(op, args, EffectInfo.OS_STR2UNICODE)
+        # ll_str2unicode can raise UnicodeDecodeError
+        return self._handle_oopspec_call(op, args, EffectInfo.OS_STR2UNICODE,
+                                         EffectInfo.EF_ELIDABLE_CAN_RAISE)
 
     # ----------
     # VirtualRefs.
@@ -1409,13 +1414,13 @@
         assert vinfo is not None
         self.vable_flags[op.args[0]] = op.args[2].value
         return []
-        
+
     # ---------
     # ll_math.sqrt_nonneg()
-    
+
     def _handle_math_sqrt_call(self, op, oopspec_name, args):
         return self._handle_oopspec_call(op, args, EffectInfo.OS_MATH_SQRT,
-                                         EffectInfo.EF_ELIDABLE)
+                                         EffectInfo.EF_ELIDABLE_CANNOT_RAISE)
 
     def rewrite_op_jit_force_quasi_immutable(self, op):
         v_inst, c_fieldname = op.args
diff --git a/pypy/jit/codewriter/longlong.py b/pypy/jit/codewriter/longlong.py
--- a/pypy/jit/codewriter/longlong.py
+++ b/pypy/jit/codewriter/longlong.py
@@ -7,7 +7,8 @@
 """
 
 import sys
-from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib import rarithmetic, longlong2float
 
 
 if sys.maxint > 2147483647:
@@ -31,8 +32,6 @@
     # ---------- 32-bit platform ----------
     # the type FloatStorage is r_longlong, and conversion is needed
 
-    from pypy.rlib import rarithmetic, longlong2float
-
     is_64_bit = False
     supports_longlong = True
     r_float_storage = rarithmetic.r_longlong
@@ -41,9 +40,19 @@
     getfloatstorage = longlong2float.float2longlong
     getrealfloat    = longlong2float.longlong2float
     gethash         = lambda xll: rarithmetic.intmask(xll - (xll >> 32))
-    is_longlong     = lambda TYPE: (TYPE == lltype.SignedLongLong or
-                                    TYPE == lltype.UnsignedLongLong)
+    is_longlong     = lambda TYPE: (TYPE is lltype.SignedLongLong or
+                                    TYPE is lltype.UnsignedLongLong)
 
     # -------------------------------------
 
 ZEROF = getfloatstorage(0.0)
+
+# ____________________________________________________________
+
+def int2singlefloat(x):
+    x = rffi.r_uint(x)
+    return longlong2float.uint2singlefloat(x)
+
+def singlefloat2int(x):
+    x = longlong2float.singlefloat2uint(x)
+    return rffi.cast(lltype.Signed, x)
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -1,9 +1,7 @@
-from pypy.translator.simplify import get_funcobj
 from pypy.jit.metainterp import history
-from pypy.rpython.lltypesystem import lltype, rclass
 from pypy.tool.udir import udir
 
-import py, sys
+import py
 from pypy.tool.ansi_print import ansi_log
 log = py.log.Producer('jitcodewriter')
 py.log.setconsumer('jitcodewriter', ansi_log)
@@ -14,6 +12,7 @@
         self.unsafe_loopy_graphs = set()
         self.supports_floats = False
         self.supports_longlong = False
+        self.supports_singlefloats = False
 
     def set_supports_floats(self, flag):
         self.supports_floats = flag
@@ -21,6 +20,9 @@
     def set_supports_longlong(self, flag):
         self.supports_longlong = flag
 
+    def set_supports_singlefloats(self, flag):
+        self.supports_singlefloats = flag
+
     def dump_unsafe_loops(self):
         f = udir.join("unsafe-loops.txt").open('w')
         strs = [str(graph) for graph in self.unsafe_loopy_graphs]
@@ -60,8 +62,9 @@
                     func, '_jit_unroll_safe_', False)
 
         unsupported = contains_unsupported_variable_type(graph,
-                                                         self.supports_floats,
-                                                         self.supports_longlong)
+                            self.supports_floats,
+                            self.supports_longlong,
+                            self.supports_singlefloats)
         res = see_function and not unsupported
         if res and contains_loop:
             self.unsafe_loopy_graphs.add(graph)
@@ -82,17 +85,24 @@
         return res
 
 def contains_unsupported_variable_type(graph, supports_floats,
-                                       supports_longlong):
+                                              supports_longlong,
+                                              supports_singlefloats):
     getkind = history.getkind
     try:
         for block in graph.iterblocks():
             for v in block.inputargs:
-                getkind(v.concretetype, supports_floats, supports_longlong)
+                getkind(v.concretetype, supports_floats,
+                                        supports_longlong,
+                                        supports_singlefloats)
             for op in block.operations:
                 for v in op.args:
-                    getkind(v.concretetype, supports_floats, supports_longlong)
+                    getkind(v.concretetype, supports_floats,
+                                            supports_longlong,
+                                            supports_singlefloats)
                 v = op.result
-                getkind(v.concretetype, supports_floats, supports_longlong)
+                getkind(v.concretetype, supports_floats,
+                                        supports_longlong,
+                                        supports_singlefloats)
     except NotImplementedError, e:
         log.WARNING('%s, ignoring graph' % (e,))
         log.WARNING('  %s' % (graph,))
diff --git a/pypy/jit/codewriter/regalloc.py b/pypy/jit/codewriter/regalloc.py
--- a/pypy/jit/codewriter/regalloc.py
+++ b/pypy/jit/codewriter/regalloc.py
@@ -1,128 +1,8 @@
-import sys
-from pypy.objspace.flow.model import Variable
-from pypy.tool.algo.color import DependencyGraph
-from pypy.tool.algo.unionfind import UnionFind
+from pypy.tool.algo import regalloc
 from pypy.jit.metainterp.history import getkind
 from pypy.jit.codewriter.flatten import ListOfKind
 
+
 def perform_register_allocation(graph, kind):
-    """Perform register allocation for the Variables of the given 'kind'
-    in the 'graph'."""
-    regalloc = RegAllocator(graph, kind)
-    regalloc.make_dependencies()
-    regalloc.coalesce_variables()
-    regalloc.find_node_coloring()
-    return regalloc
-
-
-class RegAllocator(object):
-    DEBUG_REGALLOC = False
-
-    def __init__(self, graph, kind):
-        self.graph = graph
-        self.kind = kind
-
-    def make_dependencies(self):
-        dg = DependencyGraph()
-        for block in self.graph.iterblocks():
-            # Compute die_at = {Variable: index_of_operation_with_last_usage}
-            die_at = dict.fromkeys(block.inputargs, 0)
-            for i, op in enumerate(block.operations):
-                for v in op.args:
-                    if isinstance(v, Variable):
-                        die_at[v] = i
-                    elif isinstance(v, ListOfKind):
-                        for v1 in v:
-                            if isinstance(v1, Variable):
-                                die_at[v1] = i
-                if op.result is not None:
-                    die_at[op.result] = i + 1
-            if isinstance(block.exitswitch, tuple):
-                for x in block.exitswitch:
-                    die_at.pop(x, None)
-            else:
-                die_at.pop(block.exitswitch, None)
-            for link in block.exits:
-                for v in link.args:
-                    die_at.pop(v, None)
-            die_at = [(value, key) for (key, value) in die_at.items()]
-            die_at.sort()
-            die_at.append((sys.maxint,))
-            # Done.  XXX the code above this line runs 3 times
-            # (for kind in KINDS) to produce the same result...
-            livevars = [v for v in block.inputargs
-                          if getkind(v.concretetype) == self.kind]
-            # Add the variables of this block to the dependency graph
-            for i, v in enumerate(livevars):
-                dg.add_node(v)
-                for j in range(i):
-                    dg.add_edge(livevars[j], v)
-            livevars = set(livevars)
-            die_index = 0
-            for i, op in enumerate(block.operations):
-                while die_at[die_index][0] == i:
-                    try:
-                        livevars.remove(die_at[die_index][1])
-                    except KeyError:
-                        pass
-                    die_index += 1
-                if (op.result is not None and
-                    getkind(op.result.concretetype) == self.kind):
-                    dg.add_node(op.result)
-                    for v in livevars:
-                        if getkind(v.concretetype) == self.kind:
-                            dg.add_edge(v, op.result)
-                    livevars.add(op.result)
-        self._depgraph = dg
-
-    def coalesce_variables(self):
-        self._unionfind = UnionFind()
-        pendingblocks = list(self.graph.iterblocks())
-        while pendingblocks:
-            block = pendingblocks.pop()
-            # Aggressively try to coalesce each source variable with its
-            # target.  We start from the end of the graph instead of
-            # from the beginning.  This is a bit arbitrary, but the idea
-            # is that the end of the graph runs typically more often
-            # than the start, given that we resume execution from the
-            # middle during blackholing.
-            for link in block.exits:
-                if link.last_exception is not None:
-                    self._depgraph.add_node(link.last_exception)
-                if link.last_exc_value is not None:
-                    self._depgraph.add_node(link.last_exc_value)
-                for i, v in enumerate(link.args):
-                    self._try_coalesce(v, link.target.inputargs[i])
-
-    def _try_coalesce(self, v, w):
-        if isinstance(v, Variable) and getkind(v.concretetype) == self.kind:
-            dg = self._depgraph
-            uf = self._unionfind
-            v0 = uf.find_rep(v)
-            w0 = uf.find_rep(w)
-            if v0 is not w0 and v0 not in dg.neighbours[w0]:
-                _, rep, _ = uf.union(v0, w0)
-                assert uf.find_rep(v0) is uf.find_rep(w0) is rep
-                if rep is v0:
-                    dg.coalesce(w0, v0)
-                else:
-                    assert rep is w0
-                    dg.coalesce(v0, w0)
-
-    def find_node_coloring(self):
-        self._coloring = self._depgraph.find_node_coloring()
-        if self.DEBUG_REGALLOC:
-            for block in self.graph.iterblocks():
-                print block
-                for v in block.getvariables():
-                    print '\t', v, '\t', self.getcolor(v)
-
-    def getcolor(self, v):
-        return self._coloring[self._unionfind.find_rep(v)]
-
-    def swapcolors(self, col1, col2):
-        for key, value in self._coloring.items():
-            if value == col1:
-                self._coloring[key] = col2
-            elif value == col2:
-                self._coloring[key] = col1
+    checkkind = lambda v: getkind(v.concretetype) == kind
+    return regalloc.perform_register_allocation(graph, checkkind, ListOfKind)
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -20,6 +20,7 @@
 from pypy.rpython.annlowlevel import MixLevelHelperAnnotator
 from pypy.jit.metainterp.typesystem import deref
 from pypy.rlib import rgc
+from pypy.rlib.jit import elidable
 from pypy.rlib.rarithmetic import r_longlong, r_ulonglong, r_uint, intmask
 
 def getargtypes(annotator, values):
@@ -167,9 +168,14 @@
 
 _ll_5_list_ll_arraycopy = rgc.ll_arraycopy
 
+ at elidable
 def _ll_1_gc_identityhash(x):
     return lltype.identityhash(x)
 
+# the following function should not be "@elidable": I can think of
+# a corner case in which id(const) is constant-folded, and then 'const'
+# disappears and is collected too early (possibly causing another object
+# with the same id() to appear).
 def _ll_1_gc_id(ptr):
     return llop.gc_id(lltype.Signed, ptr)
 
@@ -185,7 +191,7 @@
     return llop.int_floordiv(lltype.Signed, x, y)
 
 def _ll_2_int_floordiv_ovf(x, y):
-    if x == -sys.maxint - 1 and y == -1:        
+    if x == -sys.maxint - 1 and y == -1:
         raise OverflowError
     return llop.int_floordiv(lltype.Signed, x, y)
 
@@ -222,7 +228,7 @@
         return -x
     else:
         return x
-        
+
 # math support
 # ------------
 
@@ -395,7 +401,7 @@
     ('int_lshift_ovf',       [lltype.Signed, lltype.Signed], lltype.Signed),
     ('int_abs',              [lltype.Signed],                lltype.Signed),
     ('ll_math.ll_math_sqrt', [lltype.Float],                 lltype.Float),
-    ]
+]
 
 
 class LLtypeHelpers:
@@ -420,10 +426,6 @@
     _ll_1_dict_values.need_result_type = True
     _ll_1_dict_items .need_result_type = True
 
-    def _ll_1_newdictiter(ITER, d):
-        return ll_rdict.ll_dictiter(lltype.Ptr(ITER), d)
-    _ll_1_newdictiter.need_result_type = True
-
     _dictnext_keys   = staticmethod(ll_rdict.ll_dictnext_group['keys'])
     _dictnext_values = staticmethod(ll_rdict.ll_dictnext_group['values'])
     _dictnext_items  = staticmethod(ll_rdict.ll_dictnext_group['items'])
@@ -574,10 +576,6 @@
     _ll_1_dict_values.need_result_type = True
     _ll_1_dict_items .need_result_type = True
 
-    def _ll_1_newdictiter(ITER, d):
-        return oo_rdict.ll_dictiter(ITER, d)
-    _ll_1_newdictiter.need_result_type = True
-
     _dictnext_keys   = staticmethod(oo_rdict.ll_dictnext_group['keys'])
     _dictnext_values = staticmethod(oo_rdict.ll_dictnext_group['values'])
     _dictnext_items  = staticmethod(oo_rdict.ll_dictnext_group['items'])
diff --git a/pypy/jit/codewriter/test/test_call.py b/pypy/jit/codewriter/test/test_call.py
--- a/pypy/jit/codewriter/test/test_call.py
+++ b/pypy/jit/codewriter/test/test_call.py
@@ -1,6 +1,6 @@
 import py
 from pypy.objspace.flow.model import SpaceOperation, Constant, Variable
-from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.translator.unsimplify import varoftype
 from pypy.rlib import jit
 from pypy.jit.codewriter.call import CallControl
@@ -103,7 +103,7 @@
 
     op = SpaceOperation('direct_call', [Constant(object())],
                         Variable())
-    assert cc.guess_call_kind(op) == 'residual'        
+    assert cc.guess_call_kind(op) == 'residual'
 
     class funcptr:
         class graph:
@@ -118,7 +118,7 @@
     op = SpaceOperation('direct_call', [Constant(funcptr)],
                         Variable())
     res = cc.graphs_from(op)
-    assert res == [g]        
+    assert res == [g]
     assert cc.guess_call_kind(op) == 'regular'
 
     class funcptr:
@@ -126,7 +126,7 @@
     op = SpaceOperation('direct_call', [Constant(funcptr)],
                         Variable())
     res = cc.graphs_from(op)
-    assert res is None        
+    assert res is None
     assert cc.guess_call_kind(op) == 'residual'
 
     h = object()
@@ -142,7 +142,7 @@
                         Variable())
     res = cc.graphs_from(op)
     assert res is None
-    assert cc.guess_call_kind(op) == 'residual'        
+    assert cc.guess_call_kind(op) == 'residual'
 
 # ____________________________________________________________
 
@@ -171,3 +171,24 @@
 
 def test_jit_force_virtualizable_effectinfo():
     py.test.skip("XXX add a test for CallControl.getcalldescr() -> EF_xxx")
+
+def test_releases_gil_analyzer():
+    from pypy.jit.backend.llgraph.runner import LLtypeCPU
+
+    T = rffi.CArrayPtr(rffi.TIME_T)
+    external = rffi.llexternal("time", [T], rffi.TIME_T, threadsafe=True)
+
+    @jit.dont_look_inside
+    def f():
+        return external(lltype.nullptr(T.TO))
+
+    rtyper = support.annotate(f, [])
+    jitdriver_sd = FakeJitDriverSD(rtyper.annotator.translator.graphs[0])
+    cc = CallControl(LLtypeCPU(rtyper), jitdrivers_sd=[jitdriver_sd])
+    res = cc.find_all_graphs(FakePolicy())
+
+    [f_graph] = [x for x in res if x.func is f]
+    [block, _] = list(f_graph.iterblocks())
+    [op] = block.operations
+    call_descr = cc.getcalldescr(op)
+    assert call_descr.extrainfo.can_release_gil
\ No newline at end of file
diff --git a/pypy/jit/codewriter/test/test_jtransform.py b/pypy/jit/codewriter/test/test_jtransform.py
--- a/pypy/jit/codewriter/test/test_jtransform.py
+++ b/pypy/jit/codewriter/test/test_jtransform.py
@@ -120,9 +120,9 @@
             assert argtypes[0] == [v.concretetype for v in op.args[1:]]
             assert argtypes[1] == op.result.concretetype
             if oopspecindex == EI.OS_STR2UNICODE:
-                assert extraeffect == None    # not pure, can raise!
+                assert extraeffect == EI.EF_ELIDABLE_CAN_RAISE
             else:
-                assert extraeffect == EI.EF_ELIDABLE
+                assert extraeffect == EI.EF_ELIDABLE_CANNOT_RAISE
         return 'calldescr-%d' % oopspecindex
     def calldescr_canraise(self, calldescr):
         return False
@@ -769,7 +769,7 @@
         def get_vinfo(self, v):
             return None
         def could_be_green_field(self, S1, name1):
-            assert S1 is S
+            assert S1 == S
             assert name1 == 'x'
             return True
     S = lltype.GcStruct('S', ('x', lltype.Char),
diff --git a/pypy/jit/codewriter/test/test_longlong.py b/pypy/jit/codewriter/test/test_longlong.py
--- a/pypy/jit/codewriter/test/test_longlong.py
+++ b/pypy/jit/codewriter/test/test_longlong.py
@@ -230,3 +230,18 @@
             assert list(op1.args[3]) == []
             assert list(op1.args[4]) == vlist
             assert op1.result == v_result
+
+
+##def test_singlefloat_constants():
+##    v_x = varoftype(TYPE)
+##    vlist = [v_x, const(rffi.cast(TYPE, 7))]
+##    v_result = varoftype(TYPE)
+##    op = SpaceOperation('llong_add', vlist, v_result)
+##    tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
+##    op1 = tr.rewrite_operation(op)
+##    #
+##    assert op1.opname == 'residual_call_irf_f'
+##    assert list(op1.args[2]) == []
+##    assert list(op1.args[3]) == []
+##    assert list(op1.args[4]) == vlist
+##    assert op1.result == v_result
diff --git a/pypy/jit/codewriter/test/test_policy.py b/pypy/jit/codewriter/test/test_policy.py
--- a/pypy/jit/codewriter/test/test_policy.py
+++ b/pypy/jit/codewriter/test/test_policy.py
@@ -12,24 +12,30 @@
     graph = support.getgraph(f, [5])
     for sf in [False, True]:
         for sll in [False, True]:
-            assert not contains_unsupported_variable_type(graph, sf, sll)
+            for ssf in [False, True]:
+                assert not contains_unsupported_variable_type(graph, sf,
+                                                              sll, ssf)
     #
     graph = support.getgraph(f, [5.5])
     for sf in [False, True]:
         for sll in [False, True]:
-            res = contains_unsupported_variable_type(graph, sf, sll)
-            assert res is not sf
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res is not sf
     #
     graph = support.getgraph(f, [r_singlefloat(5.5)])
     for sf in [False, True]:
         for sll in [False, True]:
-            assert contains_unsupported_variable_type(graph, sf, sll)
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res == (not ssf)
     #
     graph = support.getgraph(f, [r_longlong(5)])
     for sf in [False, True]:
         for sll in [False, True]:
-            res = contains_unsupported_variable_type(graph, sf, sll)
-            assert res == (sys.maxint == 2147483647 and not sll)
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res == (sys.maxint == 2147483647 and not sll)
 
 
 def test_regular_function():
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -623,6 +623,19 @@
         x = float(a)
         return longlong.getfloatstorage(x)
 
+    @arguments("f", returns="i")
+    def bhimpl_cast_float_to_singlefloat(a):
+        from pypy.rlib.rarithmetic import r_singlefloat
+        a = longlong.getrealfloat(a)
+        a = r_singlefloat(a)
+        return longlong.singlefloat2int(a)
+
+    @arguments("i", returns="f")
+    def bhimpl_cast_singlefloat_to_float(a):
+        a = longlong.int2singlefloat(a)
+        a = float(a)
+        return longlong.getfloatstorage(a)
+
     # ----------
     # control flow operations
 
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -668,10 +668,9 @@
     def handle_fail(self, metainterp_sd, jitdriver_sd):
         cpu = metainterp_sd.cpu
         exception = cpu.grab_exc_value()
+        assert exception, "PropagateExceptionDescr: no exception??"
         raise metainterp_sd.ExitFrameWithExceptionRef(cpu, exception)
 
-propagate_exception_descr = PropagateExceptionDescr()
-
 def compile_tmp_callback(cpu, jitdriver_sd, greenboxes, redboxes,
                          memory_manager=None):
     """Make a LoopToken that corresponds to assembler code that just
@@ -705,7 +704,7 @@
         finishargs = []
     #
     jd = jitdriver_sd
-    faildescr = propagate_exception_descr
+    faildescr = PropagateExceptionDescr()
     operations = [
         ResOperation(rop.CALL, callargs, result, descr=jd.portal_calldescr),
         ResOperation(rop.GUARD_NO_EXCEPTION, [], None, descr=faildescr),
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -50,7 +50,7 @@
     func = argboxes[0].getint()
     # do the call using the correct function from the cpu
     rettype = descr.get_return_type()
-    if rettype == INT:
+    if rettype == INT or rettype == 'S':       # *S*ingle float
         try:
             result = cpu.bh_call_i(func, descr, args_i, args_r, args_f)
         except Exception, e:
@@ -64,7 +64,7 @@
             metainterp.execute_raised(e)
             result = NULL
         return BoxPtr(result)
-    if rettype == FLOAT or rettype == 'L':
+    if rettype == FLOAT or rettype == 'L':     # *L*ong long
         try:
             result = cpu.bh_call_f(func, descr, args_i, args_r, args_f)
         except Exception, e:
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -20,12 +20,16 @@
 
 FAILARGS_LIMIT = 1000
 
-def getkind(TYPE, supports_floats=True, supports_longlong=True):
+def getkind(TYPE, supports_floats=True,
+                  supports_longlong=True,
+                  supports_singlefloats=True):
     if TYPE is lltype.Void:
         return "void"
     elif isinstance(TYPE, lltype.Primitive):
         if TYPE is lltype.Float and supports_floats:
             return 'float'
+        if TYPE is lltype.SingleFloat and supports_singlefloats:
+            return 'int'     # singlefloats are stored in an int
         if TYPE in (lltype.Float, lltype.SingleFloat):
             raise NotImplementedError("type %s not supported" % TYPE)
         # XXX fix this for oo...
@@ -145,6 +149,7 @@
         """ Implement in call descr.
         Must return INT, REF, FLOAT, or 'v' for void.
         On 32-bit (hack) it can also be 'L' for longlongs.
+        Additionally it can be 'S' for singlefloats.
         """
         raise NotImplementedError
 
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -55,17 +55,16 @@
 
 
 def optimize_loop_1(metainterp_sd, loop, enable_opts,
-                    inline_short_preamble=True, retraced=False):
+                    inline_short_preamble=True, retraced=False, bridge=False):
     """Optimize loop.operations to remove internal overheadish operations.
     """
 
     optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts,
                                             inline_short_preamble, retraced)
-
     if unroll:
         optimize_unroll(metainterp_sd, loop, optimizations)
     else:
-        optimizer = Optimizer(metainterp_sd, loop, optimizations)
+        optimizer = Optimizer(metainterp_sd, loop, optimizations, bridge)
         optimizer.propagate_all_forward()
 
 def optimize_bridge_1(metainterp_sd, bridge, enable_opts,
@@ -77,7 +76,7 @@
     except KeyError:
         pass
     optimize_loop_1(metainterp_sd, bridge, enable_opts,
-                    inline_short_preamble, retraced)
+                    inline_short_preamble, retraced, bridge=True)
 
 if __name__ == '__main__':
     print ALL_OPTS_NAMES
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -1,12 +1,11 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.libffi import Func
-from pypy.rlib.debug import debug_start, debug_stop, debug_print, have_debug_prints
+from pypy.rlib.debug import debug_print
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
-from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
 
 
 class FuncInfo(object):
@@ -20,11 +19,8 @@
         self.funcval = funcval
         self.opargs = []
         argtypes, restype = self._get_signature(funcval)
-        try:
-            self.descr = cpu.calldescrof_dynamic(argtypes, restype)
-        except UnsupportedKind:
-            # e.g., I or U for long longs
-            self.descr = None
+        self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        # ^^^ may be None if unsupported
         self.prepare_op = prepare_op
         self.delayed_ops = []
 
@@ -48,7 +44,7 @@
           inst_argtypes is actually a low-level array, but we can use it
           directly since the only thing we do with it is to read its items
         """
-        
+
         llfunc = funcval.box.getref_base()
         if we_are_translated():
             func = cast_base_ptr_to_instance(Func, llfunc)
@@ -78,14 +74,6 @@
         else:
             self.logops = None
 
-    def propagate_begin_forward(self):
-        debug_start('jit-log-ffiopt')
-        Optimization.propagate_begin_forward(self)
-
-    def propagate_end_forward(self):
-        debug_stop('jit-log-ffiopt')
-        Optimization.propagate_end_forward(self)
-
     def reconstruct_for_next_iteration(self, optimizer, valuemap):
         return OptFfiCall()
         # FIXME: Should any status be saved for next iteration?
@@ -184,7 +172,8 @@
     def do_call(self, op):
         funcval = self._get_funcval(op)
         funcinfo = self.funcinfo
-        if not funcinfo or funcinfo.funcval is not funcval:
+        if (not funcinfo or funcinfo.funcval is not funcval or
+            funcinfo.descr is None):
             return [op] # cannot optimize
         funcsymval = self.getvalue(op.getarg(2))
         arglist = [funcsymval.force_box()]
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -1,9 +1,10 @@
 import os
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.rlib.objectmodel import we_are_translated
+
 from pypy.jit.metainterp.jitexc import JitException
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from pypy.jit.metainterp.resoperation import rop
+from pypy.rlib.objectmodel import we_are_translated
 
 
 class CachedField(object):
@@ -73,7 +74,7 @@
         assert self._lazy_setfield is None
         self._cached_fields[structvalue] = fieldvalue
 
-    def force_lazy_setfield(self, optheap):
+    def force_lazy_setfield(self, optheap, can_cache=True):
         op = self._lazy_setfield
         if op is not None:
             # This is the way _lazy_setfield is usually reset to None.
@@ -83,12 +84,16 @@
             self._cached_fields.clear()
             self._lazy_setfield = None
             optheap.next_optimization.propagate_forward(op)
+            if not can_cache:
+                return
             # Once it is done, we can put at least one piece of information
             # back in the cache: the value of this particular structure's
             # field.
             structvalue = optheap.getvalue(op.getarg(0))
             fieldvalue  = optheap.getvalue(op.getarglist()[-1])
             self.remember_field_value(structvalue, fieldvalue)
+        elif not can_cache:
+            self._cached_fields.clear()
 
     def get_reconstructed(self, optimizer, valuemap):
         assert self._lazy_setfield is None
@@ -202,20 +207,9 @@
                 for arraydescr in effectinfo.readonly_descrs_arrays:
                     self.force_lazy_setarrayitem(arraydescr)
                 for fielddescr in effectinfo.write_descrs_fields:
-                    self.force_lazy_setfield(fielddescr)
-                    try:
-                        cf = self.cached_fields[fielddescr]
-                        cf._cached_fields.clear()
-                    except KeyError:
-                        pass
+                    self.force_lazy_setfield(fielddescr, can_cache=False)
                 for arraydescr in effectinfo.write_descrs_arrays:
-                    self.force_lazy_setarrayitem(arraydescr)
-                    try:
-                        submap = self.cached_arrayitems[arraydescr]
-                        for cf in submap.itervalues():
-                            cf._cached_fields.clear()
-                    except KeyError:
-                        pass
+                    self.force_lazy_setarrayitem(arraydescr, can_cache=False)
                 if effectinfo.check_forces_virtual_or_virtualizable():
                     vrefinfo = self.optimizer.metainterp_sd.virtualref_info
                     self.force_lazy_setfield(vrefinfo.descr_forced)
@@ -238,20 +232,20 @@
                     if value in cf._cached_fields:
                         cf._cached_fields[newvalue] = cf._cached_fields[value]
 
-    def force_lazy_setfield(self, descr):
+    def force_lazy_setfield(self, descr, can_cache=True):
         try:
             cf = self.cached_fields[descr]
         except KeyError:
             return
-        cf.force_lazy_setfield(self)
+        cf.force_lazy_setfield(self, can_cache)
 
-    def force_lazy_setarrayitem(self, arraydescr):
+    def force_lazy_setarrayitem(self, arraydescr, can_cache=True):
         try:
             submap = self.cached_arrayitems[arraydescr]
         except KeyError:
             return
         for cf in submap.values():
-            cf.force_lazy_setfield(self)
+            cf.force_lazy_setfield(self, can_cache)
 
     def fixup_guard_situation(self):
         # hackish: reverse the order of the last two operations if it makes
@@ -387,7 +381,7 @@
             cf.do_setfield(self, op)
         else:
             # variable index, so make sure the lazy setarrayitems are done
-            self.force_lazy_setarrayitem(op.getdescr())
+            self.force_lazy_setarrayitem(op.getdescr(), can_cache=False)
             # and then emit the operation
             self.emit_operation(op)
 
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -1,9 +1,10 @@
+from pypy.jit.metainterp.history import ConstInt
+from pypy.jit.metainterp.optimizeopt.intutils import (IntBound, IntLowerBound,
+    IntUpperBound)
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, CONST_1, CONST_0
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.metainterp.optimizeopt.intutils import (IntBound, IntUnbounded,
-    IntLowerBound, IntUpperBound)
-from pypy.jit.metainterp.history import Const, ConstInt
-from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.jit.metainterp.resoperation import rop
+
 
 class OptIntBounds(Optimization):
     """Keeps track of the bounds placed on integers by guards and remove
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -1,17 +1,11 @@
-from pypy.jit.metainterp.history import Box, BoxInt, LoopToken, BoxFloat,\
-     ConstFloat
-from pypy.jit.metainterp.history import Const, ConstInt, ConstPtr, ConstObj, REF
+from pypy.jit.metainterp import jitprof, resume, compile
+from pypy.jit.metainterp.executor import execute_nonspec
+from pypy.jit.metainterp.history import BoxInt, BoxFloat, Const, ConstInt, REF
+from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded
+from pypy.jit.metainterp.optimizeopt.util import (make_dispatcher_method,
+    args_dict)
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp import jitprof
-from pypy.jit.metainterp.executor import execute_nonspec
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method, sort_descrs
-from pypy.jit.metainterp.optimizeopt.util import descrlist_dict, args_dict
-from pypy.jit.metainterp.optimize import InvalidLoop
-from pypy.jit.metainterp import resume, compile
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
-from pypy.rpython.lltypesystem import lltype
-from pypy.jit.metainterp.history import AbstractDescr, make_hashable_int
-from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded
 from pypy.tool.pairtype import extendabletype
 
 LEVEL_UNKNOWN    = '\x00'
@@ -254,10 +248,11 @@
 
 class Optimizer(Optimization):
 
-    def __init__(self, metainterp_sd, loop, optimizations=None):
+    def __init__(self, metainterp_sd, loop, optimizations=None, bridge=False):
         self.metainterp_sd = metainterp_sd
         self.cpu = metainterp_sd.cpu
         self.loop = loop
+        self.bridge = bridge
         self.values = {}
         self.interned_refs = self.cpu.ts.new_ref_dict()
         self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd)
@@ -413,9 +408,7 @@
             return CVAL_ZERO
 
     def propagate_all_forward(self):
-        self.exception_might_have_happened = True
-        # ^^^ at least at the start of bridges.  For loops, we could set
-        # it to False, but we probably don't care
+        self.exception_might_have_happened = self.bridge
         self.newoperations = []
         self.first_optimization.propagate_begin_forward()
         self.i = 0
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -1,10 +1,11 @@
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.history import ConstInt, make_hashable_int
+from pypy.jit.metainterp.optimize import InvalidLoop
+from pypy.jit.metainterp.optimizeopt.intutils import IntBound
 from pypy.jit.metainterp.optimizeopt.optimizer import *
-from pypy.jit.metainterp.resoperation import opboolinvers, opboolreflex
-from pypy.jit.metainterp.history import ConstInt
 from pypy.jit.metainterp.optimizeopt.util import _findall, make_dispatcher_method
-from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.metainterp.optimizeopt.intutils import IntBound
+from pypy.jit.metainterp.resoperation import (opboolinvers, opboolreflex, rop,
+    ResOperation)
 from pypy.rlib.rarithmetic import highest_bit
 
 
@@ -199,6 +200,7 @@
                     ))
                     return
         self.emit_operation(op)
+        self.pure(rop.FLOAT_MUL, [arg2, arg1], op.result)
 
     def optimize_FLOAT_NEG(self, op):
         v1 = op.getarg(0)
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -1,7 +1,7 @@
-
-from pypy.jit.metainterp.resoperation import ResOperation, rop
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from pypy.jit.metainterp.resoperation import ResOperation, rop
+
 
 class OptSimplify(Optimization):
     def optimize_CALL_PURE(self, op):
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -693,7 +693,6 @@
         """
         expected = """
         [i]
-        guard_no_exception() []
         i1 = int_add(i, 3)
         i2 = call(i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
@@ -1755,6 +1754,48 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_duplicate_getarrayitem_after_setarrayitem_bug(self):
+        ops = """
+        [p0, i0, i1]
+        setarrayitem_gc(p0, 0, i0, descr=arraydescr)
+        i6 = int_add(i0, 1)
+        setarrayitem_gc(p0, i1, i6, descr=arraydescr)
+        i10 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i11 = int_add(i10, i0)
+        jump(p0, i11, i1)
+        """
+        expected = """
+        [p0, i0, i1]
+        i6 = int_add(i0, 1)
+        setarrayitem_gc(p0, 0, i0, descr=arraydescr)
+        setarrayitem_gc(p0, i1, i6, descr=arraydescr)
+        i10 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i11 = int_add(i10, i0)
+        jump(p0, i11, i1)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_duplicate_getarrayitem_after_setarrayitem_bug2(self):
+        ops = """
+        [p0, i0, i1]
+        i2 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i6 = int_add(i0, 1)
+        setarrayitem_gc(p0, i1, i6, descr=arraydescr)
+        i10 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i11 = int_add(i10, i2)
+        jump(p0, i11, i1)
+        """
+        expected = """
+        [p0, i0, i1]
+        i2 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i6 = int_add(i0, 1)
+        setarrayitem_gc(p0, i1, i6, descr=arraydescr)
+        i10 = getarrayitem_gc(p0, 0, descr=arraydescr)
+        i11 = int_add(i10, i2)
+        jump(p0, i11, i1)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_bug_1(self):
         ops = """
         [i0, p1]
@@ -4490,7 +4531,7 @@
         escape(i1)
         jump(p0, i0)
         """
-        self.optimize_loop(ops, expected)
+        self.optimize_strunicode_loop(ops, expected)
 
     def test_int_is_true_bounds(self):
         ops = """
@@ -4509,9 +4550,9 @@
         guard_true(i1) []
         jump(p0)
         """
-        self.optimize_loop(ops, expected)
-
-    def test_strslice_with_other_stuff(self):
+        self.optimize_strunicode_loop(ops, expected)
+
+    def test_strslice_subtraction_folds(self):
         ops = """
         [p0, i0]
         i1 = int_add(i0, 1)
@@ -4530,6 +4571,56 @@
         """
         self.optimize_strunicode_loop(ops, expected)
 
+    def test_float_mul_reversed(self):
+        ops = """
+        [f0, f1]
+        f2 = float_mul(f0, f1)
+        f3 = float_mul(f1, f0)
+        jump(f2, f3)
+        """
+        expected = """
+        [f0, f1]
+        f2 = float_mul(f0, f1)
+        jump(f2, f2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_null_char_str(self):
+        ops = """
+        [p0]
+        p1 = newstr(4)
+        setfield_gc(p0, p1, descr=valuedescr)
+        jump(p0)
+        """
+        # It used to be the case that this would have a series of
+        # strsetitem(p1, idx, 0), which was silly because memory is 0 filled
+        # when allocated.
+        expected = """
+        [p0]
+        p1 = newstr(4)
+        setfield_gc(p0, p1, descr=valuedescr)
+        jump(p0)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
+    def test_newstr_strlen(self):
+        ops = """
+        [i0]
+        p0 = newstr(i0)
+        escape(p0)
+        i1 = strlen(p0)
+        i2 = int_add(i1, 1)
+        jump(i2)
+        """
+        expected = """
+        [i0]
+        p0 = newstr(i0)
+        escape(p0)
+        i1 = int_add(i0, 1)
+        jump(i1)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
 
 class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
     pass
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -889,12 +889,10 @@
         i3 = call(i2, descr=nonwritedescr)
         jump(i1)       # the exception is considered lost when we loop back
         """
-        # note that 'guard_no_exception' at the very start must be kept
-        # around: bridges may start with one.  (In case of loops we could
-        # remove it, but we probably don't care.)
+        # note that 'guard_no_exception' at the very start is kept around
+        # for bridges, but not for loops
         preamble = """
         [i]
-        guard_no_exception() []
         i1 = int_add(i, 3)
         i2 = call(i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
@@ -2820,11 +2818,11 @@
     def test_residual_call_invalidate_some_arrays(self):
         ops = """
         [p1, p2, i1]
-        p3 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p3 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p4 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
         i3 = call(i1, descr=writearraydescr)
-        p5 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p5 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p6 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i4 = getarrayitem_gc(p1, 1, descr=arraydescr)
         escape(p3)
@@ -2837,7 +2835,7 @@
         """
         expected = """
         [p1, p2, i1]
-        p3 = getarrayitem_gc(p1, 0, descr=arraydescr2)
+        p3 = getarrayitem_gc(p2, 0, descr=arraydescr2)
         p4 = getarrayitem_gc(p2, 1, descr=arraydescr2)
         i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
         i3 = call(i1, descr=writearraydescr)
@@ -2993,6 +2991,38 @@
         '''
         self.optimize_loop(ops, expected, preamble, call_pure_results)
 
+    def test_call_pure_constant_folding_exc(self):
+        # CALL_PURE may be followed by GUARD_NO_EXCEPTION
+        arg_consts = [ConstInt(i) for i in (123456, 4, 5, 6)]
+        call_pure_results = {tuple(arg_consts): ConstInt(42)}
+        ops = '''
+        [i0, i1, i2]
+        escape(i1)
+        escape(i2)
+        i3 = call_pure(123456, 4, 5, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        i4 = call_pure(123456, 4, i0, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        jump(i0, i3, i4)
+        '''
+        preamble = '''
+        [i0, i1, i2]
+        escape(i1)
+        escape(i2)
+        i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        jump(i0, i4)
+        '''
+        expected = '''
+        [i0, i2]
+        escape(42)
+        escape(i2)
+        i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
+        guard_no_exception() []
+        jump(i0, i4)
+        '''
+        self.optimize_loop(ops, expected, preamble, call_pure_results)
+
     # ----------
 
     def test_vref_nonvirtual_nonescape(self):
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -1,14 +1,12 @@
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.compile import ResumeGuardDescr
+from pypy.jit.metainterp.history import TreeLoop, LoopToken
+from pypy.jit.metainterp.jitexc import JitException
+from pypy.jit.metainterp.optimize import InvalidLoop, RetraceLoop
 from pypy.jit.metainterp.optimizeopt.optimizer import *
-from pypy.jit.metainterp.optimizeopt.virtualize import AbstractVirtualValue
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.compile import ResumeGuardDescr
 from pypy.jit.metainterp.resume import Snapshot
-from pypy.jit.metainterp.history import TreeLoop, LoopToken
-from pypy.rlib.debug import debug_start, debug_stop, debug_print
-from pypy.jit.metainterp.optimize import InvalidLoop, RetraceLoop
-from pypy.jit.metainterp.jitexc import JitException
-from pypy.jit.metainterp.history import make_hashable_int
-from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.rlib.debug import debug_print
 
 # Assumptions
 # ===========
@@ -22,7 +20,7 @@
 # are also recreated to allow virtuals not supported to be forced.
 #
 # First of all, the optimizations are not allowed to introduce new
-# boxes. It is the unoptimized version of the trace that is inlined to 
+# boxes. It is the unoptimized version of the trace that is inlined to
 # form the second iteration of the loop. Otherwise the
 # state of the virtuals would not be updated correctly. Whenever some
 # box from the first iteration is reused in the second iteration, it
@@ -57,7 +55,7 @@
 # be absorbed into the virtual p2 and never seen by the heap
 # optimizer. At the end of the loop both p2 and p3 are virtuals, but
 # the loop needs p2 to be a pointer to be able to call itself. So it
-# is forced producing the operations 
+# is forced producing the operations
 #
 #         p2 = new_with_vtable(ConstClass(node_vtable))
 #         setfield_gc(p2, i2, descr=nextdescr)
@@ -68,7 +66,7 @@
 # the trace were optimized under the wrong assumption that the
 # setfield_gc was store sinked which could lead to errors. In this
 # case what would happen is that it would be inserted once more in
-# front of the guard. 
+# front of the guard.
 
 
 
@@ -112,7 +110,7 @@
     def inline_descr_inplace(self, descr):
         if isinstance(descr, ResumeGuardDescr):
             descr.rd_snapshot = self.inline_snapshot(descr.rd_snapshot)
-            
+
     def inline_arg(self, arg):
         if arg is None:
             return None
@@ -139,7 +137,7 @@
                 return False
         return True
 
-    def generate_guards(self, other, args, cpu, extra_guards):        
+    def generate_guards(self, other, args, cpu, extra_guards):
         assert len(self.state) == len(other.state) == len(args)
         for i in range(len(self.state)):
             self.state[i].generate_guards(other.state[i], args[i],
@@ -153,7 +151,7 @@
 
     def register_virtual_fields(self, keybox, fieldboxes):
         self.fieldboxes[keybox] = fieldboxes
-        
+
     def already_seen_virtual(self, keybox):
         return keybox in self.fieldboxes
 
@@ -233,20 +231,20 @@
         if self.level == LEVEL_CONSTANT:
             import pdb; pdb.set_trace()
             raise NotImplementedError
-        
+
 
 class UnrollOptimizer(Optimization):
     """Unroll the loop into two iterations. The first one will
     become the preamble or entry bridge (don't think there is a
     distinction anymore)"""
-    
+
     def __init__(self, metainterp_sd, loop, optimizations):
         self.optimizer = Optimizer(metainterp_sd, loop, optimizations)
         self.cloned_operations = []
         for op in self.optimizer.loop.operations:
             newop = op.clone()
             self.cloned_operations.append(newop)
-            
+
     def propagate_all_forward(self):
         loop = self.optimizer.loop
         jumpop = loop.operations[-1]
@@ -284,7 +282,7 @@
             assert isinstance(start_resumedescr, ResumeGuardDescr)
             snapshot = start_resumedescr.rd_snapshot
             while snapshot is not None:
-                snapshot_args = snapshot.boxes 
+                snapshot_args = snapshot.boxes
                 new_snapshot_args = []
                 for a in snapshot_args:
                     if not isinstance(a, Const):
@@ -313,7 +311,7 @@
                 short_loop.inputargs = loop.preamble.inputargs[:]
                 short_loop.operations = short
 
-                # Clone ops and boxes to get private versions and 
+                # Clone ops and boxes to get private versions and
                 newargs = [a.clonebox() for a in short_loop.inputargs]
                 inliner = Inliner(short_loop.inputargs, newargs)
                 short_loop.inputargs = newargs
@@ -336,10 +334,10 @@
                 for op in short_loop.operations:
                     if op.result:
                         op.result.forget_value()
-                
+
     def inline(self, loop_operations, loop_args, jump_args):
         self.inliner = inliner = Inliner(loop_args, jump_args)
-           
+
         for v in self.optimizer.values.values():
             v.last_guard_index = -1 # FIXME: Are there any more indexes stored?
 
@@ -371,12 +369,12 @@
         jumpargs = jmp.getarglist()
 
         # FIXME: Should also loop over operations added by forcing things in this loop
-        for op in newoperations: 
+        for op in newoperations:
             boxes_created_this_iteration[op.result] = True
             args = op.getarglist()
             if op.is_guard():
                 args = args + op.getfailargs()
-            
+
             for a in args:
                 if not isinstance(a, Const) and not a in boxes_created_this_iteration:
                     if a not in inputargs:
@@ -439,7 +437,7 @@
                             "at preamble position: ", preamble_i,
                             "loop position: ", loop_i)
                 return None
-                
+
             if self.sameop(newop, loop_ops[loop_i]) \
                and loop_i < len(loop_ops):
                 try:
@@ -460,7 +458,7 @@
                                 "loop position: ", loop_i)
                     return None
                 short_preamble.append(op)
-                
+
             state.update(op)
             preamble_i += 1
 
@@ -470,7 +468,7 @@
                         "at position", loop_i)
             return None
 
-        
+
         jumpargs = []
         for i in range(len(loop.inputargs)):
             try:
@@ -498,7 +496,7 @@
                     return None
             if op.result:
                 seen[op.result] = True
-        
+
         return short_preamble
 
 class ExeState(object):
@@ -508,7 +506,7 @@
         self.unsafe_getitem = {}
         self.unsafe_getarrayitem = {}
         self.unsafe_getarrayitem_indexes = {}
-        
+
     # Make sure it is safe to move the instrucions in short_preamble
     # to the top making short_preamble followed by loop equvivalent
     # to preamble
@@ -545,15 +543,17 @@
         elif opnum == rop.CALL:
             effectinfo = descr.get_extra_info()
             if effectinfo is not None:
-                if effectinfo.extraeffect == EffectInfo.EF_LOOPINVARIANT or \
-                   effectinfo.extraeffect == EffectInfo.EF_ELIDABLE:
+                ef = effectinfo.extraeffect
+                if ef == EffectInfo.EF_LOOPINVARIANT or \
+                   ef == EffectInfo.EF_ELIDABLE_CANNOT_RAISE or \
+                   ef == EffectInfo.EF_ELIDABLE_CAN_RAISE:
                     return True
         return False
-    
+
     def update(self, op):
         if (op.has_no_side_effect() or
             op.is_ovf() or
-            op.is_guard()): 
+            op.is_guard()):
             return
         opnum = op.getopnum()
         descr = op.getdescr()
@@ -566,7 +566,7 @@
         if (opnum == rop.SETARRAYITEM_GC or
             opnum == rop.SETARRAYITEM_RAW):
             index = op.getarg(1)
-            if isinstance(index, Const):                
+            if isinstance(index, Const):
                 d = self.unsafe_getarrayitem_indexes.get(descr, None)
                 if d is None:
                     d = self.unsafe_getarrayitem_indexes[descr] = {}
@@ -592,7 +592,7 @@
     def __init__(self):
         self.map = {}
 
-    
+
     def link_ops(self, preambleop, loopop):
         pargs = preambleop.getarglist()
         largs = loopop.getarglist()
@@ -606,7 +606,7 @@
             if not loopop.result:
                 raise ImpossibleLink
             self.link_boxes(preambleop.result, loopop.result)
-        
+
 
     def link_boxes(self, pbox, lbox):
         if lbox in self.map:
@@ -627,11 +627,11 @@
     def __init__(self, retraced):
         self.retraced = retraced
         self.inliner = None
-        
-    
+
+
     def reconstruct_for_next_iteration(self, optimizer, valuemap):
         return self
-    
+
     def propagate_forward(self, op):
         if op.getopnum() == rop.JUMP:
             descr = op.getdescr()
@@ -657,7 +657,7 @@
                             sh.virtual_state.generate_guards(virtual_state,
                                                              args, cpu,
                                                              extra_guards)
-                            
+
                             ok = True
                         except InvalidLoop:
                             pass
@@ -697,7 +697,7 @@
                     else:
                         debug_print("Retracing (%d of %d)" % (retraced_count,
                                                               limit))
-                                                              
+
                         raise RetraceLoop
                 else:
                     if not descr.failed_states:
@@ -705,21 +705,21 @@
                     else:
                         descr.failed_states.append(virtual_state)
         self.emit_operation(op)
-                
-        
-        
+
+
+
     def inline(self, loop_operations, loop_args, jump_args, dryrun=False):
         self.inliner = inliner = Inliner(loop_args, jump_args)
 
         for op in loop_operations:
             newop = inliner.inline_op(op)
-            
+
             if not dryrun:
                 self.emit_operation(newop)
             else:
                 if not self.is_emittable(newop):
                     return False
-        
+
         return True
 
     #def inline_arg(self, arg):
diff --git a/pypy/jit/metainterp/optimizeopt/util.py b/pypy/jit/metainterp/optimizeopt/util.py
--- a/pypy/jit/metainterp/optimizeopt/util.py
+++ b/pypy/jit/metainterp/optimizeopt/util.py
@@ -21,7 +21,7 @@
             continue
         if hasattr(Class, name_prefix + name):
             opclass = resoperation.opclasses[getattr(rop, name)]
-            print value, name, opclass
+            assert name in opclass.__name__
             result.append((value, opclass, getattr(Class, name_prefix + name)))
     return unrolling_iterable(result)
 
diff --git a/pypy/jit/metainterp/optimizeopt/virtualize.py b/pypy/jit/metainterp/optimizeopt/virtualize.py
--- a/pypy/jit/metainterp/optimizeopt/virtualize.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualize.py
@@ -1,11 +1,11 @@
+from pypy.jit.codewriter.heaptracker import vtable2descr
+from pypy.jit.metainterp.executor import execute
 from pypy.jit.metainterp.history import Const, ConstInt, BoxInt
+from pypy.jit.metainterp.optimizeopt import optimizer
+from pypy.jit.metainterp.optimizeopt.util import (make_dispatcher_method,
+    descrlist_dict, sort_descrs)
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.metainterp.optimizeopt.util import descrlist_dict, sort_descrs
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.jit.metainterp.optimizeopt import optimizer
-from pypy.jit.metainterp.executor import execute
-from pypy.jit.codewriter.heaptracker import vtable2descr
 
 
 class AbstractVirtualValue(optimizer.OptValue):
diff --git a/pypy/jit/metainterp/optimizeopt/vstring.py b/pypy/jit/metainterp/optimizeopt/vstring.py
--- a/pypy/jit/metainterp/optimizeopt/vstring.py
+++ b/pypy/jit/metainterp/optimizeopt/vstring.py
@@ -1,18 +1,14 @@
-from pypy.rpython.lltypesystem import lltype, rstr, llmemory
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.history import (BoxInt, Const, ConstInt, ConstPtr,
+    get_const_ptr_for_string, get_const_ptr_for_unicode)
+from pypy.jit.metainterp.optimizeopt import optimizer, virtualize
+from pypy.jit.metainterp.optimizeopt.optimizer import CONST_0, CONST_1, llhelper
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.rlib.objectmodel import specialize, we_are_translated
+from pypy.rlib.unroll import unrolling_iterable
 from pypy.rpython import annlowlevel
-from pypy.jit.metainterp.history import Box, BoxInt, BoxPtr
-from pypy.jit.metainterp.history import Const, ConstInt, ConstPtr
-from pypy.jit.metainterp.history import get_const_ptr_for_string
-from pypy.jit.metainterp.history import get_const_ptr_for_unicode
-from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.optimizeopt import optimizer, virtualize
-from pypy.jit.metainterp.optimizeopt.optimizer import CONST_0, CONST_1
-from pypy.jit.metainterp.optimizeopt.optimizer import llhelper
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.codewriter import heaptracker
-from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.objectmodel import specialize, we_are_translated
+from pypy.rpython.lltypesystem import lltype, rstr
 
 
 class StrOrUnicode(object):
@@ -147,10 +143,11 @@
     def string_copy_parts(self, optimizer, targetbox, offsetbox, mode):
         for i in range(len(self._chars)):
             charbox = self._chars[i].force_box()
-            optimizer.emit_operation(ResOperation(mode.STRSETITEM, [targetbox,
-                                                                offsetbox,
-                                                                charbox],
-                                              None))
+            if not (isinstance(charbox, Const) and charbox.same_constant(CONST_0)):
+                optimizer.emit_operation(ResOperation(mode.STRSETITEM, [targetbox,
+                                                                    offsetbox,
+                                                                    charbox],
+                                                  None))
             offsetbox = _int_add(optimizer, offsetbox, CONST_1)
         return offsetbox
 
@@ -402,6 +399,7 @@
         else:
             self.getvalue(op.result).ensure_nonnull()
             self.emit_operation(op)
+            self.pure(mode.STRLEN, [op.result], op.getarg(0))
 
     def optimize_STRSETITEM(self, op):
         value = self.getvalue(op.getarg(0))
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -56,6 +56,8 @@
         # for resume.py operation
         self.parent_resumedata_snapshot = None
         self.parent_resumedata_frame_info_list = None
+        # counter for unrolling inlined loops
+        self.unroll_iterations = 1
 
     @specialize.arg(3)
     def copy_constants(self, registers, constants, ConstClass):
@@ -213,6 +215,7 @@
 
     for _opimpl in ['int_is_true', 'int_is_zero', 'int_neg', 'int_invert',
                     'cast_float_to_int', 'cast_int_to_float',
+                    'cast_float_to_singlefloat', 'cast_singlefloat_to_float',
                     'float_neg', 'float_abs',
                     ]:
         exec py.code.Source('''
@@ -390,8 +393,21 @@
 
     @arguments("box", "descr", "box")
     def _opimpl_getarrayitem_gc_any(self, arraybox, arraydescr, indexbox):
-        return self.execute_with_descr(rop.GETARRAYITEM_GC,
-                                       arraydescr, arraybox, indexbox)
+        cache = self.metainterp.heap_array_cache.get(arraydescr, None)
+        if cache and isinstance(indexbox, ConstInt):
+            index = indexbox.getint()
+            frombox, tobox = cache.get(index, (None, None))
+            if frombox is arraybox:
+                return tobox
+        resbox = self.execute_with_descr(rop.GETARRAYITEM_GC,
+                                         arraydescr, arraybox, indexbox)
+        if isinstance(indexbox, ConstInt):
+            if not cache:
+                cache = self.metainterp.heap_array_cache[arraydescr] = {}
+            index = indexbox.getint()
+            cache[index] = arraybox, resbox
+        return resbox
+
 
     opimpl_getarrayitem_gc_i = _opimpl_getarrayitem_gc_any
     opimpl_getarrayitem_gc_r = _opimpl_getarrayitem_gc_any
@@ -419,6 +435,13 @@
                                     indexbox, itembox):
         self.execute_with_descr(rop.SETARRAYITEM_GC, arraydescr, arraybox,
                                 indexbox, itembox)
+        if isinstance(indexbox, ConstInt):
+            cache = self.metainterp.heap_array_cache.setdefault(arraydescr, {})
+            cache[indexbox.getint()] = arraybox, itembox
+        else:
+            cache = self.metainterp.heap_array_cache.get(arraydescr, None)
+            if cache:
+                cache.clear()
 
     opimpl_setarrayitem_gc_i = _opimpl_setarrayitem_gc_any
     opimpl_setarrayitem_gc_r = _opimpl_setarrayitem_gc_any
@@ -454,21 +477,17 @@
     def opimpl_newlist(self, structdescr, lengthdescr, itemsdescr, arraydescr,
                        sizebox):
         sbox = self.metainterp.execute_and_record(rop.NEW, structdescr)
-        self.metainterp.execute_and_record(rop.SETFIELD_GC, lengthdescr,
-                                           sbox, sizebox)
+        self._opimpl_setfield_gc_any(sbox, lengthdescr, sizebox)
         abox = self.metainterp.execute_and_record(rop.NEW_ARRAY, arraydescr,
                                                   sizebox)
-        self.metainterp.execute_and_record(rop.SETFIELD_GC, itemsdescr,
-                                           sbox, abox)
+        self._opimpl_setfield_gc_any(sbox, itemsdescr, abox)
         return sbox
 
     @arguments("box", "descr", "descr", "box")
     def _opimpl_getlistitem_gc_any(self, listbox, itemsdescr, arraydescr,
                                    indexbox):
-        arraybox = self.metainterp.execute_and_record(rop.GETFIELD_GC,
-                                                      itemsdescr, listbox)
-        return self.execute_with_descr(rop.GETARRAYITEM_GC,
-                                       arraydescr, arraybox, indexbox)
+        arraybox = self._opimpl_getfield_gc_any(listbox, itemsdescr)
+        return self._opimpl_getarrayitem_gc_any(arraybox, arraydescr, indexbox)
 
     opimpl_getlistitem_gc_i = _opimpl_getlistitem_gc_any
     opimpl_getlistitem_gc_r = _opimpl_getlistitem_gc_any
@@ -477,10 +496,9 @@
     @arguments("box", "descr", "descr", "box", "box")
     def _opimpl_setlistitem_gc_any(self, listbox, itemsdescr, arraydescr,
                                    indexbox, valuebox):
-        arraybox = self.metainterp.execute_and_record(rop.GETFIELD_GC,
-                                                      itemsdescr, listbox)
-        self.execute_with_descr(rop.SETARRAYITEM_GC, arraydescr, arraybox,
-                                indexbox, valuebox)
+        arraybox = self._opimpl_getfield_gc_any(listbox, itemsdescr)
+        self._opimpl_setarrayitem_gc_any(arraybox, arraydescr, indexbox,
+                                         valuebox)
 
     opimpl_setlistitem_gc_i = _opimpl_setlistitem_gc_any
     opimpl_setlistitem_gc_r = _opimpl_setlistitem_gc_any
@@ -502,18 +520,29 @@
 
     @arguments("box", "descr")
     def _opimpl_getfield_gc_any(self, box, fielddescr):
-        return self.execute_with_descr(rop.GETFIELD_GC, fielddescr, box)
+        return self._opimpl_getfield_gc_any_pureornot(
+                rop.GETFIELD_GC, box, fielddescr)
     opimpl_getfield_gc_i = _opimpl_getfield_gc_any
     opimpl_getfield_gc_r = _opimpl_getfield_gc_any
     opimpl_getfield_gc_f = _opimpl_getfield_gc_any
 
     @arguments("box", "descr")
     def _opimpl_getfield_gc_pure_any(self, box, fielddescr):
-        return self.execute_with_descr(rop.GETFIELD_GC_PURE, fielddescr, box)
+        return self._opimpl_getfield_gc_any_pureornot(
+                rop.GETFIELD_GC_PURE, box, fielddescr)
     opimpl_getfield_gc_i_pure = _opimpl_getfield_gc_pure_any
     opimpl_getfield_gc_r_pure = _opimpl_getfield_gc_pure_any
     opimpl_getfield_gc_f_pure = _opimpl_getfield_gc_pure_any
 
+    @specialize.arg(1)
+    def _opimpl_getfield_gc_any_pureornot(self, opnum, box, fielddescr):
+        frombox, tobox = self.metainterp.heap_cache.get(fielddescr, (None, None))
+        if frombox is box:
+            return tobox
+        resbox = self.execute_with_descr(opnum, fielddescr, box)
+        self.metainterp.heap_cache[fielddescr] = (box, resbox)
+        return resbox
+
     @arguments("orgpc", "box", "descr")
     def _opimpl_getfield_gc_greenfield_any(self, pc, box, fielddescr):
         ginfo = self.metainterp.jitdriver_sd.greenfield_info
@@ -532,7 +561,11 @@
 
     @arguments("box", "descr", "box")
     def _opimpl_setfield_gc_any(self, box, fielddescr, valuebox):
+        frombox, tobox = self.metainterp.heap_cache.get(fielddescr, (None, None))
+        if frombox is box and tobox is valuebox:
+            return
         self.execute_with_descr(rop.SETFIELD_GC, fielddescr, box, valuebox)
+        self.metainterp.heap_cache[fielddescr] = (box, valuebox)
     opimpl_setfield_gc_i = _opimpl_setfield_gc_any
     opimpl_setfield_gc_r = _opimpl_setfield_gc_any
     opimpl_setfield_gc_f = _opimpl_setfield_gc_any
@@ -596,12 +629,16 @@
         standard_box = self.metainterp.virtualizable_boxes[-1]
         if standard_box is box:
             return False
+        if box in self.metainterp.nonstandard_virtualizables:
+            return True
         eqbox = self.metainterp.execute_and_record(rop.PTR_EQ, None,
                                                    box, standard_box)
         eqbox = self.implement_guard_value(pc, eqbox)
         isstandard = eqbox.getint()
         if isstandard:
             self.metainterp.replace_box(box, standard_box)
+        else:
+            self.metainterp.nonstandard_virtualizables[box] = None
         return not isstandard
 
     def _get_virtualizable_field_index(self, fielddescr):
@@ -613,7 +650,7 @@
     @arguments("orgpc", "box", "descr")
     def _opimpl_getfield_vable(self, pc, box, fielddescr):
         if self._nonstandard_virtualizable(pc, box):
-            return self.execute_with_descr(rop.GETFIELD_GC, fielddescr, box)
+            return self._opimpl_getfield_gc_any(box, fielddescr)
         self.metainterp.check_synchronized_virtualizable()
         index = self._get_virtualizable_field_index(fielddescr)
         return self.metainterp.virtualizable_boxes[index]
@@ -625,8 +662,7 @@
     @arguments("orgpc", "box", "descr", "box")
     def _opimpl_setfield_vable(self, pc, box, fielddescr, valuebox):
         if self._nonstandard_virtualizable(pc, box):
-            self.execute_with_descr(rop.SETFIELD_GC, fielddescr, box, valuebox)
-            return
+            return self._opimpl_setfield_gc_any(box, fielddescr, valuebox)
         index = self._get_virtualizable_field_index(fielddescr)
         self.metainterp.virtualizable_boxes[index] = valuebox
         self.metainterp.synchronize_virtualizable()
@@ -656,10 +692,8 @@
     @arguments("orgpc", "box", "descr", "descr", "box")
     def _opimpl_getarrayitem_vable(self, pc, box, fdescr, adescr, indexbox):
         if self._nonstandard_virtualizable(pc, box):
-            arraybox = self.metainterp.execute_and_record(rop.GETFIELD_GC,
-                                                          fdescr, box)
-            return self.execute_with_descr(rop.GETARRAYITEM_GC, adescr,
-                                           arraybox, indexbox)
+            arraybox = self._opimpl_getfield_gc_any(box, fdescr)
+            return self._opimpl_getarrayitem_gc_any(arraybox, adescr, indexbox)
         self.metainterp.check_synchronized_virtualizable()
         index = self._get_arrayitem_vable_index(pc, fdescr, indexbox)
         return self.metainterp.virtualizable_boxes[index]
@@ -672,10 +706,9 @@
     def _opimpl_setarrayitem_vable(self, pc, box, fdescr, adescr, indexbox,
                                   valuebox):
         if self._nonstandard_virtualizable(pc, box):
-            arraybox = self.metainterp.execute_and_record(rop.GETFIELD_GC,
-                                                          fdescr, box)
-            self.execute_with_descr(rop.SETARRAYITEM_GC, adescr,
-                                    arraybox, indexbox, valuebox)
+            arraybox = self._opimpl_getfield_gc_any(box, fdescr)
+            self._opimpl_setarrayitem_gc_any(arraybox, adescr,
+                                             indexbox, valuebox)
             return
         index = self._get_arrayitem_vable_index(pc, fdescr, indexbox)
         self.metainterp.virtualizable_boxes[index] = valuebox
@@ -689,8 +722,7 @@
     @arguments("orgpc", "box", "descr", "descr")
     def opimpl_arraylen_vable(self, pc, box, fdescr, adescr):
         if self._nonstandard_virtualizable(pc, box):
-            arraybox = self.metainterp.execute_and_record(rop.GETFIELD_GC,
-                                                          fdescr, box)
+            arraybox = self._opimpl_getfield_gc_any(box, fdescr)
             return self.execute_with_descr(rop.ARRAYLEN_GC, adescr, arraybox)
         vinfo = self.metainterp.jitdriver_sd.virtualizable_info
         virtualizable_box = self.metainterp.virtualizable_boxes[-1]
@@ -902,6 +934,10 @@
             # 'redboxes' back into the registers where it comes from.
             put_back_list_of_boxes3(self, jcposition, redboxes)
         else:
+            if jitdriver_sd.warmstate.should_unroll_one_iteration(greenboxes):
+                if self.unroll_iterations > 0:
+                    self.unroll_iterations -= 1
+                    return
             # warning! careful here.  We have to return from the current
             # frame containing the jit_merge_point, and then use
             # do_recursive_call() to follow the recursive call.  This is
@@ -1133,13 +1169,11 @@
             metainterp.jitdriver_sd.greenfield_info is not None):
             virtualizable_boxes = metainterp.virtualizable_boxes
         saved_pc = self.pc
-        try:
-            if resumepc >= 0:
-                self.pc = resumepc
-            resume.capture_resumedata(metainterp.framestack, virtualizable_boxes,
-                                      metainterp.virtualref_boxes, resumedescr)
-        finally:
-            self.pc = saved_pc
+        if resumepc >= 0:
+            self.pc = resumepc
+        resume.capture_resumedata(metainterp.framestack, virtualizable_boxes,
+                                  metainterp.virtualref_boxes, resumedescr)
+        self.pc = saved_pc
 
     def implement_guard_value(self, orgpc, box):
         """Promote the given Box into a Const.  Note: be careful, it's a
@@ -1166,7 +1200,7 @@
         return self.metainterp.execute_and_record(opnum, descr, *argboxes)
 
     @specialize.arg(1)
-    def execute_varargs(self, opnum, argboxes, descr, exc):
+    def execute_varargs(self, opnum, argboxes, descr, exc, pure):
         self.metainterp.clear_exception()
         resbox = self.metainterp.execute_and_record_varargs(opnum, argboxes,
                                                             descr=descr)
@@ -1174,6 +1208,9 @@
             self.make_result_of_lastop(resbox)
             # ^^^ this is done before handle_possible_exception() because we
             # need the box to show up in get_list_of_active_boxes()
+        if pure and self.metainterp.last_exc_value_box is None:
+            resbox = self.metainterp.record_result_of_call_pure(resbox)
+            exc = exc and not isinstance(resbox, Const)
         if exc:
             self.metainterp.handle_possible_exception()
         else:
@@ -1191,7 +1228,7 @@
         src_i = src_r = src_f = 0
         i = 1
         for kind in descr.get_arg_types():
-            if kind == history.INT:
+            if kind == history.INT or kind == 'S':        # single float
                 while True:
                     box = argboxes[src_i]
                     src_i += 1
@@ -1236,16 +1273,14 @@
             return resbox
         else:
             effect = effectinfo.extraeffect
-            if effect == effectinfo.EF_CANNOT_RAISE:
-                return self.execute_varargs(rop.CALL, allboxes, descr, False)
-            elif effect == effectinfo.EF_ELIDABLE:
-                return self.metainterp.record_result_of_call_pure(
-                    self.execute_varargs(rop.CALL, allboxes, descr, False))
-            elif effect == effectinfo.EF_LOOPINVARIANT:
+            if effect == effectinfo.EF_LOOPINVARIANT:
                 return self.execute_varargs(rop.CALL_LOOPINVARIANT, allboxes,
-                                            descr, False)
-            else:
-                return self.execute_varargs(rop.CALL, allboxes, descr, True)
+                                            descr, False, False)
+            exc = (effect != effectinfo.EF_CANNOT_RAISE and
+                   effect != effectinfo.EF_ELIDABLE_CANNOT_RAISE)
+            pure = (effect == effectinfo.EF_ELIDABLE_CAN_RAISE or
+                    effect == effectinfo.EF_ELIDABLE_CANNOT_RAISE)
+            return self.execute_varargs(rop.CALL, allboxes, descr, exc, pure)
 
     def do_residual_or_indirect_call(self, funcbox, calldescr, argboxes):
         """The 'residual_call' operation is emitted in two cases:
@@ -1344,9 +1379,9 @@
             num = self.cpu.get_fail_descr_number(tokens[0].finishdescr)
             setattr(self.cpu, 'done_with_this_frame_%s_v' % name, num)
         #
-        tokens = self.loop_tokens_exit_frame_with_exception_ref
-        num = self.cpu.get_fail_descr_number(tokens[0].finishdescr)
-        self.cpu.exit_frame_with_exception_v = num
+        exc_descr = compile.PropagateExceptionDescr()
+        num = self.cpu.get_fail_descr_number(exc_descr)
+        self.cpu.propagate_exception_v = num
         #
         self.globaldata = MetaInterpGlobalData(self)
 
@@ -1456,6 +1491,14 @@
         self.call_pure_results = args_dict_box()
         # contains boxes where the class is already known
         self.known_class_boxes = {}
+        # contains frame boxes that are not virtualizables
+        self.nonstandard_virtualizables = {}
+        # heap cache
+        # maps descrs to (from_box, to_box) tuples
+        self.heap_cache = {}
+        # heap array cache
+        # maps descrs to {index: (from_box, to_box)} dicts
+        self.heap_array_cache = {}
 
     def perform_call(self, jitcode, boxes, greenkey=None):
         # causes the metainterp to enter the given subfunction
@@ -1631,10 +1674,31 @@
         # record the operation
         profiler = self.staticdata.profiler
         profiler.count_ops(opnum, RECORDED_OPS)
+        self._invalidate_caches(opnum, descr)
         op = self.history.record(opnum, argboxes, resbox, descr)
         self.attach_debug_info(op)
         return resbox
 
+    def _invalidate_caches(self, opnum, descr):
+        if opnum == rop.SETFIELD_GC:
+            return
+        if opnum == rop.SETARRAYITEM_GC:
+            return
+        if rop._NOSIDEEFFECT_FIRST <= opnum <= rop._NOSIDEEFFECT_LAST:
+            return
+        if opnum == rop.CALL:
+            effectinfo = descr.get_extra_info()
+            if effectinfo is not None:
+                ef = effectinfo.extraeffect
+                if ef == effectinfo.EF_LOOPINVARIANT or \
+                   ef == effectinfo.EF_ELIDABLE_CANNOT_RAISE or \
+                   ef == effectinfo.EF_ELIDABLE_CAN_RAISE:
+                    return
+        if self.heap_cache:
+            self.heap_cache.clear()
+        if self.heap_array_cache:
+            self.heap_array_cache.clear()
+
     def attach_debug_info(self, op):
         if (not we_are_translated() and op is not None
             and getattr(self, 'framestack', None)):
@@ -1797,6 +1861,9 @@
 
     def reached_loop_header(self, greenboxes, redboxes, resumedescr):
         self.known_class_boxes = {}
+        self.nonstandard_virtualizables = {} # XXX maybe not needed?
+        self.heap_cache = {}
+        self.heap_array_cache = {}
 
         duplicates = {}
         self.remove_consts_and_duplicates(redboxes, len(redboxes),
@@ -2304,6 +2371,17 @@
             for i in range(len(boxes)):
                 if boxes[i] is oldbox:
                     boxes[i] = newbox
+        for descr, (frombox, tobox) in self.heap_cache.iteritems():
+            change = False
+            if frombox is oldbox:
+                change = True
+                frombox = newbox
+            if tobox is oldbox:
+                change = True
+                tobox = newbox
+            if change:
+                self.heap_cache[descr] = frombox, tobox
+        # XXX what about self.heap_array_cache?
 
     def find_biggest_function(self):
         start_stack = []
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -408,6 +408,8 @@
     'FLOAT_ABS/1',
     'CAST_FLOAT_TO_INT/1',
     'CAST_INT_TO_FLOAT/1',
+    'CAST_FLOAT_TO_SINGLEFLOAT/1',
+    'CAST_SINGLEFLOAT_TO_FLOAT/1',
     #
     'INT_LT/2b',
     'INT_LE/2b',
diff --git a/pypy/jit/metainterp/test/support.py b/pypy/jit/metainterp/test/support.py
--- a/pypy/jit/metainterp/test/support.py
+++ b/pypy/jit/metainterp/test/support.py
@@ -277,3 +277,15 @@
         NODE._add_fields({'value': ootype.Signed,
                           'next': NODE})
         return NODE
+
+# ____________________________________________________________
+
+class _Foo:
+    pass
+
+def noConst(x):
+    """Helper function for tests, returning 'x' as a BoxInt/BoxPtr
+    even if it is a ConstInt/ConstPtr."""
+    f1 = _Foo(); f2 = _Foo()
+    f1.x = x; f2.x = 0
+    return f1.x
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -11,10 +11,10 @@
 from pypy import conftest
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.jit.metainterp.typesystem import LLTypeHelper, OOTypeHelper
-from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.ootypesystem import ootype
 from pypy.jit.metainterp.optimizeopt import ALL_OPTS_DICT
-from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
+from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin, noConst
 
 class BasicTests:
 
@@ -407,6 +407,58 @@
         # the CALL_PURE is constant-folded away by optimizeopt.py
         self.check_loops(int_sub=1, call=0, call_pure=0, getfield_gc=0)
 
+    def test_elidable_raising(self):
+        myjitdriver = JitDriver(greens = ['m'], reds = ['n'])
+        @elidable
+        def externfn(x):
+            if x <= 0:
+                raise ValueError
+            return x - 1
+        def f(n, m):
+            while n > 0:
+                myjitdriver.can_enter_jit(n=n, m=m)
+                myjitdriver.jit_merge_point(n=n, m=m)
+                try:
+                    n -= externfn(m)
+                except ValueError:
+                    n -= 1
+            return n
+        res = self.meta_interp(f, [22, 6])
+        assert res == -3
+        # the CALL_PURE is constant-folded away during tracing
+        self.check_loops(int_sub=1, call=0, call_pure=0)
+        #
+        res = self.meta_interp(f, [22, -5])
+        assert res == 0
+        # raises: becomes CALL and is not constant-folded away
+        self.check_loops(int_sub=1, call=1, call_pure=0)
+
+    def test_elidable_raising_2(self):
+        myjitdriver = JitDriver(greens = ['m'], reds = ['n'])
+        @elidable
+        def externfn(x):
+            if x <= 0:
+                raise ValueError
+            return x - 1
+        def f(n, m):
+            while n > 0:
+                myjitdriver.can_enter_jit(n=n, m=m)
+                myjitdriver.jit_merge_point(n=n, m=m)
+                try:
+                    n -= externfn(noConst(m))
+                except ValueError:
+                    n -= 1
+            return n
+        res = self.meta_interp(f, [22, 6])
+        assert res == -3
+        # the CALL_PURE is constant-folded away by optimizeopt.py
+        self.check_loops(int_sub=1, call=0, call_pure=0)
+        #
+        res = self.meta_interp(f, [22, -5])
+        assert res == 0
+        # raises: becomes CALL and is not constant-folded away
+        self.check_loops(int_sub=1, call=1, call_pure=0)
+
     def test_constant_across_mp(self):
         myjitdriver = JitDriver(greens = [], reds = ['n'])
         class X(object):
@@ -508,6 +560,32 @@
         assert res == 84 - 61 - 62
         self.check_history(call=1)   # because the trace starts immediately
 
+    def test_unroll_one_loop_iteration(self):
+        def unroll(code):
+            return code == 0
+        myjitdriver = JitDriver(greens = ['code'],
+                                reds = ['loops', 'inner_loops', 's'],
+                                should_unroll_one_iteration=unroll)
+
+        def f(code, loops, inner_loops):
+            s = 0
+            while loops > 0:
+                myjitdriver.jit_merge_point(code=code, loops=loops,
+                                            inner_loops=inner_loops, s=s)
+                if code == 1:
+                    s += f(0, inner_loops, 0)
+                loops -= 1
+                s += 1
+            return s
+
+        res = self.meta_interp(f, [1, 4, 1], enable_opts="", inline=True)
+        assert res == f(1, 4, 1)
+        self.check_history(call_assembler=0)
+
+        res = self.meta_interp(f, [1, 4, 2], enable_opts="", inline=True)
+        assert res == f(1, 4, 2)
+        self.check_history(call_assembler=1)
+
     def test_format(self):
         def f(n):
             return len("<%d>" % n)
@@ -1024,69 +1102,6 @@
         res = self.meta_interp(main, [])
         assert res == 55
 
-    def test_dont_record_repeated_guard_class(self):
-        class A:
-            pass
-        class B(A):
-            pass
-        @dont_look_inside
-        def extern(n):
-            if n == -7:
-                return None
-            elif n:
-                return A()
-            else:
-                return B()
-        def fn(n):
-            obj = extern(n)
-            return isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B)
-        res = self.interp_operations(fn, [0])
-        assert res == 4
-        self.check_operations_history(guard_class=1, guard_nonnull=1)
-        res = self.interp_operations(fn, [1])
-        assert not res
-
-    def test_dont_record_guard_class_after_new(self):
-        class A:
-            pass
-        class B(A):
-            pass
-        def fn(n):
-            if n == -7:
-                obj = None
-            elif n:
-                obj = A()
-            else:
-                obj = B()
-            return isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B)
-        res = self.interp_operations(fn, [0])
-        assert res == 4
-        self.check_operations_history(guard_class=0, guard_nonnull=0)
-        res = self.interp_operations(fn, [1])
-        assert not res
-
-    def test_guard_isnull_nullifies(self):
-        class A:
-            pass
-        a = A()
-        a.x = None
-        def fn(n):
-            if n == -7:
-                a.x = ""
-            obj = a.x
-            res = 0
-            if not obj:
-                res += 1
-            if obj:
-                res += 1
-            if obj is None:
-                res += 1
-            if obj is not None:
-                res += 1
-            return res
-        res = self.interp_operations(fn, [0])
-        assert res == 2
-        self.check_operations_history(guard_isnull=1)
 
     def test_assert_isinstance(self):
         class A:
@@ -1248,7 +1263,7 @@
             return tup[1]
         res = self.interp_operations(f, [3, 5])
         assert res == 5
-        self.check_operations_history(setfield_gc=2, getfield_gc_pure=1)
+        self.check_operations_history(setfield_gc=2, getfield_gc_pure=0)
 
     def test_oosend_look_inside_only_one(self):
         class A:
@@ -1616,8 +1631,6 @@
         assert res == 1
 
     def test_raw_malloc_and_access(self):
-        from pypy.rpython.lltypesystem import rffi
-
         TP = rffi.CArray(lltype.Signed)
 
         def f(n):
@@ -1631,8 +1644,6 @@
         assert res == 10
 
     def test_raw_malloc_and_access_float(self):
-        from pypy.rpython.lltypesystem import rffi
-
         TP = rffi.CArray(lltype.Float)
 
         def f(n, f):
@@ -2061,7 +2072,7 @@
                 myjitdriver.jit_merge_point(a=a, b=b, n=n, sa=sa)
                 if 0 < a <= 5: pass
                 if 0 < b <= 5: pass
-                sa += (((((a << b) << b) << b) >> b) >> b) >> b                
+                sa += (((((a << b) << b) << b) >> b) >> b) >> b
                 n += 1
             return sa
 
@@ -2071,10 +2082,10 @@
                 myjitdriver.jit_merge_point(a=a, b=b, n=n, sa=sa)
                 if 0 < a < promote(sys.maxint/2): pass
                 if 0 < b < 100: pass
-                sa += (((((a << b) << b) << b) >> b) >> b) >> b                
+                sa += (((((a << b) << b) << b) >> b) >> b) >> b
                 n += 1
             return sa
-        
+
         assert self.meta_interp(f1, [5, 5]) == 50
         self.check_loops(int_rshift=0, everywhere=True)
 
@@ -2106,7 +2117,7 @@
                 myjitdriver.jit_merge_point(a=a, b=b, n=n, sa=sa)
                 if -5 <= a < 0: pass
                 if 0 < b <= 5: pass
-                sa += (((((a << b) << b) << b) >> b) >> b) >> b                
+                sa += (((((a << b) << b) << b) >> b) >> b) >> b
                 n += 1
             return sa
 
@@ -2116,10 +2127,10 @@
                 myjitdriver.jit_merge_point(a=a, b=b, n=n, sa=sa)
                 if -promote(sys.maxint/2) < a < 0: pass
                 if 0 < b < 100: pass
-                sa += (((((a << b) << b) << b) >> b) >> b) >> b                
+                sa += (((((a << b) << b) << b) >> b) >> b) >> b
                 n += 1
             return sa
-        
+
         assert self.meta_interp(f1, [-5, 5]) == -50
         self.check_loops(int_rshift=0, everywhere=True)
 
@@ -2190,7 +2201,7 @@
 
         def get_printable_location(i):
             return str(i)
-        
+
         myjitdriver = JitDriver(greens = ['i'], reds = ['j', 'c', 'a'],
                                 get_printable_location=get_printable_location)
         bytecode = "0j10jc20a3"
@@ -2299,7 +2310,7 @@
         assert self.meta_interp(build, []) == 7
         self.check_loops(getfield_gc_pure=0)
         self.check_loops(getfield_gc_pure=2, everywhere=True)
-        
+
     def test_frame_finished_during_retrace(self):
         class Base(object):
             pass
@@ -2330,7 +2341,7 @@
             return sa
         res = self.meta_interp(f, [])
         assert res == f()
-        
+
     def test_frame_finished_during_continued_retrace(self):
         class Base(object):
             pass
@@ -2381,7 +2392,7 @@
         assert res == -2
         #self.check_loops(getarrayitem_gc=0, setarrayitem_gc=0) -- xxx?
 
-    def test_retrace_ending_up_retrazing_another_loop(self):
+    def test_retrace_ending_up_retracing_another_loop(self):
 
         myjitdriver = JitDriver(greens = ['pc'], reds = ['n', 'i', 'sa'])
         bytecode = "0+sI0+SI"
@@ -2414,12 +2425,12 @@
         def g(n1, n2):
             for i in range(10):
                 f(n1)
-            for i in range(10):                
+            for i in range(10):
                 f(n2)
 
         nn = [10, 3]
         assert self.meta_interp(g, nn) == g(*nn)
-        
+
         # The attempts of retracing first loop will end up retracing the
         # second and thus fail 5 times, saturating the retrace_count. Instead a
         # bridge back to the preamble of the first loop is produced. A guard in
@@ -2430,7 +2441,7 @@
         self.check_tree_loop_count(2 + 3)
 
         # FIXME: Add a gloabl retrace counter and test that we are not trying more than 5 times.
-        
+
         def g(n):
             for i in range(n):
                 for j in range(10):
@@ -2619,5 +2630,57 @@
         self.meta_interp(f, [], enable_opts='')
         self.check_loops(new_with_vtable=1)
 
+    def test_release_gil_flush_heap_cache(self):
+        T = rffi.CArrayPtr(rffi.TIME_T)
+
+        external = rffi.llexternal("time", [T], rffi.TIME_T, threadsafe=True)
+        # Not a real lock, has all the same properties with respect to GIL
+        # release though, so good for this test.
+        class Lock(object):
+            @dont_look_inside
+            def acquire(self):
+                external(lltype.nullptr(T.TO))
+            @dont_look_inside
+            def release(self):
+                external(lltype.nullptr(T.TO))
+        class X(object):
+            def __init__(self, idx):
+                self.field = idx
+        @dont_look_inside
+        def get_obj(z):
+            return X(z)
+        myjitdriver = JitDriver(greens=[], reds=["n", "l", "z", "lock"])
+        def f(n, z):
+            lock = Lock()
+            l = 0
+            while n > 0:
+                myjitdriver.jit_merge_point(lock=lock, l=l, n=n, z=z)
+                x = get_obj(z)
+                l += x.field
+                lock.acquire()
+                # This must not reuse the previous one.
+                n -= x.field
+                lock.release()
+            return n
+        res = self.meta_interp(f, [10, 1])
+        self.check_loops(getfield_gc=2)
+        assert res == f(10, 1)
+
+    def test_jit_merge_point_with_raw_pointer(self):
+        driver = JitDriver(greens = [], reds = ['n', 'x'])
+
+        TP = lltype.Array(lltype.Signed, hints={'nolength': True})
+
+        def f(n):
+            x = lltype.malloc(TP, 10, flavor='raw')
+            x[0] = 1
+            while n > 0:
+                driver.jit_merge_point(n=n, x=x)
+                n -= x[0]
+            lltype.free(x, flavor='raw')
+            return n
+
+        self.meta_interp(f, [10], repeat=3)
+
 class TestLLtype(BaseLLtypeTests, LLJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_dict.py b/pypy/jit/metainterp/test/test_dict.py
--- a/pypy/jit/metainterp/test/test_dict.py
+++ b/pypy/jit/metainterp/test/test_dict.py
@@ -157,7 +157,7 @@
         # the same arguments are not folded, because we have conflicting
         # definitions of pure, once strhash can be appropriately folded
         # this should be decreased to seven.
-        self.check_loops({"call": 8, "guard_false": 1, "guard_no_exception": 5,
+        self.check_loops({"call": 8, "guard_false": 1, "guard_no_exception": 6,
                           "guard_true": 1, "int_and": 1, "int_gt": 1,
                           "int_is_true": 1, "int_sub": 1, "jump": 1,
                           "new_with_vtable": 1, "setfield_gc": 1})
diff --git a/pypy/jit/metainterp/test/test_fficall.py b/pypy/jit/metainterp/test/test_fficall.py
--- a/pypy/jit/metainterp/test/test_fficall.py
+++ b/pypy/jit/metainterp/test/test_fficall.py
@@ -3,7 +3,7 @@
 from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
 from pypy.rlib.jit import JitDriver, promote, dont_look_inside
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.libffi import ArgChain, longlong2float, float2longlong
+from pypy.rlib.libffi import ArgChain
 from pypy.rlib.libffi import IS_32_BIT
 from pypy.rlib.test.test_libffi import TestLibffiCall as _TestLibffiCall
 from pypy.rpython.lltypesystem import lltype, rffi
@@ -12,10 +12,11 @@
 from pypy.jit.metainterp.test.support import LLJitMixin
 
 class TestFfiCall(LLJitMixin, _TestLibffiCall):
+    supports_all = False     # supports_{floats,longlong,singlefloats}
 
     # ===> ../../../rlib/test/test_libffi.py
 
-    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
+    def call(self, funcspec, args, RESULT, is_struct=False, jitif=[]):
         """
         Call the function specified by funcspec in a loop, and let the jit to
         see and optimize it.
@@ -24,14 +25,7 @@
         lib, name, argtypes, restype = funcspec
         method_and_args = []
         for argval in args:
-            if type(argval) is r_singlefloat:
-                method_name = 'arg_singlefloat'
-                argval = float(argval)
-            elif IS_32_BIT and type(argval) in [r_longlong, r_ulonglong]:
-                method_name = 'arg_longlong'
-                argval = rffi.cast(rffi.LONGLONG, argval)
-                argval = longlong2float(argval)
-            elif isinstance(argval, tuple):
+            if isinstance(argval, tuple):
                 method_name, argval = argval
             else:
                 method_name = 'arg'
@@ -39,10 +33,20 @@
         method_and_args = unrolling_iterable(method_and_args)
         #
         reds = ['n', 'res', 'func']
-        if (RESULT in [rffi.FLOAT, rffi.DOUBLE] or
+        if (RESULT is rffi.DOUBLE or
             IS_32_BIT and RESULT in [rffi.LONGLONG, rffi.ULONGLONG]):
-            reds = ['n', 'func', 'res'] # floats must be *after* refs
+            reds = ['n', 'func', 'res'] # 'double' floats must be *after* refs
         driver = JitDriver(reds=reds, greens=[])
+        init_result = rffi.cast(RESULT, 0)
+        #
+        def g(func):
+            # a different function, which is marked as "dont_look_inside"
+            # in case it uses an unsupported argument
+            argchain = ArgChain()
+            # this loop is unrolled
+            for method_name, argval in method_and_args:
+                getattr(argchain, method_name)(argval)
+            return func.call(argchain, RESULT, is_struct=is_struct)
         #
         def f(n):
             func = lib.getpointer(name, argtypes, restype)
@@ -50,18 +54,44 @@
             while n < 10:
                 driver.jit_merge_point(n=n, res=res, func=func)
                 promote(func)
-                argchain = ArgChain()
-                # this loop is unrolled
-                for method_name, argval in method_and_args:
-                    getattr(argchain, method_name)(argval)
-                res = func.call(argchain, RESULT, is_struct=is_struct)
+                res = g(func)
                 n += 1
             return res
         #
-        res = self.meta_interp(f, [0], backendopt=True)
+        res = self.meta_interp(f, [0], backendopt=True,
+                               supports_floats       = self.supports_all,
+                               supports_longlong     = self.supports_all,
+                               supports_singlefloats = self.supports_all)
+        d = {'floats': self.supports_all,
+             'longlong': self.supports_all or not IS_32_BIT,
+             'singlefloats': self.supports_all,
+             'byval': False}
+        supported = all(d[check] for check in jitif)
+        if supported:
+            self.check_loops(
+                call_release_gil=1,   # a CALL_RELEASE_GIL, and no other CALLs
+                call=0,
+                call_may_force=0,
+                guard_no_exception=1,
+                guard_not_forced=1,
+                int_add=1,
+                int_lt=1,
+                guard_true=1,
+                jump=1)
+        else:
+            self.check_loops(
+                call_release_gil=0,   # no CALL_RELEASE_GIL
+                int_add=1,
+                int_lt=1,
+                guard_true=1,
+                jump=1)
         return res
 
     def test_byval_result(self):
         _TestLibffiCall.test_byval_result(self)
     test_byval_result.__doc__ = _TestLibffiCall.test_byval_result.__doc__
     test_byval_result.dont_track_allocations = True
+
+
+class TestFfiCallSupportAll(TestFfiCall):
+    supports_all = True     # supports_{floats,longlong,singlefloats}
diff --git a/pypy/jit/metainterp/test/test_float.py b/pypy/jit/metainterp/test/test_float.py
--- a/pypy/jit/metainterp/test/test_float.py
+++ b/pypy/jit/metainterp/test/test_float.py
@@ -36,6 +36,15 @@
         res = self.interp_operations(f, [x])
         assert res == -x
 
+    def test_singlefloat(self):
+        from pypy.rlib.rarithmetic import r_singlefloat
+        def f(a):
+            a = float(r_singlefloat(a))
+            a *= 4.25
+            return float(r_singlefloat(a))
+        res = self.interp_operations(f, [-2.0])
+        assert res == -8.5
+
 
 class TestOOtype(FloatTests, OOJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_immutable.py b/pypy/jit/metainterp/test/test_immutable.py
--- a/pypy/jit/metainterp/test/test_immutable.py
+++ b/pypy/jit/metainterp/test/test_immutable.py
@@ -1,5 +1,9 @@
+from pypy.rlib import jit
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
 
+ at jit.dont_look_inside
+def escape(x):
+    return x
 
 class ImmutableFieldsTests:
 
@@ -11,7 +15,7 @@
                 self.x = x
 
         def f(x):
-            y = X(x)
+            y = escape(X(x))
             return y.x + 5
         res = self.interp_operations(f, [23])
         assert res == 28
@@ -33,7 +37,7 @@
 
         def f(x, y):
             X(x)     # force the field 'x' to be on class 'X'
-            z = Y(x, y)
+            z = escape(Y(x, y))
             return z.x + z.y + 5
         res = self.interp_operations(f, [23, 11])
         assert res == 39
@@ -42,7 +46,7 @@
 
         def f(x, y):
             # this time, the field 'x' only shows up on subclass 'Y'
-            z = Y(x, y)
+            z = escape(Y(x, y))
             return z.x + z.y + 5
         res = self.interp_operations(f, [23, 11])
         assert res == 39
@@ -58,7 +62,7 @@
         def f(index):
             l = [1, 2, 3, 4]
             l[2] = 30
-            a = X(l)
+            a = escape(X(l))
             return a.y[index]
         res = self.interp_operations(f, [2], listops=True)
         assert res == 30
@@ -76,7 +80,7 @@
                 self.y = y
 
         def f(x, index):
-            y = X([x], x+1)
+            y = escape(X([x], x+1))
             return y.lst[index] + y.y + 5
         res = self.interp_operations(f, [23, 0], listops=True)
         assert res == 23 + 24 + 5
diff --git a/pypy/jit/metainterp/test/test_string.py b/pypy/jit/metainterp/test/test_string.py
--- a/pypy/jit/metainterp/test/test_string.py
+++ b/pypy/jit/metainterp/test/test_string.py
@@ -358,3 +358,22 @@
         self.check_loops(call=3,    # str(), _str(), escape()
                          newunicode=1, unicodegetitem=0,
                          unicodesetitem=1, copyunicodecontent=1)
+
+    def test_str2unicode_fold(self):
+        _str = self._str
+        jitdriver = JitDriver(greens = ['g'], reds = ['m'])
+        @dont_look_inside
+        def escape(x):
+            print str(x)
+        def f(g, m):
+            g = str(g)
+            while m >= 0:
+                jitdriver.can_enter_jit(g=g, m=m)
+                jitdriver.jit_merge_point(g=g, m=m)
+                escape(_str(g))
+                m -= 1
+            return 42
+        self.meta_interp(f, [6, 7])
+        self.check_loops(call_pure=0, call=1,
+                         newunicode=0, unicodegetitem=0,
+                         unicodesetitem=0, copyunicodecontent=0)
diff --git a/pypy/jit/metainterp/test/test_tracingopts.py b/pypy/jit/metainterp/test/test_tracingopts.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/test/test_tracingopts.py
@@ -0,0 +1,407 @@
+import py
+import sys
+from pypy.rlib import jit
+from pypy.jit.metainterp.test.support import LLJitMixin
+
+
+class TestLLtype(LLJitMixin):
+    def test_dont_record_repeated_guard_class(self):
+        class A:
+            pass
+        class B(A):
+            pass
+        @jit.dont_look_inside
+        def extern(n):
+            if n == -7:
+                return None
+            elif n:
+                return A()
+            else:
+                return B()
+        def fn(n):
+            obj = extern(n)
+            return isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B)
+        res = self.interp_operations(fn, [0])
+        assert res == 4
+        self.check_operations_history(guard_class=1, guard_nonnull=1)
+        res = self.interp_operations(fn, [1])
+        assert not res
+
+    def test_dont_record_guard_class_after_new(self):
+        class A:
+            pass
+        class B(A):
+            pass
+        def fn(n):
+            if n == -7:
+                obj = None
+            elif n:
+                obj = A()
+            else:
+                obj = B()
+            return isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B)
+        res = self.interp_operations(fn, [0])
+        assert res == 4
+        self.check_operations_history(guard_class=0, guard_nonnull=0)
+        res = self.interp_operations(fn, [1])
+        assert not res
+
+    def test_guard_isnull_nullifies(self):
+        class A:
+            pass
+        a = A()
+        a.x = None
+        def fn(n):
+            if n == -7:
+                a.x = ""
+            obj = a.x
+            res = 0
+            if not obj:
+                res += 1
+            if obj:
+                res += 1
+            if obj is None:
+                res += 1
+            if obj is not None:
+                res += 1
+            return res
+        res = self.interp_operations(fn, [0])
+        assert res == 2
+        self.check_operations_history(guard_isnull=1)
+
+    def test_heap_caching_while_tracing(self):
+        class A:
+            pass
+        a1 = A()
+        a2 = A()
+        def fn(n):
+            if n > 0:
+                a = a1
+            else:
+                a = a2
+            a.x = n
+            return a.x
+        res = self.interp_operations(fn, [7])
+        assert res == 7
+        self.check_operations_history(getfield_gc=0)
+        res = self.interp_operations(fn, [-7])
+        assert res == -7
+        self.check_operations_history(getfield_gc=0)
+
+        def fn(n, ca, cb):
+            a1.x = n
+            a2.x = n
+            a = a1
+            if ca:
+                a = a2
+            b = a1
+            if cb:
+                b = a
+            return a.x + b.x
+        res = self.interp_operations(fn, [7, 0, 1])
+        assert res == 7 * 2
+        self.check_operations_history(getfield_gc=1)
+        res = self.interp_operations(fn, [-7, 1, 1])
+        assert res == -7 * 2
+        self.check_operations_history(getfield_gc=1)
+
+    def test_heap_caching_while_tracing_invalidation(self):
+        class A:
+            pass
+        a1 = A()
+        a2 = A()
+        @jit.dont_look_inside
+        def f(a):
+            a.x = 5
+        l = [1]
+        def fn(n):
+            if n > 0:
+                a = a1
+            else:
+                a = a2
+            a.x = n
+            x1 = a.x
+            f(a)
+            x2 = a.x
+            l[0] = x2
+            return a.x + x1 + x2
+        res = self.interp_operations(fn, [7])
+        assert res == 5 * 2 + 7
+        self.check_operations_history(getfield_gc=1)
+
+    def test_heap_caching_dont_store_same(self):
+        class A:
+            pass
+        a1 = A()
+        a2 = A()
+        def fn(n):
+            if n > 0:
+                a = a1
+            else:
+                a = a2
+            a.x = n
+            a.x = n
+            return a.x
+        res = self.interp_operations(fn, [7])
+        assert res == 7
+        self.check_operations_history(getfield_gc=0, setfield_gc=1)
+        res = self.interp_operations(fn, [-7])
+        assert res == -7
+        self.check_operations_history(getfield_gc=0)
+
+    def test_array_caching(self):
+        a1 = [0, 0]
+        a2 = [0, 0]
+        def fn(n):
+            if n > 0:
+                a = a1
+            else:
+                a = a2
+            a[0] = n
+            x1 = a[0]
+            a[n - n] = n + 1
+            return a[0] + x1
+        res = self.interp_operations(fn, [7])
+        assert res == 7 + 7 + 1
+        self.check_operations_history(getarrayitem_gc=1)
+        res = self.interp_operations(fn, [-7])
+        assert res == -7 - 7 + 1
+        self.check_operations_history(getarrayitem_gc=1)
+
+        def fn(n, ca, cb):
+            a1[0] = n
+            a2[0] = n
+            a = a1
+            if ca:
+                a = a2
+            b = a1
+            if cb:
+                b = a
+            return a[0] + b[0]
+        res = self.interp_operations(fn, [7, 0, 1])
+        assert res == 7 * 2
+        self.check_operations_history(getarrayitem_gc=1)
+        res = self.interp_operations(fn, [-7, 1, 1])
+        assert res == -7 * 2
+        self.check_operations_history(getarrayitem_gc=1)
+
+    def test_array_caching_while_tracing_invalidation(self):
+        a1 = [0, 0]
+        a2 = [0, 0]
+        @jit.dont_look_inside
+        def f(a):
+            a[0] = 5
+        class A: pass
+        l = A()
+        def fn(n):
+            if n > 0:
+                a = a1
+            else:
+                a = a2
+            a[0] = n
+            x1 = a[0]
+            f(a)
+            x2 = a[0]
+            l.x = x2
+            return a[0] + x1 + x2
+        res = self.interp_operations(fn, [7])
+        assert res == 5 * 2 + 7
+        self.check_operations_history(getarrayitem_gc=1)
+
+    def test_array_and_getfield_interaction(self):
+        class A: pass
+        a1 = A()
+        a2 = A()
+        a1.l = a2.l = [0, 0]
+        def fn(n):
+            if n > 0:
+                a = a1
+            else:
+                a = a2
+                a.l = [0, 0]
+            a.x = 0
+            a.l[a.x] = n
+            a.x += 1
+            a.l[a.x] = n + 1
+            x1 = a.l[a.x]
+            a.x -= 1
+            x2 = a.l[a.x]
+            return x1 + x2
+        res = self.interp_operations(fn, [7])
+        assert res == 7 * 2 + 1
+        self.check_operations_history(setarrayitem_gc=2, setfield_gc=3,
+                                      getarrayitem_gc=0, getfield_gc=1)
+
+    def test_promote_changes_heap_cache(self):
+        class A: pass
+        a1 = A()
+        a2 = A()
+        a1.l = a2.l = [0, 0]
+        a1.x = a2.x = 0
+        def fn(n):
+            if n > 0:
+                a = a1
+            else:
+                a = a2
+                a.l = [0, 0]
+            jit.promote(a.x)
+            a.l[a.x] = n
+            a.x += 1
+            a.l[a.x] = n + 1
+            x1 = a.l[a.x]
+            a.x -= 1
+            x2 = a.l[a.x]
+            return x1 + x2
+        res = self.interp_operations(fn, [7])
+        assert res == 7 * 2 + 1
+        self.check_operations_history(setarrayitem_gc=2, setfield_gc=2,
+                                      getarrayitem_gc=0, getfield_gc=2)
+
+    def test_list_caching(self):
+        a1 = [0, 0]
+        a2 = [0, 0]
+        def fn(n):
+            if n > 0:
+                a = a1
+            else:
+                a = a2
+                if n < -1000:
+                    a.append(5)
+            a[0] = n
+            x1 = a[0]
+            a[n - n] = n + 1
+            return a[0] + x1
+        res = self.interp_operations(fn, [7])
+        assert res == 7 + 7 + 1
+        self.check_operations_history(getarrayitem_gc=1,
+                getfield_gc=1)
+        res = self.interp_operations(fn, [-7])
+        assert res == -7 - 7 + 1
+        self.check_operations_history(getarrayitem_gc=1,
+                getfield_gc=1)
+
+        def fn(n, ca, cb):
+            a1[0] = n
+            a2[0] = n
+            a = a1
+            if ca:
+                a = a2
+                if n < -100:
+                    a.append(5)
+            b = a1
+            if cb:
+                b = a
+            return a[0] + b[0]
+        res = self.interp_operations(fn, [7, 0, 1])
+        assert res == 7 * 2
+        self.check_operations_history(getarrayitem_gc=1,
+                getfield_gc=3)
+        res = self.interp_operations(fn, [-7, 1, 1])
+        assert res == -7 * 2
+        self.check_operations_history(getarrayitem_gc=1,
+                getfield_gc=3)
+
+    def test_list_caching_negative(self):
+        def fn(n):
+            a = [0] * n
+            if n > 1000:
+                a.append(0)
+            a[-1] = n
+            x1 = a[-1]
+            a[n - n - 1] = n + 1
+            return a[-1] + x1
+        res = self.interp_operations(fn, [7])
+        assert res == 7 + 7 + 1
+        self.check_operations_history(setarrayitem_gc=2,
+                setfield_gc=2)
+
+    def test_virtualizable_with_array_heap_cache(self):
+        myjitdriver = jit.JitDriver(greens = [], reds = ['n', 'x', 'i', 'frame'],
+                                    virtualizables = ['frame'])
+
+        class Frame(object):
+            _virtualizable2_ = ['l[*]', 's']
+
+            def __init__(self, a, s):
+                self = jit.hint(self, access_directly=True, fresh_virtualizable=True)
+                self.l = [0] * (4 + a)
+                self.s = s
+
+        def f(n, a, i):
+            frame = Frame(a, 0)
+            frame.l[0] = a
+            frame.l[1] = a + 1
+            frame.l[2] = a + 2
+            frame.l[3] = a + 3
+            if not i:
+                return frame.l[0] + len(frame.l)
+            x = 0
+            while n > 0:
+                myjitdriver.can_enter_jit(frame=frame, n=n, x=x, i=i)
+                myjitdriver.jit_merge_point(frame=frame, n=n, x=x, i=i)
+                frame.s = jit.promote(frame.s)
+                n -= 1
+                s = frame.s
+                assert s >= 0
+                x += frame.l[s]
+                frame.s += 1
+                s = frame.s
+                assert s >= 0
+                x += frame.l[s]
+                x += len(frame.l)
+                x += f(n, n, 0)
+                frame.s -= 1
+            return x
+
+        res = self.meta_interp(f, [10, 1, 1], listops=True)
+        assert res == f(10, 1, 1)
+        self.check_history(getarrayitem_gc=0, getfield_gc=0)
+
+    def test_heap_caching_pure(self):
+        class A(object):
+            pass
+        p1 = A()
+        p2 = A()
+        def fn(n):
+            if n >= 0:
+                a = (n, n + 1)
+                p = p1
+            else:
+                a = (n + 1, n)
+                p = p2
+            p.x = a
+
+            return p.x[0] + p.x[1]
+        res = self.interp_operations(fn, [7])
+        assert res == 7 + 7 + 1
+        self.check_operations_history(getfield_gc=0, getfield_gc_pure=0)
+        res = self.interp_operations(fn, [-7])
+        assert res == -7 - 7 + 1
+        self.check_operations_history(getfield_gc=0, getfield_gc_pure=0)
+
+    def test_heap_caching_and_elidable_function(self):
+        class A:
+            pass
+        class B: pass
+        a1 = A()
+        a1.y = 6
+        a2 = A()
+        a2.y = 13
+        @jit.elidable
+        def f(b):
+            return b + 1
+        def fn(n):
+            if n > 0:
+                a = a1
+            else:
+                a = A()
+            a.x = n
+            z = f(6)
+            return z + a.x
+        res = self.interp_operations(fn, [7])
+        assert res == 7 + 7
+        self.check_operations_history(getfield_gc=0)
+        res = self.interp_operations(fn, [-7])
+        assert res == -7 + 7
+        self.check_operations_history(getfield_gc=0)
+        return
diff --git a/pypy/jit/metainterp/test/test_virtualizable.py b/pypy/jit/metainterp/test/test_virtualizable.py
--- a/pypy/jit/metainterp/test/test_virtualizable.py
+++ b/pypy/jit/metainterp/test/test_virtualizable.py
@@ -377,7 +377,7 @@
         expected = f(20)
         res = self.meta_interp(f, [20], enable_opts='')
         assert res == expected
-        self.check_loops(getfield_gc=3, setfield_gc=0,
+        self.check_loops(getfield_gc=1, setfield_gc=0,
                          arraylen_gc=1, getarrayitem_gc=1, setarrayitem_gc=1)
 
     # ------------------------------
@@ -1133,6 +1133,7 @@
          res = self.meta_interp(f, [10])
          assert res == 55
          self.check_loops(new_with_vtable=0, ptr_eq=1, everywhere=True)
+         self.check_history(ptr_eq=2)
 
     def test_virtual_child_frame_with_arrays(self):
         myjitdriver = JitDriver(greens = [], reds = ['frame'],
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -303,6 +303,7 @@
         class FakeCPU(object):
             supports_floats = False
             supports_longlong = False
+            supports_singlefloats = False
             ts = llhelper
             translate_support_code = False
             stats = "stats"
diff --git a/pypy/jit/metainterp/test/test_warmstate.py b/pypy/jit/metainterp/test/test_warmstate.py
--- a/pypy/jit/metainterp/test/test_warmstate.py
+++ b/pypy/jit/metainterp/test/test_warmstate.py
@@ -1,13 +1,14 @@
 from pypy.rpython.test.test_llinterp import interpret
-from pypy.rpython.lltypesystem import lltype, llmemory, rstr
+from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rffi
 from pypy.rpython.ootypesystem import ootype
 from pypy.rpython.annlowlevel import llhelper
-from pypy.jit.metainterp.warmstate import wrap, unwrap
+from pypy.jit.metainterp.warmstate import wrap, unwrap, specialize_value
 from pypy.jit.metainterp.warmstate import equal_whatever, hash_whatever
 from pypy.jit.metainterp.warmstate import WarmEnterState, JitCell
 from pypy.jit.metainterp.history import BoxInt, BoxFloat, BoxPtr
 from pypy.jit.metainterp.history import ConstInt, ConstFloat, ConstPtr
 from pypy.jit.codewriter import longlong
+from pypy.rlib.rarithmetic import r_singlefloat
 
 def boxfloat(x):
     return BoxFloat(longlong.getfloatstorage(x))
@@ -40,6 +41,28 @@
     assert _is(wrap(None, 42, in_const_box=True), ConstInt(42))
     assert _is(wrap(None, 42.5, in_const_box=True), constfloat(42.5))
     assert _is(wrap(None, p, in_const_box=True), ConstPtr(po))
+    if longlong.supports_longlong:
+        import sys
+        from pypy.rlib.rarithmetic import r_longlong, r_ulonglong
+        value = r_longlong(-sys.maxint*17)
+        assert _is(wrap(None, value), BoxFloat(value))
+        assert _is(wrap(None, value, in_const_box=True), ConstFloat(value))
+        value_unsigned = r_ulonglong(-sys.maxint*17)
+        assert _is(wrap(None, value_unsigned), BoxFloat(value))
+    sfval = r_singlefloat(42.5)
+    ival = longlong.singlefloat2int(sfval)
+    assert _is(wrap(None, sfval), BoxInt(ival))
+    assert _is(wrap(None, sfval, in_const_box=True), ConstInt(ival))
+
+def test_specialize_value():
+    assert specialize_value(lltype.Char, 0x41) == '\x41'
+    if longlong.supports_longlong:
+        import sys
+        value = longlong.r_float_storage(sys.maxint*17)
+        assert specialize_value(lltype.SignedLongLong, value) == sys.maxint*17
+    sfval = r_singlefloat(42.5)
+    ival = longlong.singlefloat2int(sfval)
+    assert specialize_value(rffi.FLOAT, ival) == sfval
 
 def test_hash_equal_whatever_lltype():
     s1 = rstr.mallocstr(2)
@@ -186,6 +209,7 @@
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
         _can_never_inline_ptr = None
+        _should_unroll_one_iteration_ptr = None
     class FakeCell:
         dont_trace_here = False
     state = WarmEnterState(FakeWarmRunnerDesc(), FakeJitDriverSD())
@@ -214,6 +238,7 @@
         _confirm_enter_jit_ptr = None
         _can_never_inline_ptr = None
         _get_jitcell_at_ptr = None
+        _should_unroll_one_iteration_ptr = None
     state = WarmEnterState(FakeWarmRunnerDesc(), FakeJitDriverSD())
     state.make_jitdriver_callbacks()
     res = state.get_location_str([ConstInt(5), constfloat(42.5)])
@@ -238,6 +263,7 @@
         _confirm_enter_jit_ptr = llhelper(ENTER_JIT, confirm_enter_jit)
         _can_never_inline_ptr = None
         _get_jitcell_at_ptr = None
+        _should_unroll_one_iteration_ptr = None
 
     state = WarmEnterState(FakeWarmRunnerDesc(), FakeJitDriverSD())
     state.make_jitdriver_callbacks()
@@ -262,6 +288,7 @@
         _confirm_enter_jit_ptr = None
         _can_never_inline_ptr = llhelper(CAN_NEVER_INLINE, can_never_inline)
         _get_jitcell_at_ptr = None
+        _should_unroll_one_iteration_ptr = None
 
     state = WarmEnterState(FakeWarmRunnerDesc(), FakeJitDriverSD())
     state.make_jitdriver_callbacks()
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -10,6 +10,7 @@
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import fatalerror
+from pypy.rlib.rstackovf import StackOverflow
 from pypy.translator.simplify import get_functype
 from pypy.translator.unsimplify import call_final_function
 
@@ -172,6 +173,7 @@
             policy = JitPolicy()
         policy.set_supports_floats(self.cpu.supports_floats)
         policy.set_supports_longlong(self.cpu.supports_longlong)
+        policy.set_supports_singlefloats(self.cpu.supports_singlefloats)
         graphs = self.codewriter.find_all_graphs(policy)
         policy.dump_unsafe_loops()
         self.check_access_directly_sanity(graphs)
@@ -282,7 +284,9 @@
         auto_inline_graphs(self.translator, graphs, 0.01)
 
     def build_cpu(self, CPUClass, translate_support_code=False,
-                  no_stats=False, **kwds):
+                  no_stats=False, supports_floats=True,
+                  supports_longlong=True, supports_singlefloats=True,
+                  **kwds):
         assert CPUClass is not None
         self.opt = history.Options(**kwds)
         if no_stats:
@@ -294,6 +298,9 @@
             self.annhelper = MixLevelHelperAnnotator(self.translator.rtyper)
         cpu = CPUClass(self.translator.rtyper, self.stats, self.opt,
                        translate_support_code, gcdescr=self.gcdescr)
+        if not supports_floats:       cpu.supports_floats       = False
+        if not supports_longlong:     cpu.supports_longlong     = False
+        if not supports_singlefloats: cpu.supports_singlefloats = False
         self.cpu = cpu
 
     def build_meta_interp(self, ProfilerClass):
@@ -408,21 +415,29 @@
         jd.warmstate = state
 
         def crash_in_jit(e):
-            if not we_are_translated():
-                print "~~~ Crash in JIT!"
-                print '~~~ %s: %s' % (e.__class__, e)
-                if sys.stdout == sys.__stdout__:
-                    import pdb; pdb.post_mortem(sys.exc_info()[2])
-                raise
-            fatalerror('~~~ Crash in JIT! %s' % (e,), traceback=True)
+            tb = not we_are_translated() and sys.exc_info()[2]
+            try:
+                raise e
+            except JitException:
+                raise     # go through
+            except MemoryError:
+                raise     # go through
+            except StackOverflow:
+                raise     # go through
+            except Exception, e:
+                if not we_are_translated():
+                    print "~~~ Crash in JIT!"
+                    print '~~~ %s: %s' % (e.__class__, e)
+                    if sys.stdout == sys.__stdout__:
+                        import pdb; pdb.post_mortem(tb)
+                    raise e.__class__, e, tb
+                fatalerror('~~~ Crash in JIT! %s' % (e,), traceback=True)
         crash_in_jit._dont_inline_ = True
 
         if self.translator.rtyper.type_system.name == 'lltypesystem':
             def maybe_enter_jit(*args):
                 try:
                     maybe_compile_and_run(state.increment_threshold, *args)
-                except JitException:
-                    raise     # go through
                 except Exception, e:
                     crash_in_jit(e)
             maybe_enter_jit._always_inline_ = True
@@ -460,6 +475,9 @@
                 onlygreens=False)
             jd._can_never_inline_ptr = self._make_hook_graph(jd,
                 annhelper, jd.jitdriver.can_never_inline, annmodel.s_Bool)
+            jd._should_unroll_one_iteration_ptr = self._make_hook_graph(jd,
+                annhelper, jd.jitdriver.should_unroll_one_iteration,
+                annmodel.s_Bool)
         annhelper.finish()
 
     def _make_hook_graph(self, jitdriver_sd, annhelper, func,
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -25,9 +25,13 @@
         if isinstance(TYPE, lltype.Ptr) and TYPE.TO._gckind == 'raw':
             # non-gc pointer
             return rffi.cast(TYPE, x)
+        elif TYPE is lltype.SingleFloat:
+            return longlong.int2singlefloat(x)
         else:
             return lltype.cast_primitive(TYPE, x)
     elif INPUT is longlong.FLOATSTORAGE:
+        if longlong.is_longlong(TYPE):
+            return rffi.cast(TYPE, x)
         assert TYPE is lltype.Float
         return longlong.getrealfloat(x)
     else:
@@ -84,8 +88,12 @@
             return history.ConstObj(value)
         else:
             return history.BoxObj(value)
-    elif isinstance(value, float):
-        value = longlong.getfloatstorage(value)
+    elif (isinstance(value, float) or
+          longlong.is_longlong(lltype.typeOf(value))):
+        if isinstance(value, float):
+            value = longlong.getfloatstorage(value)
+        else:
+            value = rffi.cast(lltype.SignedLongLong, value)
         if in_const_box:
             return history.ConstFloat(value)
         else:
@@ -93,6 +101,8 @@
     elif isinstance(value, str) or isinstance(value, unicode):
         assert len(value) == 1     # must be a character
         value = ord(value)
+    elif lltype.typeOf(value) is lltype.SingleFloat:
+        value = longlong.singlefloat2int(value)
     else:
         value = intmask(value)
     if in_const_box:
@@ -138,7 +148,10 @@
         refvalue = cpu.ts.cast_to_ref(value)
         cpu.set_future_value_ref(j, refvalue)
     elif typecode == 'int':
-        intvalue = lltype.cast_primitive(lltype.Signed, value)
+        if isinstance(lltype.typeOf(value), lltype.Ptr):
+            intvalue = llmemory.AddressAsInt(llmemory.cast_ptr_to_adr(value))
+        else:
+            intvalue = lltype.cast_primitive(lltype.Signed, value)
         cpu.set_future_value_int(j, intvalue)
     elif typecode == 'float':
         if lltype.typeOf(value) is lltype.Float:
@@ -569,6 +582,19 @@
             return can_inline_greenargs(*greenargs)
         self.can_inline_greenargs = can_inline_greenargs
         self.can_inline_callable = can_inline_callable
+
+        if jd._should_unroll_one_iteration_ptr is None:
+            def should_unroll_one_iteration(greenkey):
+                return False
+        else:
+            rtyper = self.warmrunnerdesc.rtyper
+            inline_ptr = jd._should_unroll_one_iteration_ptr
+            def should_unroll_one_iteration(greenkey):
+                greenargs = unwrap_greenkey(greenkey)
+                fn = support.maybe_on_top_of_llinterp(rtyper, inline_ptr)
+                return fn(*greenargs)
+        self.should_unroll_one_iteration = should_unroll_one_iteration
+        
         if hasattr(jd.jitdriver, 'on_compile'):
             def on_compile(logger, token, operations, type, greenkey):
                 greenargs = unwrap_greenkey(greenkey)
diff --git a/pypy/module/__builtin__/abstractinst.py b/pypy/module/__builtin__/abstractinst.py
--- a/pypy/module/__builtin__/abstractinst.py
+++ b/pypy/module/__builtin__/abstractinst.py
@@ -58,7 +58,10 @@
 
     # -- case (anything, type)
     try:
-        w_result = space.isinstance(w_obj, w_klass_or_tuple, allow_override)
+        if allow_override:
+            w_result = space.isinstance_allow_override(w_obj, w_klass_or_tuple)
+        else:
+            w_result = space.isinstance(w_obj, w_klass_or_tuple)
     except OperationError, e:   # if w_klass_or_tuple was not a type, ignore it
         if not e.match(space, space.w_TypeError):
             raise       # propagate other errors
@@ -72,8 +75,11 @@
             w_pretendtype = space.getattr(w_obj, space.wrap('__class__'))
             if space.is_w(w_pretendtype, space.type(w_obj)):
                 return False     # common case: obj.__class__ is type(obj)
-            w_result = space.issubtype(w_pretendtype, w_klass_or_tuple,
-                                       allow_override)
+            if allow_override:
+                w_result = space.issubtype_allow_override(w_pretendtype,
+                                                          w_klass_or_tuple)
+            else:
+                w_result = space.issubtype(w_pretendtype, w_klass_or_tuple)
         except OperationError, e:
             if e.async(space):
                 raise
@@ -134,7 +140,11 @@
 
     # -- case (type, type)
     try:
-        w_result = space.issubtype(w_derived, w_klass_or_tuple, allow_override)
+        if allow_override:
+            w_result = space.issubtype_allow_override(w_derived,
+                                                      w_klass_or_tuple)
+        else:
+            w_result = space.issubtype(w_derived, w_klass_or_tuple)
     except OperationError, e:   # if one of the args was not a type, ignore it
         if not e.match(space, space.w_TypeError):
             raise       # propagate other errors
diff --git a/pypy/module/__builtin__/compiling.py b/pypy/module/__builtin__/compiling.py
--- a/pypy/module/__builtin__/compiling.py
+++ b/pypy/module/__builtin__/compiling.py
@@ -5,7 +5,7 @@
 from pypy.interpreter.pycode import PyCode
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.astcompiler import consts, ast
-from pypy.interpreter.gateway import NoneNotWrapped, unwrap_spec
+from pypy.interpreter.gateway import unwrap_spec
 
 @unwrap_spec(filename=str, mode=str, flags=int, dont_inherit=int)
 def compile(space, w_source, filename, mode, flags=0, dont_inherit=0):
diff --git a/pypy/module/__builtin__/descriptor.py b/pypy/module/__builtin__/descriptor.py
--- a/pypy/module/__builtin__/descriptor.py
+++ b/pypy/module/__builtin__/descriptor.py
@@ -1,12 +1,10 @@
-
-from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.function import StaticMethod, ClassMethod
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-from pypy.interpreter.error import OperationError, operationerrfmt
-from pypy.objspace.descroperation import object_getattribute, object_setattr
-from pypy.interpreter.function import StaticMethod, ClassMethod
-from pypy.interpreter.typedef import GetSetProperty, descr_get_dict, \
-     descr_set_dict, interp_attrproperty_w, generic_new_descr
+from pypy.interpreter.typedef import (TypeDef, interp_attrproperty_w,
+    generic_new_descr)
+from pypy.objspace.descroperation import object_getattribute
 
 class W_Super(Wrappable):
     def __init__(self, space, w_starttype, w_objtype, w_self):
diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py
--- a/pypy/module/__builtin__/functional.py
+++ b/pypy/module/__builtin__/functional.py
@@ -4,13 +4,12 @@
 """
 
 from pypy.interpreter.error import OperationError
-from pypy.interpreter.gateway import NoneNotWrapped, applevel
+from pypy.interpreter.gateway import NoneNotWrapped
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.rlib.rarithmetic import r_uint, intmask
 from pypy.rlib.objectmodel import specialize
-from inspect import getsource, getfile
 from pypy.rlib.rbigint import rbigint
 
 
@@ -662,7 +661,6 @@
 
     def descr_reduce(self):
         from pypy.interpreter.mixedmodule import MixedModule
-        from pypy.module._pickle_support import maker # helper fns
         space    = self.space
         w_mod    = space.getbuiltinmodule('_pickle_support')
         mod      = space.interp_w(MixedModule, w_mod)
diff --git a/pypy/module/__builtin__/interp_classobj.py b/pypy/module/__builtin__/interp_classobj.py
--- a/pypy/module/__builtin__/interp_classobj.py
+++ b/pypy/module/__builtin__/interp_classobj.py
@@ -1,11 +1,9 @@
 import new
 from pypy.interpreter.error import OperationError, operationerrfmt
-from pypy.interpreter.gateway import NoneNotWrapped, applevel, interp2app
+from pypy.interpreter.gateway import interp2app
 from pypy.interpreter.typedef import TypeDef, make_weakref_descr
 from pypy.interpreter.baseobjspace import Wrappable
-from pypy.interpreter.typedef import GetSetProperty, descr_get_dict
-from pypy.interpreter.typedef import descr_set_dict
-from pypy.rlib.rarithmetic import r_uint, intmask
+from pypy.interpreter.typedef import GetSetProperty, descr_get_dict, descr_set_dict
 from pypy.rlib.objectmodel import compute_identity_hash
 from pypy.rlib.debug import make_sure_not_resized
 from pypy.rlib import jit
@@ -420,7 +418,7 @@
         if w_meth is not None:
             space.call_function(w_meth, w_name)
         else:
-            if not self.deldictvalue(space, w_name):
+            if not self.deldictvalue(space, name):
                 raise operationerrfmt(
                     space.w_AttributeError,
                     "%s instance has no attribute '%s'",
diff --git a/pypy/module/__builtin__/interp_memoryview.py b/pypy/module/__builtin__/interp_memoryview.py
--- a/pypy/module/__builtin__/interp_memoryview.py
+++ b/pypy/module/__builtin__/interp_memoryview.py
@@ -2,7 +2,7 @@
 Implementation of the 'buffer' and 'memoryview' types.
 """
 from pypy.interpreter.baseobjspace import Wrappable
-from pypy.interpreter import gateway, buffer
+from pypy.interpreter import buffer
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from pypy.interpreter.error import OperationError
diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py
--- a/pypy/module/__builtin__/operation.py
+++ b/pypy/module/__builtin__/operation.py
@@ -4,12 +4,10 @@
 
 from pypy.interpreter import gateway
 from pypy.interpreter.error import OperationError
-from pypy.interpreter.gateway import interp2app, unwrap_spec
-from pypy.interpreter.typedef import TypeDef
+from pypy.interpreter.gateway import unwrap_spec
 from pypy.rlib.runicode import UNICHR
 from pypy.rlib.rfloat import isnan, isinf, round_double
 from pypy.rlib import rfloat
-import math
 import __builtin__
 NoneNotWrapped = gateway.NoneNotWrapped
 
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -25,6 +25,7 @@
         'debug_print_once'          : 'interp_debug.debug_print_once',
         'builtinify'                : 'interp_magic.builtinify',
         'lookup_special'            : 'interp_magic.lookup_special',
+        'do_what_I_mean'            : 'interp_magic.do_what_I_mean',
     }
 
     submodules = {
diff --git a/pypy/module/__pypy__/interp_debug.py b/pypy/module/__pypy__/interp_debug.py
--- a/pypy/module/__pypy__/interp_debug.py
+++ b/pypy/module/__pypy__/interp_debug.py
@@ -1,5 +1,4 @@
-from pypy.interpreter.gateway import interp2app, NoneNotWrapped, unwrap_spec
-from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import unwrap_spec
 from pypy.rlib import debug, jit
 
 
diff --git a/pypy/module/__pypy__/interp_identitydict.py b/pypy/module/__pypy__/interp_identitydict.py
--- a/pypy/module/__pypy__/interp_identitydict.py
+++ b/pypy/module/__pypy__/interp_identitydict.py
@@ -1,6 +1,6 @@
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.typedef import TypeDef
-from pypy.interpreter.gateway import NoneNotWrapped, interp2app, unwrap_spec
+from pypy.interpreter.gateway import interp2app
 from pypy.interpreter.baseobjspace import Wrappable
 
 class W_IdentityDict(Wrappable):
diff --git a/pypy/module/__pypy__/interp_magic.py b/pypy/module/__pypy__/interp_magic.py
--- a/pypy/module/__pypy__/interp_magic.py
+++ b/pypy/module/__pypy__/interp_magic.py
@@ -70,3 +70,6 @@
     if w_descr is None:
         return space.w_None
     return space.get(w_descr, w_obj)
+
+def do_what_I_mean(space):
+    return space.wrap(42)
diff --git a/pypy/module/__pypy__/test/test_special.py b/pypy/module/__pypy__/test/test_special.py
--- a/pypy/module/__pypy__/test/test_special.py
+++ b/pypy/module/__pypy__/test/test_special.py
@@ -49,3 +49,8 @@
         class X:
             pass
         raises(TypeError, lookup_special, X(), "foo")
+
+    def test_do_what_I_mean(self):
+        from __pypy__ import do_what_I_mean
+        x = do_what_I_mean()
+        assert x == 42
diff --git a/pypy/module/_ast/test/test_ast.py b/pypy/module/_ast/test/test_ast.py
--- a/pypy/module/_ast/test/test_ast.py
+++ b/pypy/module/_ast/test/test_ast.py
@@ -186,6 +186,11 @@
         mod = self.get_ast("from __future__ import with_statement; import y; " \
                                "from __future__ import nested_scopes")
         raises(SyntaxError, compile, mod, "<test>", "exec")
+        mod = self.get_ast("from __future__ import division\nx = 1/2")
+        co = compile(mod, "<test>", "exec")
+        ns = {}
+        exec co in ns
+        assert ns["x"] == .5
 
     def test_field_attr_writable(self):
         import _ast as ast
@@ -245,3 +250,38 @@
         assert x.left == n1
         assert x.op == addop
         assert x.right == n3
+
+    def test_functiondef(self):
+        import _ast as ast
+        fAst = ast.FunctionDef(
+            name="foo",
+            args=ast.arguments(
+                args=[], vararg=None, kwarg=None, defaults=[],
+                kwonlyargs=[], kw_defaults=[]),
+            body=[], decorator_list=[], lineno=5, col_offset=0)
+        exprAst = ast.Interactive(body=[fAst])
+        compiled = compile(exprAst, "<foo>", "single")
+        #
+        d = {}
+        eval(compiled, d, d)
+        assert type(d['foo']) is type(lambda: 42)
+        assert d['foo']() is None
+
+    def test_missing_name(self):
+        import _ast as ast
+        n = ast.FunctionDef(name=None)
+        n.name = "foo"
+        n.name = "foo"
+        n.name = "foo"
+        assert n.name == "foo"
+
+    def test_issue793(self):
+        import _ast as ast
+        body = ast.Module([
+            ast.TryExcept([ast.Pass(lineno=2, col_offset=4)],
+                [ast.ExceptHandler(ast.Name('Exception', ast.Load(),
+                                            lineno=3, col_offset=0),
+                                   None, [], lineno=4, col_offset=0)],
+                [], lineno=1, col_offset=0)
+        ])
+        exec compile(body, '<string>', 'exec')
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,6 +1,6 @@
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.gateway import NoneNotWrapped, interp2app, unwrap_spec
-from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
+from pypy.rlib.rstring import UnicodeBuilder
 from pypy.rlib.objectmodel import we_are_translated
 
 class CodecState(object):
diff --git a/pypy/module/_ffi/interp_ffi.py b/pypy/module/_ffi/interp_ffi.py
--- a/pypy/module/_ffi/interp_ffi.py
+++ b/pypy/module/_ffi/interp_ffi.py
@@ -1,9 +1,8 @@
-import sys
-from pypy.interpreter.baseobjspace import Wrappable, Arguments
+from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.error import OperationError, wrap_oserror, \
     operationerrfmt
-from pypy.interpreter.gateway import interp2app, NoneNotWrapped, unwrap_spec
-from pypy.interpreter.typedef import TypeDef, GetSetProperty
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef
 from pypy.module._rawffi.structure import W_StructureInstance, W_Structure
 #
 from pypy.rpython.lltypesystem import lltype, rffi
@@ -12,11 +11,12 @@
 from pypy.rlib import libffi
 from pypy.rlib.rdynload import DLOpenError
 from pypy.rlib.rarithmetic import intmask, r_uint
+from pypy.rlib.objectmodel import we_are_translated
 
 class W_FFIType(Wrappable):
 
     _immutable_fields_ = ['name', 'ffitype', 'w_datashape', 'w_pointer_to']
-    
+
     def __init__(self, name, ffitype, w_datashape=None, w_pointer_to=None):
         self.name = name
         self.ffitype = ffitype
@@ -75,6 +75,13 @@
     def is_struct(self):
         return libffi.types.is_struct(self.ffitype)
 
+    def is_char_p(self):
+        return self is app_types.char_p
+
+    def is_unichar_p(self):
+        return self is app_types.unichar_p
+
+
 W_FFIType.typedef = TypeDef(
     'FFIType',
     __repr__ = interp2app(W_FFIType.repr),
@@ -83,7 +90,6 @@
 
 
 def build_ffi_types():
-    from pypy.rlib.clibffi import FFI_TYPE_P
     types = [
         # note: most of the type name directly come from the C equivalent,
         # with the exception of bytes: in C, ubyte and char are equivalent,
@@ -117,7 +123,12 @@
         ## 'Z' : ffi_type_pointer,
 
         ]
-    return dict([(t.name, t) for t in types])
+    d = dict([(t.name, t) for t in types])
+    w_char = d['char']
+    w_unichar = d['unichar']
+    d['char_p'] = W_FFIType('char_p', libffi.types.pointer, w_pointer_to = w_char)
+    d['unichar_p'] = W_FFIType('unichar_p', libffi.types.pointer, w_pointer_to = w_unichar)
+    return d
 
 class app_types:
     pass
@@ -127,9 +138,14 @@
     try:
         return descr_new_pointer.cache[w_pointer_to]
     except KeyError:
-        w_pointer_to = space.interp_w(W_FFIType, w_pointer_to)
-        name = '(pointer to %s)' % w_pointer_to.name
-        w_result = W_FFIType(name, libffi.types.pointer, w_pointer_to = w_pointer_to)
+        if w_pointer_to is app_types.char:
+            w_result = app_types.char_p
+        elif w_pointer_to is app_types.unichar:
+            w_result = app_types.unichar_p
+        else:
+            w_pointer_to = space.interp_w(W_FFIType, w_pointer_to)
+            name = '(pointer to %s)' % w_pointer_to.name
+            w_result = W_FFIType(name, libffi.types.pointer, w_pointer_to = w_pointer_to)
         descr_new_pointer.cache[w_pointer_to] = w_result
         return w_result
 descr_new_pointer.cache = {}
@@ -161,11 +177,12 @@
 class W_FuncPtr(Wrappable):
 
     _immutable_fields_ = ['func', 'argtypes_w[*]', 'w_restype']
-    
+
     def __init__(self, func, argtypes_w, w_restype):
         self.func = func
         self.argtypes_w = argtypes_w
         self.w_restype = w_restype
+        self.to_free = []
 
     @jit.unroll_safe
     def build_argchain(self, space, args_w):
@@ -190,6 +207,9 @@
                 self.arg_longlong(space, argchain, w_arg)
             elif w_argtype.is_signed():
                 argchain.arg(unwrap_truncate_int(rffi.LONG, space, w_arg))
+            elif self.add_char_p_maybe(space, argchain, w_arg, w_argtype):
+                # the argument is added to the argchain direcly by the method above
+                pass
             elif w_argtype.is_pointer():
                 w_arg = self.convert_pointer_arg_maybe(space, w_arg, w_argtype)
                 argchain.arg(intmask(space.uint_w(w_arg)))
@@ -202,18 +222,41 @@
                 w_arg = space.ord(w_arg)
                 argchain.arg(space.int_w(w_arg))
             elif w_argtype.is_double():
-                argchain.arg(space.float_w(w_arg))
+                self.arg_float(space, argchain, w_arg)
             elif w_argtype.is_singlefloat():
-                argchain.arg_singlefloat(space.float_w(w_arg))
+                self.arg_singlefloat(space, argchain, w_arg)
             elif w_argtype.is_struct():
                 # arg_raw directly takes value to put inside ll_args
-                w_arg = space.interp_w(W_StructureInstance, w_arg)                
+                w_arg = space.interp_w(W_StructureInstance, w_arg)
                 ptrval = w_arg.ll_buffer
                 argchain.arg_raw(ptrval)
             else:
                 assert False, "Argument shape '%s' not supported" % w_argtype
         return argchain
 
+    def add_char_p_maybe(self, space, argchain, w_arg, w_argtype):
+        """
+        Automatic conversion from string to char_p. The allocated buffer will
+        be automatically freed after the call.
+        """
+        w_type = jit.promote(space.type(w_arg))
+        if w_argtype.is_char_p() and w_type is space.w_str:
+            strval = space.str_w(w_arg)
+            buf = rffi.str2charp(strval)
+            self.to_free.append(rffi.cast(rffi.VOIDP, buf))
+            addr = rffi.cast(rffi.ULONG, buf)
+            argchain.arg(addr)
+            return True
+        elif w_argtype.is_unichar_p() and (w_type is space.w_str or
+                                           w_type is space.w_unicode):
+            unicodeval = space.unicode_w(w_arg)
+            buf = rffi.unicode2wcharp(unicodeval)
+            self.to_free.append(rffi.cast(rffi.VOIDP, buf))
+            addr = rffi.cast(rffi.ULONG, buf)
+            argchain.arg(addr)
+            return True
+        return False
+
     def convert_pointer_arg_maybe(self, space, w_arg, w_argtype):
         """
         Try to convert the argument by calling _as_ffi_pointer_()
@@ -224,26 +267,47 @@
         else:
             return w_arg
 
-    @jit.dont_look_inside
+    def arg_float(self, space, argchain, w_arg):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether floats are supported
+        argchain.arg(space.float_w(w_arg))
+
     def arg_longlong(self, space, argchain, w_arg):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether longlongs are supported
         bigarg = space.bigint_w(w_arg)
         ullval = bigarg.ulonglongmask()
         llval = rffi.cast(rffi.LONGLONG, ullval)
-        # this is a hack: we store the 64 bits of the long long into the
-        # 64 bits of a float (i.e., a C double)
-        floatval = libffi.longlong2float(llval)
-        argchain.arg_longlong(floatval)
+        argchain.arg(llval)
+
+    def arg_singlefloat(self, space, argchain, w_arg):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether singlefloats are supported
+        from pypy.rlib.rarithmetic import r_singlefloat
+        fval = space.float_w(w_arg)
+        sfval = r_singlefloat(fval)
+        argchain.arg(sfval)
 
     def call(self, space, args_w):
         self = jit.promote(self)
         argchain = self.build_argchain(space, args_w)
+        return self._do_call(space, argchain)
+
+    def free_temp_buffers(self, space):
+        for buf in self.to_free:
+            if not we_are_translated():
+                buf[0] = '\00' # invalidate the buffer, so that
+                               # test_keepalive_temp_buffer can fail
+            lltype.free(buf, flavor='raw')
+        self.to_free = []
+
+    def _do_call(self, space, argchain):
         w_restype = self.w_restype
         if w_restype.is_longlong():
             # note that we must check for longlong first, because either
             # is_signed or is_unsigned returns true anyway
             assert libffi.IS_32_BIT
-            reskind = libffi.types.getkind(self.func.restype) # XXX: remove the kind
-            return self._call_longlong(space, argchain, reskind)
+            return self._call_longlong(space, argchain)
         elif w_restype.is_signed():
             return self._call_int(space, argchain)
         elif w_restype.is_unsigned() or w_restype.is_pointer():
@@ -255,12 +319,9 @@
             intres = self.func.call(argchain, rffi.WCHAR_T)
             return space.wrap(unichr(intres))
         elif w_restype.is_double():
-            floatres = self.func.call(argchain, rffi.DOUBLE)
-            return space.wrap(floatres)
+            return self._call_float(space, argchain)
         elif w_restype.is_singlefloat():
-            # the result is a float, but widened to be inside a double
-            floatres = self.func.call(argchain, rffi.FLOAT)
-            return space.wrap(floatres)
+            return self._call_singlefloat(space, argchain)
         elif w_restype.is_struct():
             w_datashape = w_restype.w_datashape
             assert isinstance(w_datashape, W_Structure)
@@ -329,19 +390,32 @@
                                  space.wrap('Unsupported restype'))
         return space.wrap(intres)
 
-    @jit.dont_look_inside
-    def _call_longlong(self, space, argchain, reskind):
-        # this is a hack: we store the 64 bits of the long long into the 64
-        # bits of a float (i.e., a C double)
-        floatres = self.func.call(argchain, rffi.LONGLONG)
-        llres = libffi.float2longlong(floatres)
-        if reskind == 'I':
+    def _call_float(self, space, argchain):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether floats are supported
+        floatres = self.func.call(argchain, rffi.DOUBLE)
+        return space.wrap(floatres)
+
+    def _call_longlong(self, space, argchain):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether longlongs are supported
+        restype = self.func.restype
+        call = self.func.call
+        if restype is libffi.types.slonglong:
+            llres = call(argchain, rffi.LONGLONG)
             return space.wrap(llres)
-        elif reskind == 'U':
-            ullres = rffi.cast(rffi.ULONGLONG, llres)
+        elif restype is libffi.types.ulonglong:
+            ullres = call(argchain, rffi.ULONGLONG)
             return space.wrap(ullres)
         else:
-            assert False
+            raise OperationError(space.w_ValueError,
+                                 space.wrap('Unsupported longlong restype'))
+
+    def _call_singlefloat(self, space, argchain):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether singlefloats are supported
+        sfres = self.func.call(argchain, rffi.FLOAT)
+        return space.wrap(float(sfres))
 
     def getaddr(self, space):
         """
@@ -374,6 +448,7 @@
     '_ffi.FuncPtr',
     __call__ = interp2app(W_FuncPtr.call),
     getaddr = interp2app(W_FuncPtr.getaddr),
+    free_temp_buffers = interp2app(W_FuncPtr.free_temp_buffers),
     fromaddr = interp2app(descr_fromaddr, as_classmethod=True)
     )
 
@@ -404,7 +479,7 @@
         except KeyError:
             raise operationerrfmt(space.w_AttributeError,
                                   "No symbol %s found in library %s", name, self.name)
-            
+
         return W_FuncPtr(func, argtypes_w, w_restype)
 
     @unwrap_spec(name=str)
diff --git a/pypy/module/_ffi/test/test__ffi.py b/pypy/module/_ffi/test/test__ffi.py
--- a/pypy/module/_ffi/test/test__ffi.py
+++ b/pypy/module/_ffi/test/test__ffi.py
@@ -188,6 +188,75 @@
         assert get_dummy() == 123
         set_val_to_ptr(ptr2, 0)
 
+    def test_convert_strings_to_char_p(self):
+        """
+            long mystrlen(char* s)
+            {
+                long len = 0;
+                while(*s++)
+                    len++;
+                return len;
+            }
+        """
+        from _ffi import CDLL, types
+        import _rawffi
+        libfoo = CDLL(self.libfoo_name)
+        mystrlen = libfoo.getfunc('mystrlen', [types.char_p], types.slong)
+        #
+        # first, try automatic conversion from a string
+        assert mystrlen('foobar') == 6
+        # then, try to pass an explicit pointer
+        CharArray = _rawffi.Array('c')
+        mystr = CharArray(7, 'foobar')
+        assert mystrlen(mystr.buffer) == 6
+        mystr.free()
+        mystrlen.free_temp_buffers()
+
+    def test_convert_unicode_to_unichar_p(self):
+        """
+            #include <wchar.h>
+            long mystrlen_u(wchar_t* s)
+            {
+                long len = 0;
+                while(*s++)
+                    len++;
+                return len;
+            }
+        """
+        from _ffi import CDLL, types
+        import _rawffi
+        libfoo = CDLL(self.libfoo_name)
+        mystrlen = libfoo.getfunc('mystrlen_u', [types.unichar_p], types.slong)
+        #
+        # first, try automatic conversion from strings and unicode
+        assert mystrlen('foobar') == 6
+        assert mystrlen(u'foobar') == 6
+        assert mystrlen(u'ab\u2070') == 3
+        # then, try to pass an explicit pointer
+        UniCharArray = _rawffi.Array('u')
+        mystr = UniCharArray(7, u'foobar')
+        assert mystrlen(mystr.buffer) == 6
+        mystr.free()
+        mystrlen.free_temp_buffers()
+
+    def test_keepalive_temp_buffer(self):
+        """
+            char* do_nothing(char* s)
+            {
+                return s;
+            }
+        """
+        from _ffi import CDLL, types
+        import _rawffi
+        libfoo = CDLL(self.libfoo_name)
+        do_nothing = libfoo.getfunc('do_nothing', [types.char_p], types.char_p)
+        CharArray = _rawffi.Array('c')
+        #
+        ptr = do_nothing('foobar')
+        array = CharArray.fromaddress(ptr, 7)
+        assert list(array) == list('foobar\00')
+        do_nothing.free_temp_buffers()
+
     def test_typed_pointer(self):
         from _ffi import types
         intptr = types.Pointer(types.sint) # create a typed pointer to sint
@@ -204,6 +273,13 @@
         assert x is y
         assert x is not z
 
+    def test_char_p_cached(self):
+        from _ffi import types
+        x = types.Pointer(types.char)
+        assert x is types.char_p
+        x = types.Pointer(types.unichar)
+        assert x is types.unichar_p
+
     def test_typed_pointer_args(self):
         """
             extern int dummy; // defined in test_void_result 
diff --git a/pypy/module/_file/interp_stream.py b/pypy/module/_file/interp_stream.py
--- a/pypy/module/_file/interp_stream.py
+++ b/pypy/module/_file/interp_stream.py
@@ -3,12 +3,10 @@
 from pypy.rlib.streamio import StreamErrors
 
 from pypy.interpreter.error import OperationError, wrap_oserror2
-from pypy.interpreter.gateway import ObjSpace
-from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.baseobjspace import ObjSpace, Wrappable
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.gateway import interp2app
 
-import os
 
 def wrap_streamerror(space, e, w_filename=None):
     if isinstance(e, streamio.StreamError):
diff --git a/pypy/module/_io/interp_bufferedio.py b/pypy/module/_io/interp_bufferedio.py
--- a/pypy/module/_io/interp_bufferedio.py
+++ b/pypy/module/_io/interp_bufferedio.py
@@ -4,7 +4,6 @@
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.buffer import RWBuffer
-from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.rlib.rstring import StringBuilder
 from pypy.rlib.rarithmetic import r_longlong, intmask
 from pypy.tool.sourcetools import func_renamer
diff --git a/pypy/module/_io/interp_fileio.py b/pypy/module/_io/interp_fileio.py
--- a/pypy/module/_io/interp_fileio.py
+++ b/pypy/module/_io/interp_fileio.py
@@ -1,5 +1,4 @@
-from pypy.interpreter.typedef import (
-    TypeDef, interp_attrproperty, interp_attrproperty_w, GetSetProperty)
+from pypy.interpreter.typedef import TypeDef, interp_attrproperty, GetSetProperty
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.error import OperationError, wrap_oserror, wrap_oserror2
 from pypy.rlib.rarithmetic import r_longlong
diff --git a/pypy/module/_io/interp_io.py b/pypy/module/_io/interp_io.py
--- a/pypy/module/_io/interp_io.py
+++ b/pypy/module/_io/interp_io.py
@@ -6,7 +6,6 @@
     TypeDef, interp_attrproperty, generic_new_descr)
 from pypy.module.exceptions.interp_exceptions import W_IOError
 from pypy.module._io.interp_fileio import W_FileIO
-from pypy.module._io.interp_iobase import W_IOBase
 from pypy.module._io.interp_textio import W_TextIOWrapper
 from pypy.rpython.module.ll_os_stat import STAT_FIELD_TYPES
 
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -4,7 +4,7 @@
     generic_new_descr)
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.baseobjspace import Wrappable
-from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.error import OperationError
 from pypy.rlib.rarithmetic import intmask, r_ulonglong, r_uint
 from pypy.rlib.rbigint import rbigint
 from pypy.rlib.rstring import UnicodeBuilder
diff --git a/pypy/module/_locale/interp_locale.py b/pypy/module/_locale/interp_locale.py
--- a/pypy/module/_locale/interp_locale.py
+++ b/pypy/module/_locale/interp_locale.py
@@ -1,4 +1,3 @@
-from pypy.rpython.tool import rffi_platform as platform
 from pypy.rlib import rposix
 from pypy.rlib.rarithmetic import intmask
 
diff --git a/pypy/module/_minimal_curses/fficurses.py b/pypy/module/_minimal_curses/fficurses.py
--- a/pypy/module/_minimal_curses/fficurses.py
+++ b/pypy/module/_minimal_curses/fficurses.py
@@ -2,12 +2,10 @@
 """ The ffi for rpython, need to be imported for side effects
 """
 
-import sys
 from pypy.rpython.lltypesystem import rffi
 from pypy.rpython.lltypesystem import lltype
 from pypy.rpython.tool import rffi_platform
 from pypy.rpython.extfunc import register_external
-from pypy.rpython.extregistry import ExtRegistryEntry
 from pypy.module._minimal_curses import interp_curses
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 
@@ -82,7 +80,7 @@
         return res
     finally:
         rffi.free_charp(ll_cap)
-    
+
 register_external(interp_curses._curses_tigetstr, [str], str,
                   export_name='_curses.tigetstr', llimpl=tigetstr_llimpl)
 
diff --git a/pypy/module/_multibytecodec/__init__.py b/pypy/module/_multibytecodec/__init__.py
--- a/pypy/module/_multibytecodec/__init__.py
+++ b/pypy/module/_multibytecodec/__init__.py
@@ -7,13 +7,14 @@
         # for compatibility this name is obscured, and should be called
         # via the _codecs_*.py modules written in lib_pypy.
         '__getcodec': 'interp_multibytecodec.getcodec',
+
+        'MultibyteIncrementalDecoder':
+            'interp_incremental.MultibyteIncrementalDecoder',
+        'MultibyteIncrementalEncoder':
+            'interp_incremental.MultibyteIncrementalEncoder',
     }
 
     appleveldefs = {
-        'MultibyteIncrementalEncoder':
-            'app_multibytecodec.MultibyteIncrementalEncoder',
-        'MultibyteIncrementalDecoder':
-            'app_multibytecodec.MultibyteIncrementalDecoder',
         'MultibyteStreamReader':
             'app_multibytecodec.MultibyteStreamReader',
         'MultibyteStreamWriter':
diff --git a/pypy/module/_multibytecodec/app_multibytecodec.py b/pypy/module/_multibytecodec/app_multibytecodec.py
--- a/pypy/module/_multibytecodec/app_multibytecodec.py
+++ b/pypy/module/_multibytecodec/app_multibytecodec.py
@@ -1,34 +1,47 @@
 # NOT_RPYTHON
 #
-# These classes are not supported so far.
-#
-# My theory is that they are not widely used on CPython either, because
-# I found two bugs just by looking at their .c source: they always call
-# encreset() after a piece of data, even though I think it's wrong ---
-# it should be called only once at the end; and mbiencoder_reset() calls
-# decreset() instead of encreset().
-#
+# The interface here may be a little bit on the lightweight side.
 
-class MultibyteIncrementalEncoder(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteIncrementalEncoder not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+from _multibytecodec import MultibyteIncrementalDecoder
+from _multibytecodec import MultibyteIncrementalEncoder
 
-class MultibyteIncrementalDecoder(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteIncrementalDecoder not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
 
-class MultibyteStreamReader(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteStreamReader not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+class MultibyteStreamReader(MultibyteIncrementalDecoder):
+    def __new__(cls, stream, errors=None):
+        self = MultibyteIncrementalDecoder.__new__(cls, errors)
+        self.stream = stream
+        return self
 
-class MultibyteStreamWriter(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteStreamWriter not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+    def __read(self, read, size):
+        if size is None or size < 0:
+            return MultibyteIncrementalDecoder.decode(self, read(), True)
+        while True:
+            data = read(size)
+            final = not data
+            output = MultibyteIncrementalDecoder.decode(self, data, final)
+            if output or final:
+                return output
+            size = 1   # read 1 more byte and retry
+
+    def read(self, size=None):
+        return self.__read(self.stream.read, size)
+
+    def readline(self, size=None):
+        return self.__read(self.stream.readline, size)
+
+    def readlines(self, sizehint=None):
+        return self.__read(self.stream.read, sizehint).splitlines(True)
+
+
+class MultibyteStreamWriter(MultibyteIncrementalEncoder):
+    def __new__(cls, stream, errors=None):
+        self = MultibyteIncrementalEncoder.__new__(cls, errors)
+        self.stream = stream
+        return self
+
+    def write(self, data):
+        self.stream.write(MultibyteIncrementalEncoder.encode(self, data))
+
+    def writelines(self, lines):
+        for data in lines:
+            self.write(data)
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -1,4 +1,4 @@
-import py, sys
+import py
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.tool.autopath import pypydir
@@ -52,13 +52,17 @@
     includes = ['src/cjkcodecs/multibytecodec.h'],
     include_dirs = [str(srcdir)],
     export_symbols = [
+        "pypy_cjk_dec_new",
         "pypy_cjk_dec_init", "pypy_cjk_dec_free", "pypy_cjk_dec_chunk",
         "pypy_cjk_dec_outbuf", "pypy_cjk_dec_outlen",
         "pypy_cjk_dec_inbuf_remaining", "pypy_cjk_dec_inbuf_consumed",
+        "pypy_cjk_dec_replace_on_error",
 
+        "pypy_cjk_enc_new",
         "pypy_cjk_enc_init", "pypy_cjk_enc_free", "pypy_cjk_enc_chunk",
         "pypy_cjk_enc_reset", "pypy_cjk_enc_outbuf", "pypy_cjk_enc_outlen",
         "pypy_cjk_enc_inbuf_remaining", "pypy_cjk_enc_inbuf_consumed",
+        "pypy_cjk_enc_replace_on_error",
     ] + ["pypy_cjkcodec_%s" % codec for codec in codecs],
 )
 
@@ -90,9 +94,11 @@
 # Decoding
 
 DECODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_dec_s', compilation_info=eci)
+pypy_cjk_dec_new = llexternal('pypy_cjk_dec_new',
+                              [MULTIBYTECODEC_P], DECODEBUF_P)
 pypy_cjk_dec_init = llexternal('pypy_cjk_dec_init',
-                               [MULTIBYTECODEC_P, rffi.CCHARP, rffi.SSIZE_T],
-                               DECODEBUF_P)
+                               [DECODEBUF_P, rffi.CCHARP, rffi.SSIZE_T],
+                               rffi.SSIZE_T)
 pypy_cjk_dec_free = llexternal('pypy_cjk_dec_free', [DECODEBUF_P],
                                lltype.Void)
 pypy_cjk_dec_chunk = llexternal('pypy_cjk_dec_chunk', [DECODEBUF_P],
@@ -111,25 +117,30 @@
                                            rffi.SSIZE_T)
 
 def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None):
+    decodebuf = pypy_cjk_dec_new(codec)
+    if not decodebuf:
+        raise MemoryError
+    try:
+        return decodeex(decodebuf, stringdata, errors, errorcb, namecb)
+    finally:
+        pypy_cjk_dec_free(decodebuf)
+
+def decodeex(decodebuf, stringdata, errors="strict", errorcb=None, namecb=None,
+             ignore_error=0):
     inleft = len(stringdata)
     inbuf = rffi.get_nonmovingbuffer(stringdata)
     try:
-        decodebuf = pypy_cjk_dec_init(codec, inbuf, inleft)
-        if not decodebuf:
+        if pypy_cjk_dec_init(decodebuf, inbuf, inleft) < 0:
             raise MemoryError
-        try:
-            while True:
-                r = pypy_cjk_dec_chunk(decodebuf)
-                if r == 0:
-                    break
-                multibytecodec_decerror(decodebuf, r, errors,
-                                        errorcb, namecb, stringdata)
-            src = pypy_cjk_dec_outbuf(decodebuf)
-            length = pypy_cjk_dec_outlen(decodebuf)
-            return rffi.wcharpsize2unicode(src, length)
-        #
-        finally:
-            pypy_cjk_dec_free(decodebuf)
+        while True:
+            r = pypy_cjk_dec_chunk(decodebuf)
+            if r == 0 or r == ignore_error:
+                break
+            multibytecodec_decerror(decodebuf, r, errors,
+                                    errorcb, namecb, stringdata)
+        src = pypy_cjk_dec_outbuf(decodebuf)
+        length = pypy_cjk_dec_outlen(decodebuf)
+        return rffi.wcharpsize2unicode(src, length)
     #
     finally:
         rffi.free_nonmovingbuffer(stringdata, inbuf)
@@ -172,13 +183,15 @@
 # ____________________________________________________________
 # Encoding
 ENCODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_enc_s', compilation_info=eci)
+pypy_cjk_enc_new = llexternal('pypy_cjk_enc_new',
+                               [MULTIBYTECODEC_P], ENCODEBUF_P)
 pypy_cjk_enc_init = llexternal('pypy_cjk_enc_init',
-                               [MULTIBYTECODEC_P, rffi.CWCHARP, rffi.SSIZE_T],
-                               ENCODEBUF_P)
+                               [ENCODEBUF_P, rffi.CWCHARP, rffi.SSIZE_T],
+                               rffi.SSIZE_T)
 pypy_cjk_enc_free = llexternal('pypy_cjk_enc_free', [ENCODEBUF_P],
                                lltype.Void)
-pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk', [ENCODEBUF_P],
-                                rffi.SSIZE_T)
+pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk',
+                                [ENCODEBUF_P, rffi.SSIZE_T], rffi.SSIZE_T)
 pypy_cjk_enc_reset = llexternal('pypy_cjk_enc_reset', [ENCODEBUF_P],
                                 rffi.SSIZE_T)
 pypy_cjk_enc_outbuf = llexternal('pypy_cjk_enc_outbuf', [ENCODEBUF_P],
@@ -193,39 +206,52 @@
                                            [ENCODEBUF_P, rffi.CCHARP,
                                             rffi.SSIZE_T, rffi.SSIZE_T],
                                            rffi.SSIZE_T)
+pypy_cjk_enc_getcodec = llexternal('pypy_cjk_enc_getcodec',
+                                   [ENCODEBUF_P], MULTIBYTECODEC_P)
+MBENC_FLUSH = 1
+MBENC_RESET = 2
 
 def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
+    encodebuf = pypy_cjk_enc_new(codec)
+    if not encodebuf:
+        raise MemoryError
+    try:
+        return encodeex(encodebuf, unicodedata, errors, errorcb, namecb)
+    finally:
+        pypy_cjk_enc_free(encodebuf)
+
+def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None,
+             namecb=None, ignore_error=0):
     inleft = len(unicodedata)
     inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata)
     try:
-        encodebuf = pypy_cjk_enc_init(codec, inbuf, inleft)
-        if not encodebuf:
+        if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0:
             raise MemoryError
-        try:
-            while True:
-                r = pypy_cjk_enc_chunk(encodebuf)
-                if r == 0:
-                    break
-                multibytecodec_encerror(encodebuf, r, errors,
-                                        codec, errorcb, namecb, unicodedata)
-            while True:
-                r = pypy_cjk_enc_reset(encodebuf)
-                if r == 0:
-                    break
-                multibytecodec_encerror(encodebuf, r, errors,
-                                        codec, errorcb, namecb, unicodedata)
-            src = pypy_cjk_enc_outbuf(encodebuf)
-            length = pypy_cjk_enc_outlen(encodebuf)
-            return rffi.charpsize2str(src, length)
-        #
-        finally:
-            pypy_cjk_enc_free(encodebuf)
+        if ignore_error == 0:
+            flags = MBENC_FLUSH | MBENC_RESET
+        else:
+            flags = MBENC_RESET
+        while True:
+            r = pypy_cjk_enc_chunk(encodebuf, flags)
+            if r == 0 or r == ignore_error:
+                break
+            multibytecodec_encerror(encodebuf, r, errors,
+                                    errorcb, namecb, unicodedata)
+        while True:
+            r = pypy_cjk_enc_reset(encodebuf)
+            if r == 0:
+                break
+            multibytecodec_encerror(encodebuf, r, errors,
+                                    errorcb, namecb, unicodedata)
+        src = pypy_cjk_enc_outbuf(encodebuf)
+        length = pypy_cjk_enc_outlen(encodebuf)
+        return rffi.charpsize2str(src, length)
     #
     finally:
         rffi.free_nonmoving_unicodebuffer(unicodedata, inbuf)
 
 def multibytecodec_encerror(encodebuf, e, errors,
-                            codec, errorcb, namecb, unicodedata):
+                            errorcb, namecb, unicodedata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -246,6 +272,7 @@
     elif errors == "ignore":
         replace = ""
     elif errors == "replace":
+        codec = pypy_cjk_enc_getcodec(encodebuf)
         try:
             replace = encode(codec, u"?")
         except EncodeDecodeError:
diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/interp_incremental.py
@@ -0,0 +1,141 @@
+from pypy.rpython.lltypesystem import lltype
+from pypy.module._multibytecodec import c_codecs
+from pypy.module._multibytecodec.interp_multibytecodec import (
+    MultibyteCodec, wrap_unicodedecodeerror, wrap_runtimeerror,
+    wrap_unicodeencodeerror)
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef, GetSetProperty
+from pypy.module._codecs.interp_codecs import CodecState
+
+
+class MultibyteIncrementalBase(Wrappable):
+
+    def __init__(self, space, errors):
+        if errors is None:
+            errors = 'strict'
+        self.space = space
+        self.errors = errors
+        w_codec = space.getattr(space.wrap(self), space.wrap("codec"))
+        codec = space.interp_w(MultibyteCodec, w_codec)
+        self.codec = codec.codec
+        self.name = codec.name
+        self._initialize()
+
+    def __del__(self):
+        self._free()
+
+    def reset_w(self):
+        self._free()
+        self._initialize()
+
+    def fget_errors(self, space):
+        return space.wrap(self.errors)
+
+    def fset_errors(self, space, w_errors):
+        self.errors = space.str_w(w_errors)
+
+
+class MultibyteIncrementalDecoder(MultibyteIncrementalBase):
+
+    def _initialize(self):
+        self.decodebuf = c_codecs.pypy_cjk_dec_new(self.codec)
+        self.pending = ""
+
+    def _free(self):
+        self.pending = None
+        if self.decodebuf:
+            c_codecs.pypy_cjk_dec_free(self.decodebuf)
+            self.decodebuf = lltype.nullptr(c_codecs.DECODEBUF_P.TO)
+
+    @unwrap_spec(object=str, final=bool)
+    def decode_w(self, object, final=False):
+        space = self.space
+        state = space.fromcache(CodecState)
+        if len(self.pending) > 0:
+            object = self.pending + object
+        try:
+            output = c_codecs.decodeex(self.decodebuf, object, self.errors,
+                                       state.decode_error_handler, self.name,
+                                       get_ignore_error(final))
+        except c_codecs.EncodeDecodeError, e:
+            raise wrap_unicodedecodeerror(space, e, object, self.name)
+        except RuntimeError:
+            raise wrap_runtimeerror(space)
+        pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf)
+        assert 0 <= pos <= len(object)
+        self.pending = object[pos:]
+        return space.wrap(output)
+
+
+ at unwrap_spec(errors="str_or_None")
+def mbidecoder_new(space, w_subtype, errors=None):
+    r = space.allocate_instance(MultibyteIncrementalDecoder, w_subtype)
+    r.__init__(space, errors)
+    return space.wrap(r)
+
+MultibyteIncrementalDecoder.typedef = TypeDef(
+    'MultibyteIncrementalDecoder',
+    __module__ = '_multibytecodec',
+    __new__ = interp2app(mbidecoder_new),
+    decode  = interp2app(MultibyteIncrementalDecoder.decode_w),
+    reset   = interp2app(MultibyteIncrementalDecoder.reset_w),
+    errors  = GetSetProperty(MultibyteIncrementalDecoder.fget_errors,
+                             MultibyteIncrementalDecoder.fset_errors),
+    )
+
+
+class MultibyteIncrementalEncoder(MultibyteIncrementalBase):
+
+    def _initialize(self):
+        self.encodebuf = c_codecs.pypy_cjk_enc_new(self.codec)
+        self.pending = u""
+
+    def _free(self):
+        self.pending = None
+        if self.encodebuf:
+            c_codecs.pypy_cjk_enc_free(self.encodebuf)
+            self.encodebuf = lltype.nullptr(c_codecs.ENCODEBUF_P.TO)
+
+    @unwrap_spec(object=unicode, final=bool)
+    def encode_w(self, object, final=False):
+        space = self.space
+        state = space.fromcache(CodecState)
+        if len(self.pending) > 0:
+            object = self.pending + object
+        try:
+            output = c_codecs.encodeex(self.encodebuf, object, self.errors,
+                                       state.encode_error_handler, self.name,
+                                       get_ignore_error(final))
+        except c_codecs.EncodeDecodeError, e:
+            raise wrap_unicodeencodeerror(space, e, object, self.name)
+        except RuntimeError:
+            raise wrap_runtimeerror(space)
+        pos = c_codecs.pypy_cjk_enc_inbuf_consumed(self.encodebuf)
+        assert 0 <= pos <= len(object)
+        self.pending = object[pos:]
+        return space.wrap(output)
+
+
+ at unwrap_spec(errors="str_or_None")
+def mbiencoder_new(space, w_subtype, errors=None):
+    r = space.allocate_instance(MultibyteIncrementalEncoder, w_subtype)
+    r.__init__(space, errors)
+    return space.wrap(r)
+
+MultibyteIncrementalEncoder.typedef = TypeDef(
+    'MultibyteIncrementalEncoder',
+    __module__ = '_multibytecodec',
+    __new__ = interp2app(mbiencoder_new),
+    encode  = interp2app(MultibyteIncrementalEncoder.encode_w),
+    reset   = interp2app(MultibyteIncrementalEncoder.reset_w),
+    errors  = GetSetProperty(MultibyteIncrementalEncoder.fget_errors,
+                             MultibyteIncrementalEncoder.fset_errors),
+    )
+
+
+def get_ignore_error(final):
+    if final:
+        return 0    # don't ignore any error
+    else:
+        return c_codecs.MBERR_TOOFEW
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -1,5 +1,5 @@
 from pypy.interpreter.baseobjspace import Wrappable
-from pypy.interpreter.gateway import ObjSpace, interp2app, unwrap_spec
+from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.error import OperationError
 from pypy.module._multibytecodec import c_codecs
@@ -22,17 +22,9 @@
             output = c_codecs.decode(self.codec, input, errors,
                                      state.decode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
-            raise OperationError(
-                space.w_UnicodeDecodeError,
-                space.newtuple([
-                    space.wrap(self.name),
-                    space.wrap(input),
-                    space.wrap(e.start),
-                    space.wrap(e.end),
-                    space.wrap(e.reason)]))
+            raise wrap_unicodedecodeerror(space, e, input, self.name)
         except RuntimeError:
-            raise OperationError(space.w_RuntimeError,
-                                 space.wrap("internal codec error"))
+            raise wrap_runtimeerror(space)
         return space.newtuple([space.wrap(output),
                                space.wrap(len(input))])
 
@@ -46,17 +38,9 @@
             output = c_codecs.encode(self.codec, input, errors,
                                      state.encode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
-            raise OperationError(
-                space.w_UnicodeEncodeError,
-                space.newtuple([
-                    space.wrap(self.name),
-                    space.wrap(input),
-                    space.wrap(e.start),
-                    space.wrap(e.end),
-                    space.wrap(e.reason)]))
+            raise wrap_unicodeencodeerror(space, e, input, self.name)
         except RuntimeError:
-            raise OperationError(space.w_RuntimeError,
-                                 space.wrap("internal codec error"))
+            raise wrap_runtimeerror(space)
         return space.newtuple([space.wrap(output),
                                space.wrap(len(input))])
 
@@ -78,3 +62,28 @@
         raise OperationError(space.w_LookupError,
                              space.wrap("no such codec is supported."))
     return space.wrap(MultibyteCodec(name, codec))
+
+
+def wrap_unicodedecodeerror(space, e, input, name):
+    return OperationError(
+        space.w_UnicodeDecodeError,
+        space.newtuple([
+            space.wrap(name),
+            space.wrap(input),
+            space.wrap(e.start),
+            space.wrap(e.end),
+            space.wrap(e.reason)]))
+
+def wrap_unicodeencodeerror(space, e, input, name):
+    raise OperationError(
+        space.w_UnicodeEncodeError,
+        space.newtuple([
+            space.wrap(name),
+            space.wrap(input),
+            space.wrap(e.start),
+            space.wrap(e.end),
+            space.wrap(e.reason)]))
+
+def wrap_runtimeerror(space):
+    raise OperationError(space.w_RuntimeError,
+                         space.wrap("internal codec error"))
diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py b/pypy/module/_multibytecodec/test/test_app_incremental.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -0,0 +1,163 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestClasses:
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['_multibytecodec'])
+        cls.w_IncrementalHzDecoder = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteIncrementalDecoder
+
+            class IncrementalHzDecoder(MultibyteIncrementalDecoder):
+                codec = _codecs_cn.getcodec('hz')
+
+            return IncrementalHzDecoder
+        """)
+        cls.w_IncrementalHzEncoder = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteIncrementalEncoder
+
+            class IncrementalHzEncoder(MultibyteIncrementalEncoder):
+                codec = _codecs_cn.getcodec('hz')
+
+            return IncrementalHzEncoder
+        """)
+        cls.w_IncrementalBig5hkscsEncoder = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteIncrementalEncoder
+
+            class IncrementalBig5hkscsEncoder(MultibyteIncrementalEncoder):
+                codec = _codecs_cn.getcodec('big5hkscs')
+
+            return IncrementalBig5hkscsEncoder
+        """)
+
+    def test_decode_hz(self):
+        d = self.IncrementalHzDecoder()
+        r = d.decode("~{abcd~}")
+        assert r == u'\u5f95\u6c85'
+        r = d.decode("~{efgh~}")
+        assert r == u'\u5f50\u73b7'
+        for c, output in zip("!~{abcd~}xyz~{efgh",
+              [u'!',  # !
+               u'',   # ~
+               u'',   # {
+               u'',   # a
+               u'\u5f95',   # b
+               u'',   # c
+               u'\u6c85',   # d
+               u'',   # ~
+               u'',   # }
+               u'x',  # x
+               u'y',  # y
+               u'z',  # z
+               u'',   # ~
+               u'',   # {
+               u'',   # e
+               u'\u5f50',   # f
+               u'',   # g
+               u'\u73b7',   # h
+               ]):
+            r = d.decode(c)
+            assert r == output
+
+    def test_decode_hz_final(self):
+        d = self.IncrementalHzDecoder()
+        r = d.decode("~{", True)
+        assert r == u''
+        raises(UnicodeDecodeError, d.decode, "~", True)
+        raises(UnicodeDecodeError, d.decode, "~{a", True)
+
+    def test_decode_hz_reset(self):
+        d = self.IncrementalHzDecoder()
+        r = d.decode("ab")
+        assert r == u'ab'
+        r = d.decode("~{")
+        assert r == u''
+        r = d.decode("ab")
+        assert r == u'\u5f95'
+        r = d.decode("ab")
+        assert r == u'\u5f95'
+        d.reset()
+        r = d.decode("ab")
+        assert r == u'ab'
+
+    def test_decode_hz_error(self):
+        d = self.IncrementalHzDecoder()
+        raises(UnicodeDecodeError, d.decode, "~{abc", True)
+        d = self.IncrementalHzDecoder("ignore")
+        r = d.decode("~{abc", True)
+        assert r == u'\u5f95'
+        d = self.IncrementalHzDecoder()
+        d.errors = "replace"
+        r = d.decode("~{abc", True)
+        assert r == u'\u5f95\ufffd'
+
+    def test_decode_hz_buffer_grow(self):
+        d = self.IncrementalHzDecoder()
+        for i in range(13):
+            r = d.decode("a" * (2**i))
+            assert r == u"a" * (2**i)
+
+    def test_encode_hz(self):
+        e = self.IncrementalHzEncoder()
+        r = e.encode("abcd")
+        assert r == 'abcd'
+        r = e.encode(u"\u5f95\u6c85")
+        assert r == '~{abcd~}'
+        r = e.encode(u"\u5f50")
+        assert r == '~{ef~}'
+        r = e.encode(u"\u73b7")
+        assert r == '~{gh~}'
+
+    def test_encode_hz_final(self):
+        e = self.IncrementalHzEncoder()
+        r = e.encode(u"xyz\u5f95\u6c85", True)
+        assert r == 'xyz~{abcd~}'
+        # This is a bit hard to test, because the only way I can see that
+        # encoders can return MBERR_TOOFEW is with surrogates, which only
+        # occur with 2-byte unicode characters...  We will just have to
+        # trust that the logic works, because it is exactly the same one
+        # as in the decode case :-/
+
+    def test_encode_hz_reset(self):
+        # Same issue as with test_encode_hz_final
+        e = self.IncrementalHzEncoder()
+        r = e.encode(u"xyz\u5f95\u6c85", True)
+        assert r == 'xyz~{abcd~}'
+        e.reset()
+        r = e.encode(u"xyz\u5f95\u6c85")
+        assert r == 'xyz~{abcd~}'
+
+    def test_encode_hz_error(self):
+        e = self.IncrementalHzEncoder()
+        raises(UnicodeEncodeError, e.encode, u"\u4321", True)
+        e = self.IncrementalHzEncoder("ignore")
+        r = e.encode(u"xy\u4321z", True)
+        assert r == 'xyz'
+        e = self.IncrementalHzEncoder()
+        e.errors = "replace"
+        r = e.encode(u"xy\u4321z", True)
+        assert r == 'xy?z'
+
+    def test_encode_hz_buffer_grow(self):
+        e = self.IncrementalHzEncoder()
+        for i in range(13):
+            r = e.encode(u"a" * (2**i))
+            assert r == "a" * (2**i)
+
+    def test_encode_big5hkscs(self):
+        #e = self.IncrementalBig5hkscsEncoder()
+        #r = e.encode(u'\xca', True)
+        #assert r == '\x88f'
+        #r = e.encode(u'\xca', True)
+        #assert r == '\x88f'
+        #raises(UnicodeEncodeError, e.encode, u'\u0304', True)
+        #
+        e = self.IncrementalBig5hkscsEncoder()
+        r = e.encode(u'\xca')
+        assert r == ''
+        r = e.encode(u'\xca')
+        assert r == '\x88f'
+        r = e.encode(u'\u0304')
+        assert r == '\x88b'
diff --git a/pypy/module/_multibytecodec/test/test_app_stream.py b/pypy/module/_multibytecodec/test/test_app_stream.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/test_app_stream.py
@@ -0,0 +1,93 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestStreams:
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['_multibytecodec'])
+        cls.w_HzStreamReader = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteStreamReader
+
+            class HzStreamReader(MultibyteStreamReader):
+                codec = _codecs_cn.getcodec('hz')
+
+            return HzStreamReader
+        """)
+        cls.w_HzStreamWriter = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteStreamWriter
+
+            class HzStreamWriter(MultibyteStreamWriter):
+                codec = _codecs_cn.getcodec('hz')
+
+            return HzStreamWriter
+        """)
+        cls.w_ShiftJisx0213StreamWriter = cls.space.appexec([], """():
+            import _codecs_jp
+            from _multibytecodec import MultibyteStreamWriter
+
+            class ShiftJisx0213StreamWriter(MultibyteStreamWriter):
+                codec = _codecs_jp.getcodec('shift_jisx0213')
+
+            return ShiftJisx0213StreamWriter
+        """)
+
+    def test_reader(self):
+        class FakeFile:
+            def __init__(self, data):
+                self.data = data
+                self.pos = 0
+            def read(self, size):
+                res = self.data[self.pos : self.pos + size]
+                self.pos += size
+                return res
+        #
+        r = self.HzStreamReader(FakeFile("!~{abcd~}xyz~{efgh"))
+        for expected in u'!\u5f95\u6c85xyz\u5f50\u73b7':
+            c = r.read(1)
+            assert c == expected
+        c = r.read(1)
+        assert c == ''
+
+    def test_reader_replace(self):
+        class FakeFile:
+            def __init__(self, data):
+                self.data = data
+            def read(self):
+                return self.data
+        #
+        r = self.HzStreamReader(FakeFile("!~{a"), "replace")
+        c = r.read()
+        assert c == u'!\ufffd'
+        #
+        r = self.HzStreamReader(FakeFile("!~{a"))
+        r.errors = "replace"
+        assert r.errors == "replace"
+        c = r.read()
+        assert c == u'!\ufffd'
+
+    def test_writer(self):
+        class FakeFile:
+            def __init__(self):
+                self.output = []
+            def write(self, data):
+                self.output.append(data)
+        #
+        w = self.HzStreamWriter(FakeFile())
+        for input in u'!\u5f95\u6c85xyz\u5f50\u73b7':
+            w.write(input)
+        assert w.stream.output == ['!', '~{ab~}', '~{cd~}', 'x', 'y', 'z',
+                                   '~{ef~}', '~{gh~}']
+
+    def test_no_flush(self):
+        class FakeFile:
+            def __init__(self):
+                self.output = []
+            def write(self, data):
+                self.output.append(data)
+        #
+        w = self.ShiftJisx0213StreamWriter(FakeFile())
+        w.write(u'\u30ce')
+        w.write(u'\u304b')
+        w.write(u'\u309a')
+        assert w.stream.output == ['\x83m', '', '\x82\xf5']
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -2,6 +2,7 @@
 from pypy.module._multibytecodec.c_codecs import getcodec, codecs
 from pypy.module._multibytecodec.c_codecs import decode, encode
 from pypy.module._multibytecodec.c_codecs import EncodeDecodeError
+from pypy.module._multibytecodec import c_codecs
 
 
 def test_codecs_existence():
@@ -22,6 +23,52 @@
     c = getcodec("hz")
     u = decode(c, "~{abc}")
     assert u == u'\u5f95\u6cef'
+    u = decode(c, "~{")
+    assert u == u''
+
+def test_decodeex_hz():
+    c = getcodec("hz")
+    decodebuf = c_codecs.pypy_cjk_dec_new(c)
+    u = c_codecs.decodeex(decodebuf, "~{abcd~}")
+    assert u == u'\u5f95\u6c85'
+    u = c_codecs.decodeex(decodebuf, "~{efgh~}")
+    assert u == u'\u5f50\u73b7'
+    u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh")
+    assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'
+    c_codecs.pypy_cjk_dec_free(decodebuf)
+
+def test_decodeex_hz_incomplete():
+    c = getcodec("hz")
+    decodebuf = c_codecs.pypy_cjk_dec_new(c)
+    buf = ''
+    for c, output in zip("!~{abcd~}xyz~{efgh",
+          [u'!',  # !
+           u'',   # ~
+           u'',   # {
+           u'',   # a
+           u'\u5f95',   # b
+           u'',   # c
+           u'\u6c85',   # d
+           u'',   # ~
+           u'',   # }
+           u'x',  # x
+           u'y',  # y
+           u'z',  # z
+           u'',   # ~
+           u'',   # {
+           u'',   # e
+           u'\u5f50',   # f
+           u'',   # g
+           u'\u73b7',   # h
+           ]):
+        buf += c
+        u = c_codecs.decodeex(decodebuf, buf,
+                              ignore_error = c_codecs.MBERR_TOOFEW)
+        assert u == output
+        incompletepos = c_codecs.pypy_cjk_dec_inbuf_consumed(decodebuf)
+        buf = buf[incompletepos:]
+    assert buf == ''
+    c_codecs.pypy_cjk_dec_free(decodebuf)
 
 def test_decode_hz_error():
     # error
diff --git a/pypy/module/_multiprocessing/interp_connection.py b/pypy/module/_multiprocessing/interp_connection.py
--- a/pypy/module/_multiprocessing/interp_connection.py
+++ b/pypy/module/_multiprocessing/interp_connection.py
@@ -4,7 +4,7 @@
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.error import (
     OperationError, wrap_oserror, operationerrfmt)
-from pypy.rpython.lltypesystem import rffi, lltype, llmemory
+from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rlib.rarithmetic import intmask
 from pypy.rlib import rpoll
 import sys
diff --git a/pypy/module/_multiprocessing/interp_semaphore.py b/pypy/module/_multiprocessing/interp_semaphore.py
--- a/pypy/module/_multiprocessing/interp_semaphore.py
+++ b/pypy/module/_multiprocessing/interp_semaphore.py
@@ -15,7 +15,6 @@
 
 if sys.platform == 'win32':
     from pypy.rlib import rwin32
-    from pypy.interpreter.error import wrap_windowserror
     from pypy.module._multiprocessing.interp_win32 import (
         handle_w, _GetTickCount)
 
diff --git a/pypy/module/_pickle_support/maker.py b/pypy/module/_pickle_support/maker.py
--- a/pypy/module/_pickle_support/maker.py
+++ b/pypy/module/_pickle_support/maker.py
@@ -1,4 +1,4 @@
-from pypy.interpreter.error import OperationError 
+from pypy.interpreter.error import OperationError
 from pypy.interpreter.nestedscope import Cell
 from pypy.interpreter.pycode import PyCode
 from pypy.interpreter.function import Function, Method
@@ -8,7 +8,6 @@
 from pypy.interpreter.generator import GeneratorIterator
 from pypy.rlib.objectmodel import instantiate
 from pypy.interpreter.gateway import unwrap_spec
-from pypy.objspace.std.dicttype import dictiter_typedef
 from pypy.objspace.std.iterobject import W_SeqIterObject, W_ReverseSeqIterObject
 
 
@@ -79,7 +78,7 @@
     try:
         return gateway.BuiltinCode.find(identifier)
     except KeyError:
-        raise OperationError(space.w_RuntimeError, 
+        raise OperationError(space.w_RuntimeError,
                              space.wrap("cannot unpickle builtin code: "+
                                         identifier))
 
@@ -89,7 +88,7 @@
     try:
         return function.Function.find(identifier)
     except KeyError:
-        raise OperationError(space.w_RuntimeError, 
+        raise OperationError(space.w_RuntimeError,
                              space.wrap("cannot unpickle builtin function: "+
                                         identifier))
 
diff --git a/pypy/module/_rawffi/callback.py b/pypy/module/_rawffi/callback.py
--- a/pypy/module/_rawffi/callback.py
+++ b/pypy/module/_rawffi/callback.py
@@ -2,10 +2,10 @@
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from pypy.rpython.lltypesystem import lltype, rffi
-from pypy.module._rawffi.array import get_elem, push_elem
+from pypy.module._rawffi.array import push_elem
 from pypy.module._rawffi.structure import W_Structure
-from pypy.module._rawffi.interp_rawffi import W_DataInstance, letter2tp, \
-     wrap_value, unwrap_value, unwrap_truncate_int, unpack_argshapes
+from pypy.module._rawffi.interp_rawffi import (W_DataInstance, letter2tp,
+     unwrap_value, unpack_argshapes)
 from pypy.rlib.clibffi import USERDATA_P, CallbackFuncPtr, FUNCFLAG_CDECL
 from pypy.rlib.clibffi import ffi_type_void
 from pypy.rlib import rweakref
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -1,7 +1,6 @@
-import sys
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.error import OperationError, wrap_oserror, operationerrfmt
-from pypy.interpreter.gateway import interp2app, NoneNotWrapped, unwrap_spec
+from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef, GetSetProperty


More information about the pypy-commit mailing list