[pypy-commit] pypy dict-strategies: merge default

cfbolz noreply at buildbot.pypy.org
Wed Jun 8 11:11:15 CEST 2011


Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: dict-strategies
Changeset: r44831:e9bc9725a35f
Date: 2011-06-08 11:12 +0200
http://bitbucket.org/pypy/pypy/changeset/e9bc9725a35f/

Log:	merge default

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -64,6 +64,7 @@
 ^pypy/doc/image/lattice3\.png$
 ^pypy/doc/image/stackless_informal\.png$
 ^pypy/doc/image/parsing_example.+\.png$
+^pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test\.o$
 ^compiled
 ^.git/
 ^release/
diff --git a/lib-python/TODO b/lib-python/TODO
deleted file mode 100644
--- a/lib-python/TODO
+++ /dev/null
@@ -1,100 +0,0 @@
-TODO list for 2.7.0
-===================
-
-You can find the results of the most recent buildbot run at:
-http://buildbot.pypy.org/
-
-
-Probably easy tasks
--------------------
-
-- (unicode|bytearray).(index|find) should accept None as indices (see
-  test_unicode.py)
-
-- missing posix.confstr and posix.confstr_names
-
-- remove code duplication: bit_length() and _count_bits() in rlib/rbigint.py,
-  objspace/std/longobject.py and objspace/std/longtype.py.
-
-- missing module pyexpat.errors
-
-- support for PYTHONIOENCODING, this needs a way to update file.encoding
-
-- implement format__Complex_ANY() in pypy/objspace/std/complexobject.py
-
-- Code like this does not work, for two reasons::
-
-  \
-  from __future__ import (with_statement,
-                          unicode_literals)
-  assert type("") is unicode
-
-- Code like::
-
-  assert(x is not None, "error message")
-
-  should emit a SyntaxWarning when compiled (the tuple is always true)
-
-
-Medium tasks
-------------
-
-- socket module has a couple of changes (including AF_TIPC packet range)
-
-Longer tasks
-------------
-
-- Fix usage of __cmp__ in subclasses::
-
-    class badint(int):
-        def __cmp__(self, other):
-            raise RuntimeError
-    raises(RuntimeError, cmp, 0, badint(1))
-
-- Fix comparison of objects layout: if two classes have the same __slots__, it
-  should be possible to change the instances __class__::
-
-      class A(object): __slots__ = ('a', 'b')
-      class B(object): __slots__ = ('b', 'a')
-      a = A()
-      a.__class__ = B
-
-- Show a ResourceWarning when a file/socket is not explicitely closed, like
-  CPython did for 3.2: http://svn.python.org/view?view=rev&revision=85920
-  in PyPy this should be enabled by default
-
-Won't do for this release
--------------------------
-
-Note: when you give up with a missing feature, please mention it here, as well
-as the various skips added to the test suite.
-
-- py3k warnings
-
-  * the -3 flag is accepted on the command line, but displays a warning (see
-    `translator/goal/app_main.py`)
-
-- CJK codecs.
-
-  * In `./conftest.py`, skipped all `test_codecencodings_*.py` and
-    `test_codecmaps_*.py`.
-
-  * In test_codecs, commented out various items in `all_unicode_encodings`.
-
-- Error messages about ill-formed calls (like "argument after ** must be a
-  mapping") don't always show the function name.  That's hard to fix for
-  the case of errors raised when the Argument object is created (as opposed
-  to when parsing for a given target function, which occurs later).
-
-  * Some "..." were added to doctests in test_extcall.py
-
-- CPython's builtin methods are both functions and unbound methods (for
-  example, `str.upper is dict(str.__dict__)['upper']`). This is not the case
-  in pypy, and assertions like `object.__str__ is object.__str__` are False
-  with pypy.  Use the `==` operator instead.
-
-  * pprint.py, _threading_local.py
-
-- When importing a nested module fails, the ImportError message mentions the
-  name of the package up to the component that could not be imported (CPython
-  prefers to display the names starting with the failing part).
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -569,7 +569,6 @@
 #
 import os
 import time
-import socket
 import getpass
 
 class ReallyRunFileExternal(py.test.collect.Item): 
diff --git a/lib-python/modified-2.7/ctypes/__init__.py b/lib-python/modified-2.7/ctypes/__init__.py
--- a/lib-python/modified-2.7/ctypes/__init__.py
+++ b/lib-python/modified-2.7/ctypes/__init__.py
@@ -7,6 +7,7 @@
 
 __version__ = "1.1.0"
 
+import _ffi
 from _ctypes import Union, Structure, Array
 from _ctypes import _Pointer
 from _ctypes import CFuncPtr as _CFuncPtr
@@ -350,7 +351,7 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            self._handle = _dlopen(self._name, mode)
+            self._handle = _ffi.CDLL(name)
         else:
             self._handle = handle
 
diff --git a/lib-python/modified-2.7/ctypes/test/test_cfuncs.py b/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
--- a/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
+++ b/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
@@ -3,8 +3,8 @@
 
 import unittest
 from ctypes import *
-
 import _ctypes_test
+from test.test_support import impl_detail
 
 class CFunctions(unittest.TestCase):
     _dll = CDLL(_ctypes_test.__file__)
@@ -158,12 +158,14 @@
         self.assertEqual(self._dll.tf_bd(0, 42.), 14.)
         self.assertEqual(self.S(), 42)
 
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdouble(self):
         self._dll.tf_D.restype = c_longdouble
         self._dll.tf_D.argtypes = (c_longdouble,)
         self.assertEqual(self._dll.tf_D(42.), 14.)
         self.assertEqual(self.S(), 42)
-
+        
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdouble_plus(self):
         self._dll.tf_bD.restype = c_longdouble
         self._dll.tf_bD.argtypes = (c_byte, c_longdouble)
diff --git a/lib-python/modified-2.7/ctypes/test/test_functions.py b/lib-python/modified-2.7/ctypes/test/test_functions.py
--- a/lib-python/modified-2.7/ctypes/test/test_functions.py
+++ b/lib-python/modified-2.7/ctypes/test/test_functions.py
@@ -8,6 +8,7 @@
 from ctypes import *
 import sys, unittest
 from ctypes.test import xfail
+from test.test_support import impl_detail
 
 try:
     WINFUNCTYPE
@@ -144,6 +145,7 @@
         self.assertEqual(result, -21)
         self.assertEqual(type(result), float)
 
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdoubleresult(self):
         f = dll._testfunc_D_bhilfD
         f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_longdouble]
diff --git a/lib-python/modified-2.7/ctypes/test/test_libc.py b/lib-python/modified-2.7/ctypes/test/test_libc.py
--- a/lib-python/modified-2.7/ctypes/test/test_libc.py
+++ b/lib-python/modified-2.7/ctypes/test/test_libc.py
@@ -26,6 +26,7 @@
         self.assertEqual(chars.raw, "   ,,aaaadmmmnpppsss\x00")
 
     def test_no_more_xfail(self):
+        import socket
         import ctypes.test
         self.assertTrue(not hasattr(ctypes.test, 'xfail'),
                         "You should incrementally grep for '@xfail' and remove them, they are real failures")
diff --git a/lib-python/modified-2.7/distutils/sysconfig.py b/lib-python/modified-2.7/distutils/sysconfig.py
--- a/lib-python/modified-2.7/distutils/sysconfig.py
+++ b/lib-python/modified-2.7/distutils/sysconfig.py
@@ -20,8 +20,10 @@
 if '__pypy__' in sys.builtin_module_names:
     from distutils.sysconfig_pypy import *
     from distutils.sysconfig_pypy import _config_vars # needed by setuptools
+    from distutils.sysconfig_pypy import _variable_rx # read_setup_file()
 else:
     from distutils.sysconfig_cpython import *
     from distutils.sysconfig_cpython import _config_vars # needed by setuptools
+    from distutils.sysconfig_cpython import _variable_rx # read_setup_file()
 
 
diff --git a/lib-python/modified-2.7/distutils/sysconfig_pypy.py b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
--- a/lib-python/modified-2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
@@ -116,3 +116,7 @@
     if compiler.compiler_type == "unix":
         compiler.compiler_so.extend(['-fPIC', '-Wimplicit'])
         compiler.shared_lib_extension = get_config_var('SO')
+
+from sysconfig_cpython import (
+    parse_makefile, _variable_rx, expand_makefile_vars)
+
diff --git a/lib-python/2.7/test/test_multibytecodec.py b/lib-python/modified-2.7/test/test_multibytecodec.py
copy from lib-python/2.7/test/test_multibytecodec.py
copy to lib-python/modified-2.7/test/test_multibytecodec.py
--- a/lib-python/2.7/test/test_multibytecodec.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec.py
@@ -42,7 +42,7 @@
         dec = codecs.getdecoder('euc-kr')
         myreplace  = lambda exc: (u'', sys.maxint+1)
         codecs.register_error('test.cjktest', myreplace)
-        self.assertRaises(IndexError, dec,
+        self.assertRaises((IndexError, OverflowError), dec,
                           'apple\x92ham\x93spam', 'test.cjktest')
 
     def test_codingspec(self):
diff --git a/lib-python/2.7/test/test_multibytecodec_support.py b/lib-python/modified-2.7/test/test_multibytecodec_support.py
copy from lib-python/2.7/test/test_multibytecodec_support.py
copy to lib-python/modified-2.7/test/test_multibytecodec_support.py
--- a/lib-python/2.7/test/test_multibytecodec_support.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec_support.py
@@ -107,8 +107,8 @@
         def myreplace(exc):
             return (u'x', sys.maxint + 1)
         codecs.register_error("test.cjktest", myreplace)
-        self.assertRaises(IndexError, self.encode, self.unmappedunicode,
-                          'test.cjktest')
+        self.assertRaises((IndexError, OverflowError), self.encode,
+                          self.unmappedunicode, 'test.cjktest')
 
     def test_callback_None_index(self):
         def myreplace(exc):
diff --git a/lib-python/modified-2.7/test/test_support.py b/lib-python/modified-2.7/test/test_support.py
--- a/lib-python/modified-2.7/test/test_support.py
+++ b/lib-python/modified-2.7/test/test_support.py
@@ -1066,7 +1066,7 @@
         if '--pdb' in sys.argv:
             import pdb, traceback
             traceback.print_tb(exc_info[2])
-            pdb.post_mortem(exc_info[2], pdb.Pdb)
+            pdb.post_mortem(exc_info[2])
 
 # ----------------------------------
 
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -208,6 +208,9 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _to_ffi_param(self):
+        return self._get_buffer_value()
+
 ARRAY_CACHE = {}
 
 def create_array_type(base, length):
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -1,5 +1,6 @@
 
 import _rawffi
+import _ffi
 import sys
 
 keepalive_key = str # XXX fix this when provided with test
@@ -46,6 +47,14 @@
         else:
             return self.from_param(as_parameter)
 
+    def get_ffi_param(self, value):
+        return self.from_param(value)._to_ffi_param()
+
+    def get_ffi_argtype(self):
+        if self._ffiargtype:
+            return self._ffiargtype
+        return _shape_to_ffi_type(self._ffiargshape)
+
     def _CData_output(self, resbuffer, base=None, index=-1):
         #assert isinstance(resbuffer, _rawffi.ArrayInstance)
         """Used when data exits ctypes and goes into user code.
@@ -99,6 +108,7 @@
     """
     __metaclass__ = _CDataMeta
     _objects = None
+    _ffiargtype = None
 
     def __init__(self, *args, **kwds):
         raise TypeError("%s has no type" % (type(self),))
@@ -119,6 +129,12 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _to_ffi_param(self):
+        if self.__class__._is_pointer_like():
+            return self._get_buffer_value()
+        else:
+            return self.value
+
     def __buffer__(self):
         return buffer(self._buffer)
 
@@ -150,7 +166,7 @@
     return pointer(cdata)
 
 def cdata_from_address(self, address):
-    # fix the address, in case it's unsigned
+    # fix the address: turn it into as unsigned, in case it's a negative number
     address = address & (sys.maxint * 2 + 1)
     instance = self.__new__(self)
     lgt = getattr(self, '_length_', 1)
@@ -159,3 +175,48 @@
 
 def addressof(tp):
     return tp._buffer.buffer
+
+
+# ----------------------------------------------------------------------
+
+def is_struct_shape(shape):
+    # see the corresponding code to set the shape in
+    # _ctypes.structure._set_shape
+    return (isinstance(shape, tuple) and
+            len(shape) == 2 and
+            isinstance(shape[0], _rawffi.Structure) and
+            shape[1] == 1)
+
+def _shape_to_ffi_type(shape):
+    try:
+        return _shape_to_ffi_type.typemap[shape]
+    except KeyError:
+        pass
+    if is_struct_shape(shape):
+        return shape[0].get_ffi_type()
+    #
+    assert False, 'unknown shape %s' % (shape,)
+
+
+_shape_to_ffi_type.typemap =  {
+    'c' : _ffi.types.char,
+    'b' : _ffi.types.sbyte,
+    'B' : _ffi.types.ubyte,
+    'h' : _ffi.types.sshort,
+    'u' : _ffi.types.unichar,
+    'H' : _ffi.types.ushort,
+    'i' : _ffi.types.sint,
+    'I' : _ffi.types.uint,
+    'l' : _ffi.types.slong,
+    'L' : _ffi.types.ulong,
+    'q' : _ffi.types.slonglong,
+    'Q' : _ffi.types.ulonglong,
+    'f' : _ffi.types.float,
+    'd' : _ffi.types.double,
+    's' : _ffi.types.void_p,
+    'P' : _ffi.types.void_p,
+    'z' : _ffi.types.void_p,
+    'O' : _ffi.types.void_p,
+    'Z' : _ffi.types.void_p,
+    }
+
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -1,12 +1,15 @@
+
+from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
+from _ctypes.primitive import SimpleType, _SimpleCData
+from _ctypes.basics import ArgumentError, keepalive_key
+from _ctypes.basics import is_struct_shape
+from _ctypes.builtin import set_errno, set_last_error
 import _rawffi
+import _ffi
 import sys
 import traceback
 import warnings
 
-from _ctypes.basics import ArgumentError, keepalive_key
-from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
-from _ctypes.builtin import set_errno, set_last_error
-from _ctypes.primitive import SimpleType
 
 # XXX this file needs huge refactoring I fear
 
@@ -24,6 +27,7 @@
 
 WIN64 = sys.platform == 'win32' and sys.maxint == 2**63 - 1
 
+
 def get_com_error(errcode, riid, pIunk):
     "Win32 specific: build a COM Error exception"
     # XXX need C support code
@@ -36,6 +40,7 @@
     funcptr.restype = int
     return funcptr(*args)
 
+
 class CFuncPtrType(_CDataMeta):
     # XXX write down here defaults and such things
 
@@ -50,6 +55,7 @@
 
     from_address = cdata_from_address
 
+
 class CFuncPtr(_CData):
     __metaclass__ = CFuncPtrType
 
@@ -65,10 +71,12 @@
     callable = None
     _ptr = None
     _buffer = None
+    _address = None
     # win32 COM properties
     _paramflags = None
     _com_index = None
     _com_iid = None
+    _is_fastpath = False
 
     __restype_set = False
 
@@ -85,8 +93,11 @@
                     raise TypeError(
                         "item %d in _argtypes_ has no from_param method" % (
                             i + 1,))
-            self._argtypes_ = argtypes
-
+            #
+            if all([hasattr(argtype, '_ffiargshape') for argtype in argtypes]):
+                fastpath_cls = make_fastpath_subclass(self.__class__)
+                fastpath_cls.enable_fastpath_maybe(self)
+            self._argtypes_ = list(argtypes)
     argtypes = property(_getargtypes, _setargtypes)
 
     def _getparamflags(self):
@@ -133,6 +144,7 @@
 
     paramflags = property(_getparamflags, _setparamflags)
 
+
     def _getrestype(self):
         return self._restype_
 
@@ -146,27 +158,24 @@
                 callable(restype)):
             raise TypeError("restype must be a type, a callable, or None")
         self._restype_ = restype
-
+        
     def _delrestype(self):
         self._ptr = None
         del self._restype_
-
+        
     restype = property(_getrestype, _setrestype, _delrestype)
 
     def _geterrcheck(self):
         return getattr(self, '_errcheck_', None)
-
     def _seterrcheck(self, errcheck):
         if not callable(errcheck):
             raise TypeError("The errcheck attribute must be callable")
         self._errcheck_ = errcheck
-
     def _delerrcheck(self):
         try:
             del self._errcheck_
         except AttributeError:
             pass
-
     errcheck = property(_geterrcheck, _seterrcheck, _delerrcheck)
 
     def _ffishapes(self, args, restype):
@@ -181,6 +190,14 @@
             restype = 'O' # void
         return argtypes, restype
 
+    def _set_address(self, address):
+        if not self._buffer:
+            self._buffer = _rawffi.Array('P')(1)
+        self._buffer[0] = address
+
+    def _get_address(self):
+        return self._buffer[0]
+
     def __init__(self, *args):
         self.name = None
         self._objects = {keepalive_key(0):self}
@@ -188,7 +205,7 @@
 
         # Empty function object -- this is needed for casts
         if not args:
-            self._buffer = _rawffi.Array('P')(1)
+            self._set_address(0)
             return
 
         argsl = list(args)
@@ -196,20 +213,24 @@
 
         # Direct construction from raw address
         if isinstance(argument, (int, long)) and not argsl:
-            ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_)
-            self._ptr = _rawffi.FuncPtr(argument, ffiargs, ffires, self._flags_)
-            self._buffer = self._ptr.byptr()
+            self._set_address(argument)
+            restype = self._restype_
+            if restype is None:
+                import ctypes
+                restype = ctypes.c_int
+            self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype)
             return
 
-        # A callback into Python
+        
+        # A callback into python
         if callable(argument) and not argsl:
             self.callable = argument
             ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_)
             if self._restype_ is None:
                 ffires = None
-            self._ptr = _rawffi.CallbackPtr(self._wrap_callable(
-                argument, self.argtypes
-                ), ffiargs, ffires, self._flags_)
+            self._ptr = _rawffi.CallbackPtr(self._wrap_callable(argument,
+                                                                self.argtypes),
+                                            ffiargs, ffires, self._flags_)
             self._buffer = self._ptr.byptr()
             return
 
@@ -218,7 +239,7 @@
             import ctypes
             self.name, dll = argument
             if isinstance(dll, str):
-                self.dll = ctypes.CDLL(dll)
+                self.dll = ctypes.CDLL(self.dll)
             else:
                 self.dll = dll
             if argsl:
@@ -227,7 +248,7 @@
                     raise TypeError("Unknown constructor %s" % (args,))
             # We need to check dll anyway
             ptr = self._getfuncptr([], ctypes.c_int)
-            self._buffer = ptr.byptr()
+            self._set_address(ptr.getaddr())
             return
 
         # A COM function call, by index
@@ -270,15 +291,15 @@
                     # than the length of the argtypes tuple.
                     args = args[:len(self._argtypes_)]
             else:
-                plural = len(argtypes) > 1 and "s" or ""
+                plural = len(self._argtypes_) > 1 and "s" or ""
                 raise TypeError(
                     "This function takes %d argument%s (%s given)"
-                    % (len(argtypes), plural, len(args)))
+                    % (len(self._argtypes_), plural, len(args)))
 
             # check that arguments are convertible
             ## XXX Not as long as ctypes.cast is a callback function with
             ## py_object arguments...
-            ## self._convert_args(argtypes, args, {})
+            ## self._convert_args(self._argtypes_, args, {})
 
             try:
                 res = self.callable(*args)
@@ -301,6 +322,7 @@
                           RuntimeWarning, stacklevel=2)
 
         if self._com_index:
+            assert False, 'TODO2'
             from ctypes import cast, c_void_p, POINTER
             if not args:
                 raise ValueError(
@@ -312,77 +334,63 @@
             args[0] = args[0].value
         else:
             thisarg = None
+            
+        newargs, argtypes, outargs = self._convert_args(argtypes, args, kwargs)
 
-        args, outargs = self._convert_args(argtypes, args, kwargs)
-        argtypes = [type(arg) for arg in args]
+        funcptr = self._getfuncptr(argtypes, self._restype_, thisarg)
+        result = self._call_funcptr(funcptr, *newargs)
+        result = self._do_errcheck(result, args)
 
-        restype = self._restype_
-        funcptr = self._getfuncptr(argtypes, restype, thisarg)
+        if not outargs:
+            return result
+        if len(outargs) == 1:
+            return outargs[0]
+        return tuple(outargs)
+
+    def _call_funcptr(self, funcptr, *newargs):
+
         if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
             set_errno(_rawffi.get_errno())
         if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
             set_last_error(_rawffi.get_last_error())
         try:
-            resbuffer = funcptr(*[arg._get_buffer_for_param()._buffer
-                                  for arg in args])
+            result = funcptr(*newargs)
         finally:
             if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
                 set_errno(_rawffi.get_errno())
             if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
                 set_last_error(_rawffi.get_last_error())
+        #
+        return self._build_result(self._restype_, result, newargs)
 
-        result = None
-        if self._com_index:
-            if resbuffer[0] & 0x80000000:
-                raise get_com_error(resbuffer[0],
-                                    self._com_iid, args[0])
-            else:
-                result = int(resbuffer[0])
-        elif restype is not None:
-            checker = getattr(self.restype, '_check_retval_', None)
-            if checker:
-                val = restype(resbuffer[0])
-                # the original ctypes seems to make the distinction between
-                # classes defining a new type, and their subclasses
-                if '_type_' in restype.__dict__:
-                    val = val.value
-                result = checker(val)
-            elif not isinstance(restype, _CDataMeta):
-                result = restype(resbuffer[0])
-            else:
-                result = restype._CData_retval(resbuffer)
-
+    def _do_errcheck(self, result, args):
         # The 'errcheck' protocol
         if self._errcheck_:
             v = self._errcheck_(result, self, args)
             # If the errcheck funtion failed, let it throw
-            # If the errcheck function returned callargs unchanged,
+            # If the errcheck function returned newargs unchanged,
             # continue normal processing.
             # If the errcheck function returned something else,
             # use that as result.
             if v is not args:
-                result = v
+                return v
+        return result
 
-        if not outargs:
-            return result
-
-        if len(outargs) == 1:
-            return outargs[0]
-
-        return tuple(outargs)
+    def _getfuncptr_fromaddress(self, argtypes, restype):
+        address = self._get_address()
+        ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes]
+        ffires = restype.get_ffi_argtype()
+        return _ffi.FuncPtr.fromaddr(address, '', ffiargs, ffires)
 
     def _getfuncptr(self, argtypes, restype, thisarg=None):
-        if self._ptr is not None and argtypes is self._argtypes_:
+        if self._ptr is not None and (argtypes is self._argtypes_ or argtypes == self._argtypes_):
             return self._ptr
         if restype is None or not isinstance(restype, _CDataMeta):
             import ctypes
             restype = ctypes.c_int
-        argshapes = [arg._ffiargshape for arg in argtypes]
-        resshape = restype._ffiargshape
         if self._buffer is not None:
-            ptr = _rawffi.FuncPtr(self._buffer[0], argshapes, resshape,
-                                  self._flags_)
-            if argtypes is self._argtypes_:
+            ptr = self._getfuncptr_fromaddress(argtypes, restype)
+            if argtypes == self._argtypes_:
                 self._ptr = ptr
             return ptr
 
@@ -391,14 +399,20 @@
             if not thisarg:
                 raise ValueError("COM method call without VTable")
             ptr = thisarg[self._com_index - 0x1000]
+            argshapes = [arg._ffiargshape for arg in argtypes]
+            resshape = restype._ffiargshape
             return _rawffi.FuncPtr(ptr, argshapes, resshape, self._flags_)
-
+        
         cdll = self.dll._handle
         try:
-            return cdll.ptr(self.name, argshapes, resshape, self._flags_)
+            ffi_argtypes = [argtype.get_ffi_argtype() for argtype in argtypes]
+            ffi_restype = restype.get_ffi_argtype()
+            self._ptr = cdll.getfunc(self.name, ffi_argtypes, ffi_restype)
+            return self._ptr
         except AttributeError:
             if self._flags_ & _rawffi.FUNCFLAG_CDECL:
                 raise
+
             # Win64 has no stdcall calling conv, so it should also not have the
             # name mangling of it.
             if WIN64:
@@ -409,23 +423,33 @@
             for i in range(33):
                 mangled_name = "_%s@%d" % (self.name, i*4)
                 try:
-                    return cdll.ptr(mangled_name, argshapes, resshape,
-                                    self._flags_)
+                    return cdll.getfunc(mangled_name,
+                                        ffi_argtypes, ffi_restype,
+                                        # XXX self._flags_
+                                        )
                 except AttributeError:
                     pass
             raise
 
-    @staticmethod
-    def _conv_param(argtype, arg):
-        from ctypes import c_char_p, c_wchar_p, c_void_p, c_int
+    @classmethod
+    def _conv_param(cls, argtype, arg):
+        if isinstance(argtype, _CDataMeta):
+            #arg = argtype.from_param(arg)
+            arg = argtype.get_ffi_param(arg)
+            return arg, argtype
+        
         if argtype is not None:
             arg = argtype.from_param(arg)
         if hasattr(arg, '_as_parameter_'):
             arg = arg._as_parameter_
         if isinstance(arg, _CData):
-            # The usual case when argtype is defined
-            cobj = arg
-        elif isinstance(arg, str):
+            return arg._to_ffi_param(), type(arg)
+        #
+        # non-usual case: we do the import here to save a lot of code in the
+        # jit trace of the normal case
+        from ctypes import c_char_p, c_wchar_p, c_void_p, c_int
+        #
+        if isinstance(arg, str):
             cobj = c_char_p(arg)
         elif isinstance(arg, unicode):
             cobj = c_wchar_p(arg)
@@ -435,11 +459,13 @@
             cobj = c_int(arg)
         else:
             raise TypeError("Don't know how to handle %s" % (arg,))
-        return cobj
+
+        return cobj._to_ffi_param(), type(cobj)
 
     def _convert_args(self, argtypes, args, kwargs, marker=object()):
-        callargs = []
+        newargs = []
         outargs = []
+        newargtypes = []
         total = len(args)
         paramflags = self._paramflags
 
@@ -470,8 +496,9 @@
                     val = defval
                     if val is marker:
                         val = 0
-                    wrapped = self._conv_param(argtype, val)
-                    callargs.append(wrapped)
+                    newarg, newargtype = self._conv_param(argtype, val)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 elif flag in (0, PARAMFLAG_FIN):
                     if inargs_idx < total:
                         val = args[inargs_idx]
@@ -485,38 +512,102 @@
                         raise TypeError("required argument '%s' missing" % name)
                     else:
                         raise TypeError("not enough arguments")
-                    wrapped = self._conv_param(argtype, val)
-                    callargs.append(wrapped)
+                    newarg, newargtype = self._conv_param(argtype, val)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 elif flag == PARAMFLAG_FOUT:
                     if defval is not marker:
                         outargs.append(defval)
-                        wrapped = self._conv_param(argtype, defval)
+                        newarg, newargtype = self._conv_param(argtype, defval)
                     else:
                         import ctypes
                         val = argtype._type_()
                         outargs.append(val)
-                        wrapped = ctypes.byref(val)
-                    callargs.append(wrapped)
+                        newarg = ctypes.byref(val)
+                        newargtype = type(newarg)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 else:
                     raise ValueError("paramflag %d not yet implemented" % flag)
             else:
                 try:
-                    wrapped = self._conv_param(argtype, args[i])
+                    newarg, newargtype = self._conv_param(argtype, args[i])
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
-                callargs.append(wrapped)
+                newargs.append(newarg)
+                newargtypes.append(newargtype)
                 inargs_idx += 1
 
-        if len(callargs) < total:
-            extra = args[len(callargs):]
+        if len(newargs) < len(args):
+            extra = args[len(newargs):]
             for i, arg in enumerate(extra):
                 try:
-                    wrapped = self._conv_param(None, arg)
+                    newarg, newargtype = self._conv_param(None, arg)
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
-                callargs.append(wrapped)
+                newargs.append(newarg)
+                newargtypes.append(newargtype)
+        return newargs, newargtypes, outargs
 
-        return callargs, outargs
+    
+    def _wrap_result(self, restype, result):
+        """
+        Convert from low-level repr of the result to the high-level python
+        one.
+        """
+        # hack for performance: if restype is a "simple" primitive type, don't
+        # allocate the buffer because it's going to be thrown away immediately
+        if restype.__bases__[0] is _SimpleCData and not restype._is_pointer_like():
+            return result
+        #
+        shape = restype._ffishape
+        if is_struct_shape(shape):
+            buf = result
+        else:
+            buf = _rawffi.Array(shape)(1, autofree=True)
+            buf[0] = result
+        retval = restype._CData_retval(buf)
+        return retval
+
+    def _build_result(self, restype, result, argsandobjs):
+        """Build the function result:
+           If there is no OUT parameter, return the actual function result
+           If there is one OUT parameter, return it
+           If there are many OUT parameters, return a tuple"""
+
+        # XXX: note for the future: the function used to take a "resbuffer",
+        # i.e. an array of ints. Now it takes a result, which is already a
+        # python object. All places that do "resbuffer[0]" should check that
+        # result is actually an int and just use it.
+        #
+        # Also, argsandobjs used to be "args" in __call__, now it's "newargs"
+        # (i.e., the already unwrapped objects). It's used only when we have a
+        # PARAMFLAG_FOUT and it's probably wrong, I'll fix it when I find a
+        # failing test
+
+        retval = None
+
+        if self._com_index:
+            if resbuffer[0] & 0x80000000:
+                raise get_com_error(resbuffer[0],
+                                    self._com_iid, argsandobjs[0])
+            else:
+                retval = int(resbuffer[0])
+        elif restype is not None:
+            checker = getattr(self.restype, '_check_retval_', None)
+            if checker:
+                val = restype(result)
+                # the original ctypes seems to make the distinction between
+                # classes defining a new type, and their subclasses
+                if '_type_' in restype.__dict__:
+                    val = val.value
+                retval = checker(val)
+            elif not isinstance(restype, _CDataMeta):
+                retval = restype(result)
+            else:
+                retval = self._wrap_result(restype, result)
+
+        return retval
 
     def __nonzero__(self):
         return self._com_index is not None or bool(self._buffer[0])
@@ -532,3 +623,61 @@
                 self._ptr.free()
                 self._ptr = None
             self._needs_free = False
+
+
+def make_fastpath_subclass(CFuncPtr):
+    if CFuncPtr._is_fastpath:
+        return CFuncPtr
+    #
+    try:
+        return make_fastpath_subclass.memo[CFuncPtr]
+    except KeyError:
+        pass
+
+    class CFuncPtrFast(CFuncPtr):
+
+        _is_fastpath = True
+        _slowpath_allowed = True # set to False by tests
+
+        @classmethod
+        def enable_fastpath_maybe(cls, obj):
+            if (obj.callable is None and
+                obj._com_index is None):
+                obj.__class__ = cls
+
+        def __rollback(self):
+            assert self._slowpath_allowed
+            self.__class__ = CFuncPtr
+
+        # disable the fast path if we reset argtypes
+        def _setargtypes(self, argtypes):
+            self.__rollback()
+            self._setargtypes(argtypes)
+        argtypes = property(CFuncPtr._getargtypes, _setargtypes)
+
+        def _setcallable(self, func):
+            self.__rollback()
+            self.callable = func
+        callable = property(lambda x: None, _setcallable)
+
+        def _setcom_index(self, idx):
+            self.__rollback()
+            self._com_index = idx
+        _com_index = property(lambda x: None, _setcom_index)
+
+        def __call__(self, *args):
+            thisarg = None
+            argtypes = self._argtypes_
+            restype = self._restype_
+            funcptr = self._getfuncptr(argtypes, restype, thisarg)
+            try:
+                result = self._call_funcptr(funcptr, *args)
+                result = self._do_errcheck(result, args)
+            except (TypeError, ArgumentError): # XXX, should be FFITypeError
+                assert self._slowpath_allowed
+                return CFuncPtr.__call__(self, *args)
+            return result
+
+    make_fastpath_subclass.memo[CFuncPtr] = CFuncPtrFast
+    return CFuncPtrFast
+make_fastpath_subclass.memo = {}
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -1,6 +1,7 @@
 
 import _rawffi
-from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
+import _ffi
+from _ctypes.basics import _CData, _CDataMeta, cdata_from_address, ArgumentError
 from _ctypes.basics import keepalive_key, store_reference, ensure_objects
 from _ctypes.basics import sizeof, byref
 from _ctypes.array import Array, array_get_slice_params, array_slice_getitem,\
@@ -19,7 +20,7 @@
             length     = 1,
             _ffiargshape = 'P',
             _ffishape  = 'P',
-            _fficompositesize = None
+            _fficompositesize = None,
         )
         # XXX check if typedict['_type_'] is any sane
         # XXX remember about paramfunc
@@ -66,6 +67,7 @@
         self._ffiarray = ffiarray
         self.__init__ = __init__
         self._type_ = TP
+        self._ffiargtype = _ffi.types.Pointer(TP.get_ffi_argtype())
 
     from_address = cdata_from_address
 
@@ -114,6 +116,17 @@
 
     contents = property(getcontents, setcontents)
 
+    def _as_ffi_pointer_(self, ffitype):
+        return as_ffi_pointer(self, ffitype)
+
+def as_ffi_pointer(value, ffitype):
+    my_ffitype = type(value).get_ffi_argtype()
+    # for now, we always allow types.pointer, else a lot of tests
+    # break. We need to rethink how pointers are represented, though
+    if my_ffitype is not ffitype and ffitype is not _ffi.types.void_p:
+        raise ArgumentError, "expected %s instance, got %s" % (type(value), ffitype)
+    return value._get_buffer_value()
+
 def _cast_addr(obj, _, tp):
     if not (isinstance(tp, _CDataMeta) and tp._is_pointer_like()):
         raise TypeError("cast() argument 2 must be a pointer type, not %s"
diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py
--- a/lib_pypy/_ctypes/primitive.py
+++ b/lib_pypy/_ctypes/primitive.py
@@ -1,3 +1,4 @@
+import _ffi
 import _rawffi
 import weakref
 import sys
@@ -8,7 +9,7 @@
      CArgObject
 from _ctypes.builtin import ConvMode
 from _ctypes.array import Array
-from _ctypes.pointer import _Pointer
+from _ctypes.pointer import _Pointer, as_ffi_pointer
 
 class NULL(object):
     pass
@@ -140,6 +141,8 @@
                     value = 0
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
+            result._ffiargtype = _ffi.types.Pointer(_ffi.types.char)
+
         elif tp == 'Z':
             # c_wchar_p
             def _getvalue(self):
@@ -162,6 +165,7 @@
                     value = 0
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
+            result._ffiargtype = _ffi.types.Pointer(_ffi.types.unichar)
 
         elif tp == 'P':
             # c_void_p
@@ -248,6 +252,12 @@
                     self._buffer[0] = 0  # VARIANT_FALSE
             result.value = property(_getvalue, _setvalue)
 
+        # make pointer-types compatible with the _ffi fast path
+        if result._is_pointer_like():
+            def _as_ffi_pointer_(self, ffitype):
+                return as_ffi_pointer(self, ffitype)
+            result._as_ffi_pointer_ = _as_ffi_pointer_
+            
         return result
 
     from_address = cdata_from_address
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -240,6 +240,9 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _to_ffi_param(self):
+        return self._buffer
+
 
 class StructureMeta(StructOrUnionMeta):
     _is_union = False
diff --git a/lib_pypy/ctypes_support.py b/lib_pypy/ctypes_support.py
--- a/lib_pypy/ctypes_support.py
+++ b/lib_pypy/ctypes_support.py
@@ -10,8 +10,8 @@
 # __________ the standard C library __________
 
 if sys.platform == 'win32':
-    import _rawffi
-    standard_c_lib = ctypes.CDLL('msvcrt', handle=_rawffi.get_libc())
+    import _ffi
+    standard_c_lib = ctypes.CDLL('msvcrt', handle=_ffi.get_libc())
 else:
     standard_c_lib = ctypes.CDLL(ctypes.util.find_library('c'))
 
diff --git a/lib_pypy/stackless.py b/lib_pypy/stackless.py
--- a/lib_pypy/stackless.py
+++ b/lib_pypy/stackless.py
@@ -200,14 +200,15 @@
     # I can't think of a better solution without a real transform.
 
 def rewrite_stackless_primitive(coro_state, alive, tempval):
-    flags, state, thunk, parent = coro_state
-    for i, frame in enumerate(state):
+    flags, frame, thunk, parent = coro_state
+    while frame is not None:
         retval_expr = _stackless_primitive_registry.get(frame.f_code)
         if retval_expr:
             # this tasklet needs to stop pickling here and return its value.
             tempval = eval(retval_expr, globals(), frame.f_locals)
-            state = state[:i]
-            coro_state = flags, state, thunk, parent
+            coro_state = flags, frame, thunk, parent
+            break
+        frame = frame.f_back
     return coro_state, alive, tempval
 
 #
@@ -492,23 +493,22 @@
         assert two == ()
         # we want to get rid of the parent thing.
         # for now, we just drop it
-        a, b, c, d = coro_state
-        
+        a, frame, c, d = coro_state
+
         # Removing all frames related to stackless.py.
         # They point to stuff we don't want to be pickled.
-        frame_list = list(b)
-        new_frame_list = []
-        for frame in frame_list:
+
+        pickleframe = frame
+        while frame is not None:
             if frame.f_code == schedule.func_code:
                 # Removing everything including and after the
                 # call to stackless.schedule()
+                pickleframe = frame.f_back
                 break
-            new_frame_list.append(frame)
-        b = tuple(new_frame_list)
-        
+            frame = frame.f_back
         if d:
             assert isinstance(d, coroutine)
-        coro_state = a, b, c, None
+        coro_state = a, pickleframe, c, None
         coro_state, alive, tempval = rewrite_stackless_primitive(coro_state, self.alive, self.tempval)
         inst_dict = self.__dict__.copy()
         inst_dict.pop('tempval', None)
diff --git a/pypy/annotation/annrpython.py b/pypy/annotation/annrpython.py
--- a/pypy/annotation/annrpython.py
+++ b/pypy/annotation/annrpython.py
@@ -228,7 +228,7 @@
             # graph -- it's already low-level operations!
             for a, s_newarg in zip(graph.getargs(), cells):
                 s_oldarg = self.binding(a)
-                assert s_oldarg.contains(s_newarg)
+                assert annmodel.unionof(s_oldarg, s_newarg) == s_oldarg
         else:
             assert not self.frozen
             for a in cells:
diff --git a/pypy/annotation/bookkeeper.py b/pypy/annotation/bookkeeper.py
--- a/pypy/annotation/bookkeeper.py
+++ b/pypy/annotation/bookkeeper.py
@@ -279,13 +279,13 @@
         desc = self.getdesc(cls)
         return desc.getuniqueclassdef()
 
-    def getlistdef(self, **flags):
+    def getlistdef(self, **flags_if_new):
         """Get the ListDef associated with the current position."""
         try:
             listdef = self.listdefs[self.position_key]
         except KeyError:
             listdef = self.listdefs[self.position_key] = ListDef(self)
-            listdef.listitem.__dict__.update(flags)
+            listdef.listitem.__dict__.update(flags_if_new)
         return listdef
 
     def newlist(self, *s_values, **flags):
@@ -294,6 +294,9 @@
         listdef = self.getlistdef(**flags)
         for s_value in s_values:
             listdef.generalize(s_value)
+        if flags:
+            assert flags.keys() == ['range_step']
+            listdef.generalize_range_step(flags['range_step'])
         return SomeList(listdef)
 
     def getdictdef(self, is_r_dict=False):
diff --git a/pypy/annotation/description.py b/pypy/annotation/description.py
--- a/pypy/annotation/description.py
+++ b/pypy/annotation/description.py
@@ -565,7 +565,7 @@
         if self.is_exception_class():
             if self.pyobj.__module__ == 'exceptions':
                 return True
-            if self.pyobj is py.code._AssertionError:
+            if issubclass(self.pyobj, AssertionError):
                 return True
         return False
 
diff --git a/pypy/annotation/listdef.py b/pypy/annotation/listdef.py
--- a/pypy/annotation/listdef.py
+++ b/pypy/annotation/listdef.py
@@ -184,6 +184,11 @@
     def generalize(self, s_value):
         self.listitem.generalize(s_value)
 
+    def generalize_range_step(self, range_step):
+        newlistitem = ListItem(self.listitem.bookkeeper, s_ImpossibleValue)
+        newlistitem.range_step = range_step
+        self.listitem.merge(newlistitem)
+
     def __repr__(self):
         return '<[%r]%s%s%s%s>' % (self.listitem.s_value,
                                self.listitem.mutated and 'm' or '',
diff --git a/pypy/annotation/model.py b/pypy/annotation/model.py
--- a/pypy/annotation/model.py
+++ b/pypy/annotation/model.py
@@ -32,13 +32,15 @@
 import pypy
 from pypy.tool import descriptor
 from pypy.tool.pairtype import pair, extendabletype
-from pypy.tool.tls import tlsobject
 from pypy.rlib.rarithmetic import r_uint, r_ulonglong, base_int
 from pypy.rlib.rarithmetic import r_singlefloat, r_longfloat
 import inspect, weakref
 
 DEBUG = False    # set to False to disable recording of debugging information
-TLS = tlsobject()
+
+class State(object):
+    pass
+TLS = State()
 
 class SomeObject(object):
     """The set of all objects.  Each instance stands
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -3483,6 +3483,17 @@
         a = self.RPythonAnnotator()
         raises(Exception, a.build_types, f, [int])
 
+    def test_range_variable_step(self):
+        def g(n):
+            return range(0, 10, n)
+        def f(n):
+            r = g(1)    # constant step, at first
+            s = g(n)    # but it becomes a variable step
+            return r
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [int])
+        assert s.listdef.listitem.range_step == 0
+
 
 def g(n):
     return [0,1,2,n]
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -33,13 +33,17 @@
      "struct", "_hashlib", "_md5", "_sha", "_minimal_curses", "cStringIO",
      "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
      "_bisect", "binascii", "_multiprocessing", '_warnings',
-     "_collections", "_multibytecodec", "micronumpy"]
+     "_collections", "_multibytecodec", "micronumpy", "_ffi"]
 ))
 
 translation_modules = default_modules.copy()
 translation_modules.update(dict.fromkeys(
     ["fcntl", "rctime", "select", "signal", "_rawffi", "zlib",
-     "struct", "_md5", "cStringIO", "array"]))
+     "struct", "_md5", "cStringIO", "array", "_ffi",
+     # the following are needed for pyrepl (and hence for the
+     # interactive prompt/pdb)
+     "termios", "_minimal_curses",
+     ]))
 
 working_oo_modules = default_modules.copy()
 working_oo_modules.update(dict.fromkeys(
@@ -243,6 +247,10 @@
                    "use small tuples",
                    default=False),
 
+        BoolOption("withsmalltuple",
+                   "use small tuples",
+                   default=False),
+
         BoolOption("withrope", "use ropes as the string implementation",
                    default=False,
                    requires=[("objspace.std.withstrslice", False),
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -136,6 +136,11 @@
 next access.  Any code that uses weak proxies must carefully catch such
 ``ReferenceError`` at any place that uses them.
 
+As a side effect, the ``finally`` clause inside a generator will be executed
+only when the generator object is garbage collected (see `issue 736`__).
+
+.. __: http://bugs.pypy.org/issue736
+
 There are a few extra implications for the difference in the GC.  Most
 notably, if an object has a ``__del__``, the ``__del__`` is never called more
 than once in PyPy; but CPython will call the same ``__del__`` several times
@@ -168,6 +173,11 @@
     >>>> A.__del__ = lambda self: None
     __main__:1: RuntimeWarning: a __del__ method added to an existing type will not be called
 
+Even more obscure: the same is true, for old-style classes, if you attach
+the ``__del__`` to an instance (even in CPython this does not work with
+new-style classes).  You get a RuntimeWarning in PyPy.  To fix these cases
+just make sure there is a ``__del__`` method in the class to start with.
+
 
 Subclasses of built-in types
 ----------------------------
diff --git a/pypy/doc/image/jitviewer.png b/pypy/doc/image/jitviewer.png
new file mode 100644
index 0000000000000000000000000000000000000000..ad2abca5c88125061fa519dcf3f9fada577573ee
GIT binary patch

[cut]

diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -21,6 +21,8 @@
 
 * `speed.pypy.org`_: Daily benchmarks of how fast PyPy is
 
+* `potential project ideas`_: In case you want to get your feet wet...
+
 Documentation for the PyPy Python Interpreter
 ===============================================
 
@@ -59,8 +61,6 @@
   (if they are not already developed in the FAQ_).
   You can find logs of the channel here_.
 
-.. XXX play1? 
-
 Meeting PyPy developers
 =======================
 
@@ -83,7 +83,7 @@
 .. _`Release 1.5`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
-
+.. _`potential project ideas`: project-ideas.html
 
 Project Documentation
 =====================================
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/project-ideas.rst
@@ -0,0 +1,130 @@
+
+Potential project list
+======================
+
+This is a list of projects that are interesting for potential contributors
+who are seriously interested in the PyPy project. They mostly share common
+patterns - they're mid-to-large in size, they're usually well defined as
+a standalone projects and they're not being actively worked on. For small
+projects that you might want to work on, it's much better to either look
+at the `issue tracker`_, pop up on #pypy on irc.freenode.net or write to the
+`mailing list`_. This is simply for the reason that small possible projects
+tend to change very rapidly.
+
+This list is mostly for having on overview on potential projects. This list is
+by definition not exhaustive and we're pleased if people come up with their
+own improvement ideas. In any case, if you feel like working on some of those
+projects, or anything else in PyPy, pop up on IRC or write to us on the
+`mailing list`_.
+
+Numpy improvements
+------------------
+
+This is more of a project-container than a single project. Possible ideas:
+
+* experiment with auto-vectorization using SSE or implement vectorization
+  without automatically detecting it for array operations.
+
+* improve numpy, for example implement memory views.
+
+* interface with fortran/C libraries.
+
+Improving the jitviewer
+------------------------
+
+Analyzing performance of applications is always tricky. We have various
+tools, for example a `jitviewer`_ that help us analyze performance.
+
+The jitviewer shows the code generated by the PyPy JIT in a hierarchical way,
+as shown by the screenshot below:
+
+  - at the bottom level, it shows the Python source code of the compiled loops
+
+  - for each source code line, it shows the corresponding Python bytecode
+
+  - for each opcode, it shows the corresponding jit operations, which are the
+    ones actually sent to the backend for compiling (such as ``i15 = i10 <
+    2000`` in the example)
+
+.. image:: image/jitviewer.png
+
+We would like to add one level to this hierarchy, by showing the generated
+machine code for each jit operation.  The necessary information is already in
+the log file produced by the JIT, so it is "only" a matter of teaching the
+jitviewer to display it.  Ideally, the machine code should be hidden by
+default and viewable on request.
+
+The jitviewer is a web application based on flask and jinja2 (and jQuery on
+the client): if you have great web developing skills and want to help PyPy,
+this is an ideal task to get started, because it does not require any deep
+knowledge of the internals.
+
+Translation Toolchain
+---------------------
+
+* Incremental or distributed translation.
+
+* Allow separate compilation of extension modules.
+
+Work on some of other languages
+-------------------------------
+
+There are various languages implemented using the RPython translation toolchain.
+One of the most interesting is the `JavaScript implementation`_, but there
+are others like scheme or prolog. An interesting project would be to improve
+the jittability of those or to experiment with various optimizations.
+
+Various GCs
+-----------
+
+PyPy has pluggable garbage collection policy. This means that various garbage
+collectors can be written for specialized purposes, or even various
+experiments can be done for the general purpose. Examples
+
+* An incremental garbage collector that has specified maximal pause times,
+  crucial for games
+
+* A garbage collector that compact memory better for mobile devices
+
+* A concurrent garbage collector (a lot of work)
+
+Remove the GIL
+--------------
+
+This is a major task that requiers lots of thinking. However, few subprojects
+can be potentially specified, unless a better plan can be thought out:
+
+* A thread-aware garbage collector
+
+* Better RPython primitives for dealing with concurrency
+
+* JIT passes to remove locks on objects
+
+* (maybe) implement locking in Python interpreter
+
+* alternatively, look at Software Transactional Memory
+
+Introduce new benchmarks
+------------------------
+
+We're usually happy to introduce new benchmarks. Please consult us
+before, but in general something that's real-world python code
+and is not already represented is welcome. We need at least a standalone
+script that can run without parameters. Example ideas (benchmarks need
+to be got from them!):
+
+* `hg`
+
+* `sympy`
+
+Experiment (again) with LLVM backend for RPython compilation
+------------------------------------------------------------
+
+We already tried working with LLVM and at the time, LLVM was not mature enough
+for our needs. It's possible that this has changed, reviving the LLVM backend
+(or writing new from scratch) for static compilation would be a good project.
+
+.. _`issue tracker`: http://bugs.pypy.org
+.. _`mailing list`: http://mail.python.org/mailman/listinfo/pypy-dev
+.. _`jitviewer`: http://bitbucket.org/pypy/jitviewer
+.. _`JavaScript implementation`: https://bitbucket.org/pypy/lang-js/overview
diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -31,11 +31,12 @@
     future_lineno = 0
     future_column = 0
     have_docstring = False
+    body = None
     if isinstance(tree, ast.Module):
         body = tree.body
     elif isinstance(tree, ast.Interactive):
         body = tree.body
-    else:
+    if body is None:
         return 0, 0
     for stmt in body:
         if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Str):
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -11,14 +11,14 @@
     """Interpreter-level exception that signals an exception that should be
     sent to the application level.
 
-    OperationError instances have three public attributes (and no .args),
-    w_type, w_value and application_traceback, which contain the wrapped
+    OperationError instances have three attributes (and no .args),
+    w_type, _w_value and _application_traceback, which contain the wrapped
     type and value describing the exception, and a chained list of
     PyTraceback objects making the application-level traceback.
     """
 
     _w_value = None
-    application_traceback = None
+    _application_traceback = None
 
     def __init__(self, w_type, w_value, tb=None):
         if not we_are_translated() and w_type is None:
@@ -26,7 +26,7 @@
             raise FlowingError(w_value)
         self.setup(w_type)
         self._w_value = w_value
-        self.application_traceback = tb
+        self._application_traceback = tb
 
     def setup(self, w_type):
         self.w_type = w_type
@@ -37,7 +37,7 @@
         # for sys.exc_clear()
         self.w_type = space.w_None
         self._w_value = space.w_None
-        self.application_traceback = None
+        self._application_traceback = None
         if not we_are_translated():
             del self.debug_excs[:]
 
@@ -103,7 +103,7 @@
 
     def print_app_tb_only(self, file):
         "NOT_RPYTHON"
-        tb = self.application_traceback
+        tb = self._application_traceback
         if tb:
             import linecache
             print >> file, "Traceback (application-level):"
@@ -251,6 +251,30 @@
     def _compute_value(self):
         raise NotImplementedError
 
+    def get_traceback(self):
+        """Calling this marks the PyTraceback as escaped, i.e. it becomes
+        accessible and inspectable by app-level Python code.  For the JIT.
+        Note that this has no effect if there are already several traceback
+        frames recorded, because in this case they are already marked as
+        escaping by executioncontext.leave() being called with
+        got_exception=True.
+        """
+        from pypy.interpreter.pytraceback import PyTraceback
+        tb = self._application_traceback
+        if tb is not None and isinstance(tb, PyTraceback):
+            tb.frame.mark_as_escaped()
+        return tb
+
+    def set_traceback(self, traceback):
+        """Set the current traceback.  It should either be a traceback
+        pointing to some already-escaped frame, or a traceback for the
+        current frame.  To support the latter case we do not mark the
+        frame as escaped.  The idea is that it will be marked as escaping
+        only if the exception really propagates out of this frame, by
+        executioncontext.leave() being called with got_exception=True.
+        """
+        self._application_traceback = traceback
+
 # ____________________________________________________________
 # optimization only: avoid the slowest operation -- the string
 # formatting with '%' -- in the common case were we don't
diff --git a/pypy/interpreter/eval.py b/pypy/interpreter/eval.py
--- a/pypy/interpreter/eval.py
+++ b/pypy/interpreter/eval.py
@@ -2,6 +2,7 @@
 This module defines the abstract base classes that support execution:
 Code and Frame.
 """
+from pypy.rlib import jit
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.baseobjspace import Wrappable
 
@@ -97,6 +98,7 @@
         "Abstract. Get the expected number of locals."
         raise TypeError, "abstract"
 
+    @jit.dont_look_inside
     def fast2locals(self):
         # Copy values from self.fastlocals_w to self.w_locals
         if self.w_locals is None:
@@ -110,6 +112,7 @@
                 w_name = self.space.wrap(name)
                 self.space.setitem(self.w_locals, w_name, w_value)
 
+    @jit.dont_look_inside
     def locals2fast(self):
         # Copy values from self.w_locals to self.fastlocals_w
         assert self.w_locals is not None
diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -58,13 +58,23 @@
         frame.f_backref = self.topframeref
         self.topframeref = jit.virtual_ref(frame)
 
-    def leave(self, frame, w_exitvalue):
+    def leave(self, frame, w_exitvalue, got_exception):
         try:
             if self.profilefunc:
                 self._trace(frame, 'leaveframe', w_exitvalue)
         finally:
+            frame_vref = self.topframeref
             self.topframeref = frame.f_backref
-            jit.virtual_ref_finish(frame)
+            if frame.escaped or got_exception:
+                # if this frame escaped to applevel, we must ensure that also
+                # f_back does
+                f_back = frame.f_backref()
+                if f_back:
+                    f_back.mark_as_escaped()
+                # force the frame (from the JIT point of view), so that it can
+                # be accessed also later
+                frame_vref()
+            jit.virtual_ref_finish(frame_vref, frame)
 
         if self.w_tracefunc is not None and not frame.hide():
             self.space.frame_trace_action.fire()
@@ -102,18 +112,16 @@
 
         # the following interface is for pickling and unpickling
         def getstate(self, space):
-            # XXX we could just save the top frame, which brings
-            # the whole frame stack, but right now we get the whole stack
-            items = [space.wrap(f) for f in self.getframestack()]
-            return space.newtuple(items)
+            if self.topframe is None:
+                return space.w_None
+            return self.topframe
 
         def setstate(self, space, w_state):
             from pypy.interpreter.pyframe import PyFrame
-            frames_w = space.unpackiterable(w_state)
-            if len(frames_w) > 0:
-                self.topframe = space.interp_w(PyFrame, frames_w[-1])
+            if space.is_w(w_state, space.w_None):
+                self.topframe = None
             else:
-                self.topframe = None
+                self.topframe = space.interp_w(PyFrame, w_state)
 
         def getframestack(self):
             lst = []
@@ -278,7 +286,7 @@
             if operr is not None:
                 w_value = operr.get_w_value(space)
                 w_arg = space.newtuple([operr.w_type, w_value,
-                                     space.wrap(operr.application_traceback)])
+                                     space.wrap(operr.get_traceback())])
 
             frame.fast2locals()
             self.is_tracing += 1
diff --git a/pypy/interpreter/main.py b/pypy/interpreter/main.py
--- a/pypy/interpreter/main.py
+++ b/pypy/interpreter/main.py
@@ -118,7 +118,7 @@
         operationerr.normalize_exception(space)
         w_type = operationerr.w_type
         w_value = operationerr.get_w_value(space)
-        w_traceback = space.wrap(operationerr.application_traceback)
+        w_traceback = space.wrap(operationerr.get_traceback())
 
         # for debugging convenience we also insert the exception into
         # the interpreter-level sys.last_xxx
diff --git a/pypy/interpreter/nestedscope.py b/pypy/interpreter/nestedscope.py
--- a/pypy/interpreter/nestedscope.py
+++ b/pypy/interpreter/nestedscope.py
@@ -127,6 +127,7 @@
         if self.cells is not None:
             self.cells[:ncellvars] = cellvars
 
+    @jit.dont_look_inside
     def fast2locals(self):
         super_fast2locals(self)
         # cellvars are values exported to inner scopes
@@ -145,6 +146,7 @@
                 w_name = self.space.wrap(name)
                 self.space.setitem(self.w_locals, w_name, w_value)
 
+    @jit.dont_look_inside
     def locals2fast(self):
         super_locals2fast(self)
         freevarnames = self.pycode.co_cellvars + self.pycode.co_freevars
diff --git a/pypy/interpreter/pycompiler.py b/pypy/interpreter/pycompiler.py
--- a/pypy/interpreter/pycompiler.py
+++ b/pypy/interpreter/pycompiler.py
@@ -101,9 +101,9 @@
     """
     def __init__(self, space, override_version=None):
         PyCodeCompiler.__init__(self, space)
-        self.parser = pyparse.PythonParser(space)
+        self.future_flags = future.futureFlags_2_7
+        self.parser = pyparse.PythonParser(space, self.future_flags)
         self.additional_rules = {}
-        self.future_flags = future.futureFlags_2_7
         self.compiler_flags = self.future_flags.allowed_flags
 
     def compile_ast(self, node, filename, mode, flags):
@@ -140,9 +140,6 @@
     def _compile_to_ast(self, source, info):
         space = self.space
         try:
-            f_flags, future_info = future.get_futures(self.future_flags, source)
-            info.last_future_import = future_info
-            info.flags |= f_flags
             parse_tree = self.parser.parse_source(source, info)
             mod = astbuilder.ast_from_node(space, parse_tree, info)
         except parseerror.IndentationError, e:
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -11,7 +11,7 @@
 from pypy.rlib.jit import hint
 from pypy.rlib.debug import make_sure_not_resized
 from pypy.rlib.rarithmetic import intmask
-from pypy.rlib import jit, rstack
+from pypy.rlib import jit
 from pypy.tool import stdlib_opcode
 from pypy.tool.stdlib_opcode import host_bytecode_spec
 
@@ -49,6 +49,7 @@
     instr_ub                 = 0
     instr_prev_plus_one      = 0
     is_being_profiled        = False
+    escaped                  = False  # see mark_as_escaped()
 
     def __init__(self, space, code, w_globals, closure):
         self = hint(self, access_directly=True, fresh_virtualizable=True)
@@ -67,6 +68,15 @@
         make_sure_not_resized(self.fastlocals_w)
         self.f_lineno = code.co_firstlineno
 
+    def mark_as_escaped(self):
+        """
+        Must be called on frames that are exposed to applevel, e.g. by
+        sys._getframe().  This ensures that the virtualref holding the frame
+        is properly forced by ec.leave(), and thus the frame will be still
+        accessible even after the corresponding C stack died.
+        """
+        self.escaped = True
+
     def append_block(self, block):
         block.previous = self.lastblock
         self.lastblock = block
@@ -138,6 +148,7 @@
                 not self.space.config.translating)
         executioncontext = self.space.getexecutioncontext()
         executioncontext.enter(self)
+        got_exception = True
         w_exitvalue = self.space.w_None
         try:
             executioncontext.call_trace(self)
@@ -157,8 +168,6 @@
             try:
                 w_exitvalue = self.dispatch(self.pycode, next_instr,
                                             executioncontext)
-                rstack.resume_point("execute_frame", self, executioncontext,
-                                    returns=w_exitvalue)
             except Exception:
                 executioncontext.return_trace(self, self.space.w_None)
                 raise
@@ -166,8 +175,9 @@
             # clean up the exception, might be useful for not
             # allocating exception objects in some cases
             self.last_exception = None
+            got_exception = False
         finally:
-            executioncontext.leave(self, w_exitvalue)
+            executioncontext.leave(self, w_exitvalue, got_exception)
         return w_exitvalue
     execute_frame.insert_stack_check_here = True
 
@@ -314,7 +324,7 @@
             w_tb = space.w_None
         else:
             w_exc_value = self.last_exception.get_w_value(space)
-            w_tb = w(self.last_exception.application_traceback)
+            w_tb = w(self.last_exception.get_traceback())
         
         tup_state = [
             w(self.f_backref()),
@@ -415,6 +425,7 @@
         "Get the fast locals as a list."
         return self.fastlocals_w
 
+    @jit.dont_look_inside
     def setfastscope(self, scope_w):
         """Initialize the fast locals from a list of values,
         where the order is according to self.pycode.signature()."""
@@ -634,7 +645,7 @@
             while f is not None and f.last_exception is None:
                 f = f.f_backref()
             if f is not None:
-                return space.wrap(f.last_exception.application_traceback)
+                return space.wrap(f.last_exception.get_traceback())
         return space.w_None
          
     def fget_f_restricted(self, space):
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -11,7 +11,7 @@
 from pypy.interpreter.pycode import PyCode
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.rlib import jit, rstackovf, rstack
+from pypy.rlib import jit, rstackovf
 from pypy.rlib.rarithmetic import r_uint, intmask
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import check_nonneg
@@ -83,16 +83,12 @@
         try:
             while True:
                 next_instr = self.handle_bytecode(co_code, next_instr, ec)
-                rstack.resume_point("dispatch", self, co_code, ec,
-                                    returns=next_instr)
         except ExitFrame:
             return self.popvalue()
 
     def handle_bytecode(self, co_code, next_instr, ec):
         try:
             next_instr = self.dispatch_bytecode(co_code, next_instr, ec)
-            rstack.resume_point("handle_bytecode", self, co_code, ec,
-                                returns=next_instr)
         except OperationError, operr:
             next_instr = self.handle_operation_error(ec, operr)
         except Reraise:
@@ -248,9 +244,6 @@
                         # dispatch to the opcode method
                         meth = getattr(self, opdesc.methodname)
                         res = meth(oparg, next_instr)
-                        if opdesc.index == self.opcodedesc.CALL_FUNCTION.index:
-                            rstack.resume_point("dispatch_call", self, co_code,
-                                                next_instr, ec)
                         # !! warning, for the annotator the next line is not
                         # comparing an int and None - you can't do that.
                         # Instead, it's constant-folded to either True or False
@@ -573,7 +566,7 @@
         else:
             msg = "raise: arg 3 must be a traceback or None"
             tb = pytraceback.check_traceback(space, w_traceback, msg)
-            operror.application_traceback = tb
+            operror.set_traceback(tb)
             # special 3-arguments raise, no new traceback obj will be attached
             raise RaiseWithExplicitTraceback(operror)
 
@@ -953,7 +946,7 @@
                       isinstance(unroller, SApplicationException))
         if is_app_exc:
             operr = unroller.operr
-            w_traceback = self.space.wrap(operr.application_traceback)
+            w_traceback = self.space.wrap(operr.get_traceback())
             w_suppress = self.call_contextmanager_exit_function(
                 w_exitfunc,
                 operr.w_type,
@@ -997,7 +990,6 @@
                                                           args)
         else:
             w_result = self.space.call_args(w_function, args)
-        rstack.resume_point("call_function", self, returns=w_result)
         self.pushvalue(w_result)
 
     def CALL_FUNCTION(self, oparg, next_instr):
@@ -1008,8 +1000,6 @@
             w_function = self.peekvalue(nargs)
             try:
                 w_result = self.space.call_valuestack(w_function, nargs, self)
-                rstack.resume_point("CALL_FUNCTION", self, nargs,
-                                    returns=w_result)
             finally:
                 self.dropvalues(nargs + 1)
             self.pushvalue(w_result)
@@ -1087,6 +1077,7 @@
         w_dict = self.space.newdict()
         self.pushvalue(w_dict)
 
+    @jit.unroll_safe
     def BUILD_SET(self, itemcount, next_instr):
         w_set = self.space.call_function(self.space.w_set)
         if itemcount:
diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -1,6 +1,6 @@
 from pypy.interpreter import gateway
 from pypy.interpreter.error import OperationError
-from pypy.interpreter.pyparser import parser, pytokenizer, pygram, error
+from pypy.interpreter.pyparser import future, parser, pytokenizer, pygram, error
 from pypy.interpreter.astcompiler import consts
 
 
@@ -88,9 +88,11 @@
 
 class PythonParser(parser.Parser):
 
-    def __init__(self, space, grammar=pygram.python_grammar):
+    def __init__(self, space, future_flags=future.futureFlags_2_7,
+                 grammar=pygram.python_grammar):
         parser.Parser.__init__(self, grammar)
         self.space = space
+        self.future_flags = future_flags
 
     def parse_source(self, textsrc, compile_info):
         """Main entry point for parsing Python source.
@@ -133,6 +135,10 @@
                         raise error.SyntaxError(space.str_w(w_message))
                     raise
 
+        f_flags, future_info = future.get_futures(self.future_flags, textsrc)
+        compile_info.last_future_import = future_info
+        compile_info.flags |= f_flags
+
         flags = compile_info.flags
 
         if flags & consts.CO_FUTURE_PRINT_FUNCTION:
diff --git a/pypy/interpreter/pytraceback.py b/pypy/interpreter/pytraceback.py
--- a/pypy/interpreter/pytraceback.py
+++ b/pypy/interpreter/pytraceback.py
@@ -51,9 +51,9 @@
 def record_application_traceback(space, operror, frame, last_instruction):
     if frame.pycode.hidden_applevel:
         return
-    tb = operror.application_traceback
+    tb = operror.get_traceback()
     tb = PyTraceback(space, frame, last_instruction, tb)
-    operror.application_traceback = tb
+    operror.set_traceback(tb)
 
 def offset2lineno(c, stopat):
     tab = c.co_lnotab
diff --git a/pypy/interpreter/test/test_compiler.py b/pypy/interpreter/test/test_compiler.py
--- a/pypy/interpreter/test/test_compiler.py
+++ b/pypy/interpreter/test/test_compiler.py
@@ -714,6 +714,12 @@
 
 class AppTestCompiler:
 
+    def test_bom_with_future(self):
+        s = '\xef\xbb\xbffrom __future__ import division\nx = 1/2'
+        ns = {}
+        exec s in ns
+        assert ns["x"] == .5
+
     def test_values_of_different_types(self):
         exec "a = 0; b = 0L; c = 0.0; d = 0j"
         assert type(a) is int
diff --git a/pypy/interpreter/test/test_pyframe.py b/pypy/interpreter/test/test_pyframe.py
--- a/pypy/interpreter/test/test_pyframe.py
+++ b/pypy/interpreter/test/test_pyframe.py
@@ -98,6 +98,15 @@
             return sys._getframe().f_back.f_code.co_name 
         f()
 
+    def test_f_back_virtualref(self):
+        import sys
+        def f():
+            return g()
+        def g():
+            return sys._getframe()
+        frame = f()
+        assert frame.f_back.f_code.co_name == 'f'
+
     def test_f_exc_xxx(self):
         import sys
 
@@ -122,6 +131,21 @@
         except:
             g(sys.exc_info())
 
+    def test_virtualref_through_traceback(self):
+        import sys
+        def g():
+            try:
+                raise ValueError
+            except:
+                _, _, tb = sys.exc_info()
+            return tb
+        def f():
+            return g()
+        #
+        tb = f()
+        assert tb.tb_frame.f_code.co_name == 'g'
+        assert tb.tb_frame.f_back.f_code.co_name == 'f'
+
     def test_trace_basic(self):
         import sys
         l = []
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -600,15 +600,15 @@
         #
         return _op_default_implementation
 
-    def op_debug_merge_point(self, _, value, recdepth):
+    def op_debug_merge_point(self, _, *args):
         from pypy.jit.metainterp.warmspot import get_stats
-        loc = ConstPtr(value)._get_str()
         try:
             stats = get_stats()
         except AttributeError:
             pass
         else:
-            stats.add_merge_point_location(loc)
+            stats.add_merge_point_location(args[1:])
+        pass
 
     def op_guard_true(self, _, value):
         if not value:
@@ -820,6 +820,12 @@
             raise NotImplementedError
 
     def op_call(self, calldescr, func, *args):
+        return self._do_call(calldescr, func, args, call_with_llptr=False)
+
+    def op_call_release_gil(self, calldescr, func, *args):
+        return self._do_call(calldescr, func, args, call_with_llptr=True)
+
+    def _do_call(self, calldescr, func, args, call_with_llptr):
         global _last_exception
         assert _last_exception is None, "exception left behind"
         assert _call_args_i == _call_args_r == _call_args_f == []
@@ -838,7 +844,8 @@
             else:
                 raise TypeError(x)
         try:
-            return _do_call_common(func, args_in_order, calldescr)
+            return _do_call_common(func, args_in_order, calldescr,
+                                   call_with_llptr)
         except LLException, lle:
             _last_exception = lle
             d = {'v': None,
@@ -1480,17 +1487,20 @@
     'v': lltype.Void,
     }
 
-def _do_call_common(f, args_in_order=None, calldescr=None):
+def _do_call_common(f, args_in_order=None, calldescr=None,
+                    call_with_llptr=False):
     ptr = llmemory.cast_int_to_adr(f).ptr
     PTR = lltype.typeOf(ptr)
     if PTR == rffi.VOIDP:
         # it's a pointer to a C function, so we don't have a precise
         # signature: create one from the descr
+        assert call_with_llptr is True
         ARGS = map(kind2TYPE.get, calldescr.arg_types)
         RESULT = kind2TYPE[calldescr.typeinfo]
         FUNC = lltype.FuncType(ARGS, RESULT)
         func_to_call = rffi.cast(lltype.Ptr(FUNC), ptr)
     else:
+        assert call_with_llptr is False
         FUNC = PTR.TO
         ARGS = FUNC.ARGS
         func_to_call = ptr._obj._callable
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -134,7 +134,7 @@
         old, oldindex = faildescr._compiled_fail
         llimpl.compile_redirect_fail(old, oldindex, c)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """In a real assembler backend, this should assemble the given
         list of operations.  Here we just generate a similar CompiledLoop
         instance.  The code here is RPython, whereas the code in llimpl
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -3,13 +3,16 @@
 from pypy.jit.backend.llsupport.descr import DynamicIntCallDescr, NonGcPtrCallDescr,\
     FloatCallDescr, VoidCallDescr
 
+class UnsupportedKind(Exception):
+    pass
+
 def get_call_descr_dynamic(ffi_args, ffi_result, extrainfo=None):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
         reskind = get_ffi_type_kind(ffi_result)
         argkinds = [get_ffi_type_kind(arg) for arg in ffi_args]
-    except KeyError:
+    except UnsupportedKind:
         return None # ??
     arg_classes = ''.join(argkinds)
     if reskind == history.INT:
@@ -33,7 +36,7 @@
         return history.FLOAT
     elif kind == 'v':
         return history.VOID
-    assert False, "Unsupported kind '%s'" % kind
+    raise UnsupportedKind("Unsupported kind '%s'" % kind)
 
 def is_ffi_type_signed(ffi_type):
     from pypy.rlib.libffi import types
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -34,7 +34,7 @@
         pass
     def do_write_barrier(self, gcref_struct, gcref_newptr):
         pass
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         return operations
     def can_inline_malloc(self, descr):
         return False
@@ -146,78 +146,6 @@
 # All code below is for the hybrid or minimark GC
 
 
-class GcRefList:
-    """Handles all references from the generated assembler to GC objects.
-    This is implemented as a nonmovable, but GC, list; the assembler contains
-    code that will (for now) always read from this list."""
-
-    GCREF_LIST = lltype.GcArray(llmemory.GCREF)     # followed by the GC
-
-    HASHTABLE = rffi.CArray(llmemory.Address)      # ignored by the GC
-    HASHTABLE_BITS = 10
-    HASHTABLE_SIZE = 1 << HASHTABLE_BITS
-
-    def initialize(self):
-        if we_are_translated(): n = 2000
-        else:                   n = 10    # tests only
-        self.list = self.alloc_gcref_list(n)
-        self.nextindex = 0
-        self.oldlists = []
-        # A pseudo dictionary: it is fixed size, and it may contain
-        # random nonsense after a collection moved the objects.  It is only
-        # used to avoid too many duplications in the GCREF_LISTs.
-        self.hashtable = lltype.malloc(self.HASHTABLE,
-                                       self.HASHTABLE_SIZE+1,
-                                       flavor='raw', track_allocation=False)
-        dummy = lltype.direct_ptradd(lltype.direct_arrayitems(self.hashtable),
-                                     self.HASHTABLE_SIZE)
-        dummy = llmemory.cast_ptr_to_adr(dummy)
-        for i in range(self.HASHTABLE_SIZE+1):
-            self.hashtable[i] = dummy
-
-    def alloc_gcref_list(self, n):
-        # Important: the GRREF_LISTs allocated are *non-movable*.  This
-        # requires support in the gc (hybrid GC or minimark GC so far).
-        if we_are_translated():
-            list = rgc.malloc_nonmovable(self.GCREF_LIST, n)
-            assert list, "malloc_nonmovable failed!"
-        else:
-            list = lltype.malloc(self.GCREF_LIST, n)     # for tests only
-        return list
-
-    def get_address_of_gcref(self, gcref):
-        assert lltype.typeOf(gcref) == llmemory.GCREF
-        # first look in the hashtable, using an inexact hash (fails after
-        # the object moves)
-        addr = llmemory.cast_ptr_to_adr(gcref)
-        hash = llmemory.cast_adr_to_int(addr, "forced")
-        hash -= hash >> self.HASHTABLE_BITS
-        hash &= self.HASHTABLE_SIZE - 1
-        addr_ref = self.hashtable[hash]
-        # the following test is safe anyway, because the addresses found
-        # in the hashtable are always the addresses of nonmovable stuff
-        # ('addr_ref' is an address inside self.list, not directly the
-        # address of a real moving GC object -- that's 'addr_ref.address[0]'.)
-        if addr_ref.address[0] == addr:
-            return addr_ref
-        # if it fails, add an entry to the list
-        if self.nextindex == len(self.list):
-            # reallocate first, increasing a bit the size every time
-            self.oldlists.append(self.list)
-            self.list = self.alloc_gcref_list(len(self.list) // 4 * 5)
-            self.nextindex = 0
-        # add it
-        index = self.nextindex
-        self.list[index] = gcref
-        addr_ref = lltype.direct_ptradd(lltype.direct_arrayitems(self.list),
-                                        index)
-        addr_ref = llmemory.cast_ptr_to_adr(addr_ref)
-        self.nextindex = index + 1
-        # record it in the hashtable
-        self.hashtable[hash] = addr_ref
-        return addr_ref
-
-
 class GcRootMap_asmgcc(object):
     """Handles locating the stack roots in the assembler.
     This is the class supporting --gcrootfinder=asmgcc.
@@ -527,6 +455,7 @@
     def __init__(self, gc_ll_descr):
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
+        self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
         self.fielddescr_tid = get_field_descr(gc_ll_descr,
                                               gc_ll_descr.GCClass.HDR, 'tid')
         self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
@@ -546,6 +475,13 @@
         funcaddr = llmemory.cast_ptr_to_adr(funcptr)
         return cpu.cast_adr_to_int(funcaddr)
 
+    def get_write_barrier_from_array_fn(self, cpu):
+        llop1 = self.llop1
+        funcptr = llop1.get_write_barrier_from_array_failing_case(
+            self.WB_ARRAY_FUNCPTR)
+        funcaddr = llmemory.cast_ptr_to_adr(funcptr)
+        return cpu.cast_adr_to_int(funcaddr)    # this may return 0
+
 
 class GcLLDescr_framework(GcLLDescription):
     DEBUG = False    # forced to True by x86/test/test_zrpy_gc.py
@@ -559,7 +495,7 @@
         self.translator = translator
         self.llop1 = llop1
 
-        # we need the hybrid or minimark GC for GcRefList.alloc_gcref_list()
+        # we need the hybrid or minimark GC for rgc._make_sure_does_not_move()
         # to work
         if gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
             raise NotImplementedError("--gc=%s not implemented with the JIT" %
@@ -574,8 +510,6 @@
                                       " with the JIT" % (name,))
         gcrootmap = cls(gcdescr)
         self.gcrootmap = gcrootmap
-        self.gcrefs = GcRefList()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
 
         # make a TransformerLayoutBuilder and save it on the translator
         # where it can be fished and reused by the FrameworkGCTransformer
@@ -617,6 +551,8 @@
             [lltype.Signed, lltype.Signed], llmemory.GCREF))
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
             [llmemory.Address, llmemory.Address], lltype.Void))
+        self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
+            [llmemory.Address, lltype.Signed], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
         #
         def malloc_array(itemsize, tid, num_elem):
@@ -706,7 +642,6 @@
         return rffi.cast(lltype.Signed, fptr)
 
     def initialize(self):
-        self.gcrefs.initialize()
         self.gcrootmap.initialize()
 
     def init_size_descr(self, S, descr):
@@ -768,54 +703,32 @@
             funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
                     llmemory.cast_ptr_to_adr(gcref_newptr))
 
-    def replace_constptrs_with_getfield_raw(self, cpu, newops, op):
-        # xxx some performance issue here
-        newargs = [None] * op.numargs()
-        needs_copy = False
+    def record_constptrs(self, op, gcrefs_output_list):
         for i in range(op.numargs()):
             v = op.getarg(i)
-            newargs[i] = v
             if isinstance(v, ConstPtr) and bool(v.value):
-                addr = self.gcrefs.get_address_of_gcref(v.value)
-                # ^^^even for non-movable objects, to record their presence
-                if rgc.can_move(v.value):
-                    box = BoxPtr(v.value)
-                    addr = cpu.cast_adr_to_int(addr)
-                    newops.append(ResOperation(rop.GETFIELD_RAW,
-                                               [ConstInt(addr)], box,
-                                               self.single_gcref_descr))
-                    newargs[i] = box
-                    needs_copy = True
-        #
-        if needs_copy:
-            return op.copy_and_change(op.getopnum(), args=newargs)
-        else:
-            return op
+                p = v.value
+                rgc._make_sure_does_not_move(p)
+                gcrefs_output_list.append(p)
 
-
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         # Perform two kinds of rewrites in parallel:
         #
         # - Add COND_CALLs to the write barrier before SETFIELD_GC and
         #   SETARRAYITEM_GC operations.
         #
-        # - Remove all uses of ConstPtrs away from the assembler.
-        #   Idea: when running on a moving GC, we can't (easily) encode
-        #   the ConstPtrs in the assembler, because they can move at any
-        #   point in time.  Instead, we store them in 'gcrefs.list', a GC
-        #   but nonmovable list; and here, we modify 'operations' to
-        #   replace direct usage of ConstPtr with a BoxPtr loaded by a
-        #   GETFIELD_RAW from the array 'gcrefs.list'.
+        # - Record the ConstPtrs from the assembler.
         #
         newops = []
+        known_lengths = {}
         # we can only remember one malloc since the next malloc can possibly
         # collect
         last_malloc = None
         for op in operations:
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 continue
-            # ---------- replace ConstPtrs with GETFIELD_RAW ----------
-            op = self.replace_constptrs_with_getfield_raw(cpu, newops, op)
+            # ---------- record the ConstPtrs ----------
+            self.record_constptrs(op, gcrefs_output_list)
             if op.is_malloc():
                 last_malloc = op.result
             elif op.can_malloc():
@@ -838,19 +751,40 @@
                     v = op.getarg(2)
                     if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
                                             bool(v.value)): # store a non-NULL
-                        # XXX detect when we should produce a
-                        # write_barrier_from_array
-                        self._gen_write_barrier(newops, op.getarg(0), v)
+                        self._gen_write_barrier_array(newops, op.getarg(0),
+                                                      op.getarg(1), v,
+                                                      cpu, known_lengths)
                         op = op.copy_and_change(rop.SETARRAYITEM_RAW)
+            elif op.getopnum() == rop.NEW_ARRAY:
+                v_length = op.getarg(0)
+                if isinstance(v_length, ConstInt):
+                    known_lengths[op.result] = v_length.getint()
             # ----------
             newops.append(op)
         return newops
 
-    def _gen_write_barrier(self, newops, v_base, v_value):
-        args = [v_base, v_value]
+    def _gen_write_barrier(self, newops, v_base, v_value_or_index):
+        # NB. the 2nd argument of COND_CALL_GC_WB is either a pointer
+        # (regular case), or an index (case of write_barrier_from_array)
+        args = [v_base, v_value_or_index]
         newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
                                    descr=self.write_barrier_descr))
 
+    def _gen_write_barrier_array(self, newops, v_base, v_index, v_value,
+                                 cpu, known_lengths):
+        if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0:
+            # If we know statically the length of 'v', and it is not too
+            # big, then produce a regular write_barrier.  If it's unknown or
+            # too big, produce instead a write_barrier_from_array.
+            LARGE = 130
+            length = known_lengths.get(v_base, LARGE)
+            if length >= LARGE:
+                # unknown or too big: produce a write_barrier_from_array
+                self._gen_write_barrier(newops, v_base, v_index)
+                return
+        # fall-back case: produce a write_barrier
+        self._gen_write_barrier(newops, v_base, v_value)
+
     def can_inline_malloc(self, descr):
         assert isinstance(descr, BaseSizeDescr)
         if descr.size < self.max_size_of_young_obj:
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -143,11 +143,11 @@
         STACK_CHECK_SLOWPATH = lltype.Ptr(lltype.FuncType([lltype.Signed],
                                                           lltype.Void))
         def insert_stack_check():
-            startaddr = rstack._stack_get_start_adr()
-            length = rstack._stack_get_length()
+            endaddr = rstack._stack_get_end_adr()
+            lengthaddr = rstack._stack_get_length_adr()
             f = llhelper(STACK_CHECK_SLOWPATH, rstack.stack_check_slowpath)
             slowpathaddr = rffi.cast(lltype.Signed, f)
-            return startaddr, length, slowpathaddr
+            return endaddr, lengthaddr, slowpathaddr
 
         self.pos_exception = pos_exception
         self.pos_exc_value = pos_exc_value
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -37,6 +37,11 @@
         self.frame_depth += size
         return newloc
 
+    def reserve_location_in_frame(self, size):
+        frame_depth = self.frame_depth
+        self.frame_depth += size
+        return frame_depth
+
     # abstract methods that need to be overwritten for specific assemblers
     @staticmethod
     def frame_pos(loc, type):
@@ -213,6 +218,15 @@
         self.reg_bindings[v] = loc
         return loc
 
+    def force_spill_var(self, var):
+        self._sync_var(var)
+        try:
+            loc = self.reg_bindings[var]
+            del self.reg_bindings[var]
+            self.free_regs.append(loc)
+        except KeyError:
+            pass   # 'var' is already not in a register
+
     def loc(self, box):
         """ Return the location of 'box'.
         """
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -49,19 +49,6 @@
 
 # ____________________________________________________________
 
-def test_GcRefList():
-    S = lltype.GcStruct('S')
-    order = range(50) * 4
-    random.shuffle(order)
-    allocs = [lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(S))
-              for i in range(50)]
-    allocs = [allocs[i] for i in order]
-    #
-    gcrefs = GcRefList()
-    gcrefs.initialize()
-    addrs = [gcrefs.get_address_of_gcref(ptr) for ptr in allocs]
-    for i in range(len(allocs)):
-        assert addrs[i].address[0] == llmemory.cast_ptr_to_adr(allocs[i])
 
 class TestGcRootMapAsmGcc:
 
@@ -288,6 +275,18 @@
     def get_write_barrier_failing_case(self, FPTRTYPE):
         return llhelper(FPTRTYPE, self._write_barrier_failing_case)
 
+    _have_wb_from_array = False
+
+    def _write_barrier_from_array_failing_case(self, adr_struct, v_index):
+        self.record.append(('barrier_from_array', adr_struct, v_index))
+
+    def get_write_barrier_from_array_failing_case(self, FPTRTYPE):
+        if self._have_wb_from_array:
+            return llhelper(FPTRTYPE,
+                            self._write_barrier_from_array_failing_case)
+        else:
+            return lltype.nullptr(FPTRTYPE.TO)
+
 
 class TestFramework(object):
     gc = 'hybrid'
@@ -303,9 +302,20 @@
             config = config_
         class FakeCPU(object):
             def cast_adr_to_int(self, adr):
-                ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
-                assert ptr._obj._callable == llop1._write_barrier_failing_case
-                return 42
+                if not adr:
+                    return 0
+                try:
+                    ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_failing_case
+                    return 42
+                except lltype.InvalidCast:
+                    ptr = llmemory.cast_adr_to_ptr(
+                        adr, gc_ll_descr.WB_ARRAY_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_from_array_failing_case
+                    return 43
+
         gcdescr = get_description(config_)
         translator = FakeTranslator()
         llop1 = FakeLLOp()
@@ -414,11 +424,11 @@
             ResOperation(rop.DEBUG_MERGE_POINT, ['dummy', 2], None),
             ]
         gc_ll_descr = self.gc_ll_descr
-        operations = gc_ll_descr.rewrite_assembler(None, operations)
+        operations = gc_ll_descr.rewrite_assembler(None, operations, [])
         assert len(operations) == 0
 
     def test_rewrite_assembler_1(self):
-        # check rewriting of ConstPtrs
+        # check recording of ConstPtrs
         class MyFakeCPU(object):
             def cast_adr_to_int(self, adr):
                 assert adr == "some fake address"
@@ -438,56 +448,12 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         gc_ll_descr.gcrefs = MyFakeGCRefList()
+        gcrefs = []
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        assert len(operations) == 2
-        assert operations[0].getopnum() == rop.GETFIELD_RAW
-        assert operations[0].getarg(0) == ConstInt(43)
-        assert operations[0].getdescr() == gc_ll_descr.single_gcref_descr
-        v_box = operations[0].result
-        assert isinstance(v_box, BoxPtr)
-        assert operations[1].getopnum() == rop.PTR_EQ
-        assert operations[1].getarg(0) == v_random_box
-        assert operations[1].getarg(1) == v_box
-        assert operations[1].result == v_result
-
-    def test_rewrite_assembler_1_cannot_move(self):
-        # check rewriting of ConstPtrs
-        class MyFakeCPU(object):
-            def cast_adr_to_int(self, adr):
-                xxx    # should not be called
-        class MyFakeGCRefList(object):
-            def get_address_of_gcref(self, s_gcref1):
-                seen.append(s_gcref1)
-                assert s_gcref1 == s_gcref
-                return "some fake address"
-        seen = []
-        S = lltype.GcStruct('S')
-        s = lltype.malloc(S)
-        s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
-        v_random_box = BoxPtr()
-        v_result = BoxInt()
-        operations = [
-            ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)],
-                         v_result),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.gcrefs = MyFakeGCRefList()
-        old_can_move = rgc.can_move
-        operations = get_deep_immutable_oplist(operations)
-        try:
-            rgc.can_move = lambda s: False
-            operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        finally:
-            rgc.can_move = old_can_move
-        assert len(operations) == 1
-        assert operations[0].getopnum() == rop.PTR_EQ
-        assert operations[0].getarg(0) == v_random_box
-        assert operations[0].getarg(1) == ConstPtr(s_gcref)
-        assert operations[0].result == v_result
-        # check that s_gcref gets added to the list anyway, to make sure
-        # that the GC sees it
-        assert seen == [s_gcref]
+        operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
+                                                   gcrefs)
+        assert operations2 == operations
+        assert gcrefs == [s_gcref]
 
     def test_rewrite_assembler_2(self):
         # check write barriers before SETFIELD_GC
@@ -500,7 +466,8 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations,
+                                                   [])
         assert len(operations) == 2
         #
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
@@ -515,29 +482,90 @@
 
     def test_rewrite_assembler_3(self):
         # check write barriers before SETARRAYITEM_GC
-        v_base = BoxPtr()
-        v_index = BoxInt()
-        v_value = BoxPtr()
-        array_descr = AbstractDescr()
-        operations = [
-            ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], None,
-                         descr=array_descr),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
-        assert len(operations) == 2
-        #
-        assert operations[0].getopnum() == rop.COND_CALL_GC_WB
-        assert operations[0].getarg(0) == v_base
-        assert operations[0].getarg(1) == v_value
-        assert operations[0].result is None
-        #
-        assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
-        assert operations[1].getarg(0) == v_base
-        assert operations[1].getarg(1) == v_index
-        assert operations[1].getarg(2) == v_value
-        assert operations[1].getdescr() == array_descr
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            assert operations[0].getarg(1) == v_value
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
+
+    def test_rewrite_assembler_4(self):
+        # check write barriers before SETARRAYITEM_GC,
+        # if we have actually a write_barrier_from_array.
+        self.llop1._have_wb_from_array = True
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
+                assert operations[0].getarg(1) == v_value
+            else:
+                assert operations[0].getarg(1) == v_index
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
 
     def test_rewrite_assembler_initialization_store(self):
         S = lltype.GcStruct('S', ('parent', OBJECT),
@@ -558,7 +586,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_2(self):
@@ -583,7 +612,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_3(self):
@@ -602,7 +632,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
 class TestFrameworkMiniMark(TestFramework):
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -53,7 +53,7 @@
         """Called once by the front-end when the program stops."""
         pass
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """Assemble the given loop.
         Should create and attach a fresh CompiledLoopToken to
         looptoken.compiled_loop_token and stick extra attributes
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -23,6 +23,7 @@
 
 def constfloat(x):
     return ConstFloat(longlong.getfloatstorage(x))
+
 class FakeStats(object):
     pass
 class TestCallingConv(Runner):
@@ -30,56 +31,172 @@
     Ptr = lltype.Ptr
     FuncType = lltype.FuncType
 
-    def __init__(self):
-        self.cpu = getcpuclass()(rtyper=None, stats=FakeStats())
-        self.cpu.setup_once()
+    def setup_class(cls):
+        cls.cpu = getcpuclass()(rtyper=None, stats=FakeStats())
+        cls.cpu.setup_once()
+
+    def _prepare_args(self, args, floats, ints):
+        local_floats = list(floats)
+        local_ints = list(ints)
+        expected_result = 0.0
+        for i in range(len(args)):
+            x = args[i]
+            if x[0] == 'f':
+                x = local_floats.pop()
+                t = longlong.getfloatstorage(x)
+                self.cpu.set_future_value_float(i, t)
+            else:
+                x = local_ints.pop()
+                self.cpu.set_future_value_int(i, x)
+            expected_result += x
+        return expected_result
 
     @classmethod
     def get_funcbox(cls, cpu, func_ptr):
         addr = llmemory.cast_ptr_to_adr(func_ptr)
         return ConstInt(heaptracker.adr2int(addr))
 
+    def test_call_aligned_with_spilled_values(self):
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
+
+
+        def func(*args):
+            return float(sum(args))
+
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            local_floats = list(floats)
+            local_ints = list(ints)
+            args = []
+            spills = []
+            funcargs = []
+            float_count = 0
+            int_count = 0
+            for i in range(8):
+                if case & (1<<i):
+                    args.append('f%d' % float_count)
+                    spills.append('force_spill(f%d)' % float_count)
+                    float_count += 1
+                    funcargs.append(F)
+                else:
+                    args.append('i%d' % int_count)
+                    spills.append('force_spill(i%d)' % int_count)
+                    int_count += 1
+                    funcargs.append(I)
+
+            arguments = ', '.join(args)
+            spill_ops = '\n'.join(spills)
+
+            FUNC = self.FuncType(funcargs, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
+
+            ops = '[%s]\n' % arguments
+            ops += '%s\n' % spill_ops
+            ops += 'f99 = call(ConstClass(func_ptr), %s, descr=calldescr)\n' % arguments
+            ops += 'finish(f99, %s)\n' % arguments
+
+            loop = parse(ops, namespace=locals())
+            looptoken = LoopToken()
+            done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            expected_result = self._prepare_args(args, floats, ints)
+
+            res = self.cpu.execute_token(looptoken)
+            x = longlong.getrealfloat(cpu.get_latest_value_float(0))
+            assert abs(x - expected_result) < 0.0001
+
+    def test_call_aligned_with_imm_values(self):
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
+
+
+        def func(*args):
+            return float(sum(args))
+
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(constfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(ConstInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
+
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
+
     def test_call_aligned_with_args_on_the_stack(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                result = 0.0
-                args = []
-                argslist = []
-                local_floats = list(floats)
-                local_ints = list(ints)
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append(F)
-                        arg = local_floats.pop()
-                        result += arg
-                        argslist.append(boxfloat(arg))
-                    else:
-                        args.append(I)
-                        arg = local_ints.pop()
-                        result += arg
-                        argslist.append(BoxInt(arg))
-                FUNC = self.FuncType(args, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(boxfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(BoxInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                res = self.execute_operation(rop.CALL,
-                                             [funcbox] + argslist,
-                                             'float', descr=calldescr)
-                assert abs(res.getfloat() - result) < 0.0001
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_alignment_call_assembler(self):
         from pypy.rlib.libffi import types
@@ -104,21 +221,6 @@
 
         floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
         ints = [7, 11, 23, 42, -42, 1111, 95, 1]
-        def _prepare_args(args):
-            local_floats = list(floats)
-            local_ints = list(ints)
-            expected_result = 0.0
-            for i in range(len(args)):
-                x = args[i]
-                if x[0] == 'f':
-                    x = local_floats.pop()
-                    t = longlong.getfloatstorage(x)
-                    cpu.set_future_value_float(i, t)
-                else:
-                    x = local_ints.pop()
-                    cpu.set_future_value_int(i, x)
-                expected_result += x
-            return expected_result
 
         for case in range(256):
             float_count = 0
@@ -152,7 +254,7 @@
             done_number = self.cpu.get_fail_descr_number(called_loop.operations[-1].getdescr())
             self.cpu.compile_loop(called_loop.inputargs, called_loop.operations, called_looptoken)
 
-            expected_result = _prepare_args(args)
+            expected_result = self._prepare_args(args, floats, ints)
             res = cpu.execute_token(called_looptoken)
             assert res.identifier == 3
             t = longlong.getrealfloat(cpu.get_latest_value_float(0))
@@ -181,7 +283,7 @@
                 self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
 
                 # prepare call to called_loop
-                _prepare_args(args)
+                self._prepare_args(args, floats, ints)
                 res = cpu.execute_token(othertoken)
                 x = longlong.getrealfloat(cpu.get_latest_value_float(0))
                 assert res.identifier == 4
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -560,23 +560,6 @@
                                          'int', descr=calldescr)
             assert res.value == func_ints(*args)
 
-    def test_call_to_c_function(self):
-        from pypy.rlib.libffi import CDLL, types, ArgChain
-        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
-        libc = CDLL(libc_name)
-        c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
-        argchain = ArgChain().arg(ord('A'))
-        assert c_tolower.call(argchain, rffi.INT) == ord('a')
-
-        func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
-        funcbox = ConstInt(heaptracker.adr2int(func_adr))
-        calldescr = self.cpu.calldescrof_dynamic([types.uchar], types.sint)
-        res = self.execute_operation(rop.CALL,
-                                     [funcbox, BoxInt(ord('A'))],
-                                     'int',
-                                     descr=calldescr)
-        assert res.value == ord('a')
-
     def test_call_with_const_floats(self):
         def func(f1, f2):
             return f1 + f2
@@ -1680,7 +1663,7 @@
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
@@ -1699,12 +1682,48 @@
             s = lltype.malloc(S)
             s.tid = value
             sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            t = lltype.malloc(S)
+            tgcref = lltype.cast_opaque_ptr(llmemory.GCREF, t)
             del record[:]
             self.execute_operation(rop.COND_CALL_GC_WB,
-                                   [BoxPtr(sgcref), ConstInt(-2121)],
+                                   [BoxPtr(sgcref), ConstPtr(tgcref)],
                                    'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, -2121)]
+                assert record == [(s, t)]
+            else:
+                assert record == []
+
+    def test_cond_call_gc_wb_array(self):
+        def func_void(a, b):
+            record.append((a, b))
+        record = []
+        #
+        S = lltype.GcStruct('S', ('tid', lltype.Signed))
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+        funcbox = self.get_funcbox(self.cpu, func_ptr)
+        class WriteBarrierDescr(AbstractDescr):
+            jit_wb_if_flag = 4096
+            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_singlebyte = 0x10
+            def get_write_barrier_from_array_fn(self, cpu):
+                return funcbox.getint()
+        #
+        for cond in [False, True]:
+            value = random.randrange(-sys.maxint, sys.maxint)
+            if cond:
+                value |= 4096
+            else:
+                value &= ~4096
+            s = lltype.malloc(S)
+            s.tid = value
+            sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            del record[:]
+            self.execute_operation(rop.COND_CALL_GC_WB,
+                                   [BoxPtr(sgcref), ConstInt(123)],
+                                   'void', descr=WriteBarrierDescr())
+            if cond:
+                assert record == [(s, 123)]
             else:
                 assert record == []
 
@@ -1843,6 +1862,99 @@
         assert self.cpu.get_latest_value_int(2) == 10
         assert values == [1, 10]
 
+    def test_call_to_c_function(self):
+        from pypy.rlib.libffi import CDLL, types, ArgChain
+        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+        libc = CDLL(libc_name)
+        c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
+        argchain = ArgChain().arg(ord('A'))
+        assert c_tolower.call(argchain, rffi.INT) == ord('a')
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.uchar], types.sint)
+        i1 = BoxInt()
+        i2 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i1], i2,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
+        ]
+        ops[1].setfailargs([i1, i2])
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, ord('G'))
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_int(0) == ord('g')
+
+    def test_call_to_c_function_with_callback(self):
+        from pypy.rlib.libffi import CDLL, types, ArgChain, clibffi
+        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+        libc = CDLL(libc_name)
+        types_size_t = clibffi.cast_type_to_ffitype(rffi.SIZE_T)
+        c_qsort = libc.getpointer('qsort', [types.pointer, types_size_t,
+                                            types_size_t, types.pointer],
+                                  types.void)
+        class Glob(object):
+            pass
+        glob = Glob()
+        class X(object):
+            pass
+        #
+        def callback(p1, p2):
+            glob.lst.append(X())
+            return rffi.cast(rffi.INT, 1)
+        CALLBACK = lltype.Ptr(lltype.FuncType([lltype.Signed,
+                                               lltype.Signed], rffi.INT))
+        fn = llhelper(CALLBACK, callback)
+        S = lltype.Struct('S', ('x', rffi.INT), ('y', rffi.INT))
+        raw = lltype.malloc(S, flavor='raw')
+        argchain = ArgChain()
+        argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+        argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 2))
+        argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 4))
+        argchain = argchain.arg(rffi.cast(lltype.Signed, fn))
+        glob.lst = []
+        c_qsort.call(argchain, lltype.Void)
+        assert len(glob.lst) > 0
+        del glob.lst[:]
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_qsort.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.pointer, types_size_t,
+                                             types_size_t, types.pointer],
+                                            types.void)
+        i0 = BoxInt()
+        i1 = BoxInt()
+        i2 = BoxInt()
+        i3 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i0, i1, i2, i3], None,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [], None, descr=BasicFailDescr(0))
+        ]
+        ops[1].setfailargs([])
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1, i2, i3], ops, looptoken)
+        self.cpu.set_future_value_int(0, rffi.cast(lltype.Signed, raw))
+        self.cpu.set_future_value_int(1, 2)
+        self.cpu.set_future_value_int(2, 4)
+        self.cpu.set_future_value_int(3, rffi.cast(lltype.Signed, fn))
+        assert glob.lst == []
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert len(glob.lst) > 0
+        lltype.free(raw, flavor='raw')
+
     def test_guard_not_invalidated(self):
         cpu = self.cpu
         i0 = BoxInt()
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -128,6 +128,8 @@
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
         self._build_stack_check_slowpath()
+        if gc_ll_descr.gcrootmap:
+            self._build_release_gil(gc_ll_descr.gcrootmap)
         debug_start('jit-backend-counts')
         self.set_debug(have_debug_prints())
         debug_stop('jit-backend-counts')
@@ -137,10 +139,11 @@
         self.current_clt = looptoken.compiled_loop_token
         self.pending_guard_tokens = []
         self.mc = codebuf.MachineCodeBlockWrapper()
-        if self.datablockwrapper is None:
-            allblocks = self.get_asmmemmgr_blocks(looptoken)
-            self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
-                                                            allblocks)
+        #assert self.datablockwrapper is None --- but obscure case
+        # possible, e.g. getting MemoryError and continuing
+        allblocks = self.get_asmmemmgr_blocks(looptoken)
+        self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
+                                                        allblocks)
 
     def teardown(self):
         self.pending_guard_tokens = None
@@ -305,7 +308,66 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
-    def assemble_loop(self, inputargs, operations, looptoken, log):
+    @staticmethod
+    def _release_gil_asmgcc(css):
+        # similar to trackgcroot.py:pypy_asm_stackwalk, first part
+        from pypy.rpython.memory.gctransform import asmgcroot
+        new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
+        next = asmgcroot.gcrootanchor.next
+        new.next = next
+        new.prev = asmgcroot.gcrootanchor
+        asmgcroot.gcrootanchor.next = new
+        next.prev = new
+        # and now release the GIL
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_asmgcc(css):
+        # first reacquire the GIL
+        after = rffi.aroundstate.after
+        if after:
+            after()
+        # similar to trackgcroot.py:pypy_asm_stackwalk, second part
+        from pypy.rpython.memory.gctransform import asmgcroot
+        old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
+        prev = old.prev
+        next = old.next
+        prev.next = next
+        next.prev = prev
+
+    @staticmethod
+    def _release_gil_shadowstack():
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_shadowstack():
+        after = rffi.aroundstate.after
+        if after:
+            after()
+
+    _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+    _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
+                                                  lltype.Void))
+
+    def _build_release_gil(self, gcrootmap):
+        if gcrootmap.is_shadow_stack:
+            releasegil_func = llhelper(self._NOARG_FUNC,
+                                       self._release_gil_shadowstack)
+            reacqgil_func = llhelper(self._NOARG_FUNC,
+                                     self._reacquire_gil_shadowstack)
+        else:
+            releasegil_func = llhelper(self._CLOSESTACK_FUNC,
+                                       self._release_gil_asmgcc)
+            reacqgil_func = llhelper(self._CLOSESTACK_FUNC,
+                                     self._reacquire_gil_asmgcc)
+        self.releasegil_addr  = self.cpu.cast_ptr_to_int(releasegil_func)
+        self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
+
+    def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
         '''adds the following attributes to looptoken:
                _x86_loop_code       (an integer giving an address)
                _x86_bootstrap_code  (an integer giving an address)
@@ -321,6 +383,7 @@
         # for the duration of compiling one loop or a one bridge.
 
         clt = CompiledLoopToken(self.cpu, looptoken.number)
+        clt.allgcrefs = []
         looptoken.compiled_loop_token = clt
         if not we_are_translated():
             # Arguments should be unique
@@ -328,13 +391,13 @@
 
         self.setup(looptoken)
         self.currently_compiling_loop = looptoken
-        funcname = self._find_debug_merge_point(operations)
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        arglocs, operations = regalloc.prepare_loop(inputargs, operations, looptoken)
+        arglocs, operations = regalloc.prepare_loop(inputargs, operations,
+                                                    looptoken, clt.allgcrefs)
         looptoken._x86_arglocs = arglocs
 
         bootstrappos = self.mc.get_relative_pos()
@@ -354,7 +417,7 @@
         #
         rawstart = self.materialize_loop(looptoken)
         debug_print("Loop #%d (%s) has address %x to %x" % (
-            looptoken.number, funcname,
+            looptoken.number, loopname,
             rawstart + self.looppos,
             rawstart + directbootstrappos))
         self._patch_stackadjust(rawstart + stackadjustpos,
@@ -374,7 +437,7 @@
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
-            name = "Loop # %s: %s" % (looptoken.number, funcname)
+            name = "Loop # %s: %s" % (looptoken.number, loopname)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
         return ops_offset
@@ -394,7 +457,6 @@
             return
 
         self.setup(original_loop_token)
-        funcname = self._find_debug_merge_point(operations)
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(faildescr, operations)
@@ -406,7 +468,8 @@
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         fail_depths = faildescr._x86_current_depths
         operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
-                                             operations)
+                                             operations,
+                                             self.current_clt.allgcrefs)
 
         stackadjustpos = self._patchable_stackadjust()
         frame_depth, param_depth = self._assemble(regalloc, operations)
@@ -416,8 +479,8 @@
         #
         rawstart = self.materialize_loop(original_loop_token)
 
-        debug_print("Bridge out of guard %d (%s) has address %x to %x" %
-                    (descr_number, funcname, rawstart, rawstart + codeendpos))
+        debug_print("Bridge out of guard %d has address %x to %x" %
+                    (descr_number, rawstart, rawstart + codeendpos))
         self._patch_stackadjust(rawstart + stackadjustpos,
                                 frame_depth + param_depth)
         self.patch_pending_failure_recoveries(rawstart)
@@ -431,7 +494,7 @@
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
-            name = "Bridge # %s: %s" % (descr_number, funcname)
+            name = "Bridge # %s" % (descr_number,)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
         return ops_offset
@@ -491,17 +554,6 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _find_debug_merge_point(self, operations):
-
-        for op in operations:
-            if op.getopnum() == rop.DEBUG_MERGE_POINT:
-                funcname = op.getarg(0)._get_str()
-                break
-        else:
-            funcname = "<loop %d>" % len(self.loop_run_counters)
-        # invent the counter, so we don't get too confused
-        return funcname
-
     def _register_counter(self):
         if self._debug:
             # YYY very minor leak -- we need the counters to stay alive
@@ -620,11 +672,11 @@
         if self.stack_check_slowpath == 0:
             pass                # no stack check (e.g. not translated)
         else:
-            startaddr, length, _ = self.cpu.insert_stack_check()
-            self.mc.MOV(eax, esp)                       # MOV eax, current
-            self.mc.SUB(eax, heap(startaddr))           # SUB eax, [startaddr]
-            self.mc.CMP(eax, imm(length))               # CMP eax, length
-            self.mc.J_il8(rx86.Conditions['B'], 0)      # JB .skip
+            endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
+            self.mc.MOV(eax, heap(endaddr))             # MOV eax, [start]
+            self.mc.SUB(eax, esp)                       # SUB eax, current
+            self.mc.CMP(eax, heap(lengthaddr))          # CMP eax, [length]
+            self.mc.J_il8(rx86.Conditions['BE'], 0)     # JBE .skip
             jb_location = self.mc.get_relative_pos()
             self.mc.CALL(imm(self.stack_check_slowpath))# CALL slowpath
             # patch the JB above                        # .skip:
@@ -1101,6 +1153,8 @@
             self.mc.MOV_bi(FORCE_INDEX_OFS, force_index)
             return force_index
         else:
+            # the return value is ignored, apart from the fact that it
+            # is not negative.
             return 0
 
     genop_int_neg = _unaryop("NEG")
@@ -1984,6 +2038,102 @@
         self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
         self.implement_guard(guard_token, 'L')
 
+    def genop_guard_call_release_gil(self, op, guard_op, guard_token,
+                                     arglocs, result_loc):
+        # first, close the stack in the sense of the asmgcc GC root tracker
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap:
+            self.call_release_gil(gcrootmap, arglocs)
+        # do the call
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
+        self._genop_call(op, arglocs, result_loc, fail_index)
+        # then reopen the stack
+        if gcrootmap:
+            self.call_reacquire_gil(gcrootmap, result_loc)
+        # finally, the guard_not_forced
+        self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
+        self.implement_guard(guard_token, 'L')
+
+    def call_release_gil(self, gcrootmap, save_registers):
+        # First, we need to save away the registers listed in
+        # 'save_registers' that are not callee-save.  XXX We assume that
+        # the XMM registers won't be modified.  We store them in
+        # [ESP+4], [ESP+8], etc., leaving enough room in [ESP] for the
+        # single argument to closestack_addr below.
+        p = WORD
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                self.mc.MOV_sr(p, reg.value)
+                p += WORD
+        self._regalloc.reserve_param(p//WORD)
+        #
+        if gcrootmap.is_shadow_stack:
+            args = []
+        else:
+            # note that regalloc.py used save_all_regs=True to save all
+            # registers, so we don't have to care about saving them (other
+            # than ebp) in the close_stack_struct.  But if they are registers
+            # like %eax that would be destroyed by this call, *and* they are
+            # used by arglocs for the *next* call, then trouble; for now we
+            # will just push/pop them.
+            from pypy.rpython.memory.gctransform import asmgcroot
+            css = self._regalloc.close_stack_struct
+            if css == 0:
+                use_words = (2 + max(asmgcroot.INDEX_OF_EBP,
+                                     asmgcroot.FRAME_PTR) + 1)
+                pos = self._regalloc.fm.reserve_location_in_frame(use_words)
+                css = get_ebp_ofs(pos + use_words - 1)
+                self._regalloc.close_stack_struct = css
+            # The location where the future CALL will put its return address
+            # will be [ESP-WORD], so save that as the next frame's top address
+            self.mc.LEA_rs(eax.value, -WORD)        # LEA EAX, [ESP-4]
+            frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
+            self.mc.MOV_br(frame_ptr, eax.value)    # MOV [css.frame], EAX
+            # Save ebp
+            index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
+            self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
+            # Call the closestack() function (also releasing the GIL)
+            if IS_X86_32:
+                reg = eax
+            elif IS_X86_64:
+                reg = edi
+            self.mc.LEA_rb(reg.value, css)
+            args = [reg]
+        #
+        self._emit_call(-1, imm(self.releasegil_addr), args)
+        # Finally, restore the registers saved above.
+        p = WORD
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                self.mc.MOV_rs(reg.value, p)
+                p += WORD
+
+    def call_reacquire_gil(self, gcrootmap, save_loc):
+        # save the previous result (eax/xmm0) into the stack temporarily.
+        # XXX like with call_release_gil(), we assume that we don't need
+        # to save xmm0 in this case.
+        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
+            self.mc.MOV_sr(WORD, save_loc.value)
+            self._regalloc.reserve_param(2)
+        # call the reopenstack() function (also reacquiring the GIL)
+        if gcrootmap.is_shadow_stack:
+            args = []
+        else:
+            css = self._regalloc.close_stack_struct
+            assert css != 0
+            if IS_X86_32:
+                reg = eax
+            elif IS_X86_64:
+                reg = edi
+            self.mc.LEA_rb(reg.value, css)
+            args = [reg]
+        self._emit_call(-1, imm(self.reacqgil_addr), args)
+        # restore the result from the stack
+        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
+            self.mc.MOV_rs(save_loc.value, WORD)
+
     def genop_guard_call_assembler(self, op, guard_op, guard_token,
                                    arglocs, result_loc):
         faildescr = guard_op.getdescr()
@@ -2076,6 +2226,8 @@
         # function remember_young_pointer() from the GC.  The two arguments
         # to the call are in arglocs[:2].  The rest, arglocs[2:], contains
         # registers that need to be saved and restored across the call.
+        # If op.getarg(1) is a int, it is an array index and we must call
+        # instead remember_young_pointer_from_array().
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
@@ -2107,13 +2259,19 @@
             remap_frame_layout(self, arglocs[:2], [edi, esi],
                                X86_64_SCRATCH_REG)
 
+        if op.getarg(1).type == INT:
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            assert func != 0
+        else:
+            func = descr.get_write_barrier_fn(self.cpu)
+
         # misaligned stack in the call, but it's ok because the write barrier
         # is not going to call anything more.  Also, this assumes that the
         # write barrier does not touch the xmm registers.  (Slightly delicate
         # assumption, given that the write barrier can end up calling the
         # platform's malloc() from AddressStack.append().  XXX may need to
         # be done properly)
-        self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu)))
+        self.mc.CALL(imm(func))
         if IS_X86_32:
             self.mc.ADD_ri(esp.value, 2*WORD)
         for i in range(2, len(arglocs)):
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -156,12 +156,14 @@
         self.translate_support_code = translate_support_code
         # to be read/used by the assembler too
         self.jump_target_descr = None
+        self.close_stack_struct = 0
 
-    def _prepare(self, inputargs, operations):
+    def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
         self.param_depth = 0
         cpu = self.assembler.cpu
-        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
+        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
+                                                       allgcrefs)
         # compute longevity of variables
         longevity = self._compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
@@ -172,15 +174,16 @@
                                    assembler = self.assembler)
         return operations
 
-    def prepare_loop(self, inputargs, operations, looptoken):
-        operations = self._prepare(inputargs, operations)
+    def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         jump = operations[-1]
         loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
         self.loop_consts = loop_consts
         return self._process_inputargs(inputargs), operations
 
-    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations):
-        operations = self._prepare(inputargs, operations)
+    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
+                       allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         self.loop_consts = {}
         self._update_bindings(arglocs, inputargs)
         self.fm.frame_depth = prev_depths[0]
@@ -268,6 +271,12 @@
             return self.rm.force_allocate_reg(var, forbidden_vars,
                                               selected_reg, need_lower_byte)
 
+    def force_spill_var(self, var):
+        if var.type == FLOAT:
+            return self.xrm.force_spill_var(var)
+        else:
+            return self.rm.force_spill_var(var)
+
     def load_xmm_aligned_16_bytes(self, var, forbidden_vars=[]):
         # Load 'var' in a register; but if it is a constant, we can return
         # a 16-bytes-aligned ConstFloatLoc.
@@ -382,7 +391,9 @@
         self.assembler.regalloc_perform_discard(op, arglocs)
 
     def can_merge_with_next_guard(self, op, i, operations):
-        if op.getopnum() == rop.CALL_MAY_FORCE or op.getopnum() == rop.CALL_ASSEMBLER:
+        if (op.getopnum() == rop.CALL_MAY_FORCE or
+            op.getopnum() == rop.CALL_ASSEMBLER or
+            op.getopnum() == rop.CALL_RELEASE_GIL):
             assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
             return True
         if not op.is_comparison():
@@ -418,6 +429,8 @@
             if self.can_merge_with_next_guard(op, i, operations):
                 oplist_with_guard[op.getopnum()](self, op, operations[i + 1])
                 i += 1
+            elif not we_are_translated() and op.getopnum() == -124: 
+                self._consider_force_spill(op)
             else:
                 oplist[op.getopnum()](self, op)
             if op.result is not None:
@@ -771,6 +784,19 @@
         self.xrm.possibly_free_var(op.getarg(1))
 
     def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
+        # we need to save registers on the stack:
+        #
+        #  - at least the non-callee-saved registers
+        #
+        #  - for shadowstack, we assume that any call can collect, and we
+        #    save also the callee-saved registers that contain GC pointers,
+        #    so that they can be found by follow_stack_frame_of_assembler()
+        #
+        #  - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
+        #    anyway, in case we need to do cpu.force().  The issue is that
+        #    grab_frame_values() would not be able to locate values in
+        #    callee-saved registers.
+        #
         save_all_regs = guard_not_forced_op is not None
         self.xrm.before_call(force_store, save_all_regs=save_all_regs)
         if not save_all_regs:
@@ -837,6 +863,8 @@
         assert guard_op is not None
         self._consider_call(op, guard_op)
 
+    consider_call_release_gil = consider_call_may_force
+
     def consider_call_assembler(self, op, guard_op):
         descr = op.getdescr()
         assert isinstance(descr, LoopToken)
@@ -856,12 +884,12 @@
     def consider_cond_call_gc_wb(self, op):
         assert op.result is None
         args = op.getarglist()
-        loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
-        # ^^^ we force loc_newvalue in a reg (unless it's a Const),
+        loc_newvalue_or_index= self.rm.make_sure_var_in_reg(op.getarg(1), args)
+        # ^^^ we force loc_newvalue_or_index in a reg (unless it's a Const),
         # because it will be needed anyway by the following setfield_gc.
         # It avoids loading it twice from the memory.
         loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args)
-        arglocs = [loc_base, loc_newvalue]
+        arglocs = [loc_base, loc_newvalue_or_index]
         # add eax, ecx and edx as extra "arguments" to ensure they are
         # saved and restored.  Fish in self.rm to know which of these
         # registers really need to be saved (a bit of a hack).  Moreover,
@@ -1293,6 +1321,10 @@
     def consider_jit_debug(self, op):
         pass
 
+    def _consider_force_spill(self, op):
+        # This operation is used only for testing
+        self.force_spill_var(op.getarg(0))
+
     def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
         shape = gcrootmap.get_basic_shape(IS_X86_64)
         for v, val in self.fm.frame_bindings.items():
@@ -1346,7 +1378,9 @@
         name = name[len('consider_'):]
         num = getattr(rop, name.upper())
         if (is_comparison_or_ovf_op(num)
-            or num == rop.CALL_MAY_FORCE or num == rop.CALL_ASSEMBLER):
+            or num == rop.CALL_MAY_FORCE
+            or num == rop.CALL_ASSEMBLER
+            or num == rop.CALL_RELEASE_GIL):
             oplist_with_guard[num] = value
             oplist[num] = add_none_argument(value)
         else:
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -22,6 +22,7 @@
 
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
+    with_threads = False
 
     def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
                  gcdescr=None):
@@ -38,6 +39,7 @@
                 if not oprofile.OPROFILE_AVAILABLE:
                     log.WARNING('oprofile support was explicitly enabled, but oprofile headers seem not to be available')
                 profile_agent = oprofile.OProfileAgent()
+            self.with_threads = config.translation.thread
 
         self.profile_agent = profile_agent
 
@@ -77,9 +79,9 @@
         lines = machine_code_dump(data, addr, self.backend_name, label_list)
         print ''.join(lines)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
-        return self.assembler.assemble_loop(inputargs, operations, looptoken,
-                                            log=log)
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
+        return self.assembler.assemble_loop(name, inputargs, operations,
+                                            looptoken, log=log)
 
     def compile_bridge(self, faildescr, inputargs, operations,
                        original_loop_token, log=True):
@@ -122,8 +124,8 @@
         addr = executable_token._x86_bootstrap_code
         #llop.debug_print(lltype.Void, ">>>> Entering", addr)
         func = rffi.cast(lltype.Ptr(self.BOOTSTRAP_TP), addr)
+        fail_index = self._execute_call(func)
         #llop.debug_print(lltype.Void, "<<<< Back")
-        fail_index = self._execute_call(func)
         return self.get_fail_descr_from_number(fail_index)
 
     def _execute_call(self, func):
@@ -140,10 +142,11 @@
                 LLInterpreter.current_interpreter = prev_interpreter
         return res
 
-    @staticmethod
     def cast_ptr_to_int(x):
         adr = llmemory.cast_ptr_to_adr(x)
         return CPU386.cast_adr_to_int(adr)
+    cast_ptr_to_int._annspecialcase_ = 'specialize:arglltype(0)'
+    cast_ptr_to_int = staticmethod(cast_ptr_to_int)
 
     all_null_registers = lltype.malloc(rffi.LONGP.TO, 24,
                                        flavor='raw', zero=True,
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -530,6 +530,7 @@
     POP_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
 
     LEA_rb = insn(rex_w, '\x8D', register(1,8), stack_bp(2))
+    LEA_rs = insn(rex_w, '\x8D', register(1,8), stack_sp(2))
     LEA32_rb = insn(rex_w, '\x8D', register(1,8),stack_bp(2,force_32bits=True))
     LEA_ra = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_scaled_reg_plus_const(2))
     LEA_rm = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_const(2))
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -16,7 +16,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcRefList, GcPtrFieldDescr
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr
 
 from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
 from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
@@ -51,11 +51,9 @@
     gcrootmap = MockGcRootMap()
 
     def initialize(self):
-        self.gcrefs = GcRefList()
-        self.gcrefs.initialize()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
+        pass
 
-    replace_constptrs_with_getfield_raw = GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func
+    record_constptrs = GcLLDescr_framework.record_constptrs.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 
 class TestRegallocDirectGcIntegration(object):
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -330,6 +330,7 @@
                         assert result != expected
 
     def test_compile_bridge_check_profile_info(self):
+        py.test.skip("does not work, reinvestigate")
         class FakeProfileAgent(object):
             def __init__(self):
                 self.functions = []
@@ -362,7 +363,7 @@
         operations[3].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
         name, loopaddress, loopsize = agent.functions[0]
-        assert name == "Loop # 17: hello"
+        assert name == "Loop # 17: hello (loop counter 0)"
         assert loopaddress <= looptoken._x86_loop_code
         assert loopsize >= 40 # randomish number
 
@@ -378,7 +379,7 @@
 
         self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken)
         name, address, size = agent.functions[1]
-        assert name == "Bridge # 0: bye"
+        assert name == "Bridge # 0: bye (loop counter 1)"
         # Would be exactly ==, but there are some guard failure recovery
         # stubs in-between
         assert address >= loopaddress + loopsize
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -1,8 +1,7 @@
 """
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
+This is a test that translates a complete JIT together with a GC and runs it.
+It is testing that the GC-dependent aspects basically work, mostly the mallocs
+and the various cases of write barrier.
 """
 
 import weakref
@@ -10,16 +9,11 @@
 from pypy.annotation import policy as annpolicy
 from pypy.rlib import rgc
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rlib.jit import JitDriver, dont_look_inside
 from pypy.rlib.jit import purefunction, unroll_safe
-from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
 from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
 from pypy.tool.udir import udir
-from pypy.jit.backend.x86.arch import IS_X86_64
 from pypy.config.translationoption import DEFL_GC
-import py.test
 
 class X(object):
     def __init__(self, x=0):
@@ -86,7 +80,7 @@
     #
     return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
 
-def compile(f, gc, **kwds):
+def compile(f, gc, enable_opts='', **kwds):
     from pypy.annotation.listdef import s_list_of_strings
     from pypy.translator.translator import TranslationContext
     from pypy.jit.metainterp.warmspot import apply_jit
@@ -110,14 +104,14 @@
                 old_value[obj, attr] = getattr(obj, attr)
                 setattr(obj, attr, value)
             #
-            apply_jit(t, enable_opts='')
+            apply_jit(t, enable_opts=enable_opts)
             #
         finally:
             for (obj, attr), oldvalue in old_value.items():
                 setattr(obj, attr, oldvalue)
 
     cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
+    cbuilder.generate_source(defines=cbuilder.DEBUG_DEFINES)
     cbuilder.compile()
     return cbuilder
 
@@ -154,8 +148,10 @@
 
 # ______________________________________________________________________
 
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
+
+class BaseFrameworkTests(object):
+    compile_kwds = {}
+
     def setup_class(cls):
         funcs = []
         name_to_func = {}
@@ -205,7 +201,8 @@
         try:
             GcLLDescr_framework.DEBUG = True
             cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
+                                   gcrootfinder=cls.gcrootfinder, jit=True,
+                                   **cls.compile_kwds)
         finally:
             GcLLDescr_framework.DEBUG = OLD_DEBUG
 
@@ -224,32 +221,36 @@
     def run_orig(self, name, n, x):
         self.main_allfuncs(name, n, x)
 
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
-        #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
+
+class CompileFrameworkTests(BaseFrameworkTests):
+    # Test suite using (so far) the minimark GC.
+
+##    def define_libffi_workaround(cls):
+##        # XXX: this is a workaround for a bug in database.py.  It seems that
+##        # the problem is triggered by optimizeopt/fficall.py, and in
+##        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
+##        # these tests, that line is the only place where libffi.Func is
+##        # referenced.
+##        #
+##        # The problem occurs because the gctransformer tries to annotate a
+##        # low-level helper to call the __del__ of libffi.Func when it's too
+##        # late.
+##        #
+##        # This workaround works by forcing the annotator (and all the rest of
+##        # the toolchain) to see libffi.Func in a "proper" context, not just as
+##        # the target of cast_base_ptr_to_instance.  Note that the function
+##        # below is *never* called by any actual test, it's just annotated.
+##        #
+##        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
+##        libc_name = get_libc_name()
+##        def f(n, x, *args):
+##            libc = CDLL(libc_name)
+##            ptr = libc.getpointer('labs', [types.slong], types.slong)
+##            chain = ArgChain()
+##            chain.arg(n)
+##            n = ptr.call(chain, lltype.Signed)
+##            return (n, x) + args
+##        return None, f, None
 
     def define_compile_framework_1(cls):
         # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
@@ -456,6 +457,73 @@
     def test_compile_framework_7(self):
         self.run('compile_framework_7')
 
+    def define_compile_framework_8(cls):
+        # Array of pointers, of unknown length (test write_barrier_from_array)
+        def before(n, x):
+            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            if n < 1900:
+                check(l[0].x == 123)
+                l = [None] * (16 + (n & 7))
+                l[0] = X(123)
+                l[1] = X(n)
+                l[2] = X(n+10)
+                l[3] = X(n+20)
+                l[4] = X(n+30)
+                l[5] = X(n+40)
+                l[6] = X(n+50)
+                l[7] = X(n+60)
+                l[8] = X(n+70)
+                l[9] = X(n+80)
+                l[10] = X(n+90)
+                l[11] = X(n+100)
+                l[12] = X(n+110)
+                l[13] = X(n+120)
+                l[14] = X(n+130)
+                l[15] = X(n+140)
+            if n < 1800:
+                check(len(l) == 16 + (n & 7))
+                check(l[0].x == 123)
+                check(l[1].x == n)
+                check(l[2].x == n+10)
+                check(l[3].x == n+20)
+                check(l[4].x == n+30)
+                check(l[5].x == n+40)
+                check(l[6].x == n+50)
+                check(l[7].x == n+60)
+                check(l[8].x == n+70)
+                check(l[9].x == n+80)
+                check(l[10].x == n+90)
+                check(l[11].x == n+100)
+                check(l[12].x == n+110)
+                check(l[13].x == n+120)
+                check(l[14].x == n+130)
+                check(l[15].x == n+140)
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            check(len(l) >= 16)
+            check(l[0].x == 123)
+            check(l[1].x == 2)
+            check(l[2].x == 12)
+            check(l[3].x == 22)
+            check(l[4].x == 32)
+            check(l[5].x == 42)
+            check(l[6].x == 52)
+            check(l[7].x == 62)
+            check(l[8].x == 72)
+            check(l[9].x == 82)
+            check(l[10].x == 92)
+            check(l[11].x == 102)
+            check(l[12].x == 112)
+            check(l[13].x == 122)
+            check(l[14].x == 132)
+            check(l[15].x == 142)
+        return before, f, after
+
+    def test_compile_framework_8(self):
+        self.run('compile_framework_8')
+
     def define_compile_framework_external_exception_handling(cls):
         def before(n, x):
             x = X(0)
@@ -525,8 +593,8 @@
         glob = A()
         def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
             a = A()
-            glob.v = virtual_ref(a)
-            virtual_ref_finish(a)
+            glob.v = vref = virtual_ref(a)
+            virtual_ref_finish(vref, a)
             n -= 1
             return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
         return None, f, None
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_releasegil.py
copy from pypy/jit/backend/x86/test/test_zrpy_gc.py
copy to pypy/jit/backend/x86/test/test_zrpy_releasegil.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_releasegil.py
@@ -1,618 +1,110 @@
-"""
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
-"""
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.rlib.jit import dont_look_inside
+from pypy.jit.metainterp.optimizeopt import ALL_OPTS_NAMES
 
-import weakref
-import py, os
-from pypy.annotation import policy as annpolicy
-from pypy.rlib import rgc
-from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.rlib.jit import JitDriver, dont_look_inside
-from pypy.rlib.jit import purefunction, unroll_safe
-from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
-from pypy.tool.udir import udir
-from pypy.jit.backend.x86.arch import IS_X86_64
-from pypy.config.translationoption import DEFL_GC
-import py.test
+from pypy.rlib.libffi import CDLL, types, ArgChain, clibffi
+from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+from pypy.rpython.annlowlevel import llhelper
 
-class X(object):
-    def __init__(self, x=0):
-        self.x = x
+from pypy.jit.backend.x86.test.test_zrpy_gc import BaseFrameworkTests
+from pypy.jit.backend.x86.test.test_zrpy_gc import check
 
-    next = None
 
-class CheckError(Exception):
-    pass
+class ReleaseGILTests(BaseFrameworkTests):
+    compile_kwds = dict(enable_opts=ALL_OPTS_NAMES, thread=True)
 
-def check(flag):
-    if not flag:
-        raise CheckError
-
-def get_g(main):
-    main._dont_inline_ = True
-    def g(name, n):
-        x = X()
-        x.foo = 2
-        main(n, x)
-        x.foo = 5
-        return weakref.ref(x)
-    g._dont_inline_ = True
-    return g
-
-
-def get_entry(g):
-
-    def entrypoint(args):
-        name = ''
-        n = 2000
-        argc = len(args)
-        if argc > 1:
-            name = args[1]
-        if argc > 2:
-            n = int(args[2])
-        r_list = []
-        for i in range(20):
-            r = g(name, n)
-            r_list.append(r)
-            rgc.collect()
-        rgc.collect(); rgc.collect()
-        freed = 0
-        for r in r_list:
-            if r() is None:
-                freed += 1
-        print freed
-        return 0
-
-    return entrypoint
-
-
-def get_functions_to_patch():
-    from pypy.jit.backend.llsupport import gc
-    #
-    can_inline_malloc1 = gc.GcLLDescr_framework.can_inline_malloc
-    def can_inline_malloc2(*args):
-        try:
-            if os.environ['PYPY_NO_INLINE_MALLOC']:
-                return False
-        except KeyError:
+    def define_simple(self):
+        class Glob:
             pass
-        return can_inline_malloc1(*args)
-    #
-    return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
-
-def compile(f, gc, **kwds):
-    from pypy.annotation.listdef import s_list_of_strings
-    from pypy.translator.translator import TranslationContext
-    from pypy.jit.metainterp.warmspot import apply_jit
-    from pypy.translator.c import genc
-    #
-    t = TranslationContext()
-    t.config.translation.gc = gc
-    if gc != 'boehm':
-        t.config.translation.gcremovetypeptr = True
-    for name, value in kwds.items():
-        setattr(t.config.translation, name, value)
-    ann = t.buildannotator(policy=annpolicy.StrictAnnotatorPolicy())
-    ann.build_types(f, [s_list_of_strings], main_entry_point=True)
-    t.buildrtyper().specialize()
-
-    if kwds['jit']:
-        patch = get_functions_to_patch()
-        old_value = {}
-        try:
-            for (obj, attr), value in patch.items():
-                old_value[obj, attr] = getattr(obj, attr)
-                setattr(obj, attr, value)
-            #
-            apply_jit(t, enable_opts='')
-            #
-        finally:
-            for (obj, attr), oldvalue in old_value.items():
-                setattr(obj, attr, oldvalue)
-
-    cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
-    cbuilder.compile()
-    return cbuilder
-
-def run(cbuilder, args=''):
-    #
-    pypylog = udir.join('test_zrpy_gc.log')
-    data = cbuilder.cmdexec(args, env={'PYPYLOG': ':%s' % pypylog})
-    return data.strip()
-
-def compile_and_run(f, gc, **kwds):
-    cbuilder = compile(f, gc, **kwds)
-    return run(cbuilder)
-
-
-
-def test_compile_boehm():
-    myjitdriver = JitDriver(greens = [], reds = ['n', 'x'])
-    @dont_look_inside
-    def see(lst, n):
-        assert len(lst) == 3
-        assert lst[0] == n+10
-        assert lst[1] == n+20
-        assert lst[2] == n+30
-    def main(n, x):
-        while n > 0:
-            myjitdriver.can_enter_jit(n=n, x=x)
-            myjitdriver.jit_merge_point(n=n, x=x)
-            y = X()
-            y.foo = x.foo
-            n -= y.foo
-            see([n+10, n+20, n+30], n)
-    res = compile_and_run(get_entry(get_g(main)), "boehm", jit=True)
-    assert int(res) >= 16
-
-# ______________________________________________________________________
-
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
-    def setup_class(cls):
-        funcs = []
-        name_to_func = {}
-        for fullname in dir(cls):
-            if not fullname.startswith('define'):
-                continue
-            definefunc = getattr(cls, fullname)
-            _, name = fullname.split('_', 1)
-            beforefunc, loopfunc, afterfunc = definefunc.im_func(cls)
-            if beforefunc is None:
-                def beforefunc(n, x):
-                    return n, x, None, None, None, None, None, None, None, None, None, ''
-            if afterfunc is None:
-                def afterfunc(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-                    pass
-            beforefunc.func_name = 'before_'+name
-            loopfunc.func_name = 'loop_'+name
-            afterfunc.func_name = 'after_'+name
-            funcs.append((beforefunc, loopfunc, afterfunc))
-            assert name not in name_to_func
-            name_to_func[name] = len(name_to_func)
-        print name_to_func
-        def allfuncs(name, n):
-            x = X()
-            x.foo = 2
-            main_allfuncs(name, n, x)
-            x.foo = 5
-            return weakref.ref(x)
-        def main_allfuncs(name, n, x):
-            num = name_to_func[name]
-            n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s = funcs[num][0](n, x)
-            while n > 0:
-                myjitdriver.can_enter_jit(num=num, n=n, x=x, x0=x0, x1=x1,
-                        x2=x2, x3=x3, x4=x4, x5=x5, x6=x6, x7=x7, l=l, s=s)
-                myjitdriver.jit_merge_point(num=num, n=n, x=x, x0=x0, x1=x1,
-                        x2=x2, x3=x3, x4=x4, x5=x5, x6=x6, x7=x7, l=l, s=s)
-
-                n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s = funcs[num][1](
-                        n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s)
-            funcs[num][2](n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s)
-        myjitdriver = JitDriver(greens = ['num'],
-                                reds = ['n', 'x', 'x0', 'x1', 'x2', 'x3', 'x4',
-                                        'x5', 'x6', 'x7', 'l', 's'])
-        cls.main_allfuncs = staticmethod(main_allfuncs)
-        cls.name_to_func = name_to_func
-        OLD_DEBUG = GcLLDescr_framework.DEBUG
-        try:
-            GcLLDescr_framework.DEBUG = True
-            cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
-        finally:
-            GcLLDescr_framework.DEBUG = OLD_DEBUG
-
-    def _run(self, name, n, env):
-        res = self.cbuilder.cmdexec("%s %d" %(name, n), env=env)
-        assert int(res) == 20
-
-    def run(self, name, n=2000):
-        pypylog = udir.join('TestCompileFramework.log')
-        env = {'PYPYLOG': ':%s' % pypylog,
-               'PYPY_NO_INLINE_MALLOC': '1'}
-        self._run(name, n, env)
-        env['PYPY_NO_INLINE_MALLOC'] = ''
-        self._run(name, n, env)
-
-    def run_orig(self, name, n, x):
-        self.main_allfuncs(name, n, x)
-
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
+        glob = Glob()
         #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
-
-    def define_compile_framework_1(cls):
-        # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
-        # without write_barriers and root stack enumeration.
-        def f(n, x, *args):
-            y = X()
-            y.foo = x.foo
-            n -= y.foo
-            return (n, x) + args
-        return None, f, None
-
-    def test_compile_framework_1(self):
-        self.run('compile_framework_1')
-
-    def define_compile_framework_2(cls):
-        # More complex test, requires root stack enumeration but
-        # not write_barriers.
-        def f(n, x, *args):
-            prev = x
-            for j in range(101):    # f() runs 20'000 times, thus allocates
-                y = X()             # a total of 2'020'000 objects
-                y.foo = prev.foo
-                prev = y
-            n -= prev.foo
-            return (n, x) + args
-        return None, f, None
-
-    def test_compile_framework_2(self):
-        self.run('compile_framework_2')
-
-    def define_compile_framework_3(cls):
-        # Third version of the test.  Really requires write_barriers.
-        def f(n, x, *args):
-            x.next = None
-            for j in range(101):    # f() runs 20'000 times, thus allocates
-                y = X()             # a total of 2'020'000 objects
-                y.foo = j+1
-                y.next = x.next
-                x.next = y
-            check(x.next.foo == 101)
-            total = 0
-            y = x
-            for j in range(101):
-                y = y.next
-                total += y.foo
-            check(not y.next)
-            check(total == 101*102/2)
-            n -= x.foo
-            return (n, x) + args
-        return None, f, None
-
-
-
-    def test_compile_framework_3(self):
-        x_test = X()
-        x_test.foo = 5
-        self.run_orig('compile_framework_3', 6, x_test)     # check that it does not raise CheckError
-        self.run('compile_framework_3')
-
-    def define_compile_framework_3_extra(cls):
-        # Extra version of the test, with tons of live vars around the residual
-        # call that all contain a GC pointer.
-        @dont_look_inside
-        def residual(n=26):
-            x = X()
-            x.next = X()
-            x.next.foo = n
-            return x
+        def f42(n):
+            c_strchr = glob.c_strchr
+            raw = rffi.str2charp("foobar" + chr((n & 63) + 32))
+            argchain = ArgChain()
+            argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+            argchain = argchain.arg(rffi.cast(rffi.INT, ord('b')))
+            res = c_strchr.call(argchain, rffi.CCHARP)
+            check(rffi.charp2str(res) == "bar" + chr((n & 63) + 32))
+            rffi.free_charp(raw)
         #
         def before(n, x):
-            residual(5)
-            x0 = residual()
-            x1 = residual()
-            x2 = residual()
-            x3 = residual()
-            x4 = residual()
-            x5 = residual()
-            x6 = residual()
-            x7 = residual()
-            n *= 19
-            return n, None, x0, x1, x2, x3, x4, x5, x6, x7, None, None
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            x8 = residual()
-            x9 = residual()
-            check(x0.next.foo == 26)
-            check(x1.next.foo == 26)
-            check(x2.next.foo == 26)
-            check(x3.next.foo == 26)
-            check(x4.next.foo == 26)
-            check(x5.next.foo == 26)
-            check(x6.next.foo == 26)
-            check(x7.next.foo == 26)
-            check(x8.next.foo == 26)
-            check(x9.next.foo == 26)
-            x0, x1, x2, x3, x4, x5, x6, x7 = x7, x4, x6, x5, x3, x2, x9, x8
+            libc = CDLL(libc_name)
+            c_strchr = libc.getpointer('strchr', [types.pointer, types.sint],
+                                       types.pointer)
+            glob.c_strchr = c_strchr
+            return (n, None, None, None, None, None,
+                    None, None, None, None, None, None)
+        #
+        def f(n, x, *args):
+            f42(n)
             n -= 1
-            return n, None, x0, x1, x2, x3, x4, x5, x6, x7, None, None
-        return before, f, None
-
-    def test_compile_framework_3_extra(self):
-        self.run_orig('compile_framework_3_extra', 6, None)     # check that it does not raise CheckError
-        self.run('compile_framework_3_extra')
-
-    def define_compile_framework_4(cls):
-        # Fourth version of the test, with __del__.
-        from pypy.rlib.debug import debug_print
-        class Counter:
-            cnt = 0
-        counter = Counter()
-        class Z:
-            def __del__(self):
-                counter.cnt -= 1
-        def before(n, x):
-            debug_print('counter.cnt =', counter.cnt)
-            check(counter.cnt < 5)
-            counter.cnt = n // x.foo
-            return n, x, None, None, None, None, None, None, None, None, None, None
-        def f(n, x, *args):
-            Z()
-            n -= x.foo
             return (n, x) + args
         return before, f, None
 
-    def test_compile_framework_4(self):
-        self.run('compile_framework_4')
+    def test_simple(self):
+        self.run('simple')
 
-    def define_compile_framework_5(cls):
-        # Test string manipulation.
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            n -= x.foo
-            s += str(n)
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(len(s) == 1*5 + 2*45 + 3*450 + 4*500)
-        return None, f, after
-
-    def test_compile_framework_5(self):
-        self.run('compile_framework_5')
-
-    def define_compile_framework_7(cls):
-        # Array of pointers (test the write barrier for setarrayitem_gc)
+    def define_close_stack(self):
+        #
+        class Glob(object):
+            pass
+        glob = Glob()
+        class X(object):
+            pass
+        #
+        def callback(p1, p2):
+            for i in range(100):
+                glob.lst.append(X())
+            return rffi.cast(rffi.INT, 1)
+        CALLBACK = lltype.Ptr(lltype.FuncType([lltype.Signed,
+                                               lltype.Signed], rffi.INT))
+        #
+        @dont_look_inside
+        def alloc1():
+            return llmemory.raw_malloc(16)
+        @dont_look_inside
+        def free1(p):
+            llmemory.raw_free(p)
+        #
+        def f42():
+            length = len(glob.lst)
+            c_qsort = glob.c_qsort
+            raw = alloc1()
+            fn = llhelper(CALLBACK, rffi._make_wrapper_for(CALLBACK, callback))
+            argchain = ArgChain()
+            argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+            argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 2))
+            argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 8))
+            argchain = argchain.arg(rffi.cast(lltype.Signed, fn))
+            c_qsort.call(argchain, lltype.Void)
+            free1(raw)
+            check(len(glob.lst) > length)
+            del glob.lst[:]
+        #
         def before(n, x):
-            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            if n < 1900:
-                check(l[0].x == 123)
-                l = [None] * 16
-                l[0] = X(123)
-                l[1] = X(n)
-                l[2] = X(n+10)
-                l[3] = X(n+20)
-                l[4] = X(n+30)
-                l[5] = X(n+40)
-                l[6] = X(n+50)
-                l[7] = X(n+60)
-                l[8] = X(n+70)
-                l[9] = X(n+80)
-                l[10] = X(n+90)
-                l[11] = X(n+100)
-                l[12] = X(n+110)
-                l[13] = X(n+120)
-                l[14] = X(n+130)
-                l[15] = X(n+140)
-            if n < 1800:
-                check(len(l) == 16)
-                check(l[0].x == 123)
-                check(l[1].x == n)
-                check(l[2].x == n+10)
-                check(l[3].x == n+20)
-                check(l[4].x == n+30)
-                check(l[5].x == n+40)
-                check(l[6].x == n+50)
-                check(l[7].x == n+60)
-                check(l[8].x == n+70)
-                check(l[9].x == n+80)
-                check(l[10].x == n+90)
-                check(l[11].x == n+100)
-                check(l[12].x == n+110)
-                check(l[13].x == n+120)
-                check(l[14].x == n+130)
-                check(l[15].x == n+140)
-            n -= x.foo
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(len(l) == 16)
-            check(l[0].x == 123)
-            check(l[1].x == 2)
-            check(l[2].x == 12)
-            check(l[3].x == 22)
-            check(l[4].x == 32)
-            check(l[5].x == 42)
-            check(l[6].x == 52)
-            check(l[7].x == 62)
-            check(l[8].x == 72)
-            check(l[9].x == 82)
-            check(l[10].x == 92)
-            check(l[11].x == 102)
-            check(l[12].x == 112)
-            check(l[13].x == 122)
-            check(l[14].x == 132)
-            check(l[15].x == 142)
-        return before, f, after
-
-    def test_compile_framework_7(self):
-        self.run('compile_framework_7')
-
-    def define_compile_framework_external_exception_handling(cls):
-        def before(n, x):
-            x = X(0)
-            return n, x, None, None, None, None, None, None, None, None, None, None
-
-        @dont_look_inside
-        def g(x):
-            if x > 200:
-                return 2
-            raise ValueError
-        @dont_look_inside
-        def h(x):
-            if x > 150:
-                raise ValueError
-            return 2
-
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            try:
-                x.x += g(n)
-            except ValueError:
-                x.x += 1
-            try:
-                x.x += h(n)
-            except ValueError:
-                x.x -= 1
+            libc = CDLL(libc_name)
+            types_size_t = clibffi.cast_type_to_ffitype(rffi.SIZE_T)
+            c_qsort = libc.getpointer('qsort', [types.pointer, types_size_t,
+                                                types_size_t, types.pointer],
+                                      types.void)
+            glob.c_qsort = c_qsort
+            glob.lst = []
+            return (n, None, None, None, None, None,
+                    None, None, None, None, None, None)
+        #
+        def f(n, x, *args):
+            f42()
             n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(x.x == 1800 * 2 + 1850 * 2 + 200 - 150)
-
+            return (n, x) + args
         return before, f, None
 
-    def test_compile_framework_external_exception_handling(self):
-        self.run('compile_framework_external_exception_handling')
+    def test_close_stack(self):
+        self.run('close_stack')
 
-    def define_compile_framework_bug1(self):
-        @purefunction
-        def nonmoving():
-            x = X(1)
-            for i in range(7):
-                rgc.collect()
-            return x
 
-        @dont_look_inside
-        def do_more_stuff():
-            x = X(5)
-            for i in range(7):
-                rgc.collect()
-            return x
-
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            x0 = do_more_stuff()
-            check(nonmoving().x == 1)
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-
-        return None, f, None
-
-    def test_compile_framework_bug1(self):
-        self.run('compile_framework_bug1', 200)
-
-    def define_compile_framework_vref(self):
-        from pypy.rlib.jit import virtual_ref, virtual_ref_finish
-        class A:
-            pass
-        glob = A()
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            a = A()
-            glob.v = virtual_ref(a)
-            virtual_ref_finish(a)
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f, None
-
-    def test_compile_framework_vref(self):
-        self.run('compile_framework_vref', 200)
-
-    def define_compile_framework_float(self):
-        # test for a bug: the fastpath_malloc does not save and restore
-        # xmm registers around the actual call to the slow path
-        class A:
-            x0 = x1 = x2 = x3 = x4 = x5 = x6 = x7 = 0
-        @dont_look_inside
-        def escape1(a):
-            a.x0 += 0
-            a.x1 += 6
-            a.x2 += 12
-            a.x3 += 18
-            a.x4 += 24
-            a.x5 += 30
-            a.x6 += 36
-            a.x7 += 42
-        @dont_look_inside
-        def escape2(n, f0, f1, f2, f3, f4, f5, f6, f7):
-            check(f0 == n + 0.0)
-            check(f1 == n + 0.125)
-            check(f2 == n + 0.25)
-            check(f3 == n + 0.375)
-            check(f4 == n + 0.5)
-            check(f5 == n + 0.625)
-            check(f6 == n + 0.75)
-            check(f7 == n + 0.875)
-        @unroll_safe
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            i = 0
-            while i < 42:
-                m = n + i
-                f0 = m + 0.0
-                f1 = m + 0.125
-                f2 = m + 0.25
-                f3 = m + 0.375
-                f4 = m + 0.5
-                f5 = m + 0.625
-                f6 = m + 0.75
-                f7 = m + 0.875
-                a1 = A()
-                # at this point, all or most f's are still in xmm registers
-                escape1(a1)
-                escape2(m, f0, f1, f2, f3, f4, f5, f6, f7)
-                i += 1
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f, None
-
-    def test_compile_framework_float(self):
-        self.run('compile_framework_float')
-
-    def define_compile_framework_minimal_size_in_nursery(self):
-        S = lltype.GcStruct('S')    # no fields!
-        T = lltype.GcStruct('T', ('i', lltype.Signed))
-        @unroll_safe
-        def f42(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            lst1 = []
-            lst2 = []
-            i = 0
-            while i < 42:
-                s1 = lltype.malloc(S)
-                t1 = lltype.malloc(T)
-                t1.i = 10000 + i + n
-                lst1.append(s1)
-                lst2.append(t1)
-                i += 1
-            i = 0
-            while i < 42:
-                check(lst2[i].i == 10000 + i + n)
-                i += 1
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f42, None
-
-    def test_compile_framework_minimal_size_in_nursery(self):
-        self.run('compile_framework_minimal_size_in_nursery')
-
-
-class TestShadowStack(CompileFrameworkTests):
+class TestShadowStack(ReleaseGILTests):
     gcrootfinder = "shadowstack"
 
-class TestAsmGcc(CompileFrameworkTests):
+class TestAsmGcc(ReleaseGILTests):
     gcrootfinder = "asmgcc"
diff --git a/pypy/jit/codewriter/assembler.py b/pypy/jit/codewriter/assembler.py
--- a/pypy/jit/codewriter/assembler.py
+++ b/pypy/jit/codewriter/assembler.py
@@ -76,7 +76,8 @@
                 TYPE = llmemory.Address
             if TYPE == llmemory.Address:
                 value = heaptracker.adr2int(value)
-            elif not isinstance(value, ComputedIntSymbolic):
+            if not isinstance(value, (llmemory.AddressAsInt,
+                                      ComputedIntSymbolic)):
                 value = lltype.cast_primitive(lltype.Signed, value)
                 if allow_short and -128 <= value <= 127:
                     # emit the constant as a small integer
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -237,6 +237,8 @@
             self.readwrite_analyzer.analyze(op), self.cpu, extraeffect,
             oopspecindex, can_invalidate)
         #
+        if oopspecindex != EffectInfo.OS_NONE:
+            assert effectinfo is not None
         if pure or loopinvariant:
             assert effectinfo is not None
             assert extraeffect != EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -108,6 +108,9 @@
     def check_forces_virtual_or_virtualizable(self):
         return self.extraeffect >= self.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
 
+    def has_random_effects(self):
+        return self.oopspecindex == self.OS_LIBFFI_CALL
+
 def effectinfo_from_writeanalyze(effects, cpu,
                                  extraeffect=EffectInfo.EF_CAN_RAISE,
                                  oopspecindex=EffectInfo.OS_NONE,
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -209,7 +209,6 @@
     def rewrite_op_cast_int_to_unichar(self, op): pass
     def rewrite_op_cast_int_to_uint(self, op): pass
     def rewrite_op_cast_uint_to_int(self, op): pass
-    def rewrite_op_resume_point(self, op): pass
 
     def _rewrite_symmetric(self, op):
         """Rewrite 'c1+v2' into 'v2+c1' in an attempt to avoid generating
@@ -769,10 +768,10 @@
         from pypy.rpython.lltypesystem.rffi import size_and_sign, sizeof
         from pypy.rlib.rarithmetic import intmask
         assert not self._is_gc(op.args[0])
-        size1, unsigned1 = size_and_sign(op.args[0].concretetype)
         size2, unsigned2 = size_and_sign(op.result.concretetype)
         if size2 >= sizeof(lltype.Signed):
             return     # the target type is LONG or ULONG
+        size1, unsigned1 = size_and_sign(op.args[0].concretetype)
         #
         def bounds(size, unsigned):
             if unsigned:
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -63,12 +63,27 @@
             contains_loop = contains_loop and not getattr(
                     func, '_jit_unroll_safe_', False)
 
-        res = see_function and not contains_unsupported_variable_type(graph,
-                                                        self.supports_floats,
-                                                        self.supports_longlong)
+        unsupported = contains_unsupported_variable_type(graph,
+                                                         self.supports_floats,
+                                                         self.supports_longlong)
+        res = see_function and not unsupported
         if res and contains_loop:
             self.unsafe_loopy_graphs.add(graph)
-        return res and not contains_loop
+        res = res and not contains_loop
+        if (see_function and not res and
+            getattr(graph, "access_directly", False)):
+            # This happens when we have a function which has an argument with
+            # the access_directly flag, and the annotator has determined we will
+            # see the function. (See
+            # pypy/annotation/specialize.py:default_specialize) However,
+            # look_inside_graph just decided that we will not see it. (It has a
+            # loop or unsupported variables.) If we return False, the call will
+            # be turned into a residual call, but the graph is access_directly!
+            # If such a function is called and accesses a virtualizable, the JIT
+            # will not notice, and the virtualizable will fall out of sync. So,
+            # we fail loudly now.
+            raise ValueError("access_directly on a function which we don't see %s" % graph)
+        return res
 
 def contains_unsupported_variable_type(graph, supports_floats,
                                        supports_longlong):
diff --git a/pypy/jit/codewriter/test/test_policy.py b/pypy/jit/codewriter/test/test_policy.py
--- a/pypy/jit/codewriter/test/test_policy.py
+++ b/pypy/jit/codewriter/test/test_policy.py
@@ -1,4 +1,5 @@
 import sys
+import py
 from pypy.jit.codewriter.policy import contains_unsupported_variable_type
 from pypy.jit.codewriter.policy import JitPolicy
 from pypy.jit.codewriter import support
@@ -107,3 +108,19 @@
                     mod = called_graph.func.__module__
                     assert (mod == 'pypy.rpython.rlist' or
                             mod == 'pypy.rpython.lltypesystem.rlist')
+
+def test_access_directly_but_not_seen():
+    class X:
+        _virtualizable2_ = ["a"]
+    def h(x, y):
+        w = 0
+        for i in range(y):
+            w += 4
+        return w
+    def f(y):
+        x = jit.hint(X(), access_directly=True)
+        h(x, y)
+    rtyper = support.annotate(f, [3])
+    h_graph = rtyper.annotator.translator.graphs[1]
+    assert h_graph.func is h
+    py.test.raises(ValueError, JitPolicy().look_inside_graph, h_graph)
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -4,6 +4,7 @@
 from pypy.objspace.flow.model import Constant, Variable
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import debug_start, debug_stop
+from pypy.rlib import rstack
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -124,18 +125,21 @@
         return old_loop_token
 
     if loop.preamble.operations is not None:
-        send_loop_to_backend(metainterp_sd, loop, "loop")
+        send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop,
+                             "loop")
         record_loop_or_bridge(metainterp_sd, loop)
         token = loop.preamble.token
         if full_preamble_needed:
-            send_loop_to_backend(metainterp_sd, loop.preamble, "entry bridge")
+            send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd,
+                                 loop.preamble, "entry bridge")
             insert_loop_token(old_loop_tokens, loop.preamble.token)
             jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
                 greenkey, loop.preamble.token)
             record_loop_or_bridge(metainterp_sd, loop.preamble)
         return token
     else:
-        send_loop_to_backend(metainterp_sd, loop, "loop")
+        send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop,
+                             "loop")
         insert_loop_token(old_loop_tokens, loop_token)
         jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
             greenkey, loop.token)
@@ -150,7 +154,10 @@
     # XXX do we still need a list?
     old_loop_tokens.append(loop_token)
 
-def send_loop_to_backend(metainterp_sd, loop, type):
+def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
+    jitdriver_sd.on_compile(metainterp_sd.logger_ops, loop.token,
+                            loop.operations, type, greenkey)
+    loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     globaldata = metainterp_sd.globaldata
     loop_token = loop.token
     loop_token.number = n = globaldata.loopnumbering
@@ -165,7 +172,7 @@
     debug_start("jit-backend")
     try:
         ops_offset = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                    loop.token)
+                                                    loop.token, name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
@@ -186,8 +193,11 @@
     if metainterp_sd.warmrunnerdesc is not None:    # for tests
         metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(loop.token)
 
-def send_bridge_to_backend(metainterp_sd, faildescr, inputargs, operations,
-                           original_loop_token):
+def send_bridge_to_backend(jitdriver_sd, metainterp_sd, faildescr, inputargs,
+                           operations, original_loop_token):
+    n = metainterp_sd.cpu.get_fail_descr_number(faildescr)
+    jitdriver_sd.on_compile_bridge(metainterp_sd.logger_ops,
+                                   original_loop_token, operations, n)
     if not we_are_translated():
         show_loop(metainterp_sd)
         TreeLoop.check_consistency_of(inputargs, operations)
@@ -204,7 +214,6 @@
         metainterp_sd.stats.compiled()
     metainterp_sd.log("compiled new bridge")
     #
-    n = metainterp_sd.cpu.get_fail_descr_number(faildescr)
     metainterp_sd.logger_ops.log_bridge(inputargs, operations, n, ops_offset)
     #
     if metainterp_sd.warmrunnerdesc is not None:    # for tests
@@ -390,8 +399,9 @@
         inputargs = metainterp.history.inputargs
         if not we_are_translated():
             self._debug_suboperations = new_loop.operations
-        send_bridge_to_backend(metainterp.staticdata, self, inputargs,
-                               new_loop.operations, new_loop.token)
+        send_bridge_to_backend(metainterp.jitdriver_sd, metainterp.staticdata,
+                               self, inputargs, new_loop.operations,
+                               new_loop.token)
 
     def copy_all_attributes_into(self, res):
         # XXX a bit ugly to have to list them all here
@@ -444,9 +454,17 @@
         # Called during a residual call from the assembler, if the code
         # actually needs to force one of the virtualrefs or the virtualizable.
         # Implemented by forcing *all* virtualrefs and the virtualizable.
-        faildescr = cpu.force(token)
-        assert isinstance(faildescr, ResumeGuardForcedDescr)
-        faildescr.handle_async_forcing(token)
+
+        # don't interrupt me! If the stack runs out in force_from_resumedata()
+        # then we have seen cpu.force() but not self.save_data(), leaving in
+        # an inconsistent state
+        rstack._stack_criticalcode_start()
+        try:
+            faildescr = cpu.force(token)
+            assert isinstance(faildescr, ResumeGuardForcedDescr)
+            faildescr.handle_async_forcing(token)
+        finally:
+            rstack._stack_criticalcode_stop()
 
     def handle_async_forcing(self, force_token):
         from pypy.jit.metainterp.resume import force_from_resumedata
@@ -570,7 +588,8 @@
         # to every guard in the loop.
         new_loop_token = make_loop_token(len(redargs), jitdriver_sd)
         new_loop.token = new_loop_token
-        send_loop_to_backend(metainterp_sd, new_loop, "entry bridge")
+        send_loop_to_backend(self.original_greenkey, metainterp.jitdriver_sd,
+                             metainterp_sd, new_loop, "entry bridge")
         # send the new_loop to warmspot.py, to be called directly the next time
         jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
             self.original_greenkey,
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -82,9 +82,6 @@
 do_call_loopinvariant = do_call
 do_call_may_force = do_call
 
-def do_call_c(cpu, metainterp, argboxes, descr):
-    raise NotImplementedError("Should never be called directly")
-
 def do_getarrayitem_gc(cpu, _, arraybox, indexbox, arraydescr):
     array = arraybox.getref_base()
     index = indexbox.getint()
@@ -322,6 +319,7 @@
                          rop.DEBUG_MERGE_POINT,
                          rop.JIT_DEBUG,
                          rop.SETARRAYITEM_RAW,
+                         rop.CALL_RELEASE_GIL,
                          rop.QUASIIMMUT_FIELD,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -712,10 +712,14 @@
         return -2      # xxx risk of changing hash...
 
 def make_hashable_int(i):
+    from pypy.rpython.lltypesystem.ll2ctypes import NotCtypesAllocatedStructure
     if not we_are_translated() and isinstance(i, llmemory.AddressAsInt):
         # Warning: such a hash changes at the time of translation
         adr = heaptracker.int2adr(i)
-        return llmemory.cast_adr_to_int(adr, "emulated")
+        try:
+            return llmemory.cast_adr_to_int(adr, "emulated")
+        except NotCtypesAllocatedStructure:
+            return 12345 # use an arbitrary number for the hash
     return i
 
 def get_const_ptr_for_string(s):
@@ -792,6 +796,7 @@
     operations = None
     token = None
     call_pure_results = None
+    logops = None
     quasi_immutable_deps = None
 
     def __init__(self, name):
diff --git a/pypy/jit/metainterp/jitdriver.py b/pypy/jit/metainterp/jitdriver.py
--- a/pypy/jit/metainterp/jitdriver.py
+++ b/pypy/jit/metainterp/jitdriver.py
@@ -20,6 +20,7 @@
     #    self.portal_finishtoken... pypy.jit.metainterp.pyjitpl
     #    self.index             ... pypy.jit.codewriter.call
     #    self.mainjitcode       ... pypy.jit.codewriter.call
+    #    self.on_compile        ... pypy.jit.metainterp.warmstate
 
     # These attributes are read by the backend in CALL_ASSEMBLER:
     #    self.assembler_helper_adr
diff --git a/pypy/jit/metainterp/logger.py b/pypy/jit/metainterp/logger.py
--- a/pypy/jit/metainterp/logger.py
+++ b/pypy/jit/metainterp/logger.py
@@ -11,47 +11,71 @@
 
     def __init__(self, metainterp_sd, guard_number=False):
         self.metainterp_sd = metainterp_sd
-        self.ts = metainterp_sd.cpu.ts
         self.guard_number = guard_number
 
     def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
         if type is None:
             debug_start("jit-log-noopt-loop")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-loop")
         else:
             debug_start("jit-log-opt-loop")
             debug_print("# Loop", number, ":", type,
                         "with", len(operations), "ops")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-opt-loop")
+        return logops
 
     def log_bridge(self, inputargs, operations, number=-1, ops_offset=None):
         if number == -1:
             debug_start("jit-log-noopt-bridge")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-bridge")
         else:
             debug_start("jit-log-opt-bridge")
             debug_print("# bridge out of Guard", number,
                         "with", len(operations), "ops")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-opt-bridge")
+        return logops
 
     def log_short_preamble(self, inputargs, operations):
         debug_start("jit-log-short-preamble")
-        self._log_operations(inputargs, operations, ops_offset=None)
-        debug_stop("jit-log-short-preamble")            
+        logops = self._log_operations(inputargs, operations, ops_offset=None)
+        debug_stop("jit-log-short-preamble")
+        return logops
+
+    def _log_operations(self, inputargs, operations, ops_offset):
+        if not have_debug_prints():
+            return None
+        logops = self._make_log_operations()
+        logops._log_operations(inputargs, operations, ops_offset)
+        return logops
+
+    def _make_log_operations(self):
+        return LogOperations(self.metainterp_sd, self.guard_number)
+
+
+class LogOperations(object):
+    """
+    ResOperation logger.  Each instance contains a memo giving numbers
+    to boxes, and is typically used to log a single loop.
+    """
+    def __init__(self, metainterp_sd, guard_number):
+        self.metainterp_sd = metainterp_sd
+        self.ts = metainterp_sd.cpu.ts
+        self.guard_number = guard_number
+        self.memo = {}
 
     def repr_of_descr(self, descr):
         return descr.repr_of_descr()
 
-    def repr_of_arg(self, memo, arg):
+    def repr_of_arg(self, arg):
         try:
-            mv = memo[arg]
+            mv = self.memo[arg]
         except KeyError:
-            mv = len(memo)
-            memo[arg] = mv
+            mv = len(self.memo)
+            self.memo[arg] = mv
         if isinstance(arg, ConstInt):
             if int_could_be_an_address(arg.value):
                 addr = arg.getaddr()
@@ -75,48 +99,52 @@
         else:
             return '?'
 
+    def repr_of_resop(self, op, ops_offset=None):
+        if op.getopnum() == rop.DEBUG_MERGE_POINT:
+            jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
+            s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+            return "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
+        if ops_offset is None:
+            offset = -1
+        else:
+            offset = ops_offset.get(op, -1)
+        if offset == -1:
+            s_offset = ""
+        else:
+            s_offset = "+%d: " % offset
+        args = ", ".join([self.repr_of_arg(op.getarg(i)) for i in range(op.numargs())])
+
+        if op.result is not None:
+            res = self.repr_of_arg(op.result) + " = "
+        else:
+            res = ""
+        is_guard = op.is_guard()
+        if op.getdescr() is not None:
+            descr = op.getdescr()
+            if is_guard and self.guard_number:
+                index = self.metainterp_sd.cpu.get_fail_descr_number(descr)
+                r = "<Guard%d>" % index
+            else:
+                r = self.repr_of_descr(descr)
+            args += ', descr=' +  r
+        if is_guard and op.getfailargs() is not None:
+            fail_args = ' [' + ", ".join([self.repr_of_arg(arg)
+                                          for arg in op.getfailargs()]) + ']'
+        else:
+            fail_args = ''
+        return s_offset + res + op.getopname() + '(' + args + ')' + fail_args
+
     def _log_operations(self, inputargs, operations, ops_offset):
         if not have_debug_prints():
             return
         if ops_offset is None:
             ops_offset = {}
-        memo = {}
         if inputargs is not None:
-            args = ", ".join([self.repr_of_arg(memo, arg) for arg in inputargs])
+            args = ", ".join([self.repr_of_arg(arg) for arg in inputargs])
             debug_print('[' + args + ']')
         for i in range(len(operations)):
             op = operations[i]
-            if op.getopnum() == rop.DEBUG_MERGE_POINT:
-                loc = op.getarg(0)._get_str()
-                reclev = op.getarg(1).getint()
-                debug_print("debug_merge_point('%s', %s)" % (loc, reclev))
-                continue
-            offset = ops_offset.get(op, -1)
-            if offset == -1:
-                s_offset = ""
-            else:
-                s_offset = "+%d: " % offset
-            args = ", ".join([self.repr_of_arg(memo, op.getarg(i)) for i in range(op.numargs())])
-            if op.result is not None:
-                res = self.repr_of_arg(memo, op.result) + " = "
-            else:
-                res = ""
-            is_guard = op.is_guard()
-            if op.getdescr() is not None:
-                descr = op.getdescr()
-                if is_guard and self.guard_number:
-                    index = self.metainterp_sd.cpu.get_fail_descr_number(descr)
-                    r = "<Guard%d>" % index
-                else:
-                    r = self.repr_of_descr(descr)
-                args += ', descr=' +  r
-            if is_guard and op.getfailargs() is not None:
-                fail_args = ' [' + ", ".join([self.repr_of_arg(memo, arg)
-                                              for arg in op.getfailargs()]) + ']'
-            else:
-                fail_args = ''
-            debug_print(s_offset + res + op.getopname() +
-                        '(' + args + ')' + fail_args)
+            debug_print(self.repr_of_resop(operations[i], ops_offset))
         if ops_offset and None in ops_offset:
             offset = ops_offset[None]
             debug_print("+%d: --end of the loop--" % offset)
diff --git a/pypy/jit/metainterp/optimize.py b/pypy/jit/metainterp/optimize.py
--- a/pypy/jit/metainterp/optimize.py
+++ b/pypy/jit/metainterp/optimize.py
@@ -14,7 +14,8 @@
 
 def _optimize_loop(metainterp_sd, old_loop_tokens, loop, enable_opts):
     cpu = metainterp_sd.cpu
-    metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations)
+    loop.logops = metainterp_sd.logger_noopt.log_loop(loop.inputargs,
+                                                      loop.operations)
     # XXX do we really still need a list?
     if old_loop_tokens:
         return old_loop_tokens[0]
@@ -36,7 +37,8 @@
 def _optimize_bridge(metainterp_sd, old_loop_tokens, bridge, enable_opts,
                      inline_short_preamble, retraced=False):
     cpu = metainterp_sd.cpu
-    metainterp_sd.logger_noopt.log_loop(bridge.inputargs, bridge.operations)
+    bridge.logops = metainterp_sd.logger_noopt.log_loop(bridge.inputargs,
+                                                        bridge.operations)
     if old_loop_tokens:
         old_loop_token = old_loop_tokens[0]
         bridge.operations[-1].setdescr(old_loop_token)   # patch jump target
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -1,10 +1,13 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.libffi import Func
+from pypy.rlib.debug import debug_start, debug_stop, debug_print, have_debug_prints
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.optimizeutil import _findall
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
+
 
 class FuncInfo(object):
 
@@ -12,14 +15,18 @@
     restype = None
     descr = None
     prepare_op = None
-    force_token_op = None
 
     def __init__(self, funcval, cpu, prepare_op):
         self.funcval = funcval
         self.opargs = []
         argtypes, restype = self._get_signature(funcval)
-        self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        try:
+            self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        except UnsupportedKind:
+            # e.g., I or U for long longs
+            self.descr = None
         self.prepare_op = prepare_op
+        self.delayed_ops = []
 
     def _get_signature(self, funcval):
         """
@@ -64,37 +71,51 @@
 
 class OptFfiCall(Optimization):
 
-    def __init__(self):
+    def setup(self):
         self.funcinfo = None
+        if self.optimizer.loop is not None:
+            self.logops = self.optimizer.loop.logops
+        else:
+            self.logops = None
+
+    def propagate_begin_forward(self):
+        debug_start('jit-log-ffiopt')
+        Optimization.propagate_begin_forward(self)
+
+    def propagate_end_forward(self):
+        debug_stop('jit-log-ffiopt')
+        Optimization.propagate_end_forward(self)
 
     def reconstruct_for_next_iteration(self, optimizer, valuemap):
         return OptFfiCall()
         # FIXME: Should any status be saved for next iteration?
 
     def begin_optimization(self, funcval, op):
-        self.rollback_maybe()
+        self.rollback_maybe('begin_optimization', op)
         self.funcinfo = FuncInfo(funcval, self.optimizer.cpu, op)
 
     def commit_optimization(self):
         self.funcinfo = None
 
-    def rollback_maybe(self):
+    def rollback_maybe(self, msg, op):
         if self.funcinfo is None:
             return # nothing to rollback
         #
         # we immediately set funcinfo to None to prevent recursion when
         # calling emit_op
+        if self.logops is not None:
+            debug_print('rollback: ' + msg + ': ', self.logops.repr_of_resop(op))
         funcinfo = self.funcinfo
         self.funcinfo = None
         self.emit_operation(funcinfo.prepare_op)
         for op in funcinfo.opargs:
             self.emit_operation(op)
-        if funcinfo.force_token_op:
-            self.emit_operation(funcinfo.force_token_op)
+        for delayed_op in funcinfo.delayed_ops:
+            self.emit_operation(delayed_op)
 
     def emit_operation(self, op):
         # we cannot emit any operation during the optimization
-        self.rollback_maybe()
+        self.rollback_maybe('invalid op', op)
         Optimization.emit_operation(self, op)
 
     def optimize_CALL(self, op):
@@ -135,13 +156,18 @@
         # call_may_force and the setfield_gc, so the final result we get is
         # again force_token/setfield_gc/call_may_force.
         #
+        # However, note that nowadays we also allow to have any setfield_gc
+        # between libffi_prepare and libffi_call, so while the comment above
+        # it's a bit superfluous, it has been left there for future reference.
         if self.funcinfo is None:
             self.emit_operation(op)
         else:
-            self.funcinfo.force_token_op = op
+            self.funcinfo.delayed_ops.append(op)
+
+    optimize_SETFIELD_GC = optimize_FORCE_TOKEN
 
     def do_prepare_call(self, op):
-        self.rollback_maybe()
+        self.rollback_maybe('prepare call', op)
         funcval = self._get_funcval(op)
         if not funcval.is_constant():
             return [op] # cannot optimize
@@ -165,16 +191,18 @@
         for push_op in funcinfo.opargs:
             argval = self.getvalue(push_op.getarg(2))
             arglist.append(argval.force_box())
-        newop = ResOperation(rop.CALL_MAY_FORCE, arglist, op.result,
+        newop = ResOperation(rop.CALL_RELEASE_GIL, arglist, op.result,
                              descr=funcinfo.descr)
         self.commit_optimization()
         ops = []
-        if funcinfo.force_token_op:
-            ops.append(funcinfo.force_token_op)
+        for delayed_op in funcinfo.delayed_ops:
+            ops.append(delayed_op)
         ops.append(newop)
         return ops
 
     def propagate_forward(self, op):
+        if self.logops is not None:
+            debug_print(self.logops.repr_of_resop(op))
         opnum = op.getopnum()
         for value, func in optimize_ops:
             if opnum == value:
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -235,6 +235,7 @@
         assert opnum != rop.CALL_PURE
         if (opnum == rop.CALL or
             opnum == rop.CALL_MAY_FORCE or
+            opnum == rop.CALL_RELEASE_GIL or
             opnum == rop.CALL_ASSEMBLER):
             if opnum == rop.CALL_ASSEMBLER:
                 effectinfo = None
@@ -242,7 +243,7 @@
                 effectinfo = op.getdescr().get_extra_info()
             if effectinfo is None or effectinfo.check_can_invalidate():
                 self._seen_guard_not_invalidated = False
-            if effectinfo is not None:
+            if effectinfo is not None and not effectinfo.has_random_effects():
                 # XXX we can get the wrong complexity here, if the lists
                 # XXX stored on effectinfo are large
                 for fielddescr in effectinfo.readonly_descrs_fields:
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -17,6 +17,14 @@
         assert self.posponedop is None
         return self
 
+    def setup(self):
+        self.posponedop = None
+        self.nextop = None
+
+    def reconstruct_for_next_iteration(self, optimizer, valuemap):
+        assert self.posponedop is None
+        return self 
+
     def propagate_forward(self, op):
         if op.is_ovf():
             self.posponedop = op
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -175,6 +175,14 @@
     def __init__(self):
         pass # make rpython happy
 
+    def propagate_begin_forward(self):
+        if self.next_optimization:
+            self.next_optimization.propagate_begin_forward()
+
+    def propagate_end_forward(self):
+        if self.next_optimization:
+            self.next_optimization.propagate_end_forward()
+
     def propagate_forward(self, op):
         raise NotImplementedError
 
@@ -406,11 +414,13 @@
         # ^^^ at least at the start of bridges.  For loops, we could set
         # it to False, but we probably don't care
         self.newoperations = []
+        self.first_optimization.propagate_begin_forward()
         self.i = 0
         while self.i < len(self.loop.operations):
             op = self.loop.operations[self.i]
             self.first_optimization.propagate_forward(op)
             self.i += 1
+        self.first_optimization.propagate_end_forward()
         self.loop.operations = self.newoperations
         self.loop.quasi_immutable_deps = self.quasi_immutable_deps
         # accumulate counters
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -415,14 +415,22 @@
         dest_start_box = self.get_constant_box(op.getarg(4))
         length = self.get_constant_box(op.getarg(5))
         if (source_value.is_virtual() and source_start_box and dest_start_box
-            and length and dest_value.is_virtual()):
-            # XXX optimize the case where dest value is not virtual,
-            #     but we still can avoid a mess
+            and length and (dest_value.is_virtual() or length.getint() <= 8)):
+            from pypy.jit.metainterp.optimizeopt.virtualize import VArrayValue
+            assert isinstance(source_value, VArrayValue)
             source_start = source_start_box.getint()
             dest_start = dest_start_box.getint()
             for index in range(length.getint()):
                 val = source_value.getitem(index + source_start)
-                dest_value.setitem(index + dest_start, val)
+                if dest_value.is_virtual():
+                    dest_value.setitem(index + dest_start, val)
+                else:
+                    newop = ResOperation(rop.SETARRAYITEM_GC,
+                                         [op.getarg(2),
+                                          ConstInt(index + dest_start),
+                                          val.force_box()], None,
+                                         descr=source_value.arraydescr)
+                    self.emit_operation(newop)
             return True
         if length and length.getint() == 0:
             return True # 0-length arraycopy
@@ -432,6 +440,9 @@
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
 
+        if v2.is_constant() and v2.box.getint() == 1:
+            self.make_equal_to(op.result, v1)
+            return
         if v1.intbound.known_ge(IntBound(0, 0)) and v2.is_constant():
             val = v2.box.getint()
             if val & (val - 1) == 0 and val > 0: # val == 2**shift
diff --git a/pypy/jit/metainterp/optimizeopt/virtualize.py b/pypy/jit/metainterp/optimizeopt/virtualize.py
--- a/pypy/jit/metainterp/optimizeopt/virtualize.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualize.py
@@ -330,18 +330,28 @@
         vrefvalue.setfield(descr_virtual_token, self.getvalue(tokenbox))
 
     def optimize_VIRTUAL_REF_FINISH(self, op):
-        # Set the 'forced' field of the virtual_ref.
-        # In good cases, this is all virtual, so has no effect.
-        # Otherwise, this forces the real object -- but only now, as
-        # opposed to much earlier.  This is important because the object is
-        # typically a PyPy PyFrame, and now is the end of its execution, so
-        # forcing it now does not have catastrophic effects.
+        # This operation is used in two cases.  In normal cases, it
+        # is the end of the frame, and op.getarg(1) is NULL.  In this
+        # case we just clear the vref.virtual_token, because it contains
+        # a stack frame address and we are about to leave the frame.
+        # In that case vref.forced should still be NULL, and remains
+        # NULL; and accessing the frame through the vref later is
+        # *forbidden* and will raise InvalidVirtualRef.
+        #
+        # In the other (uncommon) case, the operation is produced
+        # earlier, because the vref was forced during tracing already.
+        # In this case, op.getarg(1) is the virtual to force, and we
+        # have to store it in vref.forced.
+        #
         vrefinfo = self.optimizer.metainterp_sd.virtualref_info
-        # op.getarg(1) should really never point to null here
+        seo = self.optimizer.send_extra_operation
+
         # - set 'forced' to point to the real object
-        seo = self.optimizer.send_extra_operation
-        seo(ResOperation(rop.SETFIELD_GC, op.getarglist(), None,
-                         descr = vrefinfo.descr_forced))
+        objbox = op.getarg(1)
+        if not self.optimizer.cpu.ts.CONST_NULL.same_constant(objbox):
+            seo(ResOperation(rop.SETFIELD_GC, op.getarglist(), None,
+                             descr = vrefinfo.descr_forced))
+        
         # - set 'virtual_token' to TOKEN_NONE
         args = [op.getarg(0), ConstInt(vrefinfo.TOKEN_NONE)]
         seo(ResOperation(rop.SETFIELD_GC, args, None,
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -4,7 +4,7 @@
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.rlib.debug import make_sure_not_resized
-from pypy.rlib import nonconst
+from pypy.rlib import nonconst, rstack
 
 from pypy.jit.metainterp import history, compile, resume
 from pypy.jit.metainterp.history import Const, ConstInt, ConstPtr, ConstFloat
@@ -867,8 +867,7 @@
         any_operation = len(self.metainterp.history.operations) > 0
         jitdriver_sd = self.metainterp.staticdata.jitdrivers_sd[jdindex]
         self.verify_green_args(jitdriver_sd, greenboxes)
-        # xxx we may disable the following line in some context later
-        self.debug_merge_point(jitdriver_sd, self.metainterp.in_recursion,
+        self.debug_merge_point(jdindex, self.metainterp.in_recursion,
                                greenboxes)
 
         if self.metainterp.seen_loop_header_for_jdindex < 0:
@@ -915,13 +914,10 @@
                                     assembler_call=True)
             raise ChangeFrame
 
-    def debug_merge_point(self, jitdriver_sd, in_recursion, greenkey):
+    def debug_merge_point(self, jd_index, in_recursion, greenkey):
         # debugging: produce a DEBUG_MERGE_POINT operation
-        loc = jitdriver_sd.warmstate.get_location_str(greenkey)
-        debug_print(loc)
-        constloc = self.metainterp.cpu.ts.conststr(loc)
-        self.metainterp.history.record(rop.DEBUG_MERGE_POINT,
-                                       [constloc, ConstInt(in_recursion)], None)
+        args = [ConstInt(jd_index), ConstInt(in_recursion)] + greenkey
+        self.metainterp.history.record(rop.DEBUG_MERGE_POINT, args, None)
 
     @arguments("box", "label")
     def opimpl_goto_if_exception_mismatch(self, vtablebox, next_exc_target):
@@ -1049,8 +1045,10 @@
         vrefinfo = metainterp.staticdata.virtualref_info
         vref = vrefbox.getref_base()
         if vrefinfo.is_virtual_ref(vref):
+            # XXX write a comment about nullbox
+            nullbox = self.metainterp.cpu.ts.CONST_NULL
             metainterp.history.record(rop.VIRTUAL_REF_FINISH,
-                                      [vrefbox, lastbox], None)
+                                      [vrefbox, nullbox], None)
 
     @arguments()
     def opimpl_ll_read_timestamp(self):
@@ -2052,10 +2050,16 @@
 
     def initialize_state_from_guard_failure(self, resumedescr):
         # guard failure: rebuild a complete MIFrame stack
-        self.in_recursion = -1 # always one portal around
-        self.history = history.History()
-        inputargs_and_holes = self.rebuild_state_after_failure(resumedescr)
-        self.history.inputargs = [box for box in inputargs_and_holes if box]
+        # This is stack-critical code: it must not be interrupted by StackOverflow,
+        # otherwise the jit_virtual_refs are left in a dangling state.
+        rstack._stack_criticalcode_start()
+        try:
+            self.in_recursion = -1 # always one portal around
+            self.history = history.History()
+            inputargs_and_holes = self.rebuild_state_after_failure(resumedescr)
+            self.history.inputargs = [box for box in inputargs_and_holes if box]
+        finally:
+            rstack._stack_criticalcode_stop()
 
     def initialize_virtualizable(self, original_boxes):
         vinfo = self.jitdriver_sd.virtualizable_info
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -471,8 +471,9 @@
     'STRSETITEM/3',
     'UNICODESETITEM/3',
     #'RUNTIMENEW/1',     # ootype operation
-    'COND_CALL_GC_WB/2d', # [objptr, newvalue]   (for the write barrier)
-    'DEBUG_MERGE_POINT/2',      # debugging only
+    'COND_CALL_GC_WB/2d', # [objptr, newvalue] or [arrayptr, index]
+                          # (for the write barrier, latter is in an array)
+    'DEBUG_MERGE_POINT/*',      # debugging only
     'JIT_DEBUG/*',              # debugging only
     'VIRTUAL_REF_FINISH/2',   # removed before it's passed to the backend
     'COPYSTRCONTENT/5',       # src, dst, srcstart, dststart, length
@@ -485,6 +486,7 @@
     'CALL_ASSEMBLER/*d',  # call already compiled assembler
     'CALL_MAY_FORCE/*d',
     'CALL_LOOPINVARIANT/*d',
+    'CALL_RELEASE_GIL/*d',  # release the GIL and "close the stack" for asmgcc
     #'OOSEND',                     # ootype operation
     #'OOSEND_PURE',                # ootype operation
     'CALL_PURE/*d',             # removed before it's passed to the backend
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -6,7 +6,7 @@
 from pypy.jit.metainterp import jitprof
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rstr
-from pypy.rlib import rarithmetic
+from pypy.rlib import rarithmetic, rstack
 from pypy.rlib.objectmodel import we_are_translated, specialize
 from pypy.rlib.debug import have_debug_prints, ll_assert
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
@@ -978,12 +978,18 @@
 
 def blackhole_from_resumedata(blackholeinterpbuilder, jitdriver_sd, storage,
                               all_virtuals=None):
-    resumereader = ResumeDataDirectReader(blackholeinterpbuilder.metainterp_sd,
-                                          storage, all_virtuals)
-    vinfo = jitdriver_sd.virtualizable_info
-    ginfo = jitdriver_sd.greenfield_info
-    vrefinfo = blackholeinterpbuilder.metainterp_sd.virtualref_info
-    resumereader.consume_vref_and_vable(vrefinfo, vinfo, ginfo)
+    # The initialization is stack-critical code: it must not be interrupted by
+    # StackOverflow, otherwise the jit_virtual_refs are left in a dangling state.
+    rstack._stack_criticalcode_start()
+    try:
+        resumereader = ResumeDataDirectReader(blackholeinterpbuilder.metainterp_sd,
+                                              storage, all_virtuals)
+        vinfo = jitdriver_sd.virtualizable_info
+        ginfo = jitdriver_sd.greenfield_info
+        vrefinfo = blackholeinterpbuilder.metainterp_sd.virtualref_info
+        resumereader.consume_vref_and_vable(vrefinfo, vinfo, ginfo)
+    finally:
+        rstack._stack_criticalcode_stop()
     #
     # First get a chain of blackhole interpreters whose length is given
     # by the depth of rd_frame_info_list.  The first one we get must be
diff --git a/pypy/jit/metainterp/test/support.py b/pypy/jit/metainterp/test/support.py
--- a/pypy/jit/metainterp/test/support.py
+++ b/pypy/jit/metainterp/test/support.py
@@ -15,17 +15,24 @@
                   supports_longlong=False, **kwds):
     from pypy.jit.codewriter import support
 
-    class FakeJitCell:
+    class FakeJitCell(object):
         __compiled_merge_points = []
         def get_compiled_merge_points(self):
             return self.__compiled_merge_points[:]
         def set_compiled_merge_points(self, lst):
             self.__compiled_merge_points = lst
 
-    class FakeWarmRunnerState:
+    class FakeWarmRunnerState(object):
         def attach_unoptimized_bridge_from_interp(self, greenkey, newloop):
             pass
 
+        def helper_func(self, FUNCPTR, func):
+            from pypy.rpython.annlowlevel import llhelper
+            return llhelper(FUNCPTR, func)
+
+        def get_location_str(self, args):
+            return 'location'
+
         def jit_cell_at_key(self, greenkey):
             assert greenkey == []
             return self._cell
@@ -37,6 +44,7 @@
     func._jit_unroll_safe_ = True
     rtyper = support.annotate(func, values, type_system=type_system)
     graphs = rtyper.annotator.translator.graphs
+    testself.all_graphs = graphs
     result_kind = history.getkind(graphs[0].getreturnvar().concretetype)[0]
 
     class FakeJitDriverSD:
@@ -46,6 +54,8 @@
         greenfield_info = None
         result_type = result_kind
         portal_runner_ptr = "???"
+        on_compile = lambda *args: None
+        on_compile_bridge = lambda *args: None
 
     stats = history.Stats()
     cpu = CPUClass(rtyper, stats, None, False)
diff --git a/pypy/jit/metainterp/test/test_compile.py b/pypy/jit/metainterp/test/test_compile.py
--- a/pypy/jit/metainterp/test/test_compile.py
+++ b/pypy/jit/metainterp/test/test_compile.py
@@ -30,13 +30,16 @@
     ts = typesystem.llhelper
     def __init__(self):
         self.seen = []
-    def compile_loop(self, inputargs, operations, token):
+    def compile_loop(self, inputargs, operations, token, name=''):
         self.seen.append((inputargs, operations, token))
 
 class FakeLogger(object):
     def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
         pass
 
+    def repr_of_resop(self, op):
+        return repr(op)
+
 class FakeState(object):
     enable_opts = ALL_OPTS_DICT.copy()
     enable_opts.pop('unroll')
@@ -44,6 +47,9 @@
     def attach_unoptimized_bridge_from_interp(*args):
         pass
 
+    def get_location_str(self, args):
+        return 'location'
+
 class FakeGlobalData(object):
     loopnumbering = 0
 
@@ -63,6 +69,8 @@
     call_pure_results = {}
     class jitdriver_sd:
         warmstate = FakeState()
+        on_compile = staticmethod(lambda *args: None)
+        on_compile_bridge = staticmethod(lambda *args: None)
 
 def test_compile_new_loop():
     cpu = FakeCPU()
diff --git a/pypy/jit/metainterp/test/test_fficall.py b/pypy/jit/metainterp/test/test_fficall.py
--- a/pypy/jit/metainterp/test/test_fficall.py
+++ b/pypy/jit/metainterp/test/test_fficall.py
@@ -1,28 +1,46 @@
 
 import py
-from pypy.rlib.jit import JitDriver, hint
+from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
+from pypy.rlib.jit import JitDriver, hint, dont_look_inside
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.libffi import ArgChain
+from pypy.rlib.libffi import ArgChain, longlong2float, float2longlong
+from pypy.rlib.libffi import IS_32_BIT
 from pypy.rlib.test.test_libffi import TestLibffiCall as _TestLibffiCall
 from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.objectmodel import specialize
+from pypy.tool.sourcetools import func_with_new_name
 from pypy.jit.metainterp.test.support import LLJitMixin
 
-
 class TestFfiCall(LLJitMixin, _TestLibffiCall):
 
     # ===> ../../../rlib/test/test_libffi.py
 
-    def call(self, funcspec, args, RESULT, init_result=0):
+    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
         """
         Call the function specified by funcspec in a loop, and let the jit to
         see and optimize it.
         """
         #
         lib, name, argtypes, restype = funcspec
-        args = unrolling_iterable(args)
+        method_and_args = []
+        for argval in args:
+            if type(argval) is r_singlefloat:
+                method_name = 'arg_singlefloat'
+                argval = float(argval)
+            elif IS_32_BIT and type(argval) in [r_longlong, r_ulonglong]:
+                method_name = 'arg_longlong'
+                argval = rffi.cast(rffi.LONGLONG, argval)
+                argval = longlong2float(argval)
+            elif isinstance(argval, tuple):
+                method_name, argval = argval
+            else:
+                method_name = 'arg'
+            method_and_args.append((method_name, argval))
+        method_and_args = unrolling_iterable(method_and_args)
         #
         reds = ['n', 'res', 'func']
-        if type(init_result) is float:
+        if (RESULT in [rffi.FLOAT, rffi.DOUBLE] or
+            IS_32_BIT and RESULT in [rffi.LONGLONG, rffi.ULONGLONG]):
             reds = ['n', 'func', 'res'] # floats must be *after* refs
         driver = JitDriver(reds=reds, greens=[])
         #
@@ -34,12 +52,17 @@
                 driver.can_enter_jit(n=n, res=res, func=func)
                 func = hint(func, promote=True)
                 argchain = ArgChain()
-                for argval in args: # this loop is unrolled
-                    argchain.arg(argval)
-                res = func.call(argchain, RESULT)
+                # this loop is unrolled
+                for method_name, argval in method_and_args:
+                    getattr(argchain, method_name)(argval)
+                res = func.call(argchain, RESULT, is_struct=is_struct)
                 n += 1
             return res
         #
-        res = self.meta_interp(f, [0])
+        res = self.meta_interp(f, [0], backendopt=True)
         return res
 
+    def test_byval_result(self):
+        _TestLibffiCall.test_byval_result(self)
+    test_byval_result.__doc__ = _TestLibffiCall.test_byval_result.__doc__
+    test_byval_result.dont_track_allocations = True
diff --git a/pypy/jit/metainterp/test/test_history.py b/pypy/jit/metainterp/test/test_history.py
--- a/pypy/jit/metainterp/test/test_history.py
+++ b/pypy/jit/metainterp/test/test_history.py
@@ -1,5 +1,5 @@
 from pypy.jit.metainterp.history import *
-from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 
 
 def test_repr():
@@ -10,6 +10,18 @@
     const = ConstPtr(lltype.cast_opaque_ptr(llmemory.GCREF, s))
     assert const._getrepr_() == "*T"
 
+def test_repr_ll2ctypes():
+    ptr = lltype.malloc(rffi.VOIDPP.TO, 10, flavor='raw')
+    # force it to be a ll2ctypes object
+    ptr = rffi.cast(rffi.VOIDPP, rffi.cast(rffi.LONG, ptr))
+    adr = llmemory.cast_ptr_to_adr(ptr)
+    lltype.free(ptr, flavor='raw')
+    intval = llmemory.cast_adr_to_int(adr, 'symbolic')
+    box = BoxInt(intval)
+    s = box.repr_rpython()
+    assert s.startswith('12345/') # the arbitrary hash value used by
+                                  # make_hashable_int
+
 def test_same_constant():
     c1a = ConstInt(0)
     c1b = ConstInt(0)
diff --git a/pypy/jit/metainterp/test/test_jitdriver.py b/pypy/jit/metainterp/test/test_jitdriver.py
--- a/pypy/jit/metainterp/test/test_jitdriver.py
+++ b/pypy/jit/metainterp/test/test_jitdriver.py
@@ -10,8 +10,59 @@
 def getloc2(g):
     return "in jitdriver2, with g=%d" % g
 
+class JitDriverTests(object):
+    def test_on_compile(self):
+        called = {}
+        
+        class MyJitDriver(JitDriver):
+            def on_compile(self, logger, looptoken, operations, type, n, m):
+                called[(m, n, type)] = looptoken
 
-class MultipleJitDriversTests:
+        driver = MyJitDriver(greens = ['n', 'm'], reds = ['i'])
+
+        def loop(n, m):
+            i = 0
+            while i < n + m:
+                driver.can_enter_jit(n=n, m=m, i=i)
+                driver.jit_merge_point(n=n, m=m, i=i)
+                i += 1
+
+        self.meta_interp(loop, [1, 4])
+        assert sorted(called.keys()) == [(4, 1, "entry bridge"), (4, 1, "loop")]
+        self.meta_interp(loop, [2, 4])
+        assert sorted(called.keys()) == [(4, 1, "entry bridge"), (4, 1, "loop"),
+                                         (4, 2, "entry bridge"), (4, 2, "loop")]
+
+    def test_on_compile_bridge(self):
+        called = {}
+        
+        class MyJitDriver(JitDriver):
+            def on_compile(self, logger, looptoken, operations, type, n, m):
+                called[(m, n, type)] = loop
+            def on_compile_bridge(self, logger, orig_token, operations, n):
+                assert 'bridge' not in called
+                called['bridge'] = orig_token
+
+        driver = MyJitDriver(greens = ['n', 'm'], reds = ['i'])
+
+        def loop(n, m):
+            i = 0
+            while i < n + m:
+                driver.can_enter_jit(n=n, m=m, i=i)
+                driver.jit_merge_point(n=n, m=m, i=i)
+                if i >= 4:
+                    i += 2
+                i += 1
+
+        self.meta_interp(loop, [1, 10])
+        assert sorted(called.keys()) == ['bridge', (10, 1, "entry bridge"),
+                                         (10, 1, "loop")]
+
+
+class TestLLtypeSingle(JitDriverTests, LLJitMixin):
+    pass
+
+class MultipleJitDriversTests(object):
 
     def test_simple(self):
         myjitdriver1 = JitDriver(greens=[], reds=['n', 'm'],
diff --git a/pypy/jit/metainterp/test/test_list.py b/pypy/jit/metainterp/test/test_list.py
--- a/pypy/jit/metainterp/test/test_list.py
+++ b/pypy/jit/metainterp/test/test_list.py
@@ -236,4 +236,8 @@
             return a * b
         res = self.meta_interp(f, [37])
         assert res == f(37)
-        self.check_loops(getfield_gc=1, everywhere=True)
+        # There is the one actual field on a, plus 2 getfield's from the list
+        # itself, 1 to get the length (which is then incremented and passed to
+        # the resize func), and then a read of the items field to actually
+        # perform the setarrayitem on
+        self.check_loops(getfield_gc=5, everywhere=True)
diff --git a/pypy/jit/metainterp/test/test_logger.py b/pypy/jit/metainterp/test/test_logger.py
--- a/pypy/jit/metainterp/test/test_logger.py
+++ b/pypy/jit/metainterp/test/test_logger.py
@@ -36,19 +36,29 @@
         return capturing(logger.Logger.log_loop, self,
                          loop.inputargs, loop.operations, ops_offset=ops_offset)
 
-    def repr_of_descr(self, descr):
-        for k, v in self.namespace.items():
-            if v == descr:
-                return k
-        return descr.repr_of_descr()
+    def _make_log_operations(self1):
+        class LogOperations(logger.LogOperations):
+            def repr_of_descr(self, descr):
+                for k, v in self1.namespace.items():
+                    if v == descr:
+                        return k
+                return descr.repr_of_descr()
+        logops = LogOperations(self1.metainterp_sd, self1.guard_number)
+        self1.logops = logops
+        return logops
 
 class TestLogger(object):
     ts = llhelper
 
     def make_metainterp_sd(self):
+        class FakeJitDriver(object):
+            class warmstate(object):
+                get_location_str = staticmethod(lambda args: args[0]._get_str())
+        
         class FakeMetaInterpSd:
             cpu = AbstractCPU()
             cpu.ts = self.ts
+            jitdrivers_sd = [FakeJitDriver()]
             def get_name_from_address(self, addr):
                 return 'Name'
         return FakeMetaInterpSd()
@@ -66,7 +76,7 @@
         if check_equal:
             equaloplists(loop.operations, oloop.operations)
             assert oloop.inputargs == loop.inputargs
-        return loop, oloop
+        return logger, loop, oloop
     
     def test_simple(self):
         inp = '''
@@ -106,18 +116,18 @@
     def test_debug_merge_point(self):
         inp = '''
         []
-        debug_merge_point("info", 0)
+        debug_merge_point(0, 0, "dupa")
         '''
-        loop, oloop = self.reparse(inp, check_equal=False)
-        assert loop.operations[0].getarg(0)._get_str() == 'info'
-        assert oloop.operations[0].getarg(0)._get_str() == 'info'
+        _, loop, oloop = self.reparse(inp, check_equal=False)
+        assert loop.operations[0].getarg(2)._get_str() == "dupa"
+        assert oloop.operations[0].getarg(1)._get_str() == "dupa"
         
     def test_floats(self):
         inp = '''
         [f0]
         f1 = float_add(3.5, f0)
         '''
-        loop, oloop = self.reparse(inp)
+        _, loop, oloop = self.reparse(inp)
         equaloplists(loop.operations, oloop.operations)
 
     def test_jump(self):
@@ -179,6 +189,17 @@
         assert output.splitlines()[0] == "# bridge out of Guard 3 with 0 ops"
         pure_parse(output)
 
+    def test_repr_single_op(self):
+        inp = '''
+        [i0, i1, i2, p3, p4, p5]
+        i6 = int_add(i1, i2)
+        i8 = int_add(i6, 3)
+        jump(i0, i8, i6, p3, p4, p5)
+        '''
+        logger, loop, _ = self.reparse(inp)
+        op = loop.operations[1]
+        assert logger.logops.repr_of_resop(op) == "i8 = int_add(i6, 3)"
+
     def test_ops_offset(self):
         inp = '''
         [i0]
diff --git a/pypy/jit/metainterp/test/test_optimizebasic.py b/pypy/jit/metainterp/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/test/test_optimizebasic.py
@@ -3,6 +3,7 @@
 from pypy.jit.metainterp.test.test_optimizeutil import (LLtypeMixin,
                                                         #OOtypeMixin,
                                                         BaseTest)
+from pypy.jit.metainterp.test.test_compile import FakeLogger
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
 from pypy.jit.metainterp.optimizeutil import InvalidLoop
@@ -32,6 +33,8 @@
         self.profiler = EmptyProfiler()
         self.options = Fake()
         self.globaldata = Fake()
+        self.logger_ops = FakeLogger()
+        self.logger_noopt = FakeLogger()
 
 def test_store_final_boxes_in_guard():
     from pypy.jit.metainterp.compile import ResumeGuardDescr
diff --git a/pypy/jit/metainterp/test/test_optimizefficall.py b/pypy/jit/metainterp/test/test_optimizefficall.py
--- a/pypy/jit/metainterp/test/test_optimizefficall.py
+++ b/pypy/jit/metainterp/test/test_optimizefficall.py
@@ -38,6 +38,8 @@
         cpu = LLtypeMixin.cpu
         FUNC = LLtypeMixin.FUNC
         vable_token_descr = LLtypeMixin.valuedescr
+        valuedescr = LLtypeMixin.valuedescr
+
         int_float__int = MyCallDescr('if', 'i')
         funcptr = FakeLLObject()
         func = FakeLLObject(_fake_class=Func,
@@ -76,7 +78,7 @@
         """
         expected = """
         [i0, f1]
-        i3 = call_may_force(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
         guard_not_forced() []
         guard_no_exception() []
         jump(i3, f1)
@@ -99,7 +101,7 @@
 
     def test_handle_virtualizables(self):
         # this test needs an explanation to understand what goes on: see the
-        # coment in optimize_FORCE_TOKEN
+        # comment in optimize_FORCE_TOKEN
         ops = """
         [i0, f1, p2]
         call(0, ConstPtr(func),                       descr=libffi_prepare)
@@ -116,7 +118,7 @@
         [i0, f1, p2]
         i4 = force_token()
         setfield_gc(p2, i4, descr=vable_token_descr)
-        i3 = call_may_force(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
         guard_not_forced() [p2]
         guard_no_exception() [p2]
         jump(i3, f1, p2)
@@ -213,7 +215,7 @@
         call(0, ConstPtr(func),                        descr=libffi_prepare)
         #
         # this "nested" call is nicely optimized
-        i4 = call_may_force(67890, i0, f1, descr=int_float__int)
+        i4 = call_release_gil(67890, i0, f1, descr=int_float__int)
         guard_not_forced() []
         guard_no_exception() []
         #
@@ -242,3 +244,25 @@
         """
         expected = ops
         loop = self.optimize_loop(ops, expected)
+
+    def test_allow_setfields_in_between(self):
+        ops = """
+        [i0, f1, p2]
+        call(0, ConstPtr(func),                       descr=libffi_prepare)
+        call(0, ConstPtr(func), i0,                   descr=libffi_push_arg)
+        call(0, ConstPtr(func), f1,                   descr=libffi_push_arg)
+        setfield_gc(p2, i0,                           descr=valuedescr)
+        i3 = call_may_force(0, ConstPtr(func), 12345, descr=libffi_call)
+        guard_not_forced() []
+        guard_no_exception() []
+        jump(i3, f1, p2)
+        """
+        expected = """
+        [i0, f1, p2]
+        setfield_gc(p2, i0, descr=valuedescr)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
+        guard_not_forced() []
+        guard_no_exception() []
+        jump(i3, f1, p2)
+        """
+        loop = self.optimize_loop(ops, expected)
diff --git a/pypy/jit/metainterp/test/test_optimizeopt.py b/pypy/jit/metainterp/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/test/test_optimizeopt.py
@@ -3402,6 +3402,56 @@
         '''
         self.optimize_loop(ops, expected)
 
+    def test_arraycopy_dest_not_virtual(self):
+        ops = '''
+        []
+        p1 = new_array(3, descr=arraydescr)
+        p2 = new_array(3, descr=arraydescr)
+        setarrayitem_gc(p1, 2, 10, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        call(0, p1, p2, 0, 0, 3, descr=arraycopydescr)
+        escape(p2)
+        jump()
+        '''
+        expected = '''
+        []
+        p2 = new_array(3, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        setarrayitem_gc(p2, 0, 0, descr=arraydescr)
+        setarrayitem_gc(p2, 1, 0, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 10, descr=arraydescr)
+        escape(p2)
+        jump()
+        '''
+        self.optimize_loop(ops, expected)
+
+    def test_arraycopy_dest_not_virtual_too_long(self):
+        ops = '''
+        []
+        p1 = new_array(10, descr=arraydescr)
+        p2 = new_array(10, descr=arraydescr)
+        setarrayitem_gc(p1, 2, 10, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        call(0, p1, p2, 0, 0, 10, descr=arraycopydescr)
+        escape(p2)
+        jump()
+        '''
+        expected = '''
+        []
+        p2 = new_array(10, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        p1 = new_array(10, descr=arraydescr)
+        setarrayitem_gc(p1, 2, 10, descr=arraydescr)
+        call(0, p1, p2, 0, 0, 10, descr=arraycopydescr)
+        escape(p2)
+        jump()
+        '''
+        self.optimize_loop(ops, expected)
+
     def test_bound_lt(self):
         ops = """
         [i0]
@@ -3899,7 +3949,7 @@
         jump(i4, i10)
         """
         self.optimize_loop(ops, expected)
-        
+
     def test_add_sub_ovf(self):
         ops = """
         [i1]
@@ -3939,7 +3989,7 @@
         [i0, i1]
         escape(i1)
         i2 = int_add_ovf(i0, 1)
-        guard_no_overflow() []        
+        guard_no_overflow() []
         jump(i2, i0)
         """
         self.optimize_loop(ops, expected)
@@ -4420,7 +4470,6 @@
         i8 = int_floordiv(4, i2)
         i9 = int_rshift(i1, 2)
         i10 = int_floordiv(i1, 0)
-        i11 = int_rshift(i1, 0)
         i12 = int_floordiv(i2, 2)
         i13 = int_floordiv(i2, 3)
         i14 = int_floordiv(i2, 4)
@@ -4497,6 +4546,18 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_int_div_1(self):
+        ops = """
+        [i0]
+        i1 = int_floordiv(i0, 1)
+        jump(i1)
+        """
+        expected = """
+        [i0]
+        jump(i0)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_subsub_ovf(self):
         ops = """
         [i0]
diff --git a/pypy/jit/metainterp/test/test_tl.py b/pypy/jit/metainterp/test/test_tl.py
--- a/pypy/jit/metainterp/test/test_tl.py
+++ b/pypy/jit/metainterp/test/test_tl.py
@@ -58,7 +58,7 @@
             exit:
                 RETURN
         ''')
-        
+
         codes = [code, code2]
         def main(n, inputarg):
             code = codes[n]
@@ -116,7 +116,7 @@
         codes = [code, '']
         def main(num, arg):
             return interp(codes[num], inputarg=arg)
-        
+
         res = self.meta_interp(main, [0, 20], enable_opts='',
                                listops=listops, backendopt=True, policy=policy)
         assert res == 0
@@ -128,7 +128,6 @@
         from pypy.jit.tl.tl import Stack
         methods = [Stack.put,
                    Stack.pick,
-                   Stack.roll,
                    Stack.append,
                    Stack.pop]
         for meth in methods:
diff --git a/pypy/jit/metainterp/test/test_virtualref.py b/pypy/jit/metainterp/test/test_virtualref.py
--- a/pypy/jit/metainterp/test/test_virtualref.py
+++ b/pypy/jit/metainterp/test/test_virtualref.py
@@ -1,9 +1,10 @@
 import py
 from pypy.rpython.lltypesystem import lltype, llmemory, lloperation
+from pypy.rpython.llinterp import LLException
 from pypy.rlib.jit import JitDriver, dont_look_inside, vref_None
-from pypy.rlib.jit import virtual_ref, virtual_ref_finish
+from pypy.rlib.jit import virtual_ref, virtual_ref_finish, InvalidVirtualRef
 from pypy.rlib.objectmodel import compute_unique_id
-from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
+from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin, _get_jitcodes
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.metainterp.virtualref import VirtualRefInfo
 
@@ -16,6 +17,29 @@
         self.vrefinfo = VirtualRefInfo(self.warmrunnerstate)
         self.cw.setup_vrefinfo(self.vrefinfo)
 
+    def test_rewrite_graphs(self):
+        class X:
+            pass
+        def fn():
+            x = X()
+            vref = virtual_ref(x)
+            x1 = vref()                  # jit_force_virtual
+            virtual_ref_finish(vref, x)
+        #
+        _get_jitcodes(self, self.CPUClass, fn, [], self.type_system)
+        graph = self.all_graphs[0]
+        assert graph.name == 'fn'
+        self.vrefinfo.replace_force_virtual_with_call([graph])
+        #
+        def check_call(op, fname):
+            assert op.opname == 'direct_call'
+            assert op.args[0].value._obj._name == fname
+        #
+        ops = [op for block, op in graph.iterblockops()]
+        check_call(ops[-3], 'virtual_ref')
+        check_call(ops[-2], 'force_virtual_if_necessary')
+        check_call(ops[-1], 'virtual_ref_finish')
+
     def test_make_vref_simple(self):
         class X:
             pass
@@ -25,9 +49,9 @@
         #
         def f():
             x = X()
-            exctx.topframeref = virtual_ref(x)
+            exctx.topframeref = vref = virtual_ref(x)
             exctx.topframeref = vref_None
-            virtual_ref_finish(x)
+            virtual_ref_finish(vref, x)
             return 1
         #
         self.interp_operations(f, [])
@@ -60,8 +84,9 @@
             exctx._frame = x
             exctx.topframeref = virtual_ref(x)
         def leave():
+            vref = exctx.topframeref
             exctx.topframeref = vref_None
-            virtual_ref_finish(exctx._frame)
+            virtual_ref_finish(vref, exctx._frame)
         def f(n):
             enter(n)
             n = external(n)
@@ -125,7 +150,8 @@
         #
         @dont_look_inside
         def g(vref):
-            debug_print(lltype.Void, '-+-+-+-+- external read:', vref().n)
+            # we cannot do anything with the vref after the call to finish()
+            pass
         #
         def f(n):
             while n > 0:
@@ -136,7 +162,7 @@
                 exctx.topframeref = vref = virtual_ref(x)
                 # here, 'x' should be virtual
                 exctx.topframeref = vref_None
-                virtual_ref_finish(x)
+                virtual_ref_finish(vref, x)
                 # 'x' and 'vref' can randomly escape after the call to
                 # finish().
                 g(vref)
@@ -144,7 +170,7 @@
             return 1
         #
         self.meta_interp(f, [10])
-        self.check_loops(new_with_vtable=2)   # the vref and the X
+        self.check_loops(new_with_vtable=1)   # the vref
         self.check_aborted_count(0)
 
     def test_simple_all_removed(self):
@@ -169,13 +195,13 @@
                 xy.next1 = lltype.malloc(A, 0)
                 xy.next2 = lltype.malloc(A, 0)
                 xy.next3 = lltype.malloc(A, 0)
-                exctx.topframeref = virtual_ref(xy)
+                exctx.topframeref = vref = virtual_ref(xy)
                 n -= externalfn(n)
                 exctx.topframeref = vref_None
                 xy.next1 = lltype.nullptr(A)
                 xy.next2 = lltype.nullptr(A)
                 xy.next3 = lltype.nullptr(A)
-                virtual_ref_finish(xy)
+                virtual_ref_finish(vref, xy)
         #
         self.meta_interp(f, [15])
         self.check_loops(new_with_vtable=0,     # all virtualized
@@ -206,17 +232,17 @@
                 xy.next1 = lltype.malloc(A, 0)
                 xy.next2 = lltype.malloc(A, 0)
                 xy.next3 = lltype.malloc(A, 0)
-                exctx.topframeref = virtual_ref(xy)
+                exctx.topframeref = vref = virtual_ref(xy)
                 n -= externalfn(n)
                 exctx.topframeref = vref_None
                 xy.next1 = lltype.nullptr(A)
                 xy.next2 = lltype.nullptr(A)
                 xy.next3 = lltype.nullptr(A)
-                virtual_ref_finish(xy)
+                virtual_ref_finish(vref, xy)
         #
         self.meta_interp(f, [15])
-        self.check_loops(new_with_vtable=2,     # the vref, and xy so far,
-                         new_array=0)           # but not xy.next1/2/3
+        self.check_loops(new_with_vtable=1,     # the vref: xy doesn't need to be forced
+                         new_array=0)           # and neither xy.next1/2/3
         self.check_aborted_count(0)
 
     def test_simple_force_always(self):
@@ -244,12 +270,12 @@
                 xy.next2 = lltype.malloc(A, 0)
                 xy.next3 = lltype.malloc(A, 0)
                 xy.n = n
-                exctx.topframeref = virtual_ref(xy)
+                exctx.topframeref = vref = virtual_ref(xy)
                 n -= externalfn(n)
                 xy.next1 = lltype.nullptr(A)
                 xy.next2 = lltype.nullptr(A)
                 xy.next3 = lltype.nullptr(A)
-                virtual_ref_finish(xy)
+                virtual_ref_finish(vref, xy)
                 exctx.topframeref = vref_None
         #
         self.meta_interp(f, [15])
@@ -282,19 +308,19 @@
                 xy.next2 = lltype.malloc(A, 0)
                 xy.next3 = lltype.malloc(A, 0)
                 xy.n = n
-                exctx.topframeref = virtual_ref(xy)
+                exctx.topframeref = vref = virtual_ref(xy)
                 n -= externalfn(n)
                 xy.next1 = lltype.nullptr(A)
                 xy.next2 = lltype.nullptr(A)
                 xy.next3 = lltype.nullptr(A)
-                virtual_ref_finish(xy)
+                virtual_ref_finish(vref, xy)
                 exctx.topframeref = vref_None
             return exctx.m
         #
         res = self.meta_interp(f, [30])
         assert res == 13
-        self.check_loops(new_with_vtable=2,   # the vref, XY() at the end
-                         new_array=0)         # but not next1/2/3
+        self.check_loops(new_with_vtable=1,   # the vref, but not XY()
+                         new_array=0)         # and neither next1/2/3
         self.check_loop_count(1)
         self.check_aborted_count(0)
 
@@ -322,7 +348,7 @@
                 xy.next2 = lltype.malloc(A, 0)
                 xy.next3 = lltype.malloc(A, 0)
                 xy.n = n
-                exctx.topframeref = virtual_ref(xy)
+                exctx.topframeref = vref = virtual_ref(xy)
                 if n == 13:
                     externalfn(n)
                 n -= 1
@@ -330,7 +356,7 @@
                 xy.next1 = lltype.nullptr(A)
                 xy.next2 = lltype.nullptr(A)
                 xy.next3 = lltype.nullptr(A)
-                virtual_ref_finish(xy)
+                virtual_ref_finish(vref, xy)
             return exctx.m
         #
         res = self.meta_interp(f, [30])
@@ -366,7 +392,7 @@
                 xy.next4 = lltype.malloc(A, 0)
                 xy.next5 = lltype.malloc(A, 0)
                 xy.n = n
-                exctx.topframeref = virtual_ref(xy)
+                exctx.topframeref = vref = virtual_ref(xy)
                 if n % 6 == 0:
                     xy.next1 = lltype.nullptr(A)
                     xy.next2 = lltype.nullptr(A)
@@ -379,7 +405,7 @@
                 xy.next3 = lltype.nullptr(A)
                 xy.next4 = lltype.nullptr(A)
                 xy.next5 = lltype.nullptr(A)
-                virtual_ref_finish(xy)
+                virtual_ref_finish(vref, xy)
             return exctx.m
         #
         res = self.meta_interp(f, [72])
@@ -389,36 +415,6 @@
                          new_array=2)        # bridge: next4, next5
         self.check_aborted_count(0)
 
-    def test_access_vref_later(self):
-        myjitdriver = JitDriver(greens = [], reds = ['n'])
-        #
-        class XY:
-            pass
-        class ExCtx:
-            pass
-        exctx = ExCtx()
-        #
-        @dont_look_inside
-        def g():
-            return exctx.later().n
-        #
-        def f(n):
-            while n > 0:
-                myjitdriver.can_enter_jit(n=n)
-                myjitdriver.jit_merge_point(n=n)
-                xy = XY()
-                xy.n = n
-                exctx.topframeref = virtual_ref(xy)
-                exctx.later = exctx.topframeref
-                n -= 1
-                exctx.topframeref = vref_None
-                virtual_ref_finish(xy)
-            return g()
-        #
-        res = self.meta_interp(f, [15])
-        assert res == 1
-        self.check_aborted_count(0)
-
     def test_jit_force_virtual_seen(self):
         myjitdriver = JitDriver(greens = [], reds = ['n'])
         #
@@ -435,12 +431,12 @@
                 myjitdriver.jit_merge_point(n=n)
                 xy = XY()
                 xy.n = n
-                exctx.topframeref = virtual_ref(xy)
+                exctx.topframeref = vref = virtual_ref(xy)
                 xy.next1 = lltype.malloc(A, 0)
                 n = exctx.topframeref().n - 1
                 xy.next1 = lltype.nullptr(A)
                 exctx.topframeref = vref_None
-                virtual_ref_finish(xy)
+                virtual_ref_finish(vref, xy)
             return 1
         #
         res = self.meta_interp(f, [15])
@@ -465,12 +461,12 @@
                 if reclevel == 0:
                     return n
                 xy = XY()
-                exctx.topframeref = virtual_ref(xy)
+                exctx.topframeref = vref = virtual_ref(xy)
                 m = f(xy, n, reclevel-1)
                 assert m == n
                 n -= 1
                 exctx.topframeref = vref_None
-                virtual_ref_finish(xy)
+                virtual_ref_finish(vref, xy)
             return 2
         def main(n, reclevel):
             return f(XY(), n, reclevel)
@@ -495,7 +491,7 @@
                 frame.n += 1
                 xy = XY()
                 xy.n = n
-                exctx.topframeref = virtual_ref(xy)
+                exctx.topframeref = vref = virtual_ref(xy)
                 if reclevel > 0:
                     m = f(xy, frame.n, reclevel-1)
                     assert xy.n == m
@@ -503,7 +499,7 @@
                 else:
                     n -= 2
                 exctx.topframeref = vref_None
-                virtual_ref_finish(xy)
+                virtual_ref_finish(vref, xy)
             return frame.n
         def main(n, reclevel):
             return f(XY(), n, reclevel)
@@ -540,7 +536,7 @@
                 escapexy(xy)
                 # clean up
                 exctx.vr = vref_None
-                virtual_ref_finish(xy)
+                virtual_ref_finish(vr, xy)
                 n -= 1
             return 1
         #
@@ -548,6 +544,57 @@
         assert res == 1
         self.check_loops(new_with_vtable=2)     # vref, xy
 
+    def test_cannot_use_invalid_virtualref(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n'])
+        #
+        class XY:
+            n = 0
+        #
+        def fn(n):
+            res = False
+            while n > 0:
+                myjitdriver.can_enter_jit(n=n)
+                myjitdriver.jit_merge_point(n=n)
+                xy = XY()
+                xy.n = n
+                vref = virtual_ref(xy)
+                virtual_ref_finish(vref, xy)
+                vref() # raises InvalidVirtualRef when jitted
+                n -= 1
+            return res
+        #
+        py.test.raises(InvalidVirtualRef, "fn(10)")
+        py.test.raises(LLException, "self.meta_interp(fn, [10])")
+
+    def test_call_virtualref_already_forced(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'res'])
+        #
+        class XY:
+            n = 0
+        #
+        @dont_look_inside
+        def force_it(vref, n):
+            if n % 6 == 0:
+                return vref().n
+            return 0
+        def fn(n):
+            res = 0
+            while n > 0:
+                myjitdriver.can_enter_jit(n=n, res=res)
+                myjitdriver.jit_merge_point(n=n, res=res)
+                xy = XY()
+                xy.n = n
+                vref = virtual_ref(xy)
+                force_it(vref, n)
+                virtual_ref_finish(vref, xy)
+                res += force_it(vref, n) # doesn't raise, because it was already forced
+                n -= 1
+            return res
+        #
+        assert fn(10) == 6
+        res = self.meta_interp(fn, [10])
+        assert res == 6
+
 
 class TestLLtype(VRefTests, LLJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -80,7 +80,7 @@
         self.meta_interp(f, [123, 10])
         assert len(get_stats().locations) >= 4
         for loc in get_stats().locations:
-            assert loc == 'GREEN IS 123.'
+            assert loc == (0, 123)
 
     def test_set_param_enable_opts(self):
         from pypy.rpython.annlowlevel import llstr, hlstr
diff --git a/pypy/jit/metainterp/test/test_warmstate.py b/pypy/jit/metainterp/test/test_warmstate.py
--- a/pypy/jit/metainterp/test/test_warmstate.py
+++ b/pypy/jit/metainterp/test/test_warmstate.py
@@ -181,6 +181,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
@@ -207,6 +208,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = llhelper(GET_LOCATION, get_location)
         _confirm_enter_jit_ptr = None
@@ -230,6 +232,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = llhelper(ENTER_JIT, confirm_enter_jit)
@@ -253,6 +256,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
diff --git a/pypy/jit/metainterp/virtualref.py b/pypy/jit/metainterp/virtualref.py
--- a/pypy/jit/metainterp/virtualref.py
+++ b/pypy/jit/metainterp/virtualref.py
@@ -2,7 +2,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rclass
 from pypy.jit.metainterp import history
 from pypy.jit.codewriter import heaptracker
-
+from pypy.rlib.jit import InvalidVirtualRef
 
 class VirtualRefInfo:
 
@@ -38,23 +38,24 @@
 
     def replace_force_virtual_with_call(self, graphs):
         # similar to rvirtualizable2.replace_force_virtualizable_with_call().
-        c_funcptr = None
-        count = 0
+        c_force_virtual_ptr = None
+        force_virtual_count = 0
         for graph in graphs:
             for block in graph.iterblocks():
                 for op in block.operations:
                     if op.opname == 'jit_force_virtual':
                         # first compute c_funcptr, but only if there is any
                         # 'jit_force_virtual' around
-                        if c_funcptr is None:
-                            c_funcptr = self.get_force_virtual_fnptr()
+                        if c_force_virtual_ptr is None:
+                            c_force_virtual_ptr = self.get_force_virtual_fnptr()
                         #
                         op.opname = 'direct_call'
-                        op.args = [c_funcptr, op.args[0]]
-                        count += 1
-        if c_funcptr is not None:
-            log("replaced %d 'jit_force_virtual' with %r" % (count,
-                                                             c_funcptr.value))
+                        op.args = [c_force_virtual_ptr, op.args[0]]
+                        force_virtual_count += 1
+        #
+        if c_force_virtual_ptr is not None:
+            log("replaced %d 'jit_force_virtual' with %r" % (force_virtual_count,
+                                                             c_force_virtual_ptr.value))
 
     # ____________________________________________________________
 
@@ -145,7 +146,8 @@
                 ResumeGuardForcedDescr.force_now(self.cpu, token)
                 assert vref.virtual_token == self.TOKEN_NONE
                 assert vref.forced
-        else:
-            assert vref.forced
+        elif not vref.forced:
+            # token == TOKEN_NONE and the vref was not forced: it's invalid
+            raise InvalidVirtualRef
         return vref.forced
     force_virtual._dont_inline_ = True
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -566,6 +566,19 @@
             return can_inline_greenargs(*greenargs)
         self.can_inline_greenargs = can_inline_greenargs
         self.can_inline_callable = can_inline_callable
+        if hasattr(jd.jitdriver, 'on_compile'):
+            def on_compile(logger, token, operations, type, greenkey):
+                greenargs = unwrap_greenkey(greenkey)
+                return jd.jitdriver.on_compile(logger, token, operations, type,
+                                               *greenargs)
+            def on_compile_bridge(logger, orig_token, operations, n):
+                return jd.jitdriver.on_compile_bridge(logger, orig_token,
+                                                      operations, n)
+            jd.on_compile = on_compile
+            jd.on_compile_bridge = on_compile_bridge
+        else:
+            jd.on_compile = lambda *args: None
+            jd.on_compile_bridge = lambda *args: None
 
         def get_assembler_token(greenkey, redboxes):
             # 'redboxes' is only used to know the types of red arguments
@@ -586,12 +599,8 @@
         get_location_ptr = self.jitdriver_sd._get_printable_location_ptr
         if get_location_ptr is None:
             missing = '(no jitdriver.get_printable_location!)'
-            missingll = llstr(missing)
             def get_location_str(greenkey):
-                if we_are_translated():
-                    return missingll
-                else:
-                    return missing
+                return missing
         else:
             rtyper = self.warmrunnerdesc.rtyper
             unwrap_greenkey = self.make_unwrap_greenkey()
@@ -599,10 +608,10 @@
             def get_location_str(greenkey):
                 greenargs = unwrap_greenkey(greenkey)
                 fn = support.maybe_on_top_of_llinterp(rtyper, get_location_ptr)
-                res = fn(*greenargs)
-                if not we_are_translated() and not isinstance(res, str):
-                    res = hlstr(res)
-                return res
+                llres = fn(*greenargs)
+                if not we_are_translated() and isinstance(llres, str):
+                    return llres
+                return hlstr(llres)
         self.get_location_str = get_location_str
         #
         confirm_enter_jit_ptr = self.jitdriver_sd._confirm_enter_jit_ptr
diff --git a/pypy/jit/tl/pypyjit.py b/pypy/jit/tl/pypyjit.py
--- a/pypy/jit/tl/pypyjit.py
+++ b/pypy/jit/tl/pypyjit.py
@@ -30,6 +30,7 @@
     BACKEND = 'c'
 
 config = get_pypy_config(translating=True)
+config.translation.backendopt.inline_threshold = 0.1
 config.translation.gc = 'boehm'
 config.objspace.nofaking = True
 config.translating = True
diff --git a/pypy/jit/tl/tinyframe/test/test_tinyframe.py b/pypy/jit/tl/tinyframe/test/test_tinyframe.py
--- a/pypy/jit/tl/tinyframe/test/test_tinyframe.py
+++ b/pypy/jit/tl/tinyframe/test/test_tinyframe.py
@@ -96,11 +96,12 @@
         RETURN r1
         ''')
         s = StringIO()
+        prev = sys.stdout
         sys.stdout = s
         try:
             interpret(code)
         finally:
-            sys.stdout = sys.__stdout__
+            sys.stdout = prev
         lines = s.getvalue().splitlines()
         assert lines == [
             '0',
diff --git a/pypy/jit/tl/tl.py b/pypy/jit/tl/tl.py
--- a/pypy/jit/tl/tl.py
+++ b/pypy/jit/tl/tl.py
@@ -40,6 +40,7 @@
         assert n >= 0
         self.stack[n] = elem
 
+    @dont_look_inside
     def roll(self, r):
         if r < -1:
             i = self.stackpos + r
diff --git a/pypy/jit/tool/oparser.py b/pypy/jit/tool/oparser.py
--- a/pypy/jit/tool/oparser.py
+++ b/pypy/jit/tool/oparser.py
@@ -6,7 +6,9 @@
 from pypy.jit.metainterp.history import TreeLoop, BoxInt, ConstInt,\
      ConstObj, ConstPtr, Box, BasicFailDescr, BoxFloat, ConstFloat,\
      LoopToken, get_const_ptr_for_string, get_const_ptr_for_unicode
-from pypy.jit.metainterp.resoperation import rop, ResOperation, ResOpWithDescr, N_aryOp
+from pypy.jit.metainterp.resoperation import rop, ResOperation, \
+                                            ResOpWithDescr, N_aryOp, \
+                                            UnaryOp, PlainResOp
 from pypy.jit.metainterp.typesystem import llhelper
 from pypy.jit.codewriter.heaptracker import adr2int
 from pypy.jit.codewriter import longlong
@@ -35,6 +37,23 @@
     def clone(self):
         return ESCAPE_OP(self.OPNUM, self.getarglist()[:], self.result, self.getdescr())
 
+class FORCE_SPILL(UnaryOp, PlainResOp):
+
+    OPNUM = -124
+
+    def __init__(self, opnum, args, result=None, descr=None):
+        assert result is None
+        assert descr is None
+        assert opnum == self.OPNUM
+        self.result = result
+        self.initarglist(args)
+
+    def getopnum(self):
+        return self.OPNUM
+
+    def clone(self):
+        return FORCE_SPILL(self.OPNUM, self.getarglist()[:])
+
 class ExtendedTreeLoop(TreeLoop):
 
     def getboxes(self):
@@ -193,7 +212,7 @@
         descr = None
         if argspec.strip():
             if opname == 'debug_merge_point':
-                allargs = argspec.rsplit(', ', 1)
+                allargs = argspec.split(',', 2)
             else:
                 allargs = [arg for arg in argspec.split(",")
                            if arg != '']
@@ -220,6 +239,8 @@
         except AttributeError:
             if opname == 'escape':
                 opnum = ESCAPE_OP.OPNUM
+            elif opname == 'force_spill':
+                opnum = FORCE_SPILL.OPNUM
             else:
                 raise ParseError("unknown op: %s" % opname)
         endnum = line.rfind(')')
@@ -261,6 +282,8 @@
     def create_op(self, opnum, args, result, descr):
         if opnum == ESCAPE_OP.OPNUM:
             return ESCAPE_OP(opnum, args, result, descr)
+        if opnum == FORCE_SPILL.OPNUM:
+            return FORCE_SPILL(opnum, args, result, descr)
         else:
             return ResOperation(opnum, args, result, descr)
 
diff --git a/pypy/jit/tool/pypytrace-mode.el b/pypy/jit/tool/pypytrace-mode.el
--- a/pypy/jit/tool/pypytrace-mode.el
+++ b/pypy/jit/tool/pypytrace-mode.el
@@ -8,10 +8,16 @@
 (defun set-truncate-lines ()
   (setq truncate-lines t))
 
+;; to generate the list of keywords:
+;; from pypy.jit.metainterp import resoperation
+;; print ' '.join(sorted('"%s"' % op.lower() for op in resoperation.opname.values() if not op.startswith('GUARD')))
+
+
+
 (define-generic-mode 
   'pypytrace-mode                   ;; name of the mode to create
   nil
-  '("jump" "finish" "int_add" "int_sub" "int_mul" "int_floordiv" "uint_floordiv" "int_mod" "int_and" "int_or" "int_xor" "int_rshift" "int_lshift" "uint_rshift" "float_add" "float_sub" "float_mul" "float_truediv" "float_neg" "float_abs" "cast_float_to_int" "cast_int_to_float" "int_lt" "int_le" "int_eq" "int_ne" "int_gt" "int_ge" "uint_lt" "uint_le" "uint_gt" "uint_ge" "float_lt" "float_le" "float_eq" "float_ne" "float_gt" "float_ge" "int_is_zero" "int_is_true" "int_neg" "int_invert" "same_as" "ptr_eq" "ptr_ne" "arraylen_gc" "strlen" "strgetitem" "getfield_gc_pure" "getfield_raw_pure" "getarrayitem_gc_pure" "unicodelen" "unicodegetitem" "getarrayitem_gc" "getarrayitem_raw" "getfield_gc" "getfield_raw" "new" "new_with_vtable" "new_array" "force_token" "virtual_ref" "setarrayitem_gc" "setarrayitem_raw" "setfield_gc" "setfield_raw" "arraycopy" "newstr" "strsetitem" "unicodesetitem" "newunicode" "cond_call_gc_wb" "virtual_ref_finish" "call" "call_assembler" "call_may_force" "call_loopinvariant" "call_pure" "int_add_ovf" "int_sub_ovf" "int_mul_ovf") ;; keywords
+  '("arraylen_gc" "call" "call_assembler" "call_loopinvariant" "call_may_force" "call_pure" "call_release_gil" "cast_float_to_int" "cast_int_to_float" "cond_call_gc_wb" "copystrcontent" "copyunicodecontent" "debug_merge_point" "finish" "float_abs" "float_add" "float_eq" "float_ge" "float_gt" "float_le" "float_lt" "float_mul" "float_ne" "float_neg" "float_sub" "float_truediv" "force_token" "getarrayitem_gc" "getarrayitem_gc_pure" "getarrayitem_raw" "getfield_gc" "getfield_gc_pure" "getfield_raw" "getfield_raw_pure" "int_add" "int_add_ovf" "int_and" "int_eq" "int_floordiv" "int_ge" "int_gt" "int_invert" "int_is_true" "int_is_zero" "int_le" "int_lshift" "int_lt" "int_mod" "int_mul" "int_mul_ovf" "int_ne" "int_neg" "int_or" "int_rshift" "int_sub" "int_sub_ovf" "int_xor" "jit_debug" "jump" "new" "new_array" "new_with_vtable" "newstr" "newunicode" "ptr_eq" "ptr_ne" "quasiimmut_field" "read_timestamp" "same_as" "setarrayitem_gc" "setarrayitem_raw" "setfield_gc" "setfield_raw" "strgetitem" "strlen" "strsetitem" "uint_floordiv" "uint_ge" "uint_gt" "uint_le" "uint_lt" "uint_rshift" "unicodegetitem" "unicodelen" "unicodesetitem" "virtual_ref" "virtual_ref_finish") ;; keywords
   '( ;; additional regexps
     ("^# Loop.*" . 'hi-blue)
     ("\\[.*\\]" . 'font-lock-comment-face) ;; comment out argument lists
diff --git a/pypy/jit/tool/test/test_oparser.py b/pypy/jit/tool/test/test_oparser.py
--- a/pypy/jit/tool/test/test_oparser.py
+++ b/pypy/jit/tool/test/test_oparser.py
@@ -141,16 +141,16 @@
 def test_debug_merge_point():
     x = '''
     []
-    debug_merge_point("info", 0)
-    debug_merge_point('info', 1)
-    debug_merge_point('<some ('other,')> info', 1)
-    debug_merge_point('(stuff) #1', 1)
+    debug_merge_point(0, "info")
+    debug_merge_point(0, 'info')
+    debug_merge_point(1, '<some ('other,')> info')
+    debug_merge_point(0, '(stuff) #1')
     '''
     loop = parse(x)
-    assert loop.operations[0].getarg(0)._get_str() == 'info'
-    assert loop.operations[1].getarg(0)._get_str() == 'info'
-    assert loop.operations[2].getarg(0)._get_str() == "<some ('other,')> info"
-    assert loop.operations[3].getarg(0)._get_str() == "(stuff) #1"
+    assert loop.operations[0].getarg(1)._get_str() == 'info'
+    assert loop.operations[1].getarg(1)._get_str() == 'info'
+    assert loop.operations[2].getarg(1)._get_str() == "<some ('other,')> info"
+    assert loop.operations[3].getarg(1)._get_str() == "(stuff) #1"
     
 
 def test_descr_with_obj_print():
diff --git a/pypy/module/__builtin__/__init__.py b/pypy/module/__builtin__/__init__.py
--- a/pypy/module/__builtin__/__init__.py
+++ b/pypy/module/__builtin__/__init__.py
@@ -17,6 +17,8 @@
 
         'apply'         : 'app_functional.apply',
         'sorted'        : 'app_functional.sorted',
+        'any'           : 'app_functional.any',
+        'all'           : 'app_functional.all',
         'vars'          : 'app_inspect.vars',
         'dir'           : 'app_inspect.dir',
 
@@ -81,8 +83,6 @@
         'range'         : 'functional.range_int',
         'xrange'        : 'functional.W_XRange',
         'enumerate'     : 'functional.W_Enumerate',
-        'all'           : 'functional.all',
-        'any'           : 'functional.any',
         'min'           : 'functional.min',
         'max'           : 'functional.max',
         'sum'           : 'functional.sum',
diff --git a/pypy/module/__builtin__/app_functional.py b/pypy/module/__builtin__/app_functional.py
--- a/pypy/module/__builtin__/app_functional.py
+++ b/pypy/module/__builtin__/app_functional.py
@@ -16,3 +16,21 @@
     sorted_lst = list(lst)
     sorted_lst.sort(cmp, key, reverse)
     return sorted_lst
+
+def any(seq):
+    """any(iterable) -> bool
+
+Return True if bool(x) is True for any x in the iterable."""
+    for x in seq:
+        if x:
+            return True
+    return False
+
+def all(seq):
+    """all(iterable) -> bool
+
+Return True if bool(x) is True for all values x in the iterable."""
+    for x in seq:
+        if not x:
+            return False
+    return True
diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py
--- a/pypy/module/__builtin__/functional.py
+++ b/pypy/module/__builtin__/functional.py
@@ -452,40 +452,6 @@
     w_empty = space.call_function(w_str_type)
     return space.call_method(w_empty, "join", space.newlist(result_w))
 
-def all(space, w_S):
-    """all(iterable) -> bool
-
-Return True if bool(x) is True for all values x in the iterable."""
-    w_iter = space.iter(w_S)
-    while True:
-        try:
-            w_next = space.next(w_iter)
-        except OperationError, e:
-            if not e.match(space, space.w_StopIteration):
-                raise       # re-raise other app-level exceptions
-            break
-        if not space.is_true(w_next):
-            return space.w_False
-    return space.w_True
-
-
-def any(space, w_S):
-    """any(iterable) -> bool
-
-Return True if bool(x) is True for any x in the iterable."""
-    w_iter = space.iter(w_S)
-    while True:
-        try:
-            w_next = space.next(w_iter)
-        except OperationError, e:
-            if not e.match(space, space.w_StopIteration):
-                raise       # re-raise other app-level exceptions
-            break
-        if space.is_true(w_next):
-            return space.w_True
-    return space.w_False
-
-
 class W_Enumerate(Wrappable):
 
     def __init__(self, w_iter, w_start):
diff --git a/pypy/module/_ast/test/test_ast.py b/pypy/module/_ast/test/test_ast.py
--- a/pypy/module/_ast/test/test_ast.py
+++ b/pypy/module/_ast/test/test_ast.py
@@ -128,6 +128,9 @@
         assert ns["x"] == ns["lemon"] == 3
         assert ns["apple"] == 4
 
+    def test_empty_module(self):
+        compile(self.ast.Module([]), "<test>", "exec")
+
     def test_ast_types(self):
         ast = self.ast
         expr = ast.Expr()
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -32,15 +32,22 @@
                 space.wrap(reason))
             w_res = space.call_function(w_errorhandler, w_exc)
             if (not space.is_true(space.isinstance(w_res, space.w_tuple))
-                or space.len_w(w_res) != 2):
+                or space.len_w(w_res) != 2
+                or not space.is_true(space.isinstance(
+                                 space.getitem(w_res, space.wrap(0)),
+                                 space.w_unicode))):
+                if decode:
+                    msg = ("decoding error handler must return "
+                           "(unicode, int) tuple, not %s")
+                else:
+                    msg = ("encoding error handler must return "
+                           "(unicode, int) tuple, not %s")
                 raise operationerrfmt(
-                    space.w_TypeError,
-                    "encoding error handler must return "
-                    "(unicode, int) tuple, not %s",
+                    space.w_TypeError, msg,
                     space.str_w(space.repr(w_res)))
             w_replace, w_newpos = space.fixedview(w_res, 2)
             newpos = space.int_w(w_newpos)
-            if (newpos < 0):
+            if newpos < 0:
                 newpos = len(input) + newpos
             if newpos < 0 or newpos > len(input):
                 raise operationerrfmt(
@@ -50,7 +57,9 @@
                 replace = space.unicode_w(w_replace)
                 return replace, newpos
             else:
-                replace = space.str_w(w_replace)
+                from pypy.objspace.std.unicodetype import encode_object
+                w_str = encode_object(space, w_replace, encoding, None)
+                replace = space.str_w(w_str)
                 return replace, newpos
         return unicode_call_errorhandler
 
@@ -160,15 +169,7 @@
 def ignore_errors(space, w_exc):
     check_exception(space, w_exc)
     w_end = space.getattr(w_exc, space.wrap('end'))
-    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        return space.newtuple([space.wrap(''), w_end])
-    elif (space.isinstance_w(w_exc, space.w_UnicodeDecodeError) or
-          space.isinstance_w(w_exc, space.w_UnicodeTranslateError)):
-        return space.newtuple([space.wrap(u''), w_end])
-    else:
-        typename = space.type(w_exc).getname(space, '?')
-        raise operationerrfmt(space.w_TypeError,
-            "don't know how to handle %s in error callback", typename)
+    return space.newtuple([space.wrap(u''), w_end])
 
 def replace_errors(space, w_exc):
     check_exception(space, w_exc)
@@ -176,7 +177,7 @@
     w_end = space.getattr(w_exc, space.wrap('end'))
     size = space.int_w(w_end) - space.int_w(w_start)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        text = '?' * size
+        text = u'?' * size
         return space.newtuple([space.wrap(text), w_end])
     elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
         text = u'\ufffd'
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -540,6 +540,17 @@
         else:
             assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
 
+    def test_encode_error_bad_handler(self):
+        import codecs
+        codecs.register_error("test.bad_handler", lambda e: (repl, 1))
+        assert u"xyz".encode("latin-1", "test.bad_handler") == "xyz"
+        repl = u"\u1234"
+        raises(UnicodeEncodeError, u"\u5678".encode, "latin-1",
+               "test.bad_handler")
+        repl = u"\u00E9"
+        s = u"\u5678".encode("latin-1", "test.bad_handler")
+        assert s == '\xe9'
+
     def test_charmap_encode(self):
         assert 'xxx'.encode('charmap') == 'xxx'
 
@@ -593,3 +604,11 @@
         assert u'caf\xe9'.encode('mbcs') == 'caf\xe9'
         assert u'\u040a'.encode('mbcs') == '?' # some cyrillic letter
         assert 'cafx\e9'.decode('mbcs') == u'cafx\e9'
+
+    def test_bad_handler_string_result(self):
+        import _codecs
+        def f(exc):
+            return ('foo', exc.end)
+        _codecs.register_error("test.test_codecs_not_a_string", f)
+        raises(TypeError, u'\u1234'.encode, 'ascii',
+               'test.test_codecs_not_a_string')
diff --git a/pypy/module/_ffi/__init__.py b/pypy/module/_ffi/__init__.py
--- a/pypy/module/_ffi/__init__.py
+++ b/pypy/module/_ffi/__init__.py
@@ -4,8 +4,10 @@
 class Module(MixedModule):
 
     interpleveldefs = {
-        'CDLL'               : 'interp_ffi.W_CDLL',
-        'types':             'interp_ffi.W_types',
+        'CDLL':    'interp_ffi.W_CDLL',
+        'types':   'interp_ffi.W_types',
+        'FuncPtr': 'interp_ffi.W_FuncPtr',
+        'get_libc':'interp_ffi.get_libc',
     }
 
     appleveldefs = {}
diff --git a/pypy/module/_ffi/interp_ffi.py b/pypy/module/_ffi/interp_ffi.py
--- a/pypy/module/_ffi/interp_ffi.py
+++ b/pypy/module/_ffi/interp_ffi.py
@@ -4,63 +4,170 @@
     operationerrfmt
 from pypy.interpreter.gateway import interp2app, NoneNotWrapped, unwrap_spec
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
+from pypy.module._rawffi.structure import W_StructureInstance, W_Structure
 #
 from pypy.rpython.lltypesystem import lltype, rffi
 #
 from pypy.rlib import jit
 from pypy.rlib import libffi
 from pypy.rlib.rdynload import DLOpenError
-from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.rarithmetic import intmask, r_uint
 
 class W_FFIType(Wrappable):
-    def __init__(self, name, ffitype):
+
+    _immutable_fields_ = ['name', 'ffitype', 'w_datashape', 'w_pointer_to']
+    
+    def __init__(self, name, ffitype, w_datashape=None, w_pointer_to=None):
         self.name = name
         self.ffitype = ffitype
+        self.w_datashape = w_datashape
+        self.w_pointer_to = w_pointer_to
+        if self.is_struct():
+            assert w_datashape is not None
 
-    def str(self, space):
-        return space.wrap('<ffi type %s>' % self.name)
+    def descr_deref_pointer(self, space):
+        if self.w_pointer_to is None:
+            return space.w_None
+        return self.w_pointer_to
 
+    def repr(self, space):
+        return space.wrap(self.__repr__())
 
+    def __repr__(self):
+        return "<ffi type %s>" % self.name
+
+    def is_signed(self):
+        return (self is app_types.slong or
+                self is app_types.sint or
+                self is app_types.sshort or
+                self is app_types.sbyte or
+                self is app_types.slonglong)
+
+    def is_unsigned(self):
+        return (self is app_types.ulong or
+                self is app_types.uint or
+                self is app_types.ushort or
+                self is app_types.ubyte or
+                self is app_types.ulonglong)
+
+    def is_pointer(self):
+        return self.ffitype is libffi.types.pointer
+
+    def is_char(self):
+        return self is app_types.char
+
+    def is_unichar(self):
+        return self is app_types.unichar
+
+    def is_longlong(self):
+        return libffi.IS_32_BIT and (self is app_types.slonglong or
+                                     self is app_types.ulonglong)
+
+    def is_double(self):
+        return self is app_types.double
+
+    def is_singlefloat(self):
+        return self is app_types.float
+
+    def is_void(self):
+        return self is app_types.void
+
+    def is_struct(self):
+        return libffi.types.is_struct(self.ffitype)
 
 W_FFIType.typedef = TypeDef(
     'FFIType',
-    __str__ = interp2app(W_FFIType.str),
+    __repr__ = interp2app(W_FFIType.repr),
+    deref_pointer = interp2app(W_FFIType.descr_deref_pointer),
     )
 
 
+def build_ffi_types():
+    from pypy.rlib.clibffi import FFI_TYPE_P
+    types = [
+        # note: most of the type name directly come from the C equivalent,
+        # with the exception of bytes: in C, ubyte and char are equivalent,
+        # but for _ffi the first expects a number while the second a 1-length
+        # string
+        W_FFIType('slong',     libffi.types.slong),
+        W_FFIType('sint',      libffi.types.sint),
+        W_FFIType('sshort',    libffi.types.sshort),
+        W_FFIType('sbyte',     libffi.types.schar),
+        W_FFIType('slonglong', libffi.types.slonglong),
+        #
+        W_FFIType('ulong',     libffi.types.ulong),
+        W_FFIType('uint',      libffi.types.uint),
+        W_FFIType('ushort',    libffi.types.ushort),
+        W_FFIType('ubyte',     libffi.types.uchar),
+        W_FFIType('ulonglong', libffi.types.ulonglong),
+        #
+        W_FFIType('char',      libffi.types.uchar),
+        W_FFIType('unichar',   libffi.types.wchar_t),
+        #
+        W_FFIType('double',    libffi.types.double),
+        W_FFIType('float',     libffi.types.float),
+        W_FFIType('void',      libffi.types.void),
+        W_FFIType('void_p',    libffi.types.pointer),
+        #
+        # missing types:
+
+        ## 's' : ffi_type_pointer,
+        ## 'z' : ffi_type_pointer,
+        ## 'O' : ffi_type_pointer,
+        ## 'Z' : ffi_type_pointer,
+
+        ]
+    return dict([(t.name, t) for t in types])
+
+class app_types:
+    pass
+app_types.__dict__ = build_ffi_types()
+
+def descr_new_pointer(space, w_cls, w_pointer_to):
+    try:
+        return descr_new_pointer.cache[w_pointer_to]
+    except KeyError:
+        w_pointer_to = space.interp_w(W_FFIType, w_pointer_to)
+        name = '(pointer to %s)' % w_pointer_to.name
+        w_result = W_FFIType(name, libffi.types.pointer, w_pointer_to = w_pointer_to)
+        descr_new_pointer.cache[w_pointer_to] = w_result
+        return w_result
+descr_new_pointer.cache = {}
+
 class W_types(Wrappable):
     pass
-
-def build_ffi_types():
-    from pypy.rlib.clibffi import FFI_TYPE_P
-    tdict = {}
-    for key, value in libffi.types.__dict__.iteritems():
-        if key == 'getkind' or key.startswith('__'):
-            continue
-        assert lltype.typeOf(value) == FFI_TYPE_P
-        tdict[key] = W_FFIType(key, value)
-    return tdict
-    
 W_types.typedef = TypeDef(
     'types',
-    **build_ffi_types())
+    Pointer = interp2app(descr_new_pointer, as_classmethod=True),
+    **app_types.__dict__)
+
+
+def unwrap_ffitype(space, w_argtype, allow_void=False):
+    res = w_argtype.ffitype
+    if res is libffi.types.void and not allow_void:
+        msg = 'void is not a valid argument type'
+        raise OperationError(space.w_TypeError, space.wrap(msg))
+    return res
+
 
 # ========================================================================
 
 class W_FuncPtr(Wrappable):
 
-    _immutable_fields_ = ['func']
+    _immutable_fields_ = ['func', 'argtypes_w[*]', 'w_restype']
     
-    def __init__(self, func):
+    def __init__(self, func, argtypes_w, w_restype):
         self.func = func
+        self.argtypes_w = argtypes_w
+        self.w_restype = w_restype
 
     @jit.unroll_safe
-    def build_argchain(self, space, argtypes, args_w):
-        expected = len(argtypes)
+    def build_argchain(self, space, args_w):
+        expected = len(self.argtypes_w)
         given = len(args_w)
         if given != expected:
             arg = 'arguments'
-            if len(argtypes) == 1:
+            if len(self.argtypes_w) == 1:
                 arg = 'argument'
             raise operationerrfmt(space.w_TypeError,
                                   '%s() takes exactly %d %s (%d given)',
@@ -68,34 +175,103 @@
         #
         argchain = libffi.ArgChain()
         for i in range(expected):
-            argtype = argtypes[i]
+            w_argtype = self.argtypes_w[i]
             w_arg = args_w[i]
-            kind = libffi.types.getkind(argtype)
-            if kind == 'i':
+            if w_argtype.is_longlong():
+                # note that we must check for longlong first, because either
+                # is_signed or is_unsigned returns true anyway
+                assert libffi.IS_32_BIT
+                kind = libffi.types.getkind(w_argtype.ffitype) # XXX: remove the kind
+                self.arg_longlong(space, argchain, kind, w_arg)
+            elif w_argtype.is_signed():
                 argchain.arg(space.int_w(w_arg))
-            elif kind == 'u':
+            elif w_argtype.is_pointer():
+                w_arg = self.convert_pointer_arg_maybe(space, w_arg, w_argtype)
                 argchain.arg(intmask(space.uint_w(w_arg)))
-            elif kind == 'f':
+            elif w_argtype.is_unsigned():
+                argchain.arg(intmask(space.uint_w(w_arg)))
+            elif w_argtype.is_char():
+                w_arg = space.ord(w_arg)
+                argchain.arg(space.int_w(w_arg))
+            elif w_argtype.is_unichar():
+                w_arg = space.ord(w_arg)
+                argchain.arg(space.int_w(w_arg))
+            elif w_argtype.is_double():
                 argchain.arg(space.float_w(w_arg))
+            elif w_argtype.is_singlefloat():
+                argchain.arg_singlefloat(space.float_w(w_arg))
+            elif w_argtype.is_struct():
+                # arg_raw directly takes value to put inside ll_args
+                w_arg = space.interp_w(W_StructureInstance, w_arg)                
+                ptrval = w_arg.ll_buffer
+                argchain.arg_raw(ptrval)
             else:
-                assert False, "Argument kind '%s' not supported" % kind
+                assert False, "Argument shape '%s' not supported" % w_argtype
         return argchain
 
+    def convert_pointer_arg_maybe(self, space, w_arg, w_argtype):
+        """
+        Try to convert the argument by calling _as_ffi_pointer_()
+        """
+        meth = space.lookup(w_arg, '_as_ffi_pointer_') # this also promotes the type
+        if meth:
+            return space.call_function(meth, w_arg, w_argtype)
+        else:
+            return w_arg
+
+    @jit.dont_look_inside
+    def arg_longlong(self, space, argchain, kind, w_arg):
+        bigarg = space.bigint_w(w_arg)
+        if kind == 'I':
+            llval = bigarg.tolonglong()
+        elif kind == 'U':
+            ullval = bigarg.toulonglong()
+            llval = rffi.cast(rffi.LONGLONG, ullval)
+        else:
+            assert False
+        # this is a hack: we store the 64 bits of the long long into the
+        # 64 bits of a float (i.e., a C double)
+        floatval = libffi.longlong2float(llval)
+        argchain.arg_longlong(floatval)
+
     def call(self, space, args_w):
         self = jit.hint(self, promote=True)
-        argchain = self.build_argchain(space, self.func.argtypes, args_w)
-        reskind = libffi.types.getkind(self.func.restype)
-        if reskind == 'i':
+        argchain = self.build_argchain(space, args_w)
+        w_restype = self.w_restype
+        if w_restype.is_longlong():
+            # note that we must check for longlong first, because either
+            # is_signed or is_unsigned returns true anyway
+            assert libffi.IS_32_BIT
+            reskind = libffi.types.getkind(self.func.restype) # XXX: remove the kind
+            return self._call_longlong(space, argchain, reskind)
+        elif w_restype.is_signed():
             return self._call_int(space, argchain)
-        elif reskind == 'u':
+        elif w_restype.is_unsigned() or w_restype.is_pointer():
             return self._call_uint(space, argchain)
-        elif reskind == 'f':
+        elif w_restype.is_char():
+            intres = self.func.call(argchain, rffi.UCHAR)
+            return space.wrap(chr(intres))
+        elif w_restype.is_unichar():
+            intres = self.func.call(argchain, rffi.WCHAR_T)
+            return space.wrap(unichr(intres))
+        elif w_restype.is_double():
             floatres = self.func.call(argchain, rffi.DOUBLE)
             return space.wrap(floatres)
-        else:
+        elif w_restype.is_singlefloat():
+            # the result is a float, but widened to be inside a double
+            floatres = self.func.call(argchain, rffi.FLOAT)
+            return space.wrap(floatres)
+        elif w_restype.is_struct():
+            w_datashape = w_restype.w_datashape
+            assert isinstance(w_datashape, W_Structure)
+            ptrval = self.func.call(argchain, rffi.ULONG, is_struct=True)
+            return w_datashape.fromaddress(space, ptrval)
+        elif w_restype.is_void():
             voidres = self.func.call(argchain, lltype.Void)
             assert voidres is None
             return space.w_None
+        else:
+            assert False, "Return value shape '%s' not supported" % w_restype
 
     def _call_int(self, space, argchain):
         # if the declared return type of the function is smaller than LONG,
@@ -138,6 +314,10 @@
             # special case
             uintres = call(argchain, rffi.ULONG)
             return space.wrap(uintres)
+        elif restype is libffi.types.pointer:
+            ptrres = call(argchain, rffi.VOIDP)
+            uintres = rffi.cast(rffi.ULONG, ptrres)
+            return space.wrap(uintres)
         elif restype is libffi.types.uint:
             intres = rffi.cast(rffi.LONG, call(argchain, rffi.UINT))
         elif restype is libffi.types.ushort:
@@ -149,16 +329,52 @@
                                  space.wrap('Unsupported restype'))
         return space.wrap(intres)
 
+    @jit.dont_look_inside
+    def _call_longlong(self, space, argchain, reskind):
+        # this is a hack: we store the 64 bits of the long long into the 64
+        # bits of a float (i.e., a C double)
+        floatres = self.func.call(argchain, rffi.LONGLONG)
+        llres = libffi.float2longlong(floatres)
+        if reskind == 'I':
+            return space.wrap(llres)
+        elif reskind == 'U':
+            ullres = rffi.cast(rffi.ULONGLONG, llres)
+            return space.wrap(ullres)
+        else:
+            assert False
+
     def getaddr(self, space):
         """
         Return the physical address in memory of the function
         """
         return space.wrap(rffi.cast(rffi.LONG, self.func.funcsym))
 
+
+
+def unpack_argtypes(space, w_argtypes, w_restype):
+    argtypes_w = [space.interp_w(W_FFIType, w_argtype)
+                  for w_argtype in space.listview(w_argtypes)]
+    argtypes = [unwrap_ffitype(space, w_argtype) for w_argtype in
+                argtypes_w]
+    w_restype = space.interp_w(W_FFIType, w_restype)
+    restype = unwrap_ffitype(space, w_restype, allow_void=True)
+    return argtypes_w, argtypes, w_restype, restype
+
+ at unwrap_spec(addr=r_uint, name=str)
+def descr_fromaddr(space, w_cls, addr, name, w_argtypes, w_restype):
+    argtypes_w, argtypes, w_restype, restype = unpack_argtypes(space,
+                                                               w_argtypes,
+                                                               w_restype)
+    addr = rffi.cast(rffi.VOIDP, addr)
+    func = libffi.Func(name, argtypes, restype, addr)
+    return W_FuncPtr(func, argtypes_w, w_restype)
+
+
 W_FuncPtr.typedef = TypeDef(
-    'FuncPtr',
+    '_ffi.FuncPtr',
     __call__ = interp2app(W_FuncPtr.call),
     getaddr = interp2app(W_FuncPtr.getaddr),
+    fromaddr = interp2app(descr_fromaddr, as_classmethod=True)
     )
 
 
@@ -167,40 +383,57 @@
 
 class W_CDLL(Wrappable):
     def __init__(self, space, name):
+        self.space = space
+        if name is None:
+            self.name = "<None>"
+        else:
+            self.name = name
         try:
             self.cdll = libffi.CDLL(name)
         except DLOpenError, e:
-            raise operationerrfmt(space.w_OSError, '%s: %s', name,
+            raise operationerrfmt(space.w_OSError, '%s: %s', self.name,
                                   e.msg or 'unspecified error')
-        self.name = name
-        self.space = space
-
-    def ffitype(self, w_argtype, allow_void=False):
-        res = self.space.interp_w(W_FFIType, w_argtype).ffitype
-        if res is libffi.types.void and not allow_void:
-            space = self.space
-            msg = 'void is not a valid argument type'
-            raise OperationError(space.w_TypeError, space.wrap(msg))
-        return res
 
     @unwrap_spec(name=str)
     def getfunc(self, space, name, w_argtypes, w_restype):
-        argtypes = [self.ffitype(w_argtype) for w_argtype in
-                    space.listview(w_argtypes)]
-        restype = self.ffitype(w_restype, allow_void=True)
-        func = self.cdll.getpointer(name, argtypes, restype)
-        return W_FuncPtr(func)
+        argtypes_w, argtypes, w_restype, restype = unpack_argtypes(space,
+                                                                   w_argtypes,
+                                                                   w_restype)
+        try:
+            func = self.cdll.getpointer(name, argtypes, restype)
+        except KeyError:
+            raise operationerrfmt(space.w_AttributeError,
+                                  "No symbol %s found in library %s", name, self.name)
+            
+        return W_FuncPtr(func, argtypes_w, w_restype)
 
+    @unwrap_spec(name=str)
+    def getaddressindll(self, space, name):
+        try:
+            address_as_uint = rffi.cast(lltype.Unsigned,
+                                        self.cdll.getaddressindll(name))
+        except KeyError:
+            raise operationerrfmt(space.w_ValueError,
+                                  "No symbol %s found in library %s", name, self.name)
+        return space.wrap(address_as_uint)
 
- at unwrap_spec(name=str)
+ at unwrap_spec(name='str_or_None')
 def descr_new_cdll(space, w_type, name):
     return space.wrap(W_CDLL(space, name))
 
 
 W_CDLL.typedef = TypeDef(
-    'CDLL',
+    '_ffi.CDLL',
     __new__     = interp2app(descr_new_cdll),
     getfunc     = interp2app(W_CDLL.getfunc),
+    getaddressindll = interp2app(W_CDLL.getaddressindll),
     )
 
 # ========================================================================
+
+def get_libc(space):
+    from pypy.rlib.clibffi import get_libc_name
+    try:
+        return space.wrap(W_CDLL(space, get_libc_name()))
+    except OSError, e:
+        raise wrap_oserror(space, e)
diff --git a/pypy/module/_ffi/test/test__ffi.py b/pypy/module/_ffi/test/test__ffi.py
--- a/pypy/module/_ffi/test/test__ffi.py
+++ b/pypy/module/_ffi/test/test__ffi.py
@@ -17,7 +17,13 @@
 
         c_file = udir.ensure("test__ffi", dir=1).join("foolib.c")
         # automatically collect the C source from the docstrings of the tests
-        snippets = []
+        snippets = ["""
+        #ifdef _WIN32
+        #define DLLEXPORT __declspec(dllexport)
+        #else
+        #define DLLEXPORT
+        #endif
+        """]
         for name in dir(cls):
             if name.startswith('test_'):
                 meth = getattr(cls, name)
@@ -35,8 +41,9 @@
         from pypy.rpython.lltypesystem import rffi
         from pypy.rlib.libffi import get_libc_name, CDLL, types
         from pypy.rlib.test.test_libffi import get_libm_name
-        space = gettestobjspace(usemodules=('_ffi',))
+        space = gettestobjspace(usemodules=('_ffi', '_rawffi'))
         cls.space = space
+        cls.w_iswin32 = space.wrap(sys.platform == 'win32')
         cls.w_libfoo_name = space.wrap(cls.prepare_c_example())
         cls.w_libc_name = space.wrap(get_libc_name())
         libm_name = get_libm_name(sys.platform)
@@ -45,6 +52,13 @@
         pow = libm.getpointer('pow', [], types.void)
         pow_addr = rffi.cast(rffi.LONG, pow.funcsym)
         cls.w_pow_addr = space.wrap(pow_addr)
+        #
+        # these are needed for test_single_float_args
+        from ctypes import c_float
+        f_12_34 = c_float(12.34).value
+        f_56_78 = c_float(56.78).value
+        f_result = c_float(f_12_34 + f_56_78).value
+        cls.w_f_12_34_plus_56_78 = space.wrap(f_result)
 
     def test_libload(self):
         import _ffi
@@ -54,10 +68,20 @@
         import _ffi
         raises(OSError, _ffi.CDLL, "xxxxx_this_name_does_not_exist_xxxxx")
 
+    def test_libload_None(self):
+        if self.iswin32:
+            skip("unix specific")
+        from _ffi import CDLL, types
+        # this should return *all* loaded libs, dlopen(NULL)
+        dll = CDLL(None)
+        # Assume CPython, or PyPy compiled with cpyext
+        res = dll.getfunc('Py_IsInitialized', [], types.slong)()
+        assert res == 1
+
     def test_simple_types(self):
         from _ffi import types
-        assert str(types.sint) == '<ffi type sint>'
-        assert str(types.uint) == '<ffi type uint>'
+        assert str(types.sint) == "<ffi type sint>"
+        assert str(types.uint) == "<ffi type uint>"
         
     def test_callfunc(self):
         from _ffi import CDLL, types
@@ -70,10 +94,27 @@
         libm = CDLL(self.libm_name)
         pow = libm.getfunc('pow', [types.double, types.double], types.double)
         assert pow.getaddr() == self.pow_addr
-        
+
+    def test_getaddressindll(self):
+        import sys
+        from _ffi import CDLL, types
+        libm = CDLL(self.libm_name)
+        pow_addr = libm.getaddressindll('pow')
+        assert pow_addr == self.pow_addr & (sys.maxint*2-1)
+
+    def test_func_fromaddr(self):
+        import sys
+        from _ffi import CDLL, types, FuncPtr
+        libm = CDLL(self.libm_name)
+        pow_addr = libm.getaddressindll('pow')
+        pow = FuncPtr.fromaddr(pow_addr, 'pow', [types.double, types.double],
+                               types.double)
+        assert pow(2, 3) == 8
+
+
     def test_int_args(self):
         """
-            int sum_xy(int x, int y)
+            DLLEXPORT int sum_xy(int x, int y)
             {
                 return x+y;
             }
@@ -86,8 +127,8 @@
     def test_void_result(self):
         """
             int dummy = 0;
-            void set_dummy(int val) { dummy = val; }
-            int get_dummy() { return dummy; }
+            DLLEXPORT void set_dummy(int val) { dummy = val; }
+            DLLEXPORT int get_dummy() { return dummy; }
         """
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
@@ -96,10 +137,105 @@
         assert get_dummy() == 0
         assert set_dummy(42) is None
         assert get_dummy() == 42
+        set_dummy(0)
+
+    def test_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr() { return &dummy; }
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val) { *ptr = val; }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], types.void_p)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr',
+                                        [types.void_p, types.sint],
+                                        types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        set_val_to_ptr(ptr, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr, 0)
+
+    def test_convert_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr(); // defined in test_pointer_args
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val); // ditto
+        """
+        from _ffi import CDLL, types
+
+        class MyPointerWrapper(object):
+            def __init__(self, value):
+                self.value = value
+            def _as_ffi_pointer_(self, ffitype):
+                assert ffitype is types.void_p
+                return self.value
+        
+        libfoo = CDLL(self.libfoo_name)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], types.void_p)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr',
+                                        [types.void_p, types.sint],
+                                        types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        assert type(ptr) in (int, long)
+        ptr2 = MyPointerWrapper(ptr)
+        set_val_to_ptr(ptr2, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr2, 0)
+
+    def test_typed_pointer(self):
+        from _ffi import types
+        intptr = types.Pointer(types.sint) # create a typed pointer to sint
+        assert intptr.deref_pointer() is types.sint
+        assert str(intptr) == '<ffi type (pointer to sint)>'
+        assert types.sint.deref_pointer() is None
+        raises(TypeError, "types.Pointer(42)")
+
+    def test_pointer_identity(self):
+        from _ffi import types
+        x = types.Pointer(types.slong)
+        y = types.Pointer(types.slong)
+        z = types.Pointer(types.char)
+        assert x is y
+        assert x is not z
+
+    def test_typed_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr(); // defined in test_pointer_args
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val); // ditto
+        """
+        from _ffi import CDLL, types
+
+        libfoo = CDLL(self.libfoo_name)
+        intptr = types.Pointer(types.sint)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], intptr)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr', [intptr, types.sint], types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        set_val_to_ptr(ptr, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr, 0)
+
+    def test_huge_pointer_args(self):
+        """
+            #include <stdlib.h>
+            DLLEXPORT long is_null_ptr(void* ptr) { return ptr == NULL; }
+        """
+        import sys
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        is_null_ptr = libfoo.getfunc('is_null_ptr', [types.void_p], types.ulong)
+        assert not is_null_ptr(sys.maxint+1)
 
     def test_unsigned_long_args(self):
         """
-            unsigned long sum_xy_ul(unsigned long x, unsigned long y)
+            DLLEXPORT unsigned long sum_xy_ul(unsigned long x, unsigned long y)
             {
                 return x+y;
             }
@@ -114,12 +250,11 @@
 
     def test_unsigned_short_args(self):
         """
-            unsigned short sum_xy_us(unsigned short x, unsigned short y)
+            DLLEXPORT unsigned short sum_xy_us(unsigned short x, unsigned short y)
             {
                 return x+y;
             }
         """
-        import sys
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
         sum_xy = libfoo.getfunc('sum_xy_us', [types.ushort, types.ushort],
@@ -127,6 +262,166 @@
         assert sum_xy(32000, 8000) == 40000
         assert sum_xy(60000, 30000) == 90000 % 65536
 
+    def test_unsigned_byte_args(self):
+        """
+            DLLEXPORT unsigned char sum_xy_ub(unsigned char x, unsigned char y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_us', [types.ubyte, types.ubyte],
+                                types.ubyte)
+        assert sum_xy(100, 40) == 140
+        assert sum_xy(200, 60) == 260 % 256
+
+    def test_signed_byte_args(self):
+        """
+            DLLEXPORT signed char sum_xy_sb(signed char x, signed char y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_sb', [types.sbyte, types.sbyte],
+                                types.sbyte)
+        assert sum_xy(10, 20) == 30
+        assert sum_xy(100, 28) == -128
+
+    def test_char_args(self):
+        """
+            DLLEXPORT char my_toupper(char x)
+            {
+                return x - ('a'-'A');
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        my_toupper = libfoo.getfunc('my_toupper', [types.char],
+                                    types.char)
+        assert my_toupper('c') == 'C'
+
+    def test_unichar_args(self):
+        """
+            #include <stddef.h>
+            DLLEXPORT wchar_t sum_xy_wc(wchar_t x, wchar_t y)
+            {
+                return x + y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_wc', [types.unichar, types.unichar],
+                                types.unichar)
+        res = sum_xy(unichr(1000), unichr(2000))
+        assert type(res) is unicode
+        assert ord(res) == 3000
+
+    def test_single_float_args(self):
+        """
+            DLLEXPORT float sum_xy_float(float x, float y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_float', [types.float, types.float],
+                                types.float)
+        res = sum_xy(12.34, 56.78)
+        assert res == self.f_12_34_plus_56_78
+
+
+    def test_slonglong_args(self):
+        """
+            DLLEXPORT long long sum_xy_longlong(long long x, long long y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        maxint32 = 2147483647 # we cannot really go above maxint on 64 bits
+                              # (and we would not test anything, as there long
+                              # is the same as long long)
+
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_longlong', [types.slonglong, types.slonglong],
+                                types.slonglong)
+        x = maxint32+1
+        y = maxint32+2
+        res = sum_xy(x, y)
+        expected = maxint32*2 + 3
+        assert res == expected
+
+    def test_ulonglong_args(self):
+        """
+            DLLEXPORT unsigned long long sum_xy_ulonglong(unsigned long long x,
+                                                unsigned long long y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        maxint64 = 9223372036854775807 # maxint64+1 does not fit into a
+                                       # longlong, but it does into a
+                                       # ulonglong
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_ulonglong', [types.ulonglong, types.ulonglong],
+                                types.ulonglong)
+        x = maxint64+1
+        y = 2
+        res = sum_xy(x, y)
+        expected = maxint64 + 3
+        assert res == expected
+
+    def test_byval_argument(self):
+        """
+            struct Point {
+                long x;
+                long y;
+            };
+
+            DLLEXPORT long sum_point(struct Point p) {
+                return p.x + p.y;
+            }
+        """
+        import _rawffi
+        from _ffi import CDLL, types
+        POINT = _rawffi.Structure([('x', 'l'), ('y', 'l')])
+        ffi_point = POINT.get_ffi_type()
+        libfoo = CDLL(self.libfoo_name)
+        sum_point = libfoo.getfunc('sum_point', [ffi_point], types.slong)
+        #
+        p = POINT()
+        p.x = 30
+        p.y = 12
+        res = sum_point(p)
+        assert res == 42
+        p.free()
+
+    def test_byval_result(self):
+        """
+            DLLEXPORT struct Point make_point(long x, long y) {
+                struct Point p;
+                p.x = x;
+                p.y = y;
+                return p;
+            }
+        """
+        import _rawffi
+        from _ffi import CDLL, types
+        POINT = _rawffi.Structure([('x', 'l'), ('y', 'l')])
+        ffi_point = POINT.get_ffi_type()
+        libfoo = CDLL(self.libfoo_name)
+        make_point = libfoo.getfunc('make_point', [types.slong, types.slong], ffi_point)
+        #
+        p = make_point(12, 34)
+        assert p.x == 12
+        assert p.y == 34
+        p.free()
+
     def test_TypeError_numargs(self):
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
@@ -142,3 +437,10 @@
     def test_OSError_loading(self):
         from _ffi import CDLL, types
         raises(OSError, "CDLL('I do not exist')")
+
+    def test_AttributeError_missing_function(self):
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        raises(AttributeError, "libfoo.getfunc('I_do_not_exist', [], types.void)")
+        libnone = CDLL(None)
+        raises(AttributeError, "libnone.getfunc('I_do_not_exist', [], types.void)")
diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py
--- a/pypy/module/_file/interp_file.py
+++ b/pypy/module/_file/interp_file.py
@@ -4,13 +4,13 @@
 import errno
 from pypy.rlib import streamio
 from pypy.rlib.rarithmetic import r_longlong
-from pypy.module._file.interp_stream import W_AbstractStream
-from pypy.module._file.interp_stream import StreamErrors, wrap_streamerror, wrap_oserror_as_ioerror
+from pypy.rlib.rstring import StringBuilder
+from pypy.module._file.interp_stream import (W_AbstractStream, StreamErrors,
+    wrap_streamerror, wrap_oserror_as_ioerror)
 from pypy.module.posix.interp_posix import dispatch_filename
 from pypy.interpreter.error import OperationError, operationerrfmt
-from pypy.interpreter.typedef import TypeDef, GetSetProperty
-from pypy.interpreter.typedef import interp_attrproperty, make_weakref_descr
-from pypy.interpreter.typedef import interp_attrproperty_w
+from pypy.interpreter.typedef import (TypeDef, GetSetProperty,
+    interp_attrproperty, make_weakref_descr, interp_attrproperty_w)
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 
 
@@ -43,7 +43,11 @@
         # assume that the file and stream objects are only visible in the
         # thread that runs __del__, so no race condition should be possible
         self.clear_all_weakrefs()
-        self.direct_close()
+        try:
+            self.direct_close()
+        except StreamErrors, e:
+            operr = wrap_streamerror(self.space, e, self.w_name)
+            operr.write_unraisable(self.space, '__del__ of ', self)
 
     def fdopenstream(self, stream, fd, mode, w_name=None):
         self.fd = fd
@@ -160,14 +164,14 @@
         if n < 0:
             return stream.readall()
         else:
-            result = []
+            result = StringBuilder(n)
             while n > 0:
                 data = stream.read(n)
                 if not data:
                     break
                 n -= len(data)
                 result.append(data)
-            return ''.join(result)
+            return result.build()
 
     @unwrap_spec(size=int)
     def direct_readline(self, size=-1):
@@ -345,11 +349,11 @@
 may be returned, even if no size parameter was given.""")
 
     _decl(locals(), "readline",
-        """readlines([size]) -> list of strings, each a line from the file.
+        """readline([size]) -> next line from the file, as a string.
 
-Call readline() repeatedly and return a list of the lines so read.
-The optional size argument, if given, is an approximate bound on the
-total number of bytes in the lines returned.""")
+Retain newline.  A non-negative size argument limits the maximum
+number of bytes to return (an incomplete line may be returned then).
+Return an empty string at EOF.""")
 
     _decl(locals(), "readlines",
         """readlines([size]) -> list of strings, each a line from the file.
@@ -553,4 +557,4 @@
 @unwrap_spec(file=W_File, encoding="str_or_None", errors="str_or_None")
 def set_file_encoding(space, file, encoding=None, errors=None):
     file.encoding = encoding
-    file.errors = errors
\ No newline at end of file
+    file.errors = errors
diff --git a/pypy/module/_file/test/test_file.py b/pypy/module/_file/test/test_file.py
--- a/pypy/module/_file/test/test_file.py
+++ b/pypy/module/_file/test/test_file.py
@@ -232,6 +232,29 @@
             data = f.read()
             assert data == "15"
 
+    def test_exception_from_close(self):
+        import os
+        f = self.file(self.temppath, 'w')
+        os.close(f.fileno())
+        raises(IOError, f.close)    # bad file descriptor
+
+    def test_exception_from_del(self):
+        import os, gc, sys, cStringIO
+        f = self.file(self.temppath, 'w')
+        g = cStringIO.StringIO()
+        preverr = sys.stderr
+        try:
+            sys.stderr = g
+            os.close(f.fileno())
+            del f
+            gc.collect()     # bad file descriptor in f.__del__()
+        finally:
+            sys.stderr = preverr
+        import errno
+        assert os.strerror(errno.EBADF) in g.getvalue()
+        # the following is a "nice to have" feature that CPython doesn't have
+        if '__pypy__' in sys.builtin_module_names:
+            assert self.temppath in g.getvalue()
 
 
 class AppTestConcurrency(object):
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -3,6 +3,8 @@
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.tool.autopath import pypydir
 
+UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'
+
 
 class EncodeDecodeError(Exception):
     def __init__(self, start, end, reason):
@@ -103,8 +105,12 @@
                                           [DECODEBUF_P], rffi.SSIZE_T)
 pypy_cjk_dec_inbuf_consumed = llexternal('pypy_cjk_dec_inbuf_consumed',
                                          [DECODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_dec_replace_on_error = llexternal('pypy_cjk_dec_replace_on_error',
+                                           [DECODEBUF_P, rffi.CWCHARP,
+                                            rffi.SSIZE_T, rffi.SSIZE_T],
+                                           rffi.SSIZE_T)
 
-def decode(codec, stringdata):
+def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None):
     inleft = len(stringdata)
     inbuf = rffi.get_nonmovingbuffer(stringdata)
     try:
@@ -112,10 +118,12 @@
         if not decodebuf:
             raise MemoryError
         try:
-            r = pypy_cjk_dec_chunk(decodebuf)
-            if r != 0:
-                multibytecodec_decerror(decodebuf, r)
-                assert False
+            while True:
+                r = pypy_cjk_dec_chunk(decodebuf)
+                if r == 0:
+                    break
+                multibytecodec_decerror(decodebuf, r, errors,
+                                        errorcb, namecb, stringdata)
             src = pypy_cjk_dec_outbuf(decodebuf)
             length = pypy_cjk_dec_outlen(decodebuf)
             return rffi.wcharpsize2unicode(src, length)
@@ -126,7 +134,8 @@
     finally:
         rffi.free_nonmovingbuffer(stringdata, inbuf)
 
-def multibytecodec_decerror(decodebuf, e):
+def multibytecodec_decerror(decodebuf, e, errors,
+                            errorcb, namecb, stringdata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -138,12 +147,27 @@
     else:
         raise RuntimeError
     #
-    # if errors == ERROR_REPLACE:...
-    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    # compute the unicode to use as a replacement -> 'replace', and
+    # the current position in the input 'unicodedata' -> 'end'
     start = pypy_cjk_dec_inbuf_consumed(decodebuf)
     end = start + esize
-    if 1:  # errors == ERROR_STRICT:
+    if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
+    elif errors == "ignore":
+        replace = u""
+    elif errors == "replace":
+        replace = UNICODE_REPLACEMENT_CHARACTER
+    else:
+        assert errorcb
+        replace, end = errorcb(errors, namecb, reason,
+                               stringdata, start, end)
+    inbuf = rffi.get_nonmoving_unicodebuffer(replace)
+    try:
+        r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
+    finally:
+        rffi.free_nonmoving_unicodebuffer(replace, inbuf)
+    if r == MBERR_NOMEMORY:
+        raise MemoryError
 
 # ____________________________________________________________
 # Encoding
@@ -165,8 +189,12 @@
                                           [ENCODEBUF_P], rffi.SSIZE_T)
 pypy_cjk_enc_inbuf_consumed = llexternal('pypy_cjk_enc_inbuf_consumed',
                                          [ENCODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_enc_replace_on_error = llexternal('pypy_cjk_enc_replace_on_error',
+                                           [ENCODEBUF_P, rffi.CCHARP,
+                                            rffi.SSIZE_T, rffi.SSIZE_T],
+                                           rffi.SSIZE_T)
 
-def encode(codec, unicodedata):
+def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
     inleft = len(unicodedata)
     inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata)
     try:
@@ -174,14 +202,18 @@
         if not encodebuf:
             raise MemoryError
         try:
-            r = pypy_cjk_enc_chunk(encodebuf)
-            if r != 0:
-                multibytecodec_encerror(encodebuf, r)
-                assert False
-            r = pypy_cjk_enc_reset(encodebuf)
-            if r != 0:
-                multibytecodec_encerror(encodebuf, r)
-                assert False
+            while True:
+                r = pypy_cjk_enc_chunk(encodebuf)
+                if r == 0:
+                    break
+                multibytecodec_encerror(encodebuf, r, errors,
+                                        codec, errorcb, namecb, unicodedata)
+            while True:
+                r = pypy_cjk_enc_reset(encodebuf)
+                if r == 0:
+                    break
+                multibytecodec_encerror(encodebuf, r, errors,
+                                        codec, errorcb, namecb, unicodedata)
             src = pypy_cjk_enc_outbuf(encodebuf)
             length = pypy_cjk_enc_outlen(encodebuf)
             return rffi.charpsize2str(src, length)
@@ -192,7 +224,8 @@
     finally:
         rffi.free_nonmoving_unicodebuffer(unicodedata, inbuf)
 
-def multibytecodec_encerror(encodebuf, e):
+def multibytecodec_encerror(encodebuf, e, errors,
+                            codec, errorcb, namecb, unicodedata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -204,9 +237,27 @@
     else:
         raise RuntimeError
     #
-    # if errors == ERROR_REPLACE:...
-    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    # compute the string to use as a replacement -> 'replace', and
+    # the current position in the input 'unicodedata' -> 'end'
     start = pypy_cjk_enc_inbuf_consumed(encodebuf)
     end = start + esize
-    if 1:  # errors == ERROR_STRICT:
+    if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
+    elif errors == "ignore":
+        replace = ""
+    elif errors == "replace":
+        try:
+            replace = encode(codec, u"?")
+        except EncodeDecodeError:
+            replace = "?"
+    else:
+        assert errorcb
+        replace, end = errorcb(errors, namecb, reason,
+                               unicodedata, start, end)
+    inbuf = rffi.get_nonmovingbuffer(replace)
+    try:
+        r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
+    finally:
+        rffi.free_nonmovingbuffer(replace, inbuf)
+    if r == MBERR_NOMEMORY:
+        raise MemoryError
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -3,6 +3,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.error import OperationError
 from pypy.module._multibytecodec import c_codecs
+from pypy.module._codecs.interp_codecs import CodecState
 
 
 class MultibyteCodec(Wrappable):
@@ -13,13 +14,13 @@
 
     @unwrap_spec(input=str, errors="str_or_None")
     def decode(self, space, input, errors=None):
-        if errors is not None and errors != 'strict':
-            raise OperationError(space.w_NotImplementedError,    # XXX
-                                 space.wrap("errors='%s' in _multibytecodec"
-                                            % errors))
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
         #
         try:
-            output = c_codecs.decode(self.codec, input)
+            output = c_codecs.decode(self.codec, input, errors,
+                                     state.decode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
             raise OperationError(
                 space.w_UnicodeDecodeError,
@@ -37,13 +38,13 @@
 
     @unwrap_spec(input=unicode, errors="str_or_None")
     def encode(self, space, input, errors=None):
-        if errors is not None and errors != 'strict':
-            raise OperationError(space.w_NotImplementedError,    # XXX
-                                 space.wrap("errors='%s' in _multibytecodec"
-                                            % errors))
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
         #
         try:
-            output = c_codecs.encode(self.codec, input)
+            output = c_codecs.encode(self.codec, input, errors,
+                                     state.encode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
             raise OperationError(
                 space.w_UnicodeEncodeError,
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -36,6 +36,37 @@
         e = raises(UnicodeDecodeError, codec.decode, "~{xyz}").value
         assert e.args == ('hz', '~{xyz}', 2, 4, 'illegal multibyte sequence')
 
+    def test_decode_hz_ignore(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("def~{}abc", errors='ignore')
+        assert r == (u'def\u5fcf', 9)
+        r = codec.decode("def~{}abc", 'ignore')
+        assert r == (u'def\u5fcf', 9)
+
+    def test_decode_hz_replace(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("def~{}abc", errors='replace')
+        assert r == (u'def\ufffd\u5fcf', 9)
+        r = codec.decode("def~{}abc", 'replace')
+        assert r == (u'def\ufffd\u5fcf', 9)
+
+    def test_decode_custom_error_handler(self):
+        import codecs
+        codecs.register_error("test.decode_custom_error_handler",
+                              lambda e: (u'\u1234\u5678', e.end))
+        u = "abc\xDD".decode("hz", "test.decode_custom_error_handler")
+        assert u == u'abc\u1234\u5678'
+
+    def test_decode_custom_error_handler_overflow(self):
+        import codecs
+        import sys
+        codecs.register_error("test.test_decode_custom_error_handler_overflow",
+                              lambda e: (u'', sys.maxint + 1))
+        raises((IndexError, OverflowError), "abc\xDD".decode, "hz",
+               "test.test_decode_custom_error_handler_overflow")
+
     def test_encode_hz(self):
         import _codecs_cn
         codec = _codecs_cn.getcodec("hz")
@@ -54,3 +85,24 @@
         assert e.start == 3
         assert e.end == 4
         assert e.reason == 'illegal multibyte sequence'
+
+    def test_encode_hz_ignore(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'abc\u1234def', 'ignore')
+        assert r == ('abcdef', 7)
+        assert type(r[0]) is str
+
+    def test_encode_hz_replace(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'abc\u1234def', 'replace')
+        assert r == ('abc?def', 7)
+        assert type(r[0]) is str
+
+    def test_encode_custom_error_handler(self):
+        import codecs
+        codecs.register_error("test.multi_bad_handler", lambda e: (repl, 1))
+        repl = u"\u2014"
+        s = u"\uDDA1".encode("gbk", "test.multi_bad_handler")
+        assert s == '\xA1\xAA'
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -36,6 +36,16 @@
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
+def test_decode_hz_ignore():
+    c = getcodec("hz")
+    u = decode(c, 'def~{}abc', 'ignore')
+    assert u == u'def\u5fcf'
+
+def test_decode_hz_replace():
+    c = getcodec("hz")
+    u = decode(c, 'def~{}abc', 'replace')
+    assert u == u'def\ufffd\u5fcf'
+
 def test_encode_hz():
     c = getcodec("hz")
     s = encode(c, u'foobar')
@@ -51,6 +61,16 @@
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
+def test_encode_hz_ignore():
+    c = getcodec("hz")
+    s = encode(c, u'abc\u1234def', 'ignore')
+    assert s == 'abcdef'
+
+def test_encode_hz_replace():
+    c = getcodec("hz")
+    s = encode(c, u'abc\u1234def', 'replace')
+    assert s == 'abc?def'
+
 def test_encode_jisx0208():
     c = getcodec('iso2022_jp')
     s = encode(c, u'\u83ca\u5730\u6642\u592b')
diff --git a/pypy/module/_multiprocessing/test/test_memory.py b/pypy/module/_multiprocessing/test/test_memory.py
--- a/pypy/module/_multiprocessing/test/test_memory.py
+++ b/pypy/module/_multiprocessing/test/test_memory.py
@@ -3,7 +3,7 @@
 class AppTestMemory:
     def setup_class(cls):
         space = gettestobjspace(
-            usemodules=('_multiprocessing', 'mmap', '_rawffi'))
+            usemodules=('_multiprocessing', 'mmap', '_rawffi', '_ffi'))
         cls.space = space
 
     def test_address_of(self):
diff --git a/pypy/module/_rawffi/callback.py b/pypy/module/_rawffi/callback.py
--- a/pypy/module/_rawffi/callback.py
+++ b/pypy/module/_rawffi/callback.py
@@ -43,7 +43,7 @@
             unwrap_value(space, push_elem, ll_res, 0,
                          callback_ptr.result, w_res)
     except OperationError, e:
-        tbprint(space, space.wrap(e.application_traceback),
+        tbprint(space, space.wrap(e.get_traceback()),
                 space.wrap(e.errorstr(space)))
         # force the result to be zero
         if callback_ptr.result is not None:
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -250,6 +250,13 @@
     def get_basic_ffi_type(self):
         raise NotImplementedError
 
+    def descr_get_ffi_type(self, space):
+        # XXX: this assumes that you have the _ffi module enabled. In the long
+        # term, probably we will move the code for build structures and arrays
+        # from _rawffi to _ffi
+        from pypy.module._ffi.interp_ffi import W_FFIType
+        return W_FFIType('<unknown>', self.get_basic_ffi_type(), self)
+
     @unwrap_spec(n=int)
     def descr_size_alignment(self, space, n=1):
         return space.newtuple([space.wrap(self.size * n),
diff --git a/pypy/module/_rawffi/structure.py b/pypy/module/_rawffi/structure.py
--- a/pypy/module/_rawffi/structure.py
+++ b/pypy/module/_rawffi/structure.py
@@ -248,7 +248,8 @@
     alignment   = interp_attrproperty('alignment', W_Structure),
     fieldoffset = interp2app(W_Structure.descr_fieldoffset),
     fieldsize   = interp2app(W_Structure.descr_fieldsize),
-    size_alignment = interp2app(W_Structure.descr_size_alignment)
+    size_alignment = interp2app(W_Structure.descr_size_alignment),
+    get_ffi_type   = interp2app(W_Structure.descr_get_ffi_type),
 )
 W_Structure.typedef.acceptable_as_base_class = False
 
diff --git a/pypy/module/_stackless/interp_coroutine.py b/pypy/module/_stackless/interp_coroutine.py
--- a/pypy/module/_stackless/interp_coroutine.py
+++ b/pypy/module/_stackless/interp_coroutine.py
@@ -28,7 +28,7 @@
 
 from pypy.module.exceptions.interp_exceptions import W_SystemExit, _new_exception
 
-from pypy.rlib import rstack # for resume points
+from pypy.rlib import rstack, jit # for resume points
 from pypy.tool import stdlib_opcode as pythonopcode
 
 class _AppThunk(AbstractThunk):
@@ -47,9 +47,19 @@
     def call(self):
         costate = self.costate
         w_result = self.space.call_args(self.w_func, self.args)
-        rstack.resume_point("appthunk", costate, returns=w_result)
         costate.w_tempval = w_result
 
+class _ResumeThunk(AbstractThunk):
+    def __init__(self, space, costate, w_frame):
+        self.space = space
+        self.costate = costate
+        self.w_frame = w_frame
+
+    def call(self):
+        w_result = resume_frame(self.space, self.w_frame)
+        # costate.w_tempval = w_result #XXX?
+
+
 W_CoroutineExit = _new_exception('CoroutineExit', W_SystemExit,
                         """Coroutine killed manually.""")
 
@@ -97,7 +107,6 @@
                 "cannot switch to an unbound Coroutine"))
         state = self.costate
         self.switch()
-        rstack.resume_point("w_switch", state, space)
         w_ret, state.w_tempval = state.w_tempval, space.w_None
         return w_ret
 
@@ -116,7 +125,7 @@
         if isinstance(operror, OperationError):
             w_exctype = operror.w_type
             w_excvalue = operror.get_w_value(space)
-            w_exctraceback = operror.application_traceback
+            w_exctraceback = operror.get_traceback()
             w_excinfo = space.newtuple([w_exctype, w_excvalue, w_exctraceback])
             
             if w_exctype is self.costate.w_CoroutineExit:
@@ -151,7 +160,7 @@
                 space.gettypeobject(pytraceback.PyTraceback.typedef))):
                 raise OperationError(space.w_TypeError,
                       space.wrap("throw: arg 3 must be a traceback or None"))
-            operror.application_traceback = tb
+            operror.set_traceback(tb)
         
         self._kill(operror)
 
@@ -217,75 +226,17 @@
         self.parent = space.interp_w(AppCoroutine, w_parent)
         ec = self.space.getexecutioncontext()
         self.subctx.setstate(space, w_state)
-        self.reconstruct_framechain()
         if space.is_w(w_thunk, space.w_None):
-            self.thunk = None
+            if space.is_w(w_state, space.w_None):
+                self.thunk = None
+            else:
+                self.bind(_ResumeThunk(space, self.costate, self.subctx.topframe))
         else:
             w_func, w_args, w_kwds = space.unpackiterable(w_thunk,
                                                           expected_length=3)
             args = Arguments.frompacked(space, w_args, w_kwds)
             self.bind(_AppThunk(space, self.costate, w_func, args))
 
-    def reconstruct_framechain(self):
-        from pypy.interpreter.pyframe import PyFrame
-        from pypy.rlib.rstack import resume_state_create
-        if self.subctx.topframe is None:
-            self.frame = None
-            return
-
-        space = self.space
-        ec = space.getexecutioncontext()
-        costate = self.costate
-        # now the big fun of recreating tiny things...
-        bottom = resume_state_create(None, "yield_current_frame_to_caller_1")
-        # ("coroutine__bind", state)
-        _bind_frame = resume_state_create(bottom, "coroutine__bind", costate)
-        # ("appthunk", costate, returns=w_result)
-        appthunk_frame = resume_state_create(_bind_frame, "appthunk", costate)
-        chain = appthunk_frame
-        for frame in self.subctx.getframestack():
-            assert isinstance(frame, PyFrame)
-            # ("execute_frame", self, executioncontext, returns=w_exitvalue)
-            chain = resume_state_create(chain, "execute_frame", frame, ec)
-            code = frame.pycode.co_code
-            # ("dispatch", self, co_code, ec, returns=next_instr)
-            chain = resume_state_create(chain, "dispatch", frame, code, ec)
-            # ("handle_bytecode", self, co_code, ec, returns=next_instr)
-            chain = resume_state_create(chain, "handle_bytecode", frame, code,
-                                        ec)
-            instr = frame.last_instr
-            opcode = ord(code[instr])
-            map = pythonopcode.opmap
-            call_ops = [map['CALL_FUNCTION'], map['CALL_FUNCTION_KW'], map['CALL_FUNCTION_VAR'], 
-                        map['CALL_FUNCTION_VAR_KW'], map['CALL_METHOD']]
-            assert opcode in call_ops
-            # ("dispatch_call", self, co_code, next_instr, ec)
-            chain = resume_state_create(chain, "dispatch_call", frame, code,
-                                        instr+3, ec)
-            instr += 1
-            oparg = ord(code[instr]) | ord(code[instr + 1]) << 8
-            nargs = oparg & 0xff
-            nkwds = (oparg >> 8) & 0xff
-            if space.config.objspace.opcodes.CALL_METHOD and opcode == map['CALL_METHOD']:
-                if nkwds == 0:     # only positional arguments
-                    chain = resume_state_create(chain, 'CALL_METHOD', frame,
-                                                nargs)
-                else:              # includes keyword arguments
-                    chain = resume_state_create(chain, 'CALL_METHOD_KW', frame)
-            elif opcode == map['CALL_FUNCTION'] and nkwds == 0:
-                # Only positional arguments
-                # case1: ("CALL_FUNCTION", f, nargs, returns=w_result)
-                chain = resume_state_create(chain, 'CALL_FUNCTION', frame,
-                                            nargs)
-            else:
-                # case2: ("call_function", f, returns=w_result)
-                chain = resume_state_create(chain, 'call_function', frame)
-
-        # ("w_switch", state, space)
-        w_switch_frame = resume_state_create(chain, 'w_switch', costate, space)
-        # ("coroutine_switch", state, returns=incoming_frame)
-        switch_frame = resume_state_create(w_switch_frame, "coroutine_switch", costate)
-        self.frame = switch_frame
 
 # _mixin_ did not work
 for methname in StacklessFlags.__dict__:
@@ -411,3 +362,45 @@
 @unwrap_spec(limit=int)
 def set_stack_depth_limit(space, limit):
     rstack.set_stack_depth_limit(limit)
+
+
+# ___________________________________________________________________
+# unpickling trampoline
+
+def resume_frame(space, w_frame):
+    from pypy.interpreter.pyframe import PyFrame
+    frame = space.interp_w(PyFrame, w_frame, can_be_None=True)
+    w_result = space.w_None
+    operr = None
+    executioncontext = frame.space.getexecutioncontext()
+    while frame is not None:
+        code = frame.pycode.co_code
+        instr = frame.last_instr
+        opcode = ord(code[instr])
+        map = pythonopcode.opmap
+        call_ops = [map['CALL_FUNCTION'], map['CALL_FUNCTION_KW'], map['CALL_FUNCTION_VAR'], 
+                    map['CALL_FUNCTION_VAR_KW'], map['CALL_METHOD']]
+        assert opcode in call_ops
+        instr += 1
+        oparg = ord(code[instr]) | ord(code[instr + 1]) << 8
+        nargs = oparg & 0xff
+        nkwds = (oparg >> 8) & 0xff
+        if nkwds == 0:     # only positional arguments
+            # fast paths leaves things on the stack, pop them
+            if space.config.objspace.opcodes.CALL_METHOD and opcode == map['CALL_METHOD']:
+                frame.dropvalues(nargs + 2)
+            elif opcode == map['CALL_FUNCTION']:
+                frame.dropvalues(nargs + 1)
+
+        # small hack: unlink frame out of the execution context, because
+        # execute_frame will add it there again
+        executioncontext.topframeref = jit.non_virtual_ref(frame.f_backref())
+        frame.last_instr = instr + 1 # continue after the call
+        try:
+            w_result = frame.execute_frame(w_result, operr)
+        except OperationError, operr:
+            pass
+        frame = frame.f_backref()
+    if operr:
+        raise operr
+    return w_result
diff --git a/pypy/module/_stackless/interp_greenlet.py b/pypy/module/_stackless/interp_greenlet.py
--- a/pypy/module/_stackless/interp_greenlet.py
+++ b/pypy/module/_stackless/interp_greenlet.py
@@ -124,7 +124,7 @@
                 space.gettypeobject(pytraceback.PyTraceback.typedef))):
                 raise OperationError(space.w_TypeError,
                       space.wrap("throw: arg 3 must be a traceback or None"))
-            operror.application_traceback = tb
+            operror.set_traceback(tb)
         # Dead greenlet: turn GreenletExit into a regular return
         if self.isdead() and operror.match(space, self.costate.w_GreenletExit):
             args_w = [operror.get_w_value(space)]
diff --git a/pypy/module/_stackless/test/test_coroutine.py b/pypy/module/_stackless/test/test_coroutine.py
--- a/pypy/module/_stackless/test/test_coroutine.py
+++ b/pypy/module/_stackless/test/test_coroutine.py
@@ -8,33 +8,6 @@
         space = gettestobjspace(usemodules=('_stackless',))
         cls.space = space
 
-    def test_pickle_coroutine_empty(self):
-        # this test is limited to basic pickling.
-        # real stacks can only tested with a stackless pypy build.
-        import _stackless as stackless
-        co = stackless.coroutine()
-        import pickle
-        pckl = pickle.dumps(co)
-        co2 = pickle.loads(pckl)
-        # the empty unpickled coroutine can still be used:
-        result = []
-        co2.bind(result.append, 42)
-        co2.switch()
-        assert result == [42]
-
-    def test_pickle_coroutine_bound(self):
-        import pickle
-        import _stackless
-        lst = [4]
-        co = _stackless.coroutine()
-        co.bind(lst.append, 2)
-        pckl = pickle.dumps((co, lst))
-
-        (co2, lst2) = pickle.loads(pckl)
-        assert lst2 == [4]
-        co2.switch()
-        assert lst2 == [4, 2]
-
     def test_raise_propagate(self):
         import _stackless as stackless
         co = stackless.coroutine()
diff --git a/pypy/module/_stackless/test/test_pickle.py b/pypy/module/_stackless/test/test_pickle.py
--- a/pypy/module/_stackless/test/test_pickle.py
+++ b/pypy/module/_stackless/test/test_pickle.py
@@ -19,9 +19,35 @@
 class AppTestPickle:
 
     def setup_class(cls):
-        if not option.runappdirect:
-            py.test.skip('pure appdirect test (run with -A)')
-        cls.space = gettestobjspace(usemodules=('_stackless',))
+        cls.space = gettestobjspace(usemodules=('_stackless',), CALL_METHOD=True)
+
+    def test_pickle_coroutine_empty(self):
+        # this test is limited to basic pickling.
+        # real stacks can only tested with a stackless pypy build.
+        import _stackless as stackless
+        co = stackless.coroutine()
+        import pickle
+        pckl = pickle.dumps(co)
+        co2 = pickle.loads(pckl)
+        # the empty unpickled coroutine can still be used:
+        result = []
+        co2.bind(result.append, 42)
+        co2.switch()
+        assert result == [42]
+
+    def test_pickle_coroutine_bound(self):
+        import pickle
+        import _stackless
+        lst = [4]
+        co = _stackless.coroutine()
+        co.bind(lst.append, 2)
+        pckl = pickle.dumps((co, lst))
+
+        (co2, lst2) = pickle.loads(pckl)
+        assert lst2 == [4]
+        co2.switch()
+        assert lst2 == [4, 2]
+
 
     def test_simple_ish(self):
 
@@ -58,6 +84,113 @@
         finally:
             del sys.modules['mod']
 
+    def test_pickle_again(self):
+
+        import new, sys
+
+        mod = new.module('mod')
+        sys.modules['mod'] = mod
+        try:
+            exec '''
+output = []
+import _stackless
+def f(coro, n, x):
+    if n == 0:
+        coro.switch()
+        return
+    f(coro, n-1, 2*x)
+    output.append(x)
+
+def example():
+    main_coro = _stackless.coroutine.getcurrent()
+    sub_coro = _stackless.coroutine()
+    sub_coro.bind(f, main_coro, 5, 1)
+    sub_coro.switch()
+
+    import pickle
+    pckl = pickle.dumps(sub_coro)
+    new_coro = pickle.loads(pckl)
+    pckl = pickle.dumps(new_coro)
+    newer_coro = pickle.loads(pckl)
+
+    newer_coro.switch()
+
+example()
+assert output == [16, 8, 4, 2, 1]
+''' in mod.__dict__
+        finally:
+            del sys.modules['mod']
+
+    def test_kwargs(self):
+
+        import new, sys
+
+        mod = new.module('mod')
+        sys.modules['mod'] = mod
+        try:
+            exec '''
+output = []
+import _stackless
+def f(coro, n, x, step=4):
+    if n == 0:
+        coro.switch()
+        return
+    f(coro, n-1, 2*x, step=1)
+    output.append(x)
+
+def example():
+    main_coro = _stackless.coroutine.getcurrent()
+    sub_coro = _stackless.coroutine()
+    sub_coro.bind(f, main_coro, 5, 1, 1)
+    sub_coro.switch()
+
+    import pickle
+    pckl = pickle.dumps(sub_coro)
+    new_coro = pickle.loads(pckl)
+
+    new_coro.switch()
+
+example()
+assert output == [16, 8, 4, 2, 1]
+''' in mod.__dict__
+        finally:
+            del sys.modules['mod']
+
+    def test_starstarargs(self):
+
+        import new, sys
+
+        mod = new.module('mod')
+        sys.modules['mod'] = mod
+        try:
+            exec '''
+output = []
+import _stackless
+def f(coro, n, x, step=4):
+    if n == 0:
+        coro.switch()
+        return
+    f(coro, n-1, 2*x, **{'step': 1})
+    output.append(x)
+
+def example():
+    main_coro = _stackless.coroutine.getcurrent()
+    sub_coro = _stackless.coroutine()
+    sub_coro.bind(f, main_coro, 5, 1, 1)
+    sub_coro.switch()
+
+    import pickle
+    pckl = pickle.dumps(sub_coro)
+    new_coro = pickle.loads(pckl)
+
+    new_coro.switch()
+
+example()
+assert output == [16, 8, 4, 2, 1]
+''' in mod.__dict__
+        finally:
+            del sys.modules['mod']
+
     def test_closure(self):
         import new, sys
 
@@ -130,8 +263,55 @@
         finally:
             del sys.modules['mod']
 
+    def test_exception_after_unpickling(self):
+
+        import new, sys
+
+        mod = new.module('mod')
+        sys.modules['mod'] = mod
+        try:
+            exec '''
+output = []
+import _stackless
+def f(coro, n, x):
+    if n == 0:
+        coro.switch()
+        raise ValueError
+    try:
+        f(coro, n-1, 2*x)
+    finally:
+        output.append(x)
+
+def example():
+    main_coro = _stackless.coroutine.getcurrent()
+    sub_coro = _stackless.coroutine()
+    sub_coro.bind(f, main_coro, 5, 1)
+    sub_coro.switch()
+
+    import pickle
+    pckl = pickle.dumps(sub_coro)
+    new_coro = pickle.loads(pckl)
+
+    try:
+        sub_coro.switch()
+    except ValueError:
+        pass
+    else:
+        assert 0
+    try:
+        new_coro.switch()
+    except ValueError:
+        pass
+    else:
+        assert 0
+
+example()
+assert output == [16, 8, 4, 2, 1] * 2
+''' in mod.__dict__
+        finally:
+            del sys.modules['mod']
+
     def test_loop(self):
-        #skip("happily segfaulting")
         import new, sys
 
         mod = new.module('mod')
diff --git a/pypy/module/_stackless/test/test_pickle_infrastructure.py b/pypy/module/_stackless/test/test_pickle_infrastructure.py
deleted file mode 100644
--- a/pypy/module/_stackless/test/test_pickle_infrastructure.py
+++ /dev/null
@@ -1,301 +0,0 @@
-from pypy.conftest import gettestobjspace
-from py.test import skip
-
-
-class BaseAppTestPicklePrerequisites(object):
-    OPTIONS = {}
-    def setup_class(cls):
-        space = gettestobjspace(usemodules=('_stackless',), **cls.OPTIONS)
-        cls.space = space
-
-    def test_pickle_switch_function(object):
-        import _stackless, pickle
-
-        sw = _stackless.coroutine.switch.im_func
-        dump = pickle.dumps(sw)
-        res = pickle.loads(dump)
-
-        assert res is sw
-        assert res.func_code is sw.func_code
-        assert res.func_doc is sw.func_doc
-        assert res.func_globals is sw.func_globals
-
-    def test_pickle_switch_function_code(object):
-        import _stackless, pickle
-
-        sw = _stackless.coroutine.switch.im_func.func_code
-        dump = pickle.dumps(sw)
-        res = pickle.loads(dump)
-
-        assert res is sw
-        
-class AppTestPicklePrerequisites(BaseAppTestPicklePrerequisites):
-    pass
-
-class AppTestPicklePrerequisitesBuiltinShortcut(BaseAppTestPicklePrerequisites):
-    OPTIONS = {"objspace.std.builtinshortcut": True}
-
-class FrameCheck(object):
-
-    def __init__(self, name):
-        self.name = name
-
-    def __eq__(self, frame):
-        return frame.pycode.co_name == self.name
-
-class BytecodeCheck(object):
-
-    def __init__(self, code, op, arg):
-        self.code = code
-        self.op = chr(op)+chr(arg & 0xff) + chr(arg >> 8 & 0xff)
-
-    def __eq__(self, pos):
-        return self.code[pos-3:pos] == self.op
-
-class BaseTestReconstructFrameChain(object):
-    OPTIONS = {}
-
-    def setup_class(cls):
-        space = gettestobjspace(usemodules=('_stackless',), **cls.OPTIONS)
-        cls.space = space
-
-        from pypy.rlib import rstack
-        cls.old_resume_state_create = rstack.resume_state_create
-
-        def tr(prevstate, label, *args):
-            if prevstate is None:
-                prevstate = []
-            return prevstate+[(label, args)]
-        rstack.resume_state_create = tr
-
-        w_opmap = space.appexec([], """():
-        import opcode
-
-        return opcode.opmap
-        """)
-
-        opmap = space.unwrap(w_opmap)
-        cls.CALL_FUNCTION = opmap['CALL_FUNCTION']
-        cls.CALL_FUNCTION_VAR = opmap['CALL_FUNCTION_VAR']
-        cls.CALL_METHOD = opmap['CALL_METHOD']
-
-        cls.callmethod = getattr(cls, cls.callmethod_label)
-
-    def teardown_class(cls):
-        from pypy.rlib import rstack
-        rstack.resume_state_create = cls.old_resume_state_create
-
-    def start(self, w_coro):
-        self.i = 0
-        self.frame_to_check = w_coro.frame
-        w_coro.frame = None # avoid exploding in kill > __del__
-
-    def end(self):
-        assert self.i == len(self.frame_to_check)
-
-    def check_entry(self, label, *args):
-        frame = self.frame_to_check
-        assert frame[self.i] == (label, args)
-        self.i += 1
-
-        
-    def test_two_frames_simple(self):
-        space = self.space
-
-        w_res = space.appexec([], """():
-        import _stackless as stackless
-        import pickle
-
-        main = stackless.coroutine.getcurrent()
-        d = {'main': main}        
-
-        exec \"\"\"
-def f():
-    g(1)
-
-def g(x):
-    main.switch()
-\"\"\" in d
-        f = d['f']
-        g = d['g']
-
-        co = stackless.coroutine()
-        co.bind(f)
-        co.switch()
-
-        s = pickle.dumps(co)
-        co = pickle.loads(s)
-
-        return co, f, g
-        """)
-
-        w_co, w_f, w_g = space.fixedview(w_res)
-
-        ec = space.getexecutioncontext()
-        fcode = w_f.code.co_code
-        gcode = w_g.code.co_code        
-
-        self.start(w_co)
-        e = self.check_entry
-        e('yield_current_frame_to_caller_1')
-        e('coroutine__bind', w_co.costate)
-        e('appthunk', w_co.costate)
-        # f
-        e('execute_frame', FrameCheck('f'), ec)
-        e('dispatch', FrameCheck('f'), fcode, ec)
-        e('handle_bytecode', FrameCheck('f'), fcode, ec)
-        e('dispatch_call', FrameCheck('f'), fcode,
-          BytecodeCheck(fcode, self.CALL_FUNCTION, 1), ec)
-        e('CALL_FUNCTION', FrameCheck('f'), 1)
-        # g
-        e('execute_frame', FrameCheck('g'), ec)
-        e('dispatch', FrameCheck('g'), gcode, ec)
-        e('handle_bytecode', FrameCheck('g'), gcode, ec)
-        e('dispatch_call', FrameCheck('g'), gcode,
-          BytecodeCheck(gcode, self.callmethod, 0), ec)
-        e(self.callmethod_label, FrameCheck('g'), 0)
-        e('w_switch', w_co.costate, space)
-        e('coroutine_switch', w_co.costate)
-        self.end()
-
-    def test_two_frames_stararg(self):
-        space = self.space
-
-        w_res = space.appexec([], """():
-        import _stackless as stackless
-        import pickle
-        
-        main = stackless.coroutine.getcurrent()
-        d = {'main': main}        
-
-        exec \"\"\"        
-def f():
-    g(4, 3, d=2, *(1,))
-
-def g(a, b, c, d):
-    main.switch()
-\"\"\" in d
-        f = d['f']
-        g = d['g']    
-
-        co = stackless.coroutine()
-        co.bind(f)
-        co.switch()
-
-        s = pickle.dumps(co)
-        co = pickle.loads(s)
-
-        return co, f, g
-        """)
-
-        w_co, w_f, w_g = space.fixedview(w_res)
-
-        ec = space.getexecutioncontext()
-        fcode = w_f.code.co_code
-        gcode = w_g.code.co_code        
-
-        self.start(w_co)
-        e = self.check_entry
-        e('yield_current_frame_to_caller_1')
-        e('coroutine__bind', w_co.costate)
-        e('appthunk', w_co.costate)
-        # f
-        e('execute_frame', FrameCheck('f'), ec)
-        e('dispatch', FrameCheck('f'), fcode, ec)
-        e('handle_bytecode', FrameCheck('f'), fcode, ec)
-        e('dispatch_call', FrameCheck('f'), fcode,
-          BytecodeCheck(fcode, self.CALL_FUNCTION_VAR, 2+(1<<8)), ec)
-        e('call_function', FrameCheck('f'))
-        # g
-        e('execute_frame', FrameCheck('g'), ec)
-        e('dispatch', FrameCheck('g'), gcode, ec)
-        e('handle_bytecode', FrameCheck('g'), gcode, ec)
-        e('dispatch_call', FrameCheck('g'), gcode,
-          BytecodeCheck(gcode, self.callmethod, 0), ec)
-        e(self.callmethod_label, FrameCheck('g'), 0)
-        e('w_switch', w_co.costate, space)
-        e('coroutine_switch', w_co.costate)
-        self.end()        
-    
-    def test_two_frames_method(self):
-        space = self.space
-
-        w_res = space.appexec([], """():
-        import _stackless as stackless
-        import pickle
-        import new, sys
-
-        mod = new.module('mod')
-        sys.modules['mod'] = mod
-        
-        main = stackless.coroutine.getcurrent()
-        d = {'main': main}        
-
-        exec \"\"\"                
-def f():
-    a = A()
-    a.m(1)
-
-def g(_, x):
-    main.switch()
-
-class A(object):
-    m = g
-\"\"\" in d
-        f = d['f']
-        g = d['g']
-        A = d['A']
-
-        # to make pickling work
-        mod.A = A
-        A.__module__ = 'mod'
-
-        co = stackless.coroutine()
-        co.bind(f)
-        co.switch()
-
-        s = pickle.dumps(co)
-        co = pickle.loads(s)
-
-        return co, f, g
-        """)
-
-        w_co, w_f, w_g = space.fixedview(w_res)
-
-        ec = space.getexecutioncontext()
-        fcode = w_f.code.co_code
-        gcode = w_g.code.co_code        
-
-        self.start(w_co)
-        e = self.check_entry
-        e('yield_current_frame_to_caller_1')
-        e('coroutine__bind', w_co.costate)
-        e('appthunk', w_co.costate)
-        # f
-        e('execute_frame', FrameCheck('f'), ec)
-        e('dispatch', FrameCheck('f'), fcode, ec)
-        e('handle_bytecode', FrameCheck('f'), fcode, ec)
-        e('dispatch_call', FrameCheck('f'), fcode,
-          BytecodeCheck(fcode, self.callmethod, 1), ec)
-        e(self.callmethod_label, FrameCheck('f'), 1)
-        # g
-        e('execute_frame', FrameCheck('g'), ec)
-        e('dispatch', FrameCheck('g'), gcode, ec)
-        e('handle_bytecode', FrameCheck('g'), gcode, ec)
-        e('dispatch_call', FrameCheck('g'), gcode,
-          BytecodeCheck(gcode, self.callmethod, 0), ec)
-        e(self.callmethod_label, FrameCheck('g'), 0)
-        e('w_switch', w_co.costate, space)
-        e('coroutine_switch', w_co.costate)
-        self.end()
-
-class TestReconstructFrameChain(BaseTestReconstructFrameChain):
-    callmethod_label = 'CALL_FUNCTION'
-
-class TestReconstructFrameChain_CALL_METHOD(BaseTestReconstructFrameChain):
-    OPTIONS = {"objspace.opcodes.CALL_METHOD": True,
-               }
-
-    callmethod_label = 'CALL_METHOD'
-
-                
diff --git a/pypy/module/bz2/interp_bz2.py b/pypy/module/bz2/interp_bz2.py
--- a/pypy/module/bz2/interp_bz2.py
+++ b/pypy/module/bz2/interp_bz2.py
@@ -363,42 +363,44 @@
 
     def seek(self, offset, whence):
         READMAX = 2**18   # 256KB
-        if whence == 1:
-            if offset >= 0:
-                read = r_longlong(0)
-                while read < offset:
-                    count = offset - read
-                    if count < READMAX:
-                        count = intmask(count)
-                    else:
-                        count = READMAX
-                    read += len(self.read(count))
-            else:
-                pos = self.readlength + offset
-                self.seek(pos, 0)
+
+        # Make offset relative to the start of the file
+        if whence == 2:
+            # Read everything to arrive at the end
+            while len(self.read(READMAX)) > 0:
+                pass
+            offset += self.readlength
+        elif whence == 1:
+            offset += self.readlength
         elif whence == 0:
+            pass
+        else:
+            raise operationerrfmt(self.space.w_ValueError,
+                                  "Invalid value for whence: %d", whence)
+
+        # Make offset relative to the current pos
+        # Rewind iff necessary
+        if offset < self.readlength:
             self.stream.seek(0, 0)
             self.decompressor = W_BZ2Decompressor(self.space)
             self.readlength = r_longlong(0)
             self.buffer = ""
             self.finished = False
-            read = 0
-            while read < offset:
-                count = offset - read
-                if count < READMAX:
-                    count = intmask(count)
-                else:
-                    count = READMAX
-                length = len(self.read(count))
-                read += length
-                if not length:
-                    break
         else:
-            # first measure the length by reading everything left
-            while len(self.read(READMAX)) > 0:
-                pass
-            pos = self.readlength + offset
-            self.seek(pos, 0)
+            offset -= self.readlength
+
+        # Seek
+        read = r_longlong(0)
+        while read < offset:
+            count = offset - read
+            if count < READMAX:
+                count = intmask(count)
+            else:
+                count = READMAX
+            length = len(self.read(count))
+            if not length:
+                break
+            read += length
 
     def readall(self):
         w_result = self.decompressor.decompress(self.stream.readall())
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -348,6 +348,7 @@
     '_Py_TrueStruct#': ('PyObject*', 'space.w_True'),
     '_Py_ZeroStruct#': ('PyObject*', 'space.w_False'),
     '_Py_NotImplementedStruct#': ('PyObject*', 'space.w_NotImplemented'),
+    '_Py_EllipsisObject#': ('PyObject*', 'space.w_Ellipsis'),
     'PyDateTimeAPI': ('PyDateTime_CAPI*', 'None'),
     }
 FORWARD_DECLS = []
@@ -966,6 +967,7 @@
     state = space.fromcache(State)
     if state.find_extension(name, path) is not None:
         return
+    old_context = state.package_context
     state.package_context = name, path
     try:
         from pypy.rlib import rdynload
@@ -991,7 +993,7 @@
         generic_cpy_call(space, initfunc)
         state.check_and_raise_exception()
     finally:
-        state.package_context = None, None
+        state.package_context = old_context
     state.fixup_extension(name, path)
 
 @specialize.ll()
diff --git a/pypy/module/cpyext/classobject.py b/pypy/module/cpyext/classobject.py
--- a/pypy/module/cpyext/classobject.py
+++ b/pypy/module/cpyext/classobject.py
@@ -31,4 +31,9 @@
         return w_result
     return w_instance.w_class.lookup(space, name)
 
+ at cpython_api([PyObject, PyObject, PyObject], PyObject)
+def PyClass_New(space, w_bases, w_dict, w_name):
+    w_classobj = space.gettypefor(W_ClassObject)
+    return space.call_function(w_classobj,
+                               w_name, w_bases, w_dict)
 
diff --git a/pypy/module/cpyext/frameobject.py b/pypy/module/cpyext/frameobject.py
--- a/pypy/module/cpyext/frameobject.py
+++ b/pypy/module/cpyext/frameobject.py
@@ -1,6 +1,7 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
-    cpython_api, bootstrap_function, PyObjectFields, cpython_struct)
+    cpython_api, bootstrap_function, PyObjectFields, cpython_struct,
+    CANNOT_FAIL)
 from pypy.module.cpyext.pyobject import (
     PyObject, Py_DecRef, make_ref, from_ref, track_reference,
     make_typedescr, get_typedescr)
@@ -9,6 +10,7 @@
 from pypy.module.cpyext.funcobject import PyCodeObject
 from pypy.interpreter.pyframe import PyFrame
 from pypy.interpreter.pycode import PyCode
+from pypy.interpreter.pytraceback import PyTraceback
 
 PyFrameObjectStruct = lltype.ForwardReference()
 PyFrameObject = lltype.Ptr(PyFrameObjectStruct)
@@ -80,3 +82,8 @@
     frame = space.interp_w(PyFrame, w_frame)
     record_application_traceback(space, state.operror, frame, 0)
     return 0
+
+ at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
+def PyTraceBack_Check(space, w_obj):
+    obj = space.interpclass_w(w_obj)
+    return obj is not None and isinstance(obj, PyTraceback)
diff --git a/pypy/module/cpyext/funcobject.py b/pypy/module/cpyext/funcobject.py
--- a/pypy/module/cpyext/funcobject.py
+++ b/pypy/module/cpyext/funcobject.py
@@ -69,6 +69,10 @@
     assert isinstance(w_method, Method)
     return borrow_from(w_method, w_method.w_class)
 
+ at cpython_api([PyObject], PyObject)
+def PyClassMethod_New(space, w_function):
+    return space.call_method(space.builtin, "classmethod", w_function)
+
 def unwrap_list_of_strings(space, w_list):
     return [space.str_w(w_item) for w_item in space.fixedview(w_list)]
 
diff --git a/pypy/module/cpyext/intobject.py b/pypy/module/cpyext/intobject.py
--- a/pypy/module/cpyext/intobject.py
+++ b/pypy/module/cpyext/intobject.py
@@ -4,7 +4,7 @@
 from pypy.module.cpyext.api import (
     cpython_api, build_type_checkers, PyObject,
     CONST_STRING, CANNOT_FAIL, Py_ssize_t)
-from pypy.rlib.rarithmetic import r_uint
+from pypy.rlib.rarithmetic import r_uint, intmask, LONG_TEST
 import sys
 
 PyInt_Check, PyInt_CheckExact = build_type_checkers("Int")
@@ -73,13 +73,24 @@
                              space.wrap("an integer is required, got NULL"))
     return space.int_w(w_obj) # XXX this is wrong on win64
 
+LONG_MAX = int(LONG_TEST - 1)
+
+ at cpython_api([rffi.SIZE_T], PyObject)
+def PyInt_FromSize_t(space, ival):
+    """Create a new integer object with a value of ival. If the value exceeds
+    LONG_MAX, a long integer object is returned.
+    """
+    if ival <= LONG_MAX:
+        return space.wrap(intmask(ival))
+    return space.wrap(ival)
+
 @cpython_api([Py_ssize_t], PyObject)
 def PyInt_FromSsize_t(space, ival):
     """Create a new integer object with a value of ival. If the value is larger
     than LONG_MAX or smaller than LONG_MIN, a long integer object is
     returned.
     """
-    return space.wrap(ival) # XXX this is wrong on win64
+    return space.wrap(ival)
 
 @cpython_api([CONST_STRING, rffi.CCHARPP, rffi.INT_real], PyObject)
 def PyInt_FromString(space, str, pend, base):
diff --git a/pypy/module/cpyext/number.py b/pypy/module/cpyext/number.py
--- a/pypy/module/cpyext/number.py
+++ b/pypy/module/cpyext/number.py
@@ -49,6 +49,13 @@
     failure.  This is the equivalent of the Python expression long(o)."""
     return space.long(w_obj)
 
+ at cpython_api([PyObject], PyObject)
+def PyNumber_Index(space, w_obj):
+    """Returns the o converted to a Python int or long on success or NULL with a
+    TypeError exception raised on failure.
+    """
+    return space.index(w_obj)
+
 def func_rename(newname):
     return lambda func: func_with_new_name(func, newname)
 
diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py
--- a/pypy/module/cpyext/pyerrors.py
+++ b/pypy/module/cpyext/pyerrors.py
@@ -57,7 +57,7 @@
     if operror:
         ptype[0] = make_ref(space, operror.w_type)
         pvalue[0] = make_ref(space, operror.get_w_value(space))
-        ptraceback[0] = make_ref(space, space.wrap(operror.application_traceback))
+        ptraceback[0] = make_ref(space, space.wrap(operror.get_traceback()))
     else:
         ptype[0] = lltype.nullptr(PyObject.TO)
         pvalue[0] = lltype.nullptr(PyObject.TO)
@@ -268,7 +268,7 @@
 
     w_type = operror.w_type
     w_value = operror.get_w_value(space)
-    w_tb = space.wrap(operror.application_traceback)
+    w_tb = space.wrap(operror.get_traceback())
 
     if rffi.cast(lltype.Signed, set_sys_last_vars):
         space.sys.setdictvalue(space, "last_type", w_type)
diff --git a/pypy/module/cpyext/src/modsupport.c b/pypy/module/cpyext/src/modsupport.c
--- a/pypy/module/cpyext/src/modsupport.c
+++ b/pypy/module/cpyext/src/modsupport.c
@@ -611,8 +611,8 @@
 	if (result != NULL && n > 0) {
 		for (i = 0; i < n; ++i) {
 			tmp = (PyObject *)va_arg(va, PyObject *);
+			Py_INCREF(tmp);
 			PyTuple_SET_ITEM(result, i, tmp);
-			Py_INCREF(tmp);
 		}
 	}
 	return result;
diff --git a/pypy/module/cpyext/stringobject.py b/pypy/module/cpyext/stringobject.py
--- a/pypy/module/cpyext/stringobject.py
+++ b/pypy/module/cpyext/stringobject.py
@@ -2,7 +2,7 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
     cpython_api, cpython_struct, bootstrap_function, build_type_checkers,
-    PyObjectFields, Py_ssize_t, CONST_STRING)
+    PyObjectFields, Py_ssize_t, CONST_STRING, CANNOT_FAIL)
 from pypy.module.cpyext.pyerrors import PyErr_BadArgument
 from pypy.module.cpyext.pyobject import (
     PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference,
@@ -203,6 +203,10 @@
     ref[0] = rffi.cast(PyObject, py_newstr)
     return 0
 
+ at cpython_api([PyObject, PyObject], rffi.INT, error=CANNOT_FAIL)
+def _PyString_Eq(space, w_str1, w_str2):
+    return space.eq_w(w_str1, w_str2)
+
 @cpython_api([PyObjectP, PyObject], lltype.Void)
 def PyString_Concat(space, ref, w_newpart):
     """Create a new string object in *string containing the contents of newpart
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -172,12 +172,6 @@
     This is equivalent to (PyBUF_ND)."""
     raise NotImplementedError
 
- at cpython_api([Py_buffer], lltype.Void)
-def PyBuffer_Release(space, view):
-    """Release the buffer view.  This should be called when the buffer
-    is no longer being used as it may free memory from it."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP], Py_ssize_t, error=CANNOT_FAIL)
 def PyBuffer_SizeFromFormat(space, format):
     """Return the implied ~Py_buffer.itemsize from the struct-stype
@@ -198,13 +192,6 @@
     given shape with the given number of bytes per element."""
     raise NotImplementedError
 
- at cpython_api([Py_buffer, PyObject, rffi.VOIDP, Py_ssize_t, rffi.INT_real, rffi.INT_real], rffi.INT_real, error=-1)
-def PyBuffer_FillInfo(space, view, obj, buf, len, readonly, infoflags):
-    """Fill in a buffer-info structure, view, correctly for an exporter that can
-    only share a contiguous chunk of memory of "unsigned bytes" of the given
-    length.  Return 0 on success and -1 (with raising an error) on error."""
-    raise NotImplementedError
-
 @cpython_api([Py_buffer], PyObject)
 def PyMemoryView_FromBuffer(space, view):
     """Create a memoryview object wrapping the given buffer-info structure view.
@@ -1094,14 +1081,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([PyObject], PyObject)
-def PyImport_ReloadModule(space, m):
-    """Reload a module.  This is best described by referring to the built-in
-    Python function reload(), as the standard reload() function calls this
-    function directly.  Return a new reference to the reloaded module, or NULL
-    with an exception set on failure (the module still exists in this case)."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP, PyObject], PyObject)
 def PyImport_ExecCodeModule(space, name, co):
     """Given a module name (possibly of the form package.module) and a code
@@ -1140,13 +1119,6 @@
     of the bytecode file, in little-endian byte order."""
     raise NotImplementedError
 
- at cpython_api([], PyObject)
-def PyImport_GetModuleDict(space):
-    """Return the dictionary used for the module administration (a.k.a.
-    sys.modules).  Note that this is a per-interpreter variable."""
-    borrow_from()
-    raise NotImplementedError
-
 @cpython_api([PyObject], PyObject)
 def PyImport_GetImporter(space, path):
     """Return an importer object for a sys.path/pkg.__path__ item
@@ -1701,13 +1673,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([rffi.SIZE_T], PyObject)
-def PyInt_FromSize_t(space, ival):
-    """Create a new integer object with a value of ival. If the value exceeds
-    LONG_MAX, a long integer object is returned.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.ULONGLONG, error=-1)
 def PyInt_AsUnsignedLongLongMask(space, io):
     """Will first attempt to cast the object to a PyIntObject or
@@ -1920,13 +1885,6 @@
     Reference counts are still not increased in this case."""
     raise NotImplementedError
 
- at cpython_api([PyObject], PyObject)
-def PyNumber_Index(space, o):
-    """Returns the o converted to a Python int or long on success or NULL with a
-    TypeError exception raised on failure.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject, rffi.INT_real], PyObject)
 def PyNumber_ToBase(space, n, base):
     """Returns the integer n converted to base as a string with a base
@@ -2254,15 +2212,6 @@
     standard C library function exit(status)."""
     raise NotImplementedError
 
- at cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
-def PyTuple_GetSlice(space, p, low, high):
-    """Take a slice of the tuple pointed to by p from low to high and return it
-    as a new tuple.
-
-    This function used an int type for low and high. This might
-    require changes in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
-
 @cpython_api([], rffi.INT_real, error=CANNOT_FAIL)
 def PyTuple_ClearFreeList(space):
     """Clear the free list. Return the total number of freed items.
@@ -2275,14 +2224,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([PyTypeObjectPtr], lltype.Void)
-def PyType_Modified(space, type):
-    """Invalidate the internal lookup cache for the type and all of its
-    subtypes.  This function must be called after any manual
-    modification of the attributes or base classes of the type.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
 def PyType_IS_GC(space, o):
     """Return true if the type object includes support for the cycle detector; this
diff --git a/pypy/module/cpyext/test/test_classobject.py b/pypy/module/cpyext/test/test_classobject.py
--- a/pypy/module/cpyext/test/test_classobject.py
+++ b/pypy/module/cpyext/test/test_classobject.py
@@ -40,3 +40,14 @@
         assert not isinstance(api.PyObject_GetAttr(w_instance, space.wrap('f')), Function)
         # _PyInstance_Lookup returns the raw descriptor
         assert isinstance(api._PyInstance_Lookup(w_instance, space.wrap('f')), Function)
+
+    def test_pyclass_new(self, space, api):
+        w_bases = space.newtuple([])
+        w_dict = space.newdict()
+        w_name = space.wrap("C")
+        w_class = api.PyClass_New(w_bases, w_dict, w_name)
+        assert not space.isinstance_w(w_class, space.w_type)
+        w_instance = space.call_function(w_class)
+        assert api.PyInstance_Check(w_instance)
+        assert space.is_true(space.call_method(space.builtin, "isinstance",
+                                               w_instance, w_class))
diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py
--- a/pypy/module/cpyext/test/test_eval.py
+++ b/pypy/module/cpyext/test/test_eval.py
@@ -193,3 +193,32 @@
             return args
         assert module.call_func(f) == ("text", 42, None)
         assert module.call_method("text") == 2
+
+    def test_CallFunctionObjArgs(self):
+        module = self.import_extension('foo', [
+            ("call_func", "METH_VARARGS",
+             """
+                PyObject *t = PyString_FromString("t");
+                PyObject *res = PyObject_CallFunctionObjArgs(
+                   PyTuple_GetItem(args, 0),
+                   Py_None, NULL);
+                Py_DECREF(t);
+                return res;
+             """),
+            ("call_method", "METH_VARARGS",
+             """
+                PyObject *t = PyString_FromString("t");
+                PyObject *count = PyString_FromString("count");
+                PyObject *res = PyObject_CallMethodObjArgs(
+                   PyTuple_GetItem(args, 0),
+                   count, t, NULL);
+                Py_DECREF(t);
+                Py_DECREF(count);
+                return res;
+             """),
+            ])
+        def f(*args):
+            return args
+        assert module.call_func(f) == (None,)
+        assert module.call_method("text") == 2
+        
diff --git a/pypy/module/cpyext/test/test_frameobject.py b/pypy/module/cpyext/test/test_frameobject.py
--- a/pypy/module/cpyext/test/test_frameobject.py
+++ b/pypy/module/cpyext/test/test_frameobject.py
@@ -64,3 +64,31 @@
         # Cython does not work on CPython as well...
         assert exc.traceback.tb_lineno == 42 # should be 48
         assert frame.f_lineno == 42
+
+    def test_traceback_check(self):
+        module = self.import_extension('foo', [
+            ("traceback_check", "METH_NOARGS",
+             """
+                 int check;
+                 PyObject *type, *value, *tb;
+                 PyObject *ret = PyRun_String("XXX", Py_eval_input, 
+                                              Py_None, Py_None);
+                 if (ret) {
+                     Py_DECREF(ret);
+                     PyErr_SetString(PyExc_AssertionError, "should raise");
+                     return NULL;
+                 }
+                 PyErr_Fetch(&type, &value, &tb);
+                 check = PyTraceBack_Check(tb);
+                 Py_XDECREF(type);
+                 Py_XDECREF(value);
+                 Py_XDECREF(tb);
+                 if (check) {
+                     Py_RETURN_TRUE;
+                 }
+                 else {
+                     Py_RETURN_FALSE;
+                 }
+             """),
+            ])
+        assert module.traceback_check()
diff --git a/pypy/module/cpyext/test/test_funcobject.py b/pypy/module/cpyext/test/test_funcobject.py
--- a/pypy/module/cpyext/test/test_funcobject.py
+++ b/pypy/module/cpyext/test/test_funcobject.py
@@ -44,3 +44,19 @@
         assert w_code.co_firstlineno == 3
         rffi.free_charp(filename)
         rffi.free_charp(funcname)
+
+    def test_classmethod(self, space, api):
+        w_function = space.appexec([], """():
+            def method(x): return x
+            return method
+        """)
+        w_class = space.call_function(space.w_type, space.wrap("C"),
+                                      space.newtuple([]), space.newdict())
+        w_instance = space.call_function(w_class)
+        # regular instance method
+        space.setattr(w_class, space.wrap("method"), w_function)
+        assert space.is_w(space.call_method(w_instance, "method"), w_instance)
+        # now a classmethod
+        w_classmethod = api.PyClassMethod_New(w_function)
+        space.setattr(w_class, space.wrap("classmethod"), w_classmethod)
+        assert space.is_w(space.call_method(w_instance, "classmethod"), w_class)
diff --git a/pypy/module/cpyext/test/test_intobject.py b/pypy/module/cpyext/test/test_intobject.py
--- a/pypy/module/cpyext/test/test_intobject.py
+++ b/pypy/module/cpyext/test/test_intobject.py
@@ -50,3 +50,19 @@
             ])
         assert module.from_string() == 0x1234
         assert type(module.from_string()) is int
+
+    def test_size_t(self):
+        module = self.import_extension('foo', [
+            ("values", "METH_NOARGS",
+             """
+                 return Py_BuildValue("NNNN",
+                     PyInt_FromSize_t(123),
+                     PyInt_FromSize_t((size_t)-1),
+                     PyInt_FromSsize_t(123),
+                     PyInt_FromSsize_t((size_t)-1));
+             """),
+            ])
+        values = module.values()
+        types = [type(x) for x in values]
+        assert types == [int, long, int, int]
+        
diff --git a/pypy/module/cpyext/test/test_number.py b/pypy/module/cpyext/test/test_number.py
--- a/pypy/module/cpyext/test/test_number.py
+++ b/pypy/module/cpyext/test/test_number.py
@@ -25,6 +25,15 @@
         assert api.PyInt_CheckExact(w_l)
         w_l = api.PyNumber_Int(space.wrap(2 << 65))
         assert api.PyLong_CheckExact(w_l)
+        w_l = api.PyNumber_Int(space.wrap(42.3))
+        assert api.PyInt_CheckExact(w_l)
+
+    def test_number_index(self, space, api):
+        w_l = api.PyNumber_Index(space.wrap(123L))
+        assert api.PyLong_CheckExact(w_l)
+        w_l = api.PyNumber_Index(space.wrap(42.3))
+        assert w_l is None
+        api.PyErr_Clear()
 
     def test_numbermethods(self, space, api):
         assert "ab" == space.unwrap(
diff --git a/pypy/module/cpyext/test/test_sliceobject.py b/pypy/module/cpyext/test/test_sliceobject.py
--- a/pypy/module/cpyext/test/test_sliceobject.py
+++ b/pypy/module/cpyext/test/test_sliceobject.py
@@ -67,3 +67,14 @@
              """),
             ])
         assert module.nullslice() == slice(None, None, None)
+
+    def test_ellipsis(self):
+        module = self.import_extension('foo', [
+            ("get_ellipsis", "METH_NOARGS",
+             """
+                 PyObject *ret = Py_Ellipsis;
+                 Py_INCREF(ret);
+                 return ret;
+             """),
+            ])
+        assert module.get_ellipsis() is Ellipsis
diff --git a/pypy/module/cpyext/test/test_stringobject.py b/pypy/module/cpyext/test/test_stringobject.py
--- a/pypy/module/cpyext/test/test_stringobject.py
+++ b/pypy/module/cpyext/test/test_stringobject.py
@@ -283,3 +283,7 @@
         self.raises(space, api, TypeError, api.PyString_AsEncodedObject,
             space.wrap(2), lltype.nullptr(rffi.CCHARP.TO), lltype.nullptr(rffi.CCHARP.TO)
         )
+
+    def test_eq(self, space, api):
+        assert 1 == api._PyString_Eq(space.wrap("hello"), space.wrap("hello"))
+        assert 0 == api._PyString_Eq(space.wrap("hello"), space.wrap("world"))
diff --git a/pypy/module/cpyext/test/test_sysmodule.py b/pypy/module/cpyext/test/test_sysmodule.py
--- a/pypy/module/cpyext/test/test_sysmodule.py
+++ b/pypy/module/cpyext/test/test_sysmodule.py
@@ -22,12 +22,13 @@
                  Py_RETURN_NONE;
              """)])
         import sys, StringIO
+        prev = sys.stdout
         sys.stdout = StringIO.StringIO()
         try:
             module.writestdout()
             assert sys.stdout.getvalue() == "format: 42\n"
         finally:
-            sys.stdout = sys.__stdout__
+            sys.stdout = prev
 
 class TestSysModule(BaseApiTest):
     def test_sysmodule(self, space, api):
diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py
--- a/pypy/module/cpyext/test/test_tupleobject.py
+++ b/pypy/module/cpyext/test/test_tupleobject.py
@@ -42,3 +42,9 @@
         assert api.PyTuple_Size(atuple) == 2
         assert space.eq_w(space.getitem(atuple, space.wrap(0)), space.wrap(0))
         assert space.eq_w(space.getitem(atuple, space.wrap(1)), space.wrap(1))
+
+    def test_getslice(self, space, api):
+        w_tuple = space.newtuple([space.wrap(i) for i in range(10)])
+        w_slice = api.PyTuple_GetSlice(w_tuple, 3, -3)
+        assert space.eq_w(w_slice,
+                          space.newtuple([space.wrap(i) for i in range(3, 7)]))
diff --git a/pypy/module/cpyext/tupleobject.py b/pypy/module/cpyext/tupleobject.py
--- a/pypy/module/cpyext/tupleobject.py
+++ b/pypy/module/cpyext/tupleobject.py
@@ -79,3 +79,10 @@
     Py_DecRef(space, ref[0])
     ref[0] = make_ref(space, py_newtuple)
     return 0
+
+ at cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
+def PyTuple_GetSlice(space, w_obj, low, high):
+    """Take a slice of the tuple pointed to by p from low to high and return it
+    as a new tuple.
+    """
+    return space.getslice(w_obj, space.wrap(low), space.wrap(high))
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -650,3 +650,13 @@
     name = space.str_w(w_name)
     w_obj = w_type.lookup(name)
     return borrow_from(w_type, w_obj)
+
+ at cpython_api([PyTypeObjectPtr], lltype.Void)
+def PyType_Modified(space, w_obj):
+    """Invalidate the internal lookup cache for the type and all of its
+    subtypes.  This function must be called after any manual
+    modification of the attributes or base classes of the type.
+    """
+    # PyPy already takes care of direct modifications to type.__dict__
+    # (which is a W_DictProxyObject).
+    pass
diff --git a/pypy/module/oracle/__init__.py b/pypy/module/oracle/__init__.py
--- a/pypy/module/oracle/__init__.py
+++ b/pypy/module/oracle/__init__.py
@@ -28,6 +28,7 @@
 
     appleveldefs = {
         'version': 'app_oracle.version',
+        'paramstyle': 'app_oracle.paramstyle',
         'makedsn': 'app_oracle.makedsn',
         'TimestampFromTicks': 'app_oracle.TimestampFromTicks',
     }
diff --git a/pypy/module/oracle/app_oracle.py b/pypy/module/oracle/app_oracle.py
--- a/pypy/module/oracle/app_oracle.py
+++ b/pypy/module/oracle/app_oracle.py
@@ -1,4 +1,5 @@
 version = '5.0.0'
+paramstyle = 'named'
 
 class Warning(StandardError):
     pass
diff --git a/pypy/module/oracle/config.py b/pypy/module/oracle/config.py
--- a/pypy/module/oracle/config.py
+++ b/pypy/module/oracle/config.py
@@ -16,6 +16,7 @@
         return space.str_w(w_obj)
 
     def w_string(space, buf, len=-1):
+        #assert type(len) is int
         if len < 0:
             return space.wrap(rffi.charp2str(buf))
         else:
diff --git a/pypy/module/oracle/interp_connect.py b/pypy/module/oracle/interp_connect.py
--- a/pypy/module/oracle/interp_connect.py
+++ b/pypy/module/oracle/interp_connect.py
@@ -159,9 +159,20 @@
         # set the internal and external names; these are needed for global
         # transactions but are limited in terms of the lengths of the strings
         if twophase:
-            raise OperationError(
-                interp_error.get(space).w_NotSupportedError,
-                space.wrap("XXX write me"))
+            status = roci.OCIAttrSet(
+                self.serverHandle, roci.OCI_HTYPE_SERVER,
+                "cx_Oracle", 0,
+                roci.OCI_ATTR_INTERNAL_NAME,
+                self.environment.errorHandle)
+            self.environment.checkForError(
+                status, "Connection_Connect(): set internal name")
+            status = roci.OCIAttrSet(
+                self.serverHandle, roci.OCI_HTYPE_SERVER,
+                "cx_Oracle", 0,
+                roci.OCI_ATTR_EXTERNAL_NAME,
+                self.environment.errorHandle)
+            self.environment.checkForError(
+                status, "Connection_Connect(): set external name")
 
         # allocate the session handle
         handleptr = lltype.malloc(rffi.CArrayPtr(roci.OCISession).TO,
@@ -371,6 +382,7 @@
         finally:
             stringBuffer.clear()
             lltype.free(foundptr, flavor='raw')
+            lltype.free(handleptr, flavor='raw')
 
         # eliminate the authorization handle immediately, if applicable
         if authInfo:
diff --git a/pypy/module/oracle/interp_cursor.py b/pypy/module/oracle/interp_cursor.py
--- a/pypy/module/oracle/interp_cursor.py
+++ b/pypy/module/oracle/interp_cursor.py
@@ -459,7 +459,7 @@
                 self.environment.checkForError(
                     status,
                     "Cursor_ItemDescription(): name")
-                name = rffi.charpsize2str(nameptr[0], lenptr[0])
+                name = rffi.charpsize2str(nameptr[0], rffi.cast(lltype.Signed, lenptr[0]))
             finally:
                 lltype.free(nameptr, flavor='raw')
                 lltype.free(lenptr, flavor='raw')
diff --git a/pypy/module/oracle/interp_object.py b/pypy/module/oracle/interp_object.py
--- a/pypy/module/oracle/interp_object.py
+++ b/pypy/module/oracle/interp_object.py
@@ -38,7 +38,7 @@
             self.environment.checkForError(
                 status,
                 "ObjectType_Initialize(): get schema name")
-            self.schema = rffi.charpsize2str(nameptr[0], lenptr[0])
+            self.schema = rffi.charpsize2str(nameptr[0], rffi.cast(lltype.Signed, lenptr[0]))
 
             # determine the name of the type
             status = roci.OCIAttrGet(
@@ -50,7 +50,7 @@
             self.environment.checkForError(
                 status,
                 "ObjectType_Initialize(): get schema name")
-            self.name = rffi.charpsize2str(nameptr[0], lenptr[0])
+            self.name = rffi.charpsize2str(nameptr[0], rffi.cast(lltype.Signed, lenptr[0]))
         finally:
             lltype.free(nameptr, flavor='raw')
             lltype.free(lenptr, flavor='raw')
@@ -301,7 +301,7 @@
             connection.environment.checkForError(
                 status,
                 "ObjectAttribute_Initialize(): get name")
-            self.name = rffi.charpsize2str(nameptr[0], lenptr[0])
+            self.name = rffi.charpsize2str(nameptr[0], rffi.cast(lltype.Signed, lenptr[0]))
         finally:
             lltype.free(nameptr, flavor='raw')
             lltype.free(lenptr, flavor='raw')
@@ -428,7 +428,7 @@
         strValue = rffi.cast(roci.Ptr(roci.OCIString), value)[0]
         ptr = roci.OCIStringPtr(environment.handle, strValue)
         size = roci.OCIStringSize(environment.handle, strValue)
-        return config.w_string(space, ptr, size)
+        return config.w_string(space, ptr, rffi.cast(lltype.Signed, size))
     elif typeCode == roci.OCI_TYPECODE_NUMBER:
         return transform.OracleNumberToPythonFloat(
             environment,
diff --git a/pypy/module/oracle/interp_pool.py b/pypy/module/oracle/interp_pool.py
--- a/pypy/module/oracle/interp_pool.py
+++ b/pypy/module/oracle/interp_pool.py
@@ -100,11 +100,13 @@
                 status, "SessionPool_New(): create pool")
 
             self.w_name = config.w_string(space, poolnameptr[0],
-                                          poolnamelenptr[0])
+                              rffi.cast(lltype.Signed, poolnamelenptr[0]))
         finally:
             user_buf.clear()
             password_buf.clear()
             dsn_buf.clear()
+            lltype.free(poolnameptr, flavor='raw')
+            lltype.free(poolnamelenptr, flavor='raw')
 
         return space.wrap(self)
 
@@ -128,10 +130,19 @@
 
         self.checkConnected(space)
 
+        if __args__.keywords:
+             keywords = __args__.keywords + ["pool"]
+        else:
+             keywords = ["pool"]
+        if __args__.keywords_w:
+             keywords_w = __args__.keywords_w + [space.wrap(self)]
+        else:
+             keywords_w = [space.wrap(self)]
+             
         newargs = Arguments(space,
                             __args__.arguments_w,
-                            __args__.keywords + ["pool"],
-                            __args__.keywords_w + [space.wrap(self)])
+                            keywords,
+                            keywords_w)
         return space.call_args(self.w_connectionType, newargs)
 
     def release(self, space, w_connection):
diff --git a/pypy/module/oracle/interp_variable.py b/pypy/module/oracle/interp_variable.py
--- a/pypy/module/oracle/interp_variable.py
+++ b/pypy/module/oracle/interp_variable.py
@@ -279,6 +279,7 @@
                     self.actualLength, self.returnCode,
                     allocatedElements, actualElementsPtr,
                     roci.OCI_DEFAULT)
+                nameBuffer.clear()
             else:
                 status = roci.OCIBindByPos(
                     self.boundCursorHandle, bindHandlePtr,
@@ -601,6 +602,7 @@
     def getValueProc(self, space, pos):
         ptr = rffi.ptradd(self.data, pos * self.bufferSize)
         length = rffi.cast(roci.Ptr(roci.ub4), ptr)[0]
+        length = rffi.cast(lltype.Signed, length)
 
         ptr = rffi.ptradd(ptr, rffi.sizeof(roci.ub4))
         return space.wrap(rffi.charpsize2str(ptr, length))
@@ -732,6 +734,7 @@
             finally:
                 rffi.keep_buffer_alive_until_here(textbuf, text)
                 lltype.free(sizeptr, flavor='raw')
+                format_buf.clear()
 
             if isinstance(self, VT_NumberAsString):
                 return w_strvalue
@@ -778,6 +781,8 @@
                 format_buf.ptr, format_buf.size,
                 None, 0,
                 dataptr)
+            text_buf.clear()
+            format_buf.clear()
             self.environment.checkForError(
                 status, "NumberVar_SetValue(): from long")
             return
@@ -810,6 +815,8 @@
                 format_buf.ptr, format_buf.size,
                 nls_params, len(nls_params),
                 dataptr)
+            text_buf.clear()
+            format_buf.clear()
             self.environment.checkForError(
                 status, "NumberVar_SetValue(): from decimal")
             return
diff --git a/pypy/module/oracle/roci.py b/pypy/module/oracle/roci.py
--- a/pypy/module/oracle/roci.py
+++ b/pypy/module/oracle/roci.py
@@ -73,7 +73,8 @@
     defines = '''
     OCI_ATTR_SERVER OCI_ATTR_SESSION OCI_ATTR_USERNAME OCI_ATTR_PASSWORD
     OCI_ATTR_STMT_TYPE OCI_ATTR_PARAM OCI_ATTR_PARAM_COUNT OCI_ATTR_ROW_COUNT
-    OCI_ATTR_NAME OCI_ATTR_SCALE OCI_ATTR_PRECISION OCI_ATTR_IS_NULL
+    OCI_ATTR_NAME OCI_ATTR_INTERNAL_NAME OCI_ATTR_EXTERNAL_NAME
+    OCI_ATTR_SCALE OCI_ATTR_PRECISION OCI_ATTR_IS_NULL
     OCI_ATTR_DATA_SIZE OCI_ATTR_DATA_TYPE OCI_ATTR_REF_TDO
     OCI_ATTR_SCHEMA_NAME OCI_ATTR_TYPE_NAME OCI_ATTR_TYPECODE
     OCI_ATTR_NUM_TYPE_ATTRS OCI_ATTR_LIST_TYPE_ATTRS
diff --git a/pypy/module/oracle/test/test_connect.py b/pypy/module/oracle/test/test_connect.py
--- a/pypy/module/oracle/test/test_connect.py
+++ b/pypy/module/oracle/test/test_connect.py
@@ -41,6 +41,10 @@
         if hasattr(self, 'cnx'):
             self.cnx.close()
 
+    def test_constants(self):
+        assert '.' in oracle.version
+        assert oracle.paramstyle == 'named'
+
     def test_connect(self):
         self.cnx = oracle.connect(self.username, self.password,
                                   self.tnsentry, threaded=True)
@@ -49,6 +53,13 @@
         assert self.cnx.tnsentry == self.tnsentry
         assert isinstance(self.cnx.version, str)
 
+    def test_connect_twophase(self):
+        self.cnx = oracle.connect(self.username, self.password,
+                                  self.tnsentry, twophase=True)
+        assert self.cnx.username == self.username
+        assert self.cnx.password == self.password
+        assert self.cnx.tnsentry == self.tnsentry
+
     def test_singleArg(self):
         self.cnx = oracle.connect("%s/%s@%s" % (self.username, self.password,
                                                 self.tnsentry))
diff --git a/pypy/module/pypyjit/__init__.py b/pypy/module/pypyjit/__init__.py
--- a/pypy/module/pypyjit/__init__.py
+++ b/pypy/module/pypyjit/__init__.py
@@ -7,13 +7,15 @@
     interpleveldefs = {
         'set_param':    'interp_jit.set_param',
         'residual_call': 'interp_jit.residual_call',
+        'set_compile_hook': 'interp_jit.set_compile_hook',
     }
 
     def setup_after_space_initialization(self):
         # force the __extend__ hacks to occur early
-        import pypy.module.pypyjit.interp_jit
+        from pypy.module.pypyjit.interp_jit import pypyjitdriver
         # add the 'defaults' attribute
         from pypy.rlib.jit import PARAMETERS
         space = self.space
+        pypyjitdriver.space = space
         w_obj = space.wrap(PARAMETERS)
         space.setattr(space.wrap(self), space.wrap('defaults'), w_obj)
diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -12,8 +12,11 @@
 from pypy.interpreter.pycode import PyCode, CO_GENERATOR
 from pypy.interpreter.pyframe import PyFrame
 from pypy.interpreter.pyopcode import ExitFrame
+from pypy.interpreter.gateway import unwrap_spec
+from pypy.interpreter.baseobjspace import ObjSpace, W_Root
 from opcode import opmap
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.nonconst import NonConstant
 
 PyFrame._virtualizable2_ = ['last_instr', 'pycode',
                             'valuestackdepth', 'valuestack_w[*]',
@@ -49,6 +52,52 @@
     greens = ['next_instr', 'is_being_profiled', 'pycode']
     virtualizables = ['frame']
 
+    def on_compile(self, logger, looptoken, operations, type, next_instr,
+                   is_being_profiled, ll_pycode):
+        from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
+        
+        space = self.space
+        cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
+        if space.is_true(cache.w_compile_hook):
+            logops = logger._make_log_operations()
+            list_w = [space.wrap(logops.repr_of_resop(op))
+                      for op in operations]
+            pycode = cast_base_ptr_to_instance(PyCode, ll_pycode)
+            cache.in_recursion = True
+            try:
+                space.call_function(cache.w_compile_hook,
+                                    space.wrap('main'),
+                                    space.wrap(type),
+                                    space.newtuple([pycode,
+                                    space.wrap(next_instr),
+                                    space.wrap(is_being_profiled)]),
+                                    space.newlist(list_w))
+            except OperationError, e:
+                e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
+            cache.in_recursion = False
+
+    def on_compile_bridge(self, logger, orig_looptoken, operations, n):
+        space = self.space
+        cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
+        if space.is_true(cache.w_compile_hook):
+            logops = logger._make_log_operations()
+            list_w = [space.wrap(logops.repr_of_resop(op))
+                      for op in operations]
+            cache.in_recursion = True
+            try:
+                space.call_function(cache.w_compile_hook,
+                                    space.wrap('main'),
+                                    space.wrap('bridge'),
+                                    space.wrap(n),
+                                    space.newlist(list_w))
+            except OperationError, e:
+                e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
+            cache.in_recursion = False
+
 pypyjitdriver = PyPyJitDriver(get_printable_location = get_printable_location,
                               get_jitcell_at = get_jitcell_at,
                               set_jitcell_at = set_jitcell_at,
@@ -149,3 +198,35 @@
     '''For testing.  Invokes callable(...), but without letting
     the JIT follow the call.'''
     return space.call_args(w_callable, __args__)
+
+class Cache(object):
+    in_recursion = False
+    
+    def __init__(self, space):
+        self.w_compile_hook = space.w_None
+
+ at unwrap_spec(ObjSpace, W_Root)
+def set_compile_hook(space, w_hook):
+    """ set_compile_hook(hook)
+
+    Set a compiling hook that will be called each time a loop is compiled.
+    The hook will be called with the following signature:
+    hook(merge_point_type, loop_type, greenkey or guard_number, operations)
+
+    for now merge point type is always `main`
+
+    loop_type can be either `loop` `entry_bridge` or `bridge`
+    in case loop is not `bridge`, greenkey will be a set of constants
+    for jit merge point. in case it's `main` it'll be a tuple
+    (code, offset, is_being_profiled)
+
+    Note that jit hook is not reentrant. It means that if the code
+    inside the jit hook is itself jitted, it will get compiled, but the
+    jit hook won't be called for that.
+
+    XXX write down what else
+    """
+    cache = space.fromcache(Cache)
+    cache.w_compile_hook = w_hook
+    cache.in_recursion = NonConstant(False)
+    return space.w_None
diff --git a/pypy/module/pypyjit/test/test_jit_hook.py b/pypy/module/pypyjit/test/test_jit_hook.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test/test_jit_hook.py
@@ -0,0 +1,105 @@
+
+import py
+from pypy.conftest import gettestobjspace, option
+from pypy.interpreter.pycode import PyCode
+from pypy.interpreter.gateway import interp2app
+from pypy.jit.metainterp.history import LoopToken
+from pypy.jit.metainterp.resoperation import ResOperation, rop
+from pypy.jit.metainterp.logger import Logger
+from pypy.rpython.annlowlevel import (cast_instance_to_base_ptr,
+                                      cast_base_ptr_to_instance)
+from pypy.module.pypyjit.interp_jit import pypyjitdriver
+from pypy.jit.tool.oparser import parse
+from pypy.jit.metainterp.typesystem import llhelper
+
+class MockSD(object):
+    class cpu:
+        ts = llhelper
+
+class AppTestJitHook(object):
+    def setup_class(cls):
+        if option.runappdirect:
+            py.test.skip("Can't run this test with -A")
+        space = gettestobjspace(usemodules=('pypyjit',))
+        cls.space = space
+        w_f = space.appexec([], """():
+        def f():
+            pass
+        return f
+        """)
+        ll_code = cast_instance_to_base_ptr(w_f.code)
+        logger = Logger(MockSD())
+
+        oplist = parse("""
+        [i1, i2]
+        i3 = int_add(i1, i2)
+        guard_true(i3) []
+        """).operations
+
+        def interp_on_compile():
+            pypyjitdriver.on_compile(logger, LoopToken(), oplist, 'loop',
+                                     0, False, ll_code)
+
+        def interp_on_compile_bridge():
+            pypyjitdriver.on_compile_bridge(logger, LoopToken(), oplist, 0)
+        
+        cls.w_on_compile = space.wrap(interp2app(interp_on_compile))
+        cls.w_on_compile_bridge = space.wrap(interp2app(interp_on_compile_bridge))
+
+    def test_on_compile(self):
+        import pypyjit
+        all = []
+
+        def hook(*args):
+            assert args[0] == 'main'
+            assert args[1] in ['loop', 'bridge']
+            all.append(args[2:])
+        
+        self.on_compile()
+        pypyjit.set_compile_hook(hook)
+        assert not all
+        self.on_compile()
+        assert len(all) == 1
+        assert all[0][0][0].co_name == 'f'
+        assert all[0][0][1] == 0
+        assert all[0][0][2] == False
+        assert len(all[0][1]) == 2
+        assert 'int_add' in all[0][1][0]
+        self.on_compile_bridge()
+        assert len(all) == 2
+        pypyjit.set_compile_hook(None)
+        self.on_compile()
+        assert len(all) == 2
+
+    def test_on_compile_exception(self):
+        import pypyjit, sys, cStringIO
+
+        def hook(*args):
+            1/0
+
+        pypyjit.set_compile_hook(hook)
+        s = cStringIO.StringIO()
+        prev = sys.stderr
+        sys.stderr = s
+        try:
+            self.on_compile()
+        finally:
+            sys.stderr = prev
+        assert 'jit hook' in s.getvalue()
+        assert 'ZeroDivisionError' in s.getvalue()
+
+    def test_non_reentrant(self):
+        import pypyjit
+        l = []
+        
+        def hook(*args):
+            l.append(None)
+            self.on_compile()
+            self.on_compile_bridge()
+        
+        pypyjit.set_compile_hook(hook)
+        self.on_compile()
+        assert len(l) == 1 # and did not crash
+        self.on_compile_bridge()
+        assert len(l) == 2 # and did not crash
+        
diff --git a/pypy/module/pypyjit/test/test_jit_setup.py b/pypy/module/pypyjit/test/test_jit_setup.py
--- a/pypy/module/pypyjit/test/test_jit_setup.py
+++ b/pypy/module/pypyjit/test/test_jit_setup.py
@@ -24,3 +24,13 @@
                 i += 1
 
         assert list(gen(3)) == [0, 1, 4]
+
+def test_interface_residual_call():
+    space = gettestobjspace(usemodules=['pypyjit'])
+    space.appexec([], """():
+        import pypyjit
+        def f(*args, **kwds):
+            return (args, kwds)
+        res = pypyjit.residual_call(f, 4, x=6)
+        assert res == ((4,), {'x': 6})
+    """)
diff --git a/pypy/module/pypyjit/test/test_pypy_c.py b/pypy/module/pypyjit/test/test_pypy_c.py
deleted file mode 100644
--- a/pypy/module/pypyjit/test/test_pypy_c.py
+++ /dev/null
@@ -1,430 +0,0 @@
-from pypy.conftest import gettestobjspace, option
-from pypy.tool.udir import udir
-import py
-from py.test import skip
-import sys, os, re
-import subprocess
-
-class BytecodeTrace(list):
-    def get_opnames(self, prefix=""):
-        return [op.getopname() for op in self
-                    if op.getopname().startswith(prefix)]
-
-    def __repr__(self):
-        return "%s%s" % (self.bytecode, list.__repr__(self))
-
-ZERO_OP_BYTECODES = [
-    'POP_TOP',
-    'ROT_TWO',
-    'ROT_THREE',
-    'DUP_TOP',
-    'ROT_FOUR',
-    'NOP',
-    'DUP_TOPX',
-    'LOAD_CONST',
-    'JUMP_FORWARD',
-    #'JUMP_ABSOLUTE' in theory, but contains signals stuff
-    #'LOAD_FAST' should be here, but currently needs a guard for nonzeroness
-    'STORE_FAST',
-    ]
-
-
-r_bridge = re.compile(r"bridge out of Guard (\d+)")
-
-def from_entry_bridge(text, allparts):
-    firstline = text.splitlines()[0]
-    if 'entry bridge' in firstline:
-        return True
-    match = r_bridge.search(firstline)
-    if match:
-        search = '<Guard' + match.group(1) + '>'
-        for part in allparts:
-            if search in part:
-                break
-        else:
-            raise AssertionError, "%s not found??" % (search,)
-        return from_entry_bridge(part, allparts)
-    return False
-
-def test_from_entry_bridge():
-    assert from_entry_bridge(
-        "# Loop 4 : entry bridge with 31 ops\n[p0, etc", [])
-    assert not from_entry_bridge(
-        "# Loop 1 : loop with 31 ops\n[p0, p1, etc", [])
-    assert not from_entry_bridge(
-        "# bridge out of Guard 5 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : loop with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n"])
-    assert from_entry_bridge(
-        "# bridge out of Guard 5 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : entry bridge with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n"])
-    assert not from_entry_bridge(
-        "# bridge out of Guard 51 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : loop with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n",
-         "# bridge out of Guard 5 with 13 ops\n"
-             "[p0, p1]\n"
-             "guard_other(p1, descr=<Guard51>)\n"])
-    assert from_entry_bridge(
-        "# bridge out of Guard 51 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : entry bridge with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n",
-         "# bridge out of Guard 5 with 13 ops\n"
-             "[p0, p1]\n"
-             "guard_other(p1, descr=<Guard51>)\n"])
-
-
-class PyPyCJITTests(object):
-    def run_source(self, source, expected_max_ops, *testcases, **kwds):
-        assert isinstance(expected_max_ops, int)
-        threshold = kwds.pop('threshold', 3)
-        self.count_debug_merge_point = \
-                                     kwds.pop('count_debug_merge_point', True)
-        if kwds:
-            raise TypeError, 'Unsupported keyword arguments: %s' % kwds.keys()
-        source = py.code.Source(source)
-        filepath = self.tmpdir.join('case%d.py' % self.counter)
-        logfilepath = filepath.new(ext='.log')
-        self.__class__.counter += 1
-        f = filepath.open('w')
-        print >> f, source
-        # some support code...
-        print >> f, py.code.Source("""
-            import sys
-            # we don't want to see the small bridges created
-            # by the checkinterval reaching the limit
-            sys.setcheckinterval(10000000)
-            try: # make the file runnable by CPython
-                import pypyjit
-                pypyjit.set_param(threshold=%d)
-            except ImportError:
-                pass
-
-            def check(args, expected):
-                #print >> sys.stderr, 'trying:', args
-                result = main(*args)
-                #print >> sys.stderr, 'got:', repr(result)
-                assert result == expected
-                assert type(result) is type(expected)
-        """ % threshold)
-        for testcase in testcases * 2:
-            print >> f, "check(%r, %r)" % testcase
-        print >> f, "print 'OK :-)'"
-        f.close()
-
-        print logfilepath
-        env = os.environ.copy()
-        env['PYPYLOG'] = ":%s" % (logfilepath,)
-        p = subprocess.Popen([self.pypy_c, str(filepath)],
-                             env=env, stdout=subprocess.PIPE)
-        result, _ = p.communicate()
-        assert result
-        if result.strip().startswith('SKIP:'):
-            py.test.skip(result.strip())
-        assert result.splitlines()[-1].strip() == 'OK :-)'
-        self.parse_loops(logfilepath)
-        self.print_loops()
-        print logfilepath
-        if self.total_ops > expected_max_ops:
-            assert 0, "too many operations: got %d, expected maximum %d" % (
-                self.total_ops, expected_max_ops)
-        return result
-
-    def parse_loops(self, opslogfile):
-        from pypy.tool import logparser
-        assert opslogfile.check()
-        log = logparser.parse_log_file(str(opslogfile))
-        parts = logparser.extract_category(log, 'jit-log-opt-')
-        self.rawloops = [part for part in parts
-                         if not from_entry_bridge(part, parts)]
-        self.loops, self.sliced_loops, self.total_ops = \
-                                           self.parse_rawloops(self.rawloops)
-        self.check_0_op_bytecodes()
-        self.rawentrybridges = [part for part in parts
-                                if from_entry_bridge(part, parts)]
-        _, self.sliced_entrybridge, _ = \
-                                    self.parse_rawloops(self.rawentrybridges)
-
-        from pypy.jit.tool.jitoutput import parse_prof
-        summaries  = logparser.extract_category(log, 'jit-summary')
-        if len(summaries) > 0:
-            self.jit_summary = parse_prof(summaries[-1])
-        else:
-            self.jit_summary = None
-        
-
-    def parse_rawloops(self, rawloops):
-        from pypy.jit.tool.oparser import parse
-        loops = [parse(part, no_namespace=True) for part in rawloops]
-        sliced_loops = [] # contains all bytecodes of all loops
-        total_ops = 0
-        for loop in loops:
-            for op in loop.operations:
-                if op.getopname() == "debug_merge_point":
-                    sliced_loop = BytecodeTrace()
-                    sliced_loop.bytecode = op.getarg(0)._get_str().rsplit(" ", 1)[1]
-                    sliced_loops.append(sliced_loop)
-                    if self.count_debug_merge_point:
-                        total_ops += 1
-                else:
-                    sliced_loop.append(op)
-                    total_ops += 1
-        return loops, sliced_loops, total_ops
-
-    def check_0_op_bytecodes(self):
-        for bytecodetrace in self.sliced_loops:
-            if bytecodetrace.bytecode not in ZERO_OP_BYTECODES:
-                continue
-            assert not bytecodetrace
-
-    def get_by_bytecode(self, name, from_entry_bridge=False):
-        if from_entry_bridge:
-            sliced_loops = self.sliced_entrybridge
-        else:
-            sliced_loops = self.sliced_loops
-        return [ops for ops in sliced_loops if ops.bytecode == name]
-
-    def print_loops(self):
-        for rawloop in self.rawloops:
-            print
-            print '@' * 79
-            print
-            print rawloop.rstrip()
-        print
-        print '@' * 79
-
-
-    def test_richards(self):
-        self.run_source('''
-            import sys; sys.path[:] = %r
-            from pypy.translator.goal import richards
-
-            def main():
-                return richards.main(iterations = 1)
-        ''' % (sys.path,), 7200,
-                   ([], 42))
-
-
-    def test_overflow_checking(self):
-        startvalue = sys.maxint - 2147483647
-        self.run_source('''
-        def main():
-            def f(a,b):
-                if a < 0: return -1
-                return a-b
-            total = %d
-            for i in range(100000):
-                total += f(i, 5)
-            return total
-        ''' % startvalue, 170, ([], startvalue + 4999450000L))
-
-    def test_shift(self):
-        from sys import maxint
-        maxvals = (-maxint-1, -maxint, maxint-1, maxint)
-        for a in (-4, -3, -2, -1, 0, 1, 2, 3, 4) + maxvals:
-            for b in (0, 1, 2, 31, 32, 33, 61, 62, 63):
-                r = 0
-                if (a >> b) >= 0:
-                    r += 2000
-                if (a << b) > 2:
-                    r += 20000000
-                if abs(a) < 10 and b < 5:
-                    ops = 13
-                else:
-                    ops = 29
-
-                self.run_source('''
-                def main(a, b):
-                    i = sa = 0
-                    while i < 2000:
-                        if a > 0: # Specialises the loop
-                            pass
-                        if b < 2 and b > 0:
-                            pass
-                        if (a >> b) >= 0:
-                            sa += 1
-                        if (a << b) > 2:
-                            sa += 10000
-                        i += 1
-                    return sa
-                ''', ops, ([a, b], r), count_debug_merge_point=False)
-
-    def test_revert_shift(self):
-        from sys import maxint
-        tests = []
-        for a in (1, 4, 8, 100):
-            for b in (-10, 10, -201, 201, -maxint/3, maxint/3):
-                for c in (-10, 10, -maxint/3, maxint/3):
-                    tests.append(([a, b, c], long(4000*(a+b+c))))
-        self.run_source('''
-        def main(a, b, c):
-            from sys import maxint
-            i = sa = 0
-            while i < 2000:
-                if 0 < a < 10: pass
-                if -100 < b < 100: pass
-                if -maxint/2 < c < maxint/2: pass
-                sa += (a<<a)>>a
-                sa += (b<<a)>>a
-                sa += (c<<a)>>a
-                sa += (a<<100)>>100
-                sa += (b<<100)>>100
-                sa += (c<<100)>>100
-                i += 1
-            return long(sa)
-        ''', 93, count_debug_merge_point=False, *tests)
-        
-    def test_division_to_rshift(self):
-        avalues = ('a', 'b', 7, -42, 8)
-        bvalues = ['b'] + range(-10, 0) + range(1,10)
-        code = ''
-        a1, b1, res1 = 10, 20, 0
-        a2, b2, res2 = 10, -20, 0
-        a3, b3, res3 = -10, -20, 0
-        def dd(a, b, aval, bval):
-            m = {'a': aval, 'b': bval}
-            if not isinstance(a, int):
-                a=m[a]
-            if not isinstance(b, int):
-                b=m[b]
-            return a/b
-        for a in avalues:
-            for b in bvalues:
-                code += '                sa += %s / %s\n' % (a, b)
-                res1 += dd(a, b, a1, b1)
-                res2 += dd(a, b, a2, b2)
-                res3 += dd(a, b, a3, b3)
-        # The purpose of this test is to check that we get
-        # the correct results, not really to count operations.
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 2000:
-%s                
-                i += 1
-            return sa
-        ''' % code, sys.maxint, ([a1, b1], 2000 * res1),
-                                ([a2, b2], 2000 * res2),
-                                ([a3, b3], 2000 * res3))
-
-    def test_mod(self):
-        avalues = ('a', 'b', 7, -42, 8)
-        bvalues = ['b'] + range(-10, 0) + range(1,10)
-        code = ''
-        a1, b1, res1 = 10, 20, 0
-        a2, b2, res2 = 10, -20, 0
-        a3, b3, res3 = -10, -20, 0
-        def dd(a, b, aval, bval):
-            m = {'a': aval, 'b': bval}
-            if not isinstance(a, int):
-                a=m[a]
-            if not isinstance(b, int):
-                b=m[b]
-            return a % b
-        for a in avalues:
-            for b in bvalues:
-                code += '                sa += %s %% %s\n' % (a, b)
-                res1 += dd(a, b, a1, b1)
-                res2 += dd(a, b, a2, b2)
-                res3 += dd(a, b, a3, b3)
-        # The purpose of this test is to check that we get
-        # the correct results, not really to count operations.
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 2000:
-                if a > 0: pass
-                if 1 < b < 2: pass
-%s
-                i += 1
-            return sa
-        ''' % code, sys.maxint, ([a1, b1], 2000 * res1),
-                                ([a2, b2], 2000 * res2),
-                                ([a3, b3], 2000 * res3))
-
-    def test_dont_trace_every_iteration(self):
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 200:
-                if a > 0: pass
-                if 1 < b < 2: pass
-                sa += a % b
-                i += 1
-            return sa
-        ''', 22,  ([10, 20], 200 * (10 % 20)),
-                 ([-10, -20], 200 * (-10 % -20)),
-                        count_debug_merge_point=False)
-        assert self.jit_summary.tracing_no == 2
-    def test_id_compare_optimization(self):
-        # XXX: lower the instruction count, 35 is the old value.
-        self.run_source("""
-        class A(object):
-            pass
-        def main():
-            i = 0
-            a = A()
-            while i < 5:
-                if A() != a:
-                    pass
-                i += 1
-        """, 35, ([], None))
-        _, compare = self.get_by_bytecode("COMPARE_OP")
-        assert "call" not in compare.get_opnames()
-
-class AppTestJIT(PyPyCJITTests):
-    def setup_class(cls):
-        if not option.runappdirect:
-            py.test.skip("meant only for pypy-c")
-        # the next line skips stuff if the pypy-c is not a jit build
-        cls.space = gettestobjspace(usemodules=['pypyjit'])
-        cls.tmpdir = udir.join('pypy-jit')
-        cls.tmpdir.ensure(dir=1)
-        cls.counter = 0
-        cls.pypy_c = sys.executable
-
-class TestJIT(PyPyCJITTests):
-    def setup_class(cls):
-        if option.pypy_c is None:
-            py.test.skip("pass --pypy!")
-        if not has_info(option.pypy_c, 'translation.jit'):
-            py.test.skip("must give a pypy-c with the jit enabled")
-        cls.tmpdir = udir.join('pypy-jit')
-        cls.tmpdir.ensure(dir=1)
-        cls.counter = 0
-        cls.pypy_c = option.pypy_c
-
-
-def test_interface_residual_call():
-    space = gettestobjspace(usemodules=['pypyjit'])
-    space.appexec([], """():
-        import pypyjit
-        def f(*args, **kwds):
-            return (args, kwds)
-        res = pypyjit.residual_call(f, 4, x=6)
-        assert res == ((4,), {'x': 6})
-    """)
-
-
-def has_info(pypy_c, option):
-    g = os.popen('"%s" --info' % pypy_c, 'r')
-    lines = g.readlines()
-    g.close()
-    if not lines:
-        raise ValueError("cannot execute %r" % pypy_c)
-    for line in lines:
-        line = line.strip()
-        if line.startswith(option + ':'):
-            line = line[len(option)+1:].strip()
-            if line == 'True':
-                return True
-            elif line == 'False':
-                return False
-            else:
-                return line
-    raise ValueError(option + ' not found in ' + pypy_c)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_model.py b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
rename from pypy/module/pypyjit/test_pypy_c/test_model.py
rename to pypy/module/pypyjit/test_pypy_c/test_00_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
@@ -5,6 +5,7 @@
 from lib_pypy import disassembler
 from pypy.tool.udir import udir
 from pypy.tool import logparser
+from pypy.jit.tool.jitoutput import parse_prof
 from pypy.module.pypyjit.test_pypy_c.model import Log, find_ids_range, find_ids, \
     LoopWithIds, OpMatcher
 
@@ -21,6 +22,7 @@
         self.filepath = self.tmpdir.join(meth.im_func.func_name + '.py')
 
     def run(self, func_or_src, args=[], import_site=False, **jitopts):
+        jitopts.setdefault('threshold', 200)
         src = py.code.Source(func_or_src)
         if isinstance(func_or_src, types.FunctionType):
             funcname = func_or_src.func_name
@@ -63,6 +65,13 @@
         rawtraces = logparser.extract_category(rawlog, 'jit-log-opt-')
         log = Log(rawtraces)
         log.result = eval(stdout)
+        #
+        summaries  = logparser.extract_category(rawlog, 'jit-summary')
+        if len(summaries) > 0:
+            log.jit_summary = parse_prof(summaries[-1])
+        else:
+            log.jit_summary = None
+        #
         return log
 
     def run_and_check(self, src, args=[], **jitopts):
diff --git a/pypy/module/pypyjit/test_pypy_c/test__ffi.py b/pypy/module/pypyjit/test_pypy_c/test__ffi.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test__ffi.py
@@ -0,0 +1,133 @@
+import py
+import sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class Test__ffi(BaseTestPyPyC):
+
+    def test__ffi_call(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            try:
+                from _ffi import CDLL, types
+            except ImportError:
+                sys.stderr.write('SKIP: cannot import _ffi\n')
+                return 0
+
+            libm = CDLL(libm_name)
+            pow = libm.getfunc('pow', [types.double, types.double],
+                               types.double)
+            i = 0
+            res = 0
+            while i < 300:
+                tmp = pow(2, 3)   # ID: fficall
+                res += tmp
+                i += 1
+            return pow.getaddr(), res
+        #
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        pow_addr, res = log.result
+        assert res == 8.0 * 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('fficall', """
+            p16 = getfield_gc(ConstPtr(ptr15), descr=<.* .*Function.inst_name .*>)
+            guard_not_invalidated(descr=...)
+            i17 = force_token()
+            setfield_gc(p0, i17, descr=<.* .*PyFrame.vable_token .*>)
+            f21 = call_release_gil(%d, 2.000000, 3.000000, descr=<FloatCallDescr>)
+            guard_not_forced(descr=...)
+            guard_no_exception(descr=...)
+        """ % pow_addr)
+
+
+    def test__ffi_call_frame_does_not_escape(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            try:
+                from _ffi import CDLL, types
+            except ImportError:
+                sys.stderr.write('SKIP: cannot import _ffi\n')
+                return 0
+
+            libm = CDLL(libm_name)
+            pow = libm.getfunc('pow', [types.double, types.double],
+                               types.double)
+
+            def mypow(a, b):
+                return pow(a, b)
+
+            i = 0
+            res = 0
+            while i < 300:
+                tmp = mypow(2, 3)
+                res += tmp
+                i += 1
+            return pow.getaddr(), res
+        #
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        pow_addr, res = log.result
+        assert res == 8.0 * 300
+        loop, = log.loops_by_filename(self.filepath)
+        opnames = log.opnames(loop.allops())
+        # we only force the virtualref, not its content
+        assert opnames.count('new_with_vtable') == 1
+
+    def test__ffi_call_releases_gil(self):
+        from pypy.rlib.test.test_libffi import get_libc_name
+        def main(libc_name, n):
+            import time
+            from threading import Thread
+            from _ffi import CDLL, types
+            #
+            libc = CDLL(libc_name)
+            sleep = libc.getfunc('sleep', [types.uint], types.uint)
+            delays = [0]*n + [1]
+            #
+            def loop_of_sleeps(i, delays):
+                for delay in delays:
+                    sleep(delay)    # ID: sleep
+            #
+            threads = [Thread(target=loop_of_sleeps, args=[i, delays]) for i in range(5)]
+            start = time.time()
+            for i, thread in enumerate(threads):
+                thread.start()
+            for thread in threads:
+                thread.join()
+            end = time.time()
+            return end - start
+        #
+        log = self.run(main, [get_libc_name(), 200], threshold=150)
+        assert 1 <= log.result <= 1.5 # at most 0.5 seconds of overhead
+        loops = log.loops_by_id('sleep')
+        assert len(loops) == 1 # make sure that we actually JITted the loop
+
+
+    def test_ctypes_call(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            import ctypes
+            libm = ctypes.CDLL(libm_name)
+            fabs = libm.fabs
+            fabs.argtypes = [ctypes.c_double]
+            fabs.restype = ctypes.c_double
+            x = -4
+            i = 0
+            while i < 300:
+                x = fabs(x)
+                x = x - 100
+                i += 1
+            return fabs._ptr.getaddr(), x
+
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        fabs_addr, res = log.result
+        assert res == -4.0
+        loop, = log.loops_by_filename(self.filepath)
+        ops = loop.allops()
+        opnames = log.opnames(ops)
+        assert opnames.count('new_with_vtable') == 1 # only the virtualref
+        assert opnames.count('call_release_gil') == 1
+        idx = opnames.index('call_release_gil')
+        call = ops[idx]
+        assert int(call.args[0]) == fabs_addr
diff --git a/pypy/module/pypyjit/test_pypy_c/test_array.py b/pypy/module/pypyjit/test_pypy_c/test_array.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_array.py
@@ -0,0 +1,186 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestArray(BaseTestPyPyC):
+
+    def test_arraycopy_disappears(self):
+        def main(n):
+            i = 0
+            while i < n:
+                t = (1, 2, 3, i + 1)
+                t2 = t[:]
+                del t
+                i = t2[3]
+                del t2
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, i6)
+            guard_true(i7, descr=<Guard3>)
+            i9 = int_add(i5, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
+        """)
+
+    def test_array_sum(self):
+        def main():
+            from array import array
+            img = array("i", range(128) * 5) * 480
+            l, i = 0, 0
+            while i < len(img):
+                l += img[i]
+                i += 1
+            return l
+        #
+        log = self.run(main, [])
+        assert log.result == 19507200
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i13 = int_lt(i7, i9)
+            guard_true(i13, descr=<Guard3>)
+            i15 = getarrayitem_raw(i10, i7, descr=<.*ArrayNoLengthDescr>)
+            i16 = int_add_ovf(i8, i15)
+            guard_no_overflow(descr=<Guard4>)
+            i18 = int_add(i7, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, p6, i18, i16, i9, i10, descr=<Loop0>)
+        """)
+
+    def test_array_intimg(self):
+        def main():
+            from array import array
+            img = array('i', range(3)) * (350 * 480)
+            intimg = array('i', (0,)) * (640 * 480)
+            l, i = 0, 640
+            while i < 640 * 480:
+                assert len(img) == 3*350*480
+                assert len(intimg) == 640*480
+                l = l + img[i]
+                intimg[i] = (intimg[i-640] + l)
+                i += 1
+            return intimg[i - 1]
+        #
+        log = self.run(main, [])
+        assert log.result == 73574560
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i13 = int_lt(i8, 307200)
+            guard_true(i13, descr=<Guard3>)
+        # the bound check guard on img has been killed (thanks to the asserts)
+            i14 = getarrayitem_raw(i10, i8, descr=<.*ArrayNoLengthDescr>)
+            i15 = int_add_ovf(i9, i14)
+            guard_no_overflow(descr=<Guard4>)
+            i17 = int_sub(i8, 640)
+        # the bound check guard on intimg has been killed (thanks to the asserts)
+            i18 = getarrayitem_raw(i11, i17, descr=<.*ArrayNoLengthDescr>)
+            i19 = int_add_ovf(i18, i15)
+            guard_no_overflow(descr=<Guard5>)
+        # on 64bit, there is a guard checking that i19 actually fits into 32bit
+            ...
+            setarrayitem_raw(i11, i8, _, descr=<.*ArrayNoLengthDescr>)
+            i28 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, p6, p7, i28, i15, i10, i11, descr=<Loop0>)
+        """)
+
+
+    def test_zeropadded(self):
+        def main():
+            from array import array
+            class ZeroPadded(array):
+                def __new__(cls, l):
+                    self = array.__new__(cls, 'd', range(l))
+                    return self
+
+                def __getitem__(self, i):
+                    if i < 0 or i >= len(self):
+                        return 0
+                    return array.__getitem__(self, i) # ID: get
+            #
+            buf = ZeroPadded(2000)
+            i = 10
+            sa = 0
+            while i < 2000 - 10:
+                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
+                i += 1
+            return sa
+
+        log = self.run(main, [])
+        assert log.result == 9895050.0
+        loop, = log.loops_by_filename(self.filepath)
+        #
+        # check that the overloaded __getitem__ does not introduce double
+        # array bound checks.
+        #
+        # The force_token()s are still there, but will be eliminated by the
+        # backend regalloc, so they are harmless
+        assert loop.match(ignore_ops=['force_token'],
+                          expected_src="""
+            ...
+            i20 = int_ge(i18, i8)
+            guard_false(i20, descr=...)
+            f21 = getarrayitem_raw(i13, i18, descr=...)
+            f23 = getarrayitem_raw(i13, i14, descr=...)
+            f24 = float_add(f21, f23)
+            f26 = getarrayitem_raw(i13, i6, descr=...)
+            f27 = float_add(f24, f26)
+            i29 = int_add(i6, 1)
+            i31 = int_ge(i29, i8)
+            guard_false(i31, descr=...)
+            f33 = getarrayitem_raw(i13, i29, descr=...)
+            f34 = float_add(f27, f33)
+            i36 = int_add(i6, 2)
+            i38 = int_ge(i36, i8)
+            guard_false(i38, descr=...)
+            f39 = getarrayitem_raw(i13, i36, descr=...)
+            ...
+        """)
+
+    def test_circular(self):
+        def main():
+            from array import array
+            class Circular(array):
+                def __new__(cls):
+                    self = array.__new__(cls, 'd', range(256))
+                    return self
+                def __getitem__(self, i):
+                    assert len(self) == 256
+                    return array.__getitem__(self, i & 255)
+            #
+            buf = Circular()
+            i = 10
+            sa = 0
+            while i < 2000 - 10:
+                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
+                i += 1
+            return sa
+        #
+        log = self.run(main, [])
+        assert log.result == 1239690.0
+        loop, = log.loops_by_filename(self.filepath)
+        #
+        # check that the array bound checks are removed
+        #
+        # The force_token()s are still there, but will be eliminated by the
+        # backend regalloc, so they are harmless
+        assert loop.match(ignore_ops=['force_token'],
+                          expected_src="""
+            ...
+            i17 = int_and(i14, 255)
+            f18 = getarrayitem_raw(i8, i17, descr=...)
+            f20 = getarrayitem_raw(i8, i9, descr=...)
+            f21 = float_add(f18, f20)
+            f23 = getarrayitem_raw(i8, i10, descr=...)
+            f24 = float_add(f21, f23)
+            i26 = int_add(i6, 1)
+            i29 = int_and(i26, 255)
+            f30 = getarrayitem_raw(i8, i29, descr=...)
+            f31 = float_add(f24, f30)
+            i33 = int_add(i6, 2)
+            i36 = int_and(i33, 255)
+            f37 = getarrayitem_raw(i8, i36, descr=...)
+            ...
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py b/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py
@@ -0,0 +1,233 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestBoolRewrite(BaseTestPyPyC):
+
+    def test_boolrewrite_inverse(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(i >= y)
+
+        where x and y can be either constants or variables. There are cases in
+        which the second guard is proven to be always true.
+        """
+
+        for a, b, res, opt_expected in (('2000', '2000', 20001000, True),
+                                        ( '500',  '500', 15001500, True),
+                                        ( '300',  '600', 16001700, False),
+                                        (   'a',    'b', 16001700, False),
+                                        (   'a',    'a', 13001700, True)):
+            src = """
+                def main():
+                    sa = 0
+                    a = 300
+                    b = 600
+                    for i in range(1000):
+                        if i < %s:         # ID: lt
+                            sa += 1
+                        else:
+                            sa += 2
+                        #
+                        if i >= %s:        # ID: ge
+                            sa += 10000
+                        else:
+                            sa += 20000
+                    return sa
+            """ % (a, b)
+            #
+            log = self.run(src, [], threshold=400)
+            assert log.result == res
+            loop, = log.loops_by_filename(self.filepath)
+            le_ops = log.opnames(loop.ops_by_id('lt'))
+            ge_ops = log.opnames(loop.ops_by_id('ge'))
+            assert le_ops.count('int_lt') == 1
+            #
+            if opt_expected:
+                assert ge_ops.count('int_ge') == 0
+            else:
+                # if this assert fails it means that the optimization was
+                # applied even if we don't expect to. Check whether the
+                # optimization is valid, and either fix the code or fix the
+                # test :-)
+                assert ge_ops.count('int_ge') == 1
+
+    def test_boolrewrite_reflex(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(y > i)
+
+        where x and y can be either constants or variables. There are cases in
+        which the second guard is proven to be always true.
+        """
+        for a, b, res, opt_expected in (('2000', '2000', 10001000, True),
+                                        ( '500',  '500', 15001500, True),
+                                        ( '300',  '600', 14001700, False),
+                                        (   'a',    'b', 14001700, False),
+                                        (   'a',    'a', 17001700, True)):
+
+            src = """
+                def main():
+                    sa = 0
+                    a = 300
+                    b = 600
+                    for i in range(1000):
+                        if i < %s:        # ID: lt
+                            sa += 1
+                        else:
+                            sa += 2
+                        if %s > i:        # ID: gt
+                            sa += 10000
+                        else:
+                            sa += 20000
+                    return sa
+            """ % (a, b)
+            log = self.run(src, [], threshold=400)
+            assert log.result == res
+            loop, = log.loops_by_filename(self.filepath)
+            le_ops = log.opnames(loop.ops_by_id('lt'))
+            gt_ops = log.opnames(loop.ops_by_id('gt'))
+            assert le_ops.count('int_lt') == 1
+            #
+            if opt_expected:
+                assert gt_ops.count('int_gt') == 0
+            else:
+                # if this assert fails it means that the optimization was
+                # applied even if we don't expect to. Check whether the
+                # optimization is valid, and either fix the code or fix the
+                # test :-)
+                assert gt_ops.count('int_gt') == 1
+
+
+    def test_boolrewrite_allcases_inverse(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(i > y)
+
+        with all possible combination of binary comparison operators.  This
+        test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        for op1 in ops:
+            for op2 in ops:
+                for a,b in ((500, 500), (300, 600)):
+                    src = """
+                        def main():
+                            sa = 0
+                            for i in range(300):
+                                if i %s %d:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if i %s %d:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                            return sa
+                    """ % (op1, a, op2, b)
+                    yield self.run_and_check, src
+
+                    src = """
+                        def main():
+                            sa = 0
+                            i = 0.0
+                            while i < 250.0:
+                                if i %s %f:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if i %s %f:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                                i += 0.25
+                            return sa
+                    """ % (op1, float(a)/4.0, op2, float(b)/4.0)
+                    yield self.run_and_check, src
+
+
+    def test_boolrewrite_allcases_reflex(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(x > i)
+
+        with all possible combination of binary comparison operators.  This
+        test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        for op1 in ops:
+            for op2 in ops:
+                for a,b in ((500, 500), (300, 600)):
+                    src = """
+                        def main():
+                            sa = 0
+                            for i in range(300):
+                                if i %s %d:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if %d %s i:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                            return sa
+                    """ % (op1, a, b, op2)
+                    yield self.run_and_check, src
+
+                    src = """
+                        def main():
+                            sa = 0
+                            i = 0.0
+                            while i < 250.0:
+                                if i %s %f:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if %f %s i:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                                i += 0.25
+                            return sa
+                    """ % (op1, float(a)/4.0, float(b)/4.0, op2)
+                    yield self.run_and_check, src
+
+    def test_boolrewrite_ptr(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        compares = ('a == b', 'b == a', 'a != b', 'b != a', 'a == c', 'c != b')
+        for e1 in compares:
+            for e2 in compares:
+                src = """
+                    class tst(object):
+                        pass
+                    def main():
+                        a = tst()
+                        b = tst()
+                        c = tst()
+                        sa = 0
+                        for i in range(300):
+                            if %s:
+                                sa += 1
+                            else:
+                                sa += 2
+                            if %s:
+                                sa += 10000
+                            else:
+                                sa += 20000
+                            if i > 750:
+                                a = b
+                        return sa
+                """ % (e1, e2)
+                yield self.run_and_check, src
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -0,0 +1,381 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestCall(BaseTestPyPyC):
+
+    def test_recursive_call(self):
+        def fn():
+            def rec(n):
+                if n == 0:
+                    return 0
+                return 1 + rec(n-1)
+            #
+            # this loop is traced and then aborted, because the trace is too
+            # long. But then "rec" is marked as "don't inline"
+            i = 0
+            j = 0
+            while i < 20:
+                i += 1
+                j += rec(100)
+            #
+            # next time we try to trace "rec", instead of inlining we compile
+            # it separately and generate a call_assembler
+            i = 0
+            j = 0
+            while i < 20:
+                i += 1
+                j += rec(100) # ID: call_rec
+                a = 0
+            return j
+        #
+        log = self.run(fn, [], threshold=18)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('call_rec', """
+            ...
+            p53 = call_assembler(..., descr=...)
+            guard_not_forced(descr=...)
+            guard_no_exception(descr=...)
+            ...
+        """)
+
+    def test_simple_call(self):
+        src = """
+            OFFSET = 0
+            def f(i):
+                return i + 1 + OFFSET # ID: add
+            def main(n):
+                i = 0
+                while i < n+OFFSET:   # ID: cond
+                    i = f(f(i))       # ID: call
+                    a = 0
+                return i
+        """
+        log = self.run(src, [1000])
+        assert log.result == 1000
+        # first, we test what is inside the entry bridge
+        # -----------------------------------------------
+        entry_bridge, = log.loops_by_id('call', is_entry_bridge=True)
+        # LOAD_GLOBAL of OFFSET
+        ops = entry_bridge.ops_by_id('cond', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["guard_value",
+                                    "getfield_gc", "guard_value",
+                                    "getfield_gc", "guard_isnull",
+                                    "getfield_gc", "guard_nonnull_class"]
+        # LOAD_GLOBAL of OFFSET but in different function partially folded
+        # away
+        # XXX could be improved
+        ops = entry_bridge.ops_by_id('add', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["guard_value", "getfield_gc", "guard_isnull"]
+        #
+        # two LOAD_GLOBAL of f, the second is folded away
+        ops = entry_bridge.ops_by_id('call', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["getfield_gc", "guard_nonnull_class"]
+        #
+        assert entry_bridge.match_by_id('call', """
+            p29 = getfield_gc(ConstPtr(ptr28), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value .*>)
+            guard_nonnull_class(p29, ConstClass(Function), descr=<Guard18>)
+            p33 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_code .*>)
+            guard_value(p33, ConstPtr(ptr34), descr=<Guard19>)
+            p35 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_w_func_globals .*>)
+            p36 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_closure .*>)
+            p38 = call(ConstClass(getexecutioncontext), descr=<GcPtrCallDescr>)
+            p39 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
+            i40 = force_token()
+            p41 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
+            guard_isnull(p41, descr=<Guard20>)
+            i42 = getfield_gc(p38, descr=<NonGcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_profilefunc .*>)
+            i43 = int_is_zero(i42)
+            guard_true(i43, descr=<Guard21>)
+            i50 = force_token()
+        """)
+        #
+        # then, we test the actual loop
+        # -----------------------------
+        loop, = log.loops_by_id('call')
+        assert loop.match("""
+            i12 = int_lt(i5, i6)
+            guard_true(i12, descr=<Guard3>)
+            i13 = force_token()
+            i15 = int_add(i5, 1)
+            i16 = int_add_ovf(i15, i7)
+            guard_no_overflow(descr=<Guard4>)
+            i18 = force_token()
+            i20 = int_add_ovf(i16, 1)
+            guard_no_overflow(descr=<Guard5>)
+            i21 = int_add_ovf(i20, i7)
+            guard_no_overflow(descr=<Guard6>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i21, i6, i7, p8, p9, p10, p11, descr=<Loop0>)
+        """)
+
+    def test_method_call(self):
+        def fn(n):
+            class A(object):
+                def __init__(self, a):
+                    self.a = a
+                def f(self, i):
+                    return self.a + i
+            i = 0
+            a = A(1)
+            while i < n:
+                x = a.f(i)    # ID: meth1
+                i = a.f(x)    # ID: meth2
+            return i
+        #
+        log = self.run(fn, [1000])
+        assert log.result == 1000
+        #
+        # first, we test the entry bridge
+        # -------------------------------
+        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
+        ops = entry_bridge.ops_by_id('meth1', opcode='LOOKUP_METHOD')
+        assert log.opnames(ops) == ['guard_value', 'getfield_gc', 'guard_value',
+                                    'guard_not_invalidated']
+        # the second LOOKUP_METHOD is folded away
+        assert list(entry_bridge.ops_by_id('meth2', opcode='LOOKUP_METHOD')) == []
+        #
+        # then, the actual loop
+        # ----------------------
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i15 = int_lt(i6, i9)
+            guard_true(i15, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i16 = force_token()
+            i17 = int_add_ovf(i10, i6)
+            guard_no_overflow(descr=<Guard5>)
+            i18 = force_token()
+            i19 = int_add_ovf(i10, i17)
+            guard_no_overflow(descr=<Guard6>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i19, p7, i17, i9, i10, p11, p12, p13, descr=<Loop0>)
+        """)
+
+    def test_static_classmethod_call(self):
+        def fn(n):
+            class A(object):
+                @classmethod
+                def f(cls, i):
+                    return i + (cls is A) + 1
+                @staticmethod
+                def g(i):
+                    return i - 1
+            #
+            i = 0
+            a = A()
+            while i < n:
+                x = a.f(i)
+                i = a.g(x)
+            return i
+        #
+        log = self.run(fn, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i14 = int_lt(i6, i9)
+            guard_true(i14, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i15 = force_token()
+            i17 = int_add_ovf(i8, 1)
+            guard_no_overflow(descr=<Guard5>)
+            i18 = force_token()
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i8, p7, i17, i9, p10, p11, p12, descr=<Loop0>)
+        """)
+
+    def test_default_and_kw(self):
+        def main(n):
+            def f(i, j=1):
+                return i + j
+            #
+            i = 0
+            while i < n:
+                i = f(f(i), j=1) # ID: call
+                a = 0
+            return i
+        #
+        log = self.run(main, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_id('call')
+        assert loop.match_by_id('call', """
+            i14 = force_token()
+            i16 = force_token()
+        """)
+
+    def test_kwargs(self):
+        # this is not a very precise test, could be improved
+        def main(x):
+            def g(**args):
+                return len(args)
+            #
+            s = 0
+            d = {}
+            for i in range(x):
+                s += g(**d)       # ID: call
+                d[str(i)] = i
+                if i % 100 == 99:
+                    d = {}
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 49500
+        loop, = log.loops_by_id('call')
+        ops = log.opnames(loop.ops_by_id('call'))
+        guards = [ops for ops in ops if ops.startswith('guard')]
+        assert len(guards) <= 5
+
+    def test_stararg_virtual(self):
+        def main(x):
+            def g(*args):
+                return len(args)
+            def h(a, b, c):
+                return c
+            #
+            s = 0
+            for i in range(x):
+                l = [i, x, 2]
+                s += g(*l)       # ID: g1
+                s += h(*l)       # ID: h1
+                s += g(i, x, 2)  # ID: g2
+                a = 0
+            for i in range(x):
+                l = [x, 2]
+                s += g(i, *l)    # ID: g3
+                s += h(i, *l)    # ID: h2
+                a = 0
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 13000
+        loop0, = log.loops_by_id('g1')
+        assert loop0.match_by_id('g1', """
+            i20 = force_token()
+            setfield_gc(p4, i19, descr=<.*W_AbstractSeqIterObject.inst_index .*>)
+            i22 = int_add_ovf(i8, 3)
+            guard_no_overflow(descr=<Guard4>)
+        """)
+        assert loop0.match_by_id('h1', """
+            i20 = force_token()
+            i22 = int_add_ovf(i8, 2)
+            guard_no_overflow(descr=<Guard5>)
+        """)
+        assert loop0.match_by_id('g2', """
+            i27 = force_token()
+            i29 = int_add_ovf(i26, 3)
+            guard_no_overflow(descr=<Guard6>)
+        """)
+        #
+        loop1, = log.loops_by_id('g3')
+        assert loop1.match_by_id('g3', """
+            i21 = force_token()
+            setfield_gc(p4, i20, descr=<.* .*W_AbstractSeqIterObject.inst_index .*>)
+            i23 = int_add_ovf(i9, 3)
+            guard_no_overflow(descr=<Guard37>)
+        """)
+        assert loop1.match_by_id('h2', """
+            i25 = force_token()
+            i27 = int_add_ovf(i23, 2)
+            guard_no_overflow(descr=<Guard38>)
+        """)
+
+    def test_stararg(self):
+        def main(x):
+            def g(*args):
+                return args[-1]
+            def h(*args):
+                return len(args)
+            #
+            s = 0
+            l = []
+            i = 0
+            while i < x:
+                l.append(1)
+                s += g(*l)     # ID: g
+                i = h(*l)      # ID: h
+                a = 0
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_id('g')
+        ops_g = log.opnames(loop.ops_by_id('g'))
+        ops_h = log.opnames(loop.ops_by_id('h'))
+        ops = ops_g + ops_h
+        assert 'new_with_vtable' not in ops
+        assert 'call_may_force' not in ops
+
+    def test_call_builtin_function(self):
+        def main(n):
+            i = 2
+            l = []
+            while i < n:
+                i += 1
+                l.append(i)    # ID: append
+                a = 0
+            return i, len(l)
+        #
+        log = self.run(main, [1000])
+        assert log.result == (1000, 998)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('append', """
+            i13 = getfield_gc(p8, descr=<SignedFieldDescr list.length .*>)
+            i15 = int_add(i13, 1)
+            call(ConstClass(_ll_list_resize_ge__listPtr_Signed), p8, i15, descr=<VoidCallDescr>)
+            guard_no_exception(descr=<Guard4>)
+            p17 = getfield_gc(p8, descr=<GcPtrFieldDescr list.items .*>)
+            p19 = new_with_vtable(ConstClass(W_IntObject))
+            setfield_gc(p19, i12, descr=<SignedFieldDescr .*W_IntObject.inst_intval .*>)
+            setarrayitem_gc(p17, i13, p19, descr=<GcPtrArrayDescr>)
+        """)
+
+    def test_blockstack_virtualizable(self):
+        def main(n):
+            from pypyjit import residual_call
+            i = 0
+            while i < n:
+                try:
+                    residual_call(len, [])   # ID: call
+                except:
+                    pass
+                i += 1
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_id('call')
+        assert loop.match_by_id('call', opcode='CALL_FUNCTION', expected_src="""
+            # make sure that the "block" is not allocated
+            ...
+            i20 = force_token()
+            setfield_gc(p0, i20, descr=<SignedFieldDescr .*PyFrame.vable_token .*>)
+            p22 = new_with_vtable(19511408)
+            p24 = new_array(1, descr=<GcPtrArrayDescr>)
+            p26 = new_with_vtable(ConstClass(W_ListObject))
+            p27 = new(descr=<SizeDescr .*>)
+            p29 = new_array(0, descr=<GcPtrArrayDescr>)
+            setfield_gc(p27, p29, descr=<GcPtrFieldDescr list.items .*>)
+            setfield_gc(p26, p27, descr=<.* .*W_ListObject.inst_wrappeditems .*>)
+            setarrayitem_gc(p24, 0, p26, descr=<GcPtrArrayDescr>)
+            setfield_gc(p22, p24, descr=<GcPtrFieldDescr .*Arguments.inst_arguments_w .*>)
+            p32 = call_may_force(11376960, p18, p22, descr=<GcPtrCallDescr>)
+            ...
+        """)
+
+    def test_func_defaults(self):
+        def main(n):
+            i = 1
+            while i < n:
+                i += len(xrange(i+1)) - i
+            return i
+
+        log = self.run(main, [10000])
+        assert log.result == 10000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i5, i6)
+            guard_true(i10, descr=<Guard3>)
+            i120 = int_add(i5, 1)
+            guard_not_invalidated(descr=<Guard4>)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_exception.py b/pypy/module/pypyjit/test_pypy_c/test_exception.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_exception.py
@@ -0,0 +1,93 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestException(BaseTestPyPyC):
+
+    def test_cmp_exc(self):
+        def f1(n):
+            # So we don't get a LOAD_GLOBAL op
+            KE = KeyError
+            i = 0
+            while i < n:
+                try:
+                    raise KE
+                except KE: # ID: except
+                    i += 1
+            return i
+
+        log = self.run(f1, [10000])
+        assert log.result == 10000
+        loop, = log.loops_by_id("except")
+        ops = list(loop.ops_by_id("except", opcode="COMPARE_OP"))
+        assert ops == []
+
+    def test_exception_inside_loop_1(self):
+        def main(n):
+            while n:
+                try:
+                    raise ValueError
+                except ValueError:
+                    pass
+                n -= 1
+            return n
+        #
+        log = self.run(main, [1000])
+        assert log.result == 0
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+        i5 = int_is_true(i3)
+        guard_true(i5, descr=<Guard3>)
+        guard_not_invalidated(descr=<Guard4>)
+        --EXC-TICK--
+        i12 = int_sub_ovf(i3, 1)
+        guard_no_overflow(descr=<Guard6>)
+        --TICK--
+        jump(..., descr=<Loop0>)
+        """)
+
+    def test_exception_inside_loop_2(self):
+        def main(n):
+            def g(n):
+                raise ValueError(n)  # ID: raise
+            def f(n):
+                g(n)
+            #
+            while n:
+                try:
+                    f(n)
+                except ValueError:
+                    pass
+                n -= 1
+            return n
+        #
+        log = self.run(main, [1000])
+        assert log.result == 0
+        loop, = log.loops_by_filename(self.filepath)
+        ops = log.opnames(loop.ops_by_id('raise'))
+        assert 'new' not in ops
+
+    def test_reraise(self):
+        def f(n):
+            i = 0
+            while i < n:
+                try:
+                    try:
+                        raise KeyError
+                    except KeyError:
+                        raise
+                except KeyError:
+                    i += 1
+            return i
+
+        log = self.run(f, [100000])
+        assert log.result == 100000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i4, i5)
+            guard_true(i7, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            --EXC-TICK--
+            i14 = int_add(i4, 1)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_import.py b/pypy/module/pypyjit/test_pypy_c/test_import.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_import.py
@@ -0,0 +1,46 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestImport(BaseTestPyPyC):
+
+    def test_import_in_function(self):
+        def main(n):
+            i = 0
+            while i < n:
+                from sys import version  # ID: import
+                i += 1
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_id('import')
+        assert loop.match_by_id('import', """
+            p11 = getfield_gc(ConstPtr(ptr10), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            guard_value(p11, ConstPtr(ptr12), descr=<Guard4>)
+            guard_not_invalidated(descr=<Guard5>)
+            p14 = getfield_gc(ConstPtr(ptr13), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            p16 = getfield_gc(ConstPtr(ptr15), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            guard_value(p14, ConstPtr(ptr17), descr=<Guard6>)
+            guard_isnull(p16, descr=<Guard7>)
+        """)
+
+    def test_import_fast_path(self, tmpdir):
+        pkg = tmpdir.join('mypkg').ensure(dir=True)
+        pkg.join('__init__.py').write("")
+        pkg.join('mod.py').write(str(py.code.Source("""
+            def do_the_import():
+                import sys
+        """)))
+        def main(path, n):
+            import sys
+            sys.path.append(path)
+            from mypkg.mod import do_the_import
+            for i in range(n):
+                do_the_import()
+        #
+        log = self.run(main, [str(tmpdir), 300])
+        loop, = log.loops_by_filename(self.filepath)
+        # this is a check for a slow-down that introduced a
+        # call_may_force(absolute_import_with_lock).
+        for opname in log.opnames(loop.allops(opcode="IMPORT_NAME")):
+            assert 'call' not in opname    # no call-like opcode
diff --git a/pypy/module/pypyjit/test_pypy_c/test_instance.py b/pypy/module/pypyjit/test_pypy_c/test_instance.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_instance.py
@@ -0,0 +1,202 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestInstance(BaseTestPyPyC):
+
+    def test_virtual_instance(self):
+        def main(n):
+            class A(object):
+                pass
+            #
+            i = 0
+            while i < n:
+                a = A()
+                assert isinstance(a, A)
+                assert not isinstance(a, int)
+                a.x = 2
+                i = i + a.x
+            return i
+        #
+        log = self.run(main, [1000], threshold = 400)
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, i6)
+            guard_true(i7, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i9 = int_add_ovf(i5, 2)
+            guard_no_overflow(descr=<Guard5>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
+        """)
+
+    def test_load_attr(self):
+        src = '''
+            class A(object):
+                pass
+            a = A()
+            a.x = 2
+            def main(n):
+                i = 0
+                while i < n:
+                    i = i + a.x
+                return i
+        '''
+        log = self.run(src, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i9 = int_lt(i5, i6)
+            guard_true(i9, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i10 = int_add_ovf(i5, i7)
+            guard_no_overflow(descr=<Guard5>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i10, i6, p7, i7, p8, descr=<Loop0>)
+        """)
+
+    def test_getattr_with_dynamic_attribute(self):
+        src = """
+        class A(object):
+            pass
+
+        l = ["x", "y"]
+
+        def main():
+            sum = 0
+            a = A()
+            a.a1 = 0
+            a.a2 = 0
+            a.a3 = 0
+            a.a4 = 0
+            a.a5 = 0 # workaround, because the first five attributes need a promotion
+            a.x = 1
+            a.y = 2
+            i = 0
+            while i < 500:
+                name = l[i % 2]
+                sum += getattr(a, name)
+                i += 1
+            return sum
+        """
+        log = self.run(src, [])
+        assert log.result == 250 + 250*2
+        loops = log.loops_by_filename(self.filepath)
+        assert len(loops) == 1
+
+    def test_mutate_class(self):
+        def fn(n):
+            class A(object):
+                count = 1
+                def __init__(self, a):
+                    self.a = a
+                def f(self):
+                    return self.count
+            i = 0
+            a = A(1)
+            while i < n:
+                A.count += 1 # ID: mutate
+                i = a.f()    # ID: meth1
+            return i
+        #
+        log = self.run(fn, [1000], threshold=10)
+        assert log.result == 1000
+        #
+        # first, we test the entry bridge
+        # -------------------------------
+        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
+        ops = entry_bridge.ops_by_id('mutate', opcode='LOAD_ATTR')
+        assert log.opnames(ops) == ['guard_value', 'guard_not_invalidated',
+                                    'getfield_gc', 'guard_nonnull_class']
+        # the STORE_ATTR is folded away
+        assert list(entry_bridge.ops_by_id('meth1', opcode='STORE_ATTR')) == []
+        #
+        # then, the actual loop
+        # ----------------------
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = getfield_gc_pure(p5, descr=<SignedFieldDescr .*W_IntObject.inst_intval.*>)
+            i9 = int_lt(i8, i7)
+            guard_true(i9, descr=.*)
+            guard_not_invalidated(descr=.*)
+            i11 = int_add(i8, 1)
+            i12 = force_token()
+            --TICK--
+            p20 = new_with_vtable(ConstClass(W_IntObject))
+            setfield_gc(p20, i11, descr=<SignedFieldDescr.*W_IntObject.inst_intval .*>)
+            setfield_gc(ConstPtr(ptr21), p20, descr=<GcPtrFieldDescr .*TypeCell.inst_w_value .*>)
+            jump(p0, p1, p2, p3, p4, p20, p6, i7, descr=<Loop.>)
+        """)
+
+    def test_oldstyle_newstyle_mix(self):
+        def main():
+            class A:
+                pass
+
+            class B(object, A):
+                def __init__(self, x):
+                    self.x = x
+
+            i = 0
+            b = B(1)
+            while i < 100:
+                v = b.x # ID: loadattr
+                i += v
+            return i
+
+        log = self.run(main, [], threshold=80)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('loadattr',
+        '''
+        guard_not_invalidated(descr=...)
+        i19 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
+        guard_no_exception(descr=...)
+        i21 = int_and(i19, _)
+        i22 = int_is_true(i21)
+        guard_true(i22, descr=...)
+        i26 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
+        guard_no_exception(descr=...)
+        i28 = int_and(i26, _)
+        i29 = int_is_true(i28)
+        guard_true(i29, descr=...)
+        ''')
+
+    def test_python_contains(self):
+        def main():
+            class A(object):
+                def __contains__(self, v):
+                    return True
+
+            i = 0
+            a = A()
+            while i < 100:
+                i += i in a # ID: contains
+                b = 0       # to make sure that JUMP_ABSOLUTE is not part of the ID
+
+        log = self.run(main, [], threshold=80)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("contains", """
+            guard_not_invalidated(descr=...)
+            i11 = force_token()
+            i12 = int_add_ovf(i5, i7)
+            guard_no_overflow(descr=...)
+        """)
+
+    def test_id_compare_optimization(self):
+        def main():
+            class A(object):
+                pass
+            #
+            i = 0
+            a = A()
+            while i < 300:
+                new_a = A()
+                if new_a != a:  # ID: compare
+                    pass
+                i += 1
+            return i
+        #
+        log = self.run(main, [])
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("compare", "") # optimized away
+
diff --git a/pypy/module/pypyjit/test_pypy_c/test_intbound.py b/pypy/module/pypyjit/test_pypy_c/test_intbound.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_intbound.py
@@ -0,0 +1,296 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestIntbound(BaseTestPyPyC):
+
+    def test_intbound_simple(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        nbr = (3, 7)
+        for o1 in ops:
+            for o2 in ops:
+                for n1 in nbr:
+                    for n2 in nbr:
+                        src = '''
+                        def f(i):
+                            a, b = 3, 3
+                            if i %s %d:
+                                a = 0
+                            else:
+                                a = 1
+                            if i %s %d:
+                                b = 0
+                            else:
+                                b = 1
+                            return a + b * 2
+
+                        def main():
+                            res = [0] * 4
+                            idx = []
+                            for i in range(15):
+                                idx.extend([i] * 15)
+                            for i in idx:
+                                res[f(i)] += 1
+                            return res
+
+                        ''' % (o1, n1, o2, n2)
+                        yield self.run_and_check, src
+
+    def test_intbound_addsub_mix(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        tests = ('i > 4', 'i > 2', 'i + 1 > 2', '1 + i > 4',
+                 'i - 1 > 1', '1 - i > 1', '1 - i < -3',
+                 'i == 1', 'i == 5', 'i != 1', '-2 * i < -4')
+        for t1 in tests:
+            for t2 in tests:
+                src = '''
+                def f(i):
+                    a, b = 3, 3
+                    if %s:
+                        a = 0
+                    else:
+                        a = 1
+                    if %s:
+                        b = 0
+                    else:
+                        b = 1
+                    return a + b * 2
+
+                def main():
+                    res = [0] * 4
+                    idx = []
+                    for i in range(15):
+                        idx.extend([i] * 15)
+                    for i in idx:
+                        res[f(i)] += 1
+                    return res
+
+                ''' % (t1, t2)
+                yield self.run_and_check, src
+
+    def test_intbound_gt(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < n:
+                if i > -1:
+                    a += 1
+                if i > -2:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, i9)
+            guard_true(i10, descr=...)
+            i12 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i14 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i17 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i14, i12, i17, i9, descr=<Loop0>)
+        """)
+
+    def test_intbound_sub_lt(self):
+        def main():
+            i, a = 0, 0
+            while i < 300:
+                if i - 10 < 295:
+                    a += 1
+                i += 1
+            return a
+        #
+        log = self.run(main, [])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, 300)
+            guard_true(i7, descr=...)
+            i9 = int_sub_ovf(i5, 10)
+            guard_no_overflow(descr=...)
+            i11 = int_add_ovf(i4, 1)
+            guard_no_overflow(descr=...)
+            i13 = int_add(i5, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, i11, i13, descr=<Loop0>)
+        """)
+
+    def test_intbound_addsub_ge(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < n:
+                if i + 5 >= 5:
+                    a += 1
+                if i - 1 >= -1:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, i9)
+            guard_true(i10, descr=...)
+            i12 = int_add_ovf(i8, 5)
+            guard_no_overflow(descr=...)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i19 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i16, i14, i19, i9, descr=<Loop0>)
+        """)
+
+    def test_intbound_addmul_ge(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < 300:
+                if i + 5 >= 5:
+                    a += 1
+                if 2 * i >= 0:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, 300)
+            guard_true(i10, descr=...)
+            i12 = int_add(i8, 5)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_lshift(i8, 1)
+            i18 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i21 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i18, i14, i21, descr=<Loop0>)
+        """)
+
+    def test_intbound_eq(self):
+        def main(a, n):
+            i, s = 0, 0
+            while i < 300:
+                if a == 7:
+                    s += a + 1
+                elif i == 10:
+                    s += i
+                else:
+                    s += 1
+                i += 1
+            return s
+        #
+        log = self.run(main, [7, 300])
+        assert log.result == main(7, 300)
+        log = self.run(main, [10, 300])
+        assert log.result == main(10, 300)
+        log = self.run(main, [42, 300])
+        assert log.result == main(42, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, 300)
+            guard_true(i10, descr=...)
+            i12 = int_eq(i8, 10)
+            guard_false(i12, descr=...)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, p6, i14, i16, descr=<Loop0>)
+        """)
+
+    def test_intbound_mul(self):
+        def main(a):
+            i, s = 0, 0
+            while i < 300:
+                assert i >= 0
+                if 2 * i < 30000:
+                    s += 1
+                else:
+                    s += a
+                i += 1
+            return s
+        #
+        log = self.run(main, [7])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = int_lt(i6, 300)
+            guard_true(i8, descr=...)
+            i10 = int_lshift(i6, 1)
+            i12 = int_add_ovf(i5, 1)
+            guard_no_overflow(descr=...)
+            i14 = int_add(i6, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i12, i14, descr=<Loop0>)
+        """)
+
+    def test_assert(self):
+        def main(a):
+            i, s = 0, 0
+            while i < 300:
+                assert a == 7
+                s += a + 1
+                i += 1
+            return s
+        log = self.run(main, [7])
+        assert log.result == 300*8
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = int_lt(i6, 300)
+            guard_true(i8, descr=...)
+            i10 = int_add_ovf(i5, 8)
+            guard_no_overflow(descr=...)
+            i12 = int_add(i6, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i10, i12, descr=<Loop0>)
+        """)
+
+    def test_xor(self):
+        def main(b):
+            a = sa = 0
+            while a < 300:
+                if a > 0: # Specialises the loop
+                    pass
+                if b > 10:
+                    pass
+                if a^b >= 0:  # ID: guard
+                    sa += 1
+                sa += a^a     # ID: a_xor_a
+                a += 1
+            return sa
+
+        log = self.run(main, [11])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        # if both are >=0, a^b is known to be >=0
+        # note that we know that b>10
+        assert loop.match_by_id('guard', """
+            i10 = int_xor(i5, i7)
+        """)
+        #
+        # x^x is always optimized to 0
+        assert loop.match_by_id('a_xor_a', "")
+
+        log = self.run(main, [9])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        # we don't know that b>10, hence we cannot optimize it
+        assert loop.match_by_id('guard', """
+            i10 = int_xor(i5, i7)
+            i12 = int_ge(i10, 0)
+            guard_true(i12, descr=...)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_min_max.py b/pypy/module/pypyjit/test_pypy_c/test_min_max.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_min_max.py
@@ -0,0 +1,67 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestMinMax(BaseTestPyPyC):
+
+    def test_min_max(self):
+        def main():
+            i=0
+            sa=0
+            while i < 300:
+                sa+=min(max(i, 3000), 4000)
+                i+=1
+            return sa
+        log = self.run(main, [])
+        assert log.result == 300*3000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i4, 300)
+            guard_true(i7, descr=...)
+            i9 = int_add_ovf(i5, 3000)
+            guard_no_overflow(descr=...)
+            i11 = int_add(i4, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, i11, i9, descr=<Loop0>)
+        """)
+
+    def test_silly_max(self):
+        def main():
+            i = 2
+            sa = 0
+            while i < 300:
+                lst = range(i)
+                sa += max(*lst) # ID: max
+                i += 1
+            return sa
+        log = self.run(main, [])
+        assert log.result == main()
+        loop, = log.loops_by_filename(self.filepath)
+        # We dont want too many guards, but a residual call to min_max_loop
+        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
+        assert len(guards) < 20
+        assert loop.match_by_id('max',"""
+            ...
+            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
+            ...
+        """)
+
+    def test_iter_max(self):
+        def main():
+            i = 2
+            sa = 0
+            while i < 300:
+                lst = range(i)
+                sa += max(lst) # ID: max
+                i += 1
+            return sa
+        log = self.run(main, [])
+        assert log.result == main()
+        loop, = log.loops_by_filename(self.filepath)
+        # We dont want too many guards, but a residual call to min_max_loop
+        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
+        assert len(guards) < 20
+        assert loop.match_by_id('max',"""
+            ...
+            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
+            ...
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py
rename from pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
rename to pypy/module/pypyjit/test_pypy_c/test_misc.py
--- a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py
@@ -1,13 +1,8 @@
-import py, sys, re
-import subprocess
-from lib_pypy import disassembler
-from pypy.tool.udir import udir
-from pypy.tool import logparser
-from pypy.module.pypyjit.test_pypy_c.model import Log
-from pypy.module.pypyjit.test_pypy_c.test_model import BaseTestPyPyC
+import py, sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
 
 
-class TestPyPyCNew(BaseTestPyPyC):
+class TestMisc(BaseTestPyPyC):
     def test_f1(self):
         def f1(n):
             "Arbitrary test function."
@@ -76,378 +71,6 @@
         """)
 
 
-    def test_recursive_call(self):
-        def fn():
-            def rec(n):
-                if n == 0:
-                    return 0
-                return 1 + rec(n-1)
-            #
-            # this loop is traced and then aborted, because the trace is too
-            # long. But then "rec" is marked as "don't inline"
-            i = 0
-            j = 0
-            while i < 20:
-                i += 1
-                j += rec(100)
-            #
-            # next time we try to trace "rec", instead of inlining we compile
-            # it separately and generate a call_assembler
-            i = 0
-            j = 0
-            while i < 20:
-                i += 1
-                j += rec(100) # ID: call_rec
-                a = 0
-            return j
-        #
-        log = self.run(fn, [], threshold=18)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('call_rec', """
-            ...
-            p53 = call_assembler(..., descr=...)
-            guard_not_forced(descr=...)
-            guard_no_exception(descr=...)
-            ...
-        """)
-
-    def test_cmp_exc(self):
-        def f1(n):
-            # So we don't get a LOAD_GLOBAL op
-            KE = KeyError
-            i = 0
-            while i < n:
-                try:
-                    raise KE
-                except KE: # ID: except
-                    i += 1
-            return i
-
-        log = self.run(f1, [10000])
-        assert log.result == 10000
-        loop, = log.loops_by_id("except")
-        ops = list(loop.ops_by_id("except", opcode="COMPARE_OP"))
-        assert ops == []
-
-    def test_simple_call(self):
-        src = """
-            OFFSET = 0
-            def f(i):
-                return i + 1 + OFFSET # ID: add
-            def main(n):
-                i = 0
-                while i < n+OFFSET:   # ID: cond
-                    i = f(f(i))       # ID: call
-                    a = 0
-                return i
-        """
-        log = self.run(src, [1000], threshold=400)
-        assert log.result == 1000
-        # first, we test what is inside the entry bridge
-        # -----------------------------------------------
-        entry_bridge, = log.loops_by_id('call', is_entry_bridge=True)
-        # LOAD_GLOBAL of OFFSET
-        ops = entry_bridge.ops_by_id('cond', opcode='LOAD_GLOBAL')
-        assert log.opnames(ops) == ["guard_value",
-                                    "getfield_gc", "guard_value",
-                                    "getfield_gc", "guard_isnull",
-                                    "getfield_gc", "guard_nonnull_class"]
-        # LOAD_GLOBAL of OFFSET but in different function partially folded
-        # away
-        # XXX could be improved
-        ops = entry_bridge.ops_by_id('add', opcode='LOAD_GLOBAL')
-        assert log.opnames(ops) == ["guard_value", "getfield_gc", "guard_isnull"]
-        #
-        # two LOAD_GLOBAL of f, the second is folded away
-        ops = entry_bridge.ops_by_id('call', opcode='LOAD_GLOBAL')
-        assert log.opnames(ops) == ["getfield_gc", "guard_nonnull_class"]
-        #
-        assert entry_bridge.match_by_id('call', """
-            p29 = getfield_gc(ConstPtr(ptr28), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value .*>)
-            guard_nonnull_class(p29, ConstClass(Function), descr=<Guard18>)
-            p33 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_code .*>)
-            guard_value(p33, ConstPtr(ptr34), descr=<Guard19>)
-            p35 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_w_func_globals .*>)
-            p36 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_closure .*>)
-            p38 = call(ConstClass(getexecutioncontext), descr=<GcPtrCallDescr>)
-            p39 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
-            i40 = force_token()
-            p41 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
-            guard_isnull(p41, descr=<Guard20>)
-            i42 = getfield_gc(p38, descr=<NonGcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_profilefunc .*>)
-            i43 = int_is_zero(i42)
-            guard_true(i43, descr=<Guard21>)
-            i50 = force_token()
-        """)
-        #
-        # then, we test the actual loop
-        # -----------------------------
-        loop, = log.loops_by_id('call')
-        assert loop.match("""
-            i12 = int_lt(i5, i6)
-            guard_true(i12, descr=<Guard3>)
-            i13 = force_token()
-            i15 = int_add(i5, 1)
-            i16 = int_add_ovf(i15, i7)
-            guard_no_overflow(descr=<Guard4>)
-            i18 = force_token()
-            i20 = int_add_ovf(i16, 1)
-            guard_no_overflow(descr=<Guard5>)
-            i21 = int_add_ovf(i20, i7)
-            guard_no_overflow(descr=<Guard6>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i21, i6, i7, p8, p9, p10, p11, descr=<Loop0>)
-        """)
-
-    def test_method_call(self):
-        def fn(n):
-            class A(object):
-                def __init__(self, a):
-                    self.a = a
-                def f(self, i):
-                    return self.a + i
-            i = 0
-            a = A(1)
-            while i < n:
-                x = a.f(i)    # ID: meth1
-                i = a.f(x)    # ID: meth2
-            return i
-        #
-        log = self.run(fn, [1000], threshold=400)
-        assert log.result == 1000
-        #
-        # first, we test the entry bridge
-        # -------------------------------
-        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
-        ops = entry_bridge.ops_by_id('meth1', opcode='LOOKUP_METHOD')
-        assert log.opnames(ops) == ['guard_value', 'getfield_gc', 'guard_value',
-                                    'guard_not_invalidated']
-        # the second LOOKUP_METHOD is folded away
-        assert list(entry_bridge.ops_by_id('meth2', opcode='LOOKUP_METHOD')) == []
-        #
-        # then, the actual loop
-        # ----------------------
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i15 = int_lt(i6, i9)
-            guard_true(i15, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i16 = force_token()
-            i17 = int_add_ovf(i10, i6)
-            guard_no_overflow(descr=<Guard5>)
-            i18 = force_token()
-            i19 = int_add_ovf(i10, i17)
-            guard_no_overflow(descr=<Guard6>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i19, p7, i17, i9, i10, p11, p12, p13, descr=<Loop0>)
-        """)
-
-    def test_static_classmethod_call(self):
-        def fn(n):
-            class A(object):
-                @classmethod
-                def f(cls, i):
-                    return i + (cls is A) + 1
-                @staticmethod
-                def g(i):
-                    return i - 1
-            #
-            i = 0
-            a = A()
-            while i < n:
-                x = a.f(i)
-                i = a.g(x)
-            return i
-        #
-        log = self.run(fn, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i14 = int_lt(i6, i9)
-            guard_true(i14, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i15 = force_token()
-            i17 = int_add_ovf(i8, 1)
-            guard_no_overflow(descr=<Guard5>)
-            i18 = force_token()
-            i20 = int_sub(i17, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i20, p7, i17, i9, p10, p11, p12, descr=<Loop0>)
-        """)
-
-    def test_default_and_kw(self):
-        def main(n):
-            def f(i, j=1):
-                return i + j
-            #
-            i = 0
-            while i < n:
-                i = f(f(i), j=1) # ID: call
-                a = 0
-            return i
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_id('call')
-        assert loop.match_by_id('call', """
-            i14 = force_token()
-            i16 = force_token()
-        """)
-
-    def test_kwargs(self):
-        # this is not a very precise test, could be improved
-        def main(x):
-            def g(**args):
-                return len(args)
-            #
-            s = 0
-            d = {}
-            for i in range(x):
-                s += g(**d)       # ID: call
-                d[str(i)] = i
-                if i % 100 == 99:
-                    d = {}
-            return s
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 49500
-        loop, = log.loops_by_id('call')
-        ops = log.opnames(loop.ops_by_id('call'))
-        guards = [ops for ops in ops if ops.startswith('guard')]
-        assert len(guards) <= 5
-
-    def test_stararg_virtual(self):
-        def main(x):
-            def g(*args):
-                return len(args)
-            def h(a, b, c):
-                return c
-            #
-            s = 0
-            for i in range(x):
-                l = [i, x, 2]
-                s += g(*l)       # ID: g1
-                s += h(*l)       # ID: h1
-                s += g(i, x, 2)  # ID: g2
-                a = 0
-            for i in range(x):
-                l = [x, 2]
-                s += g(i, *l)    # ID: g3
-                s += h(i, *l)    # ID: h2
-                a = 0
-            return s
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 13000
-        loop0, = log.loops_by_id('g1')
-        assert loop0.match_by_id('g1', """
-            i20 = force_token()
-            setfield_gc(p4, i19, descr=<.*W_AbstractSeqIterObject.inst_index .*>)
-            i22 = int_add_ovf(i8, 3)
-            guard_no_overflow(descr=<Guard4>)
-        """)
-        assert loop0.match_by_id('h1', """
-            i20 = force_token()
-            i22 = int_add_ovf(i8, 2)
-            guard_no_overflow(descr=<Guard5>)
-        """)
-        assert loop0.match_by_id('g2', """
-            i27 = force_token()
-            i29 = int_add_ovf(i26, 3)
-            guard_no_overflow(descr=<Guard6>)
-        """)
-        #
-        loop1, = log.loops_by_id('g3')
-        assert loop1.match_by_id('g3', """
-            i21 = force_token()
-            setfield_gc(p4, i20, descr=<.* .*W_AbstractSeqIterObject.inst_index .*>)
-            i23 = int_add_ovf(i9, 3)
-            guard_no_overflow(descr=<Guard37>)
-        """)
-        assert loop1.match_by_id('h2', """
-            i25 = force_token()
-            i27 = int_add_ovf(i23, 2)
-            guard_no_overflow(descr=<Guard38>)
-        """)
-
-    def test_stararg(self):
-        def main(x):
-            def g(*args):
-                return args[-1]
-            def h(*args):
-                return len(args)
-            #
-            s = 0
-            l = []
-            i = 0
-            while i < x:
-                l.append(1)
-                s += g(*l)     # ID: g
-                i = h(*l)      # ID: h
-                a = 0
-            return s
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_id('g')
-        ops_g = log.opnames(loop.ops_by_id('g'))
-        ops_h = log.opnames(loop.ops_by_id('h'))
-        ops = ops_g + ops_h
-        assert 'new_with_vtable' not in ops
-        assert 'call_may_force' not in ops
-
-    def test_virtual_instance(self):
-        def main(n):
-            class A(object):
-                pass
-            #
-            i = 0
-            while i < n:
-                a = A()
-                assert isinstance(a, A)
-                assert not isinstance(a, int)
-                a.x = 2
-                i = i + a.x
-            return i
-        #
-        log = self.run(main, [1000], threshold = 400)
-        assert log.result == 1000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i5, i6)
-            guard_true(i7, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i9 = int_add_ovf(i5, 2)
-            guard_no_overflow(descr=<Guard5>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
-        """)
-
-    def test_load_attr(self):
-        src = '''
-            class A(object):
-                pass
-            a = A()
-            a.x = 2
-            def main(n):
-                i = 0
-                while i < n:
-                    i = i + a.x
-                return i
-        '''
-        log = self.run(src, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i9 = int_lt(i5, i6)
-            guard_true(i9, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i10 = int_add_ovf(i5, i7)
-            guard_no_overflow(descr=<Guard5>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i10, i6, p7, i7, p8, descr=<Loop0>)
-        """)
-
     def test_mixed_type_loop(self):
         def main(n):
             i = 0.0
@@ -456,7 +79,7 @@
                 i = j + i
             return i
         #
-        log = self.run(main, [1000], threshold=400)
+        log = self.run(main, [1000])
         assert log.result == 1000.0
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
@@ -467,25 +90,6 @@
             jump(p0, p1, p2, p3, p4, f10, p6, f7, f8, descr=<Loop0>)
         """)
 
-    def test_call_builtin_function(self):
-        def main(n):
-            i = 2
-            l = []
-            while i < n:
-                i += 1
-                l.append(i)    # ID: append
-                a = 0
-            return i, len(l)
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == (1000, 998)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('append', """
-            p14 = new_with_vtable(ConstClass(W_IntObject))
-            setfield_gc(p14, i12, descr=<SignedFieldDescr .*W_IntObject.inst_intval .*>)
-            call(ConstClass(ll_append__listPtr_objectPtr), p8, p14, descr=...)
-            guard_no_exception(descr=<Guard4>)
-        """)
 
     def test_range_iter(self):
         def main(n):
@@ -498,7 +102,7 @@
                 a = 0
             return s
         #
-        log = self.run(main, [1000], threshold=400)
+        log = self.run(main, [1000])
         assert log.result == 1000 * 999 / 2
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
@@ -520,76 +124,6 @@
             jump(..., descr=<Loop0>)
         """)
 
-    def test_exception_inside_loop_1(self):
-        def main(n):
-            while n:
-                try:
-                    raise ValueError
-                except ValueError:
-                    pass
-                n -= 1
-            return n
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 0
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-        i5 = int_is_true(i3)
-        guard_true(i5, descr=<Guard3>)
-        guard_not_invalidated(descr=<Guard4>)
-        --EXC-TICK--
-        i12 = int_sub_ovf(i3, 1)
-        guard_no_overflow(descr=<Guard6>)
-        --TICK--
-        jump(..., descr=<Loop0>)
-        """)
-
-    def test_exception_inside_loop_2(self):
-        def main(n):
-            def g(n):
-                raise ValueError(n)  # ID: raise
-            def f(n):
-                g(n)
-            #
-            while n:
-                try:
-                    f(n)
-                except ValueError:
-                    pass
-                n -= 1
-            return n
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 0
-        loop, = log.loops_by_filename(self.filepath)
-        ops = log.opnames(loop.ops_by_id('raise'))
-        assert 'new' not in ops
-
-    def test_reraise(self):
-        def f(n):
-            i = 0
-            while i < n:
-                try:
-                    try:
-                        raise KeyError
-                    except KeyError:
-                        raise
-                except KeyError:
-                    i += 1
-            return i
-
-        log = self.run(f, [100000])
-        assert log.result == 100000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i4, i5)
-            guard_true(i7, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            --EXC-TICK--
-            i14 = int_add(i4, 1)
-            --TICK--
-            jump(..., descr=<Loop0>)
-        """)
 
     def test_chain_of_guards(self):
         src = """
@@ -609,445 +143,11 @@
                 i += 1
             return sum
         """
-        log = self.run(src, [0], threshold=400)
+        log = self.run(src, [0])
         assert log.result == 500*3
         loops = log.loops_by_filename(self.filepath)
         assert len(loops) == 1
 
-    def test_getattr_with_dynamic_attribute(self):
-        src = """
-        class A(object):
-            pass
-
-        l = ["x", "y"]
-
-        def main():
-            sum = 0
-            a = A()
-            a.a1 = 0
-            a.a2 = 0
-            a.a3 = 0
-            a.a4 = 0
-            a.a5 = 0 # workaround, because the first five attributes need a promotion
-            a.x = 1
-            a.y = 2
-            i = 0
-            while i < 500:
-                name = l[i % 2]
-                sum += getattr(a, name)
-                i += 1
-            return sum
-        """
-        log = self.run(src, [], threshold=400)
-        assert log.result == 250 + 250*2
-        loops = log.loops_by_filename(self.filepath)
-        assert len(loops) == 1
-
-    def test_blockstack_virtualizable(self):
-        def main(n):
-            from pypyjit import residual_call
-            i = 0
-            while i < n:
-                try:
-                    residual_call(len, [])   # ID: call
-                except:
-                    pass
-                i += 1
-            return i
-        #
-        log = self.run(main, [500], threshold=400)
-        assert log.result == 500
-        loop, = log.loops_by_id('call')
-        assert loop.match_by_id('call', opcode='CALL_FUNCTION', expected_src="""
-            # make sure that the "block" is not allocated
-            ...
-            i20 = force_token()
-            setfield_gc(p0, i20, descr=<SignedFieldDescr .*PyFrame.vable_token .*>)
-            p22 = new_with_vtable(19511408)
-            p24 = new_array(1, descr=<GcPtrArrayDescr>)
-            p26 = new_with_vtable(ConstClass(W_ListObject))
-            p27 = new(descr=<SizeDescr .*>)
-            p29 = new_array(0, descr=<GcPtrArrayDescr>)
-            setfield_gc(p27, p29, descr=<GcPtrFieldDescr list.items .*>)
-            setfield_gc(p26, p27, descr=<.* .*W_ListObject.inst_wrappeditems .*>)
-            setarrayitem_gc(p24, 0, p26, descr=<GcPtrArrayDescr>)
-            setfield_gc(p22, p24, descr=<GcPtrFieldDescr .*Arguments.inst_arguments_w .*>)
-            p32 = call_may_force(11376960, p18, p22, descr=<GcPtrCallDescr>)
-            ...
-        """)
-
-    def test_import_in_function(self):
-        def main(n):
-            i = 0
-            while i < n:
-                from sys import version  # ID: import
-                i += 1
-            return i
-        #
-        log = self.run(main, [500], threshold=400)
-        assert log.result == 500
-        loop, = log.loops_by_id('import')
-        assert loop.match_by_id('import', """
-            p11 = getfield_gc(ConstPtr(ptr10), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
-            guard_value(p11, ConstPtr(ptr12), descr=<Guard4>)
-            guard_not_invalidated(descr=<Guard5>)
-            p14 = getfield_gc(ConstPtr(ptr13), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
-            p16 = getfield_gc(ConstPtr(ptr15), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
-            guard_value(p14, ConstPtr(ptr17), descr=<Guard6>)
-            guard_isnull(p16, descr=<Guard7>)
-        """)
-
-    def test_import_fast_path(self, tmpdir):
-        pkg = tmpdir.join('mypkg').ensure(dir=True)
-        pkg.join('__init__.py').write("")
-        pkg.join('mod.py').write(str(py.code.Source("""
-            def do_the_import():
-                import sys
-        """)))
-        def main(path, n):
-            import sys
-            sys.path.append(path)
-            from mypkg.mod import do_the_import
-            for i in range(n):
-                do_the_import()
-        #
-        log = self.run(main, [str(tmpdir), 300], threshold=200)
-        loop, = log.loops_by_filename(self.filepath)
-        # this is a check for a slow-down that introduced a
-        # call_may_force(absolute_import_with_lock).
-        for opname in log.opnames(loop.allops(opcode="IMPORT_NAME")):
-            assert 'call' not in opname    # no call-like opcode
-
-    def test_arraycopy_disappears(self):
-        def main(n):
-            i = 0
-            while i < n:
-                t = (1, 2, 3, i + 1)
-                t2 = t[:]
-                del t
-                i = t2[3]
-                del t2
-            return i
-        #
-        log = self.run(main, [500], threshold=400)
-        assert log.result == 500
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i5, i6)
-            guard_true(i7, descr=<Guard3>)
-            i9 = int_add(i5, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
-        """)
-
-    def test_boolrewrite_inverse(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(i >= y)
-
-        where x and y can be either constants or variables. There are cases in
-        which the second guard is proven to be always true.
-        """
-
-        for a, b, res, opt_expected in (('2000', '2000', 20001000, True),
-                                        ( '500',  '500', 15001500, True),
-                                        ( '300',  '600', 16001700, False),
-                                        (   'a',    'b', 16001700, False),
-                                        (   'a',    'a', 13001700, True)):
-            src = """
-                def main():
-                    sa = 0
-                    a = 300
-                    b = 600
-                    for i in range(1000):
-                        if i < %s:         # ID: lt
-                            sa += 1
-                        else:
-                            sa += 2
-                        #
-                        if i >= %s:        # ID: ge
-                            sa += 10000
-                        else:
-                            sa += 20000
-                    return sa
-            """ % (a, b)
-            #
-            log = self.run(src, [], threshold=400)
-            assert log.result == res
-            loop, = log.loops_by_filename(self.filepath)
-            le_ops = log.opnames(loop.ops_by_id('lt'))
-            ge_ops = log.opnames(loop.ops_by_id('ge'))
-            assert le_ops.count('int_lt') == 1
-            #
-            if opt_expected:
-                assert ge_ops.count('int_ge') == 0
-            else:
-                # if this assert fails it means that the optimization was
-                # applied even if we don't expect to. Check whether the
-                # optimization is valid, and either fix the code or fix the
-                # test :-)
-                assert ge_ops.count('int_ge') == 1
-
-    def test_boolrewrite_reflex(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(y > i)
-
-        where x and y can be either constants or variables. There are cases in
-        which the second guard is proven to be always true.
-        """
-        for a, b, res, opt_expected in (('2000', '2000', 10001000, True),
-                                        ( '500',  '500', 15001500, True),
-                                        ( '300',  '600', 14001700, False),
-                                        (   'a',    'b', 14001700, False),
-                                        (   'a',    'a', 17001700, True)):
-
-            src = """
-                def main():
-                    sa = 0
-                    a = 300
-                    b = 600
-                    for i in range(1000):
-                        if i < %s:        # ID: lt
-                            sa += 1
-                        else:
-                            sa += 2
-                        if %s > i:        # ID: gt
-                            sa += 10000
-                        else:
-                            sa += 20000
-                    return sa
-            """ % (a, b)
-            log = self.run(src, [], threshold=400)
-            assert log.result == res
-            loop, = log.loops_by_filename(self.filepath)
-            le_ops = log.opnames(loop.ops_by_id('lt'))
-            gt_ops = log.opnames(loop.ops_by_id('gt'))
-            assert le_ops.count('int_lt') == 1
-            #
-            if opt_expected:
-                assert gt_ops.count('int_gt') == 0
-            else:
-                # if this assert fails it means that the optimization was
-                # applied even if we don't expect to. Check whether the
-                # optimization is valid, and either fix the code or fix the
-                # test :-)
-                assert gt_ops.count('int_gt') == 1
-
-
-    def test_boolrewrite_allcases_inverse(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(i > y)
-
-        with all possible combination of binary comparison operators.  This
-        test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        ops = ('<', '>', '<=', '>=', '==', '!=')
-        for op1 in ops:
-            for op2 in ops:
-                for a,b in ((500, 500), (300, 600)):
-                    src = """
-                        def main():
-                            sa = 0
-                            for i in range(300):
-                                if i %s %d:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if i %s %d:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                            return sa
-                    """ % (op1, a, op2, b)
-                    self.run_and_check(src, threshold=200)
-
-                    src = """
-                        def main():
-                            sa = 0
-                            i = 0.0
-                            while i < 250.0:
-                                if i %s %f:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if i %s %f:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                                i += 0.25
-                            return sa
-                    """ % (op1, float(a)/4.0, op2, float(b)/4.0)
-                    self.run_and_check(src, threshold=300)
-
-
-    def test_boolrewrite_allcases_reflex(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(x > i)
-
-        with all possible combination of binary comparison operators.  This
-        test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        ops = ('<', '>', '<=', '>=', '==', '!=')
-        for op1 in ops:
-            for op2 in ops:
-                for a,b in ((500, 500), (300, 600)):
-                    src = """
-                        def main():
-                            sa = 0
-                            for i in range(300):
-                                if i %s %d:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if %d %s i:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                            return sa
-                    """ % (op1, a, b, op2)
-                    self.run_and_check(src, threshold=200)
-
-                    src = """
-                        def main():
-                            sa = 0
-                            i = 0.0
-                            while i < 250.0:
-                                if i %s %f:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if %f %s i:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                                i += 0.25
-                            return sa
-                    """ % (op1, float(a)/4.0, float(b)/4.0, op2)
-                    self.run_and_check(src, threshold=300)
-
-    def test_boolrewrite_ptr(self):
-        """
-        This test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        compares = ('a == b', 'b == a', 'a != b', 'b != a', 'a == c', 'c != b')
-        for e1 in compares:
-            for e2 in compares:
-                src = """
-                    class tst(object):
-                        pass
-                    def main():
-                        a = tst()
-                        b = tst()
-                        c = tst()
-                        sa = 0
-                        for i in range(300):
-                            if %s:
-                                sa += 1
-                            else:
-                                sa += 2
-                            if %s:
-                                sa += 10000
-                            else:
-                                sa += 20000
-                            if i > 750:
-                                a = b
-                        return sa
-                """ % (e1, e2)
-                self.run_and_check(src, threshold=200)
-
-    def test_array_sum(self):
-        def main():
-            from array import array
-            img = array("i", range(128) * 5) * 480
-            l, i = 0, 0
-            while i < len(img):
-                l += img[i]
-                i += 1
-            return l
-        #
-        log = self.run(main, [])
-        assert log.result == 19507200
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i13 = int_lt(i7, i9)
-            guard_true(i13, descr=<Guard3>)
-            i15 = getarrayitem_raw(i10, i7, descr=<.*ArrayNoLengthDescr>)
-            i16 = int_add_ovf(i8, i15)
-            guard_no_overflow(descr=<Guard4>)
-            i18 = int_add(i7, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, i18, i16, i9, i10, descr=<Loop0>)
-        """)
-
-    def test_array_intimg(self):
-        def main():
-            from array import array
-            img = array('i', range(3)) * (350 * 480)
-            intimg = array('i', (0,)) * (640 * 480)
-            l, i = 0, 640
-            while i < 640 * 480:
-                assert len(img) == 3*350*480
-                assert len(intimg) == 640*480
-                l = l + img[i]
-                intimg[i] = (intimg[i-640] + l)
-                i += 1
-            return intimg[i - 1]
-        #
-        log = self.run(main, [])
-        assert log.result == 73574560
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i13 = int_lt(i8, 307200)
-            guard_true(i13, descr=<Guard3>)
-        # the bound check guard on img has been killed (thanks to the asserts)
-            i14 = getarrayitem_raw(i10, i8, descr=<.*ArrayNoLengthDescr>)
-            i15 = int_add_ovf(i9, i14)
-            guard_no_overflow(descr=<Guard4>)
-            i17 = int_sub(i8, 640)
-        # the bound check guard on intimg has been killed (thanks to the asserts)
-            i18 = getarrayitem_raw(i11, i17, descr=<.*ArrayNoLengthDescr>)
-            i19 = int_add_ovf(i18, i15)
-            guard_no_overflow(descr=<Guard5>)
-        # on 64bit, there is a guard checking that i19 actually fits into 32bit
-            ...
-            setarrayitem_raw(i11, i8, _, descr=<.*ArrayNoLengthDescr>)
-            i28 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, p7, i28, i15, i10, i11, descr=<Loop0>)
-        """)
-
-    def test_func_defaults(self):
-        def main(n):
-            i = 1
-            while i < n:
-                i += len(xrange(i+1)) - i
-            return i
-
-        log = self.run(main, [10000])
-        assert log.result == 10000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i5, i6)
-            guard_true(i10, descr=<Guard3>)
-            i120 = int_add(i5, 1)
-            guard_not_invalidated(descr=<Guard4>)
-            --TICK--
-            jump(..., descr=<Loop0>)
-        """)
 
     def test_unpack_iterable_non_list_tuple(self):
         def main(n):
@@ -1082,649 +182,53 @@
             jump(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, i28, i25, i19, i13, p14, p15, descr=<Loop0>)
         """)
 
-    def test_mutate_class(self):
-        def fn(n):
-            class A(object):
-                count = 1
-                def __init__(self, a):
-                    self.a = a
-                def f(self):
-                    return self.count
-            i = 0
-            a = A(1)
-            while i < n:
-                A.count += 1 # ID: mutate
-                i = a.f()    # ID: meth1
-            return i
+
+    def test_dont_trace_every_iteration(self):
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+                if a > 0:
+                    pass
+                if 1 < b < 2:
+                    pass
+                sa += a % b
+                i += 1
+            return sa
         #
-        log = self.run(fn, [1000], threshold=10)
-        assert log.result == 1000
-        #
-        # first, we test the entry bridge
-        # -------------------------------
-        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
-        ops = entry_bridge.ops_by_id('mutate', opcode='LOAD_ATTR')
-        assert log.opnames(ops) == ['guard_value', 'guard_not_invalidated',
-                                    'getfield_gc', 'guard_nonnull_class']
-        # the STORE_ATTR is folded away
-        assert list(entry_bridge.ops_by_id('meth1', opcode='STORE_ATTR')) == []
-        #
-        # then, the actual loop
-        # ----------------------
+        log = self.run(main, [10, 20])
+        assert log.result == 300 * (10 % 20)
+        assert log.jit_summary.tracing_no == 1
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
-            i8 = getfield_gc_pure(p5, descr=<SignedFieldDescr .*W_IntObject.inst_intval.*>)
-            i9 = int_lt(i8, i7)
-            guard_true(i9, descr=.*)
-            guard_not_invalidated(descr=.*)
-            i11 = int_add(i8, 1)
-            i12 = force_token()
+            i11 = int_lt(i7, 300)
+            guard_true(i11, descr=<Guard3>)
+            i12 = int_add_ovf(i8, i9)
+            guard_no_overflow(descr=<Guard4>)
+            i14 = int_add(i7, 1)
             --TICK--
-            p20 = new_with_vtable(ConstClass(W_IntObject))
-            setfield_gc(p20, i11, descr=<SignedFieldDescr.*W_IntObject.inst_intval .*>)
-            setfield_gc(ConstPtr(ptr21), p20, descr=<GcPtrFieldDescr .*TypeCell.inst_w_value .*>)
-            jump(p0, p1, p2, p3, p4, p20, p6, i7, descr=<Loop.>)
+            jump(..., descr=...)
         """)
+        #
+        log = self.run(main, [-10, -20])
+        assert log.result == 300 * (-10 % -20)
+        assert log.jit_summary.tracing_no == 1
 
 
-    def test_intbound_simple(self):
+    def test_overflow_checking(self):
         """
         This test only checks that we get the expected result, not that any
         optimization has been applied.
         """
-        ops = ('<', '>', '<=', '>=', '==', '!=')
-        nbr = (3, 7)
-        for o1 in ops:
-            for o2 in ops:
-                for n1 in nbr:
-                    for n2 in nbr:
-                        src = '''
-                        def f(i):
-                            a, b = 3, 3
-                            if i %s %d:
-                                a = 0
-                            else:
-                                a = 1
-                            if i %s %d:
-                                b = 0
-                            else:
-                                b = 1
-                            return a + b * 2
-
-                        def main():
-                            res = [0] * 4
-                            idx = []
-                            for i in range(15):
-                                idx.extend([i] * 15)
-                            for i in idx:
-                                res[f(i)] += 1
-                            return res
-
-                        ''' % (o1, n1, o2, n2)
-                        self.run_and_check(src, threshold=200)
-
-    def test_intbound_addsub_mix(self):
-        """
-        This test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        tests = ('i > 4', 'i > 2', 'i + 1 > 2', '1 + i > 4',
-                 'i - 1 > 1', '1 - i > 1', '1 - i < -3',
-                 'i == 1', 'i == 5', 'i != 1', '-2 * i < -4')
-        for t1 in tests:
-            for t2 in tests:
-                src = '''
-                def f(i):
-                    a, b = 3, 3
-                    if %s:
-                        a = 0
-                    else:
-                        a = 1
-                    if %s:
-                        b = 0
-                    else:
-                        b = 1
-                    return a + b * 2
-
-                def main():
-                    res = [0] * 4
-                    idx = []
-                    for i in range(15):
-                        idx.extend([i] * 15)
-                    for i in idx:
-                        res[f(i)] += 1
-                    return res
-
-                ''' % (t1, t2)
-                self.run_and_check(src, threshold=200)
-
-    def test_intbound_gt(self):
-        def main(n):
-            i, a, b = 0, 0, 0
-            while i < n:
-                if i > -1:
-                    a += 1
-                if i > -2:
-                    b += 1
-                i += 1
-            return (a, b)
+        def main():
+            import sys
+            def f(a,b):
+                if a < 0: return -1
+                return a-b
+            #
+            total = sys.maxint - 2147483647
+            for i in range(100000):
+                total += f(i, 5)
+            #
+            return total
         #
-        log = self.run(main, [300], threshold=200)
-        assert log.result == (300, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, i9)
-            guard_true(i10, descr=...)
-            i12 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i14 = int_add_ovf(i6, 1)
-            guard_no_overflow(descr=...)
-            i17 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i14, i12, i17, i9, descr=<Loop0>)
-        """)
-
-    def test_intbound_sub_lt(self):
-        def main():
-            i, a = 0, 0
-            while i < 300:
-                if i - 10 < 295:
-                    a += 1
-                i += 1
-            return a
-        #
-        log = self.run(main, [], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i5, 300)
-            guard_true(i7, descr=...)
-            i9 = int_sub_ovf(i5, 10)
-            guard_no_overflow(descr=...)
-            i11 = int_add_ovf(i4, 1)
-            guard_no_overflow(descr=...)
-            i13 = int_add(i5, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, i11, i13, descr=<Loop0>)
-        """)
-
-    def test_intbound_addsub_ge(self):
-        def main(n):
-            i, a, b = 0, 0, 0
-            while i < n:
-                if i + 5 >= 5:
-                    a += 1
-                if i - 1 >= -1:
-                    b += 1
-                i += 1
-            return (a, b)
-        #
-        log = self.run(main, [300], threshold=200)
-        assert log.result == (300, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, i9)
-            guard_true(i10, descr=...)
-            i12 = int_add_ovf(i8, 5)
-            guard_no_overflow(descr=...)
-            i14 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i16 = int_add_ovf(i6, 1)
-            guard_no_overflow(descr=...)
-            i19 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i16, i14, i19, i9, descr=<Loop0>)
-        """)
-
-    def test_intbound_addmul_ge(self):
-        def main(n):
-            i, a, b = 0, 0, 0
-            while i < 300:
-                if i + 5 >= 5:
-                    a += 1
-                if 2 * i >= 0:
-                    b += 1
-                i += 1
-            return (a, b)
-        #
-        log = self.run(main, [300], threshold=200)
-        assert log.result == (300, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, 300)
-            guard_true(i10, descr=...)
-            i12 = int_add(i8, 5)
-            i14 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i16 = int_lshift(i8, 1)
-            i18 = int_add_ovf(i6, 1)
-            guard_no_overflow(descr=...)
-            i21 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i18, i14, i21, descr=<Loop0>)
-        """)
-
-    def test_intbound_eq(self):
-        def main(a, n):
-            i, s = 0, 0
-            while i < 300:
-                if a == 7:
-                    s += a + 1
-                elif i == 10:
-                    s += i
-                else:
-                    s += 1
-                i += 1
-            return s
-        #
-        log = self.run(main, [7, 300], threshold=200)
-        assert log.result == main(7, 300)
-        log = self.run(main, [10, 300], threshold=200)
-        assert log.result == main(10, 300)
-        log = self.run(main, [42, 300], threshold=200)
-        assert log.result == main(42, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, 300)
-            guard_true(i10, descr=...)
-            i12 = int_eq(i8, 10)
-            guard_false(i12, descr=...)
-            i14 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i16 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, i14, i16, descr=<Loop0>)
-        """)
-
-    def test_intbound_mul(self):
-        def main(a):
-            i, s = 0, 0
-            while i < 300:
-                assert i >= 0
-                if 2 * i < 30000:
-                    s += 1
-                else:
-                    s += a
-                i += 1
-            return s
-        #
-        log = self.run(main, [7], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i8 = int_lt(i6, 300)
-            guard_true(i8, descr=...)
-            i10 = int_lshift(i6, 1)
-            i12 = int_add_ovf(i5, 1)
-            guard_no_overflow(descr=...)
-            i14 = int_add(i6, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i12, i14, descr=<Loop0>)
-        """)
-
-    def test_assert(self):
-        def main(a):
-            i, s = 0, 0
-            while i < 300:
-                assert a == 7
-                s += a + 1
-                i += 1
-            return s
-        log = self.run(main, [7], threshold=200)
-        assert log.result == 300*8
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i8 = int_lt(i6, 300)
-            guard_true(i8, descr=...)
-            i10 = int_add_ovf(i5, 8)
-            guard_no_overflow(descr=...)
-            i12 = int_add(i6, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i10, i12, descr=<Loop0>)
-        """)
-
-    def test_zeropadded(self):
-        def main():
-            from array import array
-            class ZeroPadded(array):
-                def __new__(cls, l):
-                    self = array.__new__(cls, 'd', range(l))
-                    return self
-
-                def __getitem__(self, i):
-                    if i < 0 or i >= len(self):
-                        return 0
-                    return array.__getitem__(self, i) # ID: get
-            #
-            buf = ZeroPadded(2000)
-            i = 10
-            sa = 0
-            while i < 2000 - 10:
-                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
-                i += 1
-            return sa
-
-        log = self.run(main, [], threshold=200)
-        assert log.result == 9895050.0
-        loop, = log.loops_by_filename(self.filepath)
-        #
-        # check that the overloaded __getitem__ does not introduce double
-        # array bound checks.
-        #
-        # The force_token()s are still there, but will be eliminated by the
-        # backend regalloc, so they are harmless
-        assert loop.match(ignore_ops=['force_token'],
-                          expected_src="""
-            ...
-            i20 = int_ge(i18, i8)
-            guard_false(i20, descr=...)
-            f21 = getarrayitem_raw(i13, i18, descr=...)
-            f23 = getarrayitem_raw(i13, i14, descr=...)
-            f24 = float_add(f21, f23)
-            f26 = getarrayitem_raw(i13, i6, descr=...)
-            f27 = float_add(f24, f26)
-            i29 = int_add(i6, 1)
-            i31 = int_ge(i29, i8)
-            guard_false(i31, descr=...)
-            f33 = getarrayitem_raw(i13, i29, descr=...)
-            f34 = float_add(f27, f33)
-            i36 = int_add(i6, 2)
-            i38 = int_ge(i36, i8)
-            guard_false(i38, descr=...)
-            f39 = getarrayitem_raw(i13, i36, descr=...)
-            ...
-        """)
-
-
-    def test_circular(self):
-        def main():
-            from array import array
-            class Circular(array):
-                def __new__(cls):
-                    self = array.__new__(cls, 'd', range(256))
-                    return self
-                def __getitem__(self, i):
-                    assert len(self) == 256
-                    return array.__getitem__(self, i & 255)
-            #
-            buf = Circular()
-            i = 10
-            sa = 0
-            while i < 2000 - 10:
-                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
-                i += 1
-            return sa
-        #
-        log = self.run(main, [], threshold=200)
-        assert log.result == 1239690.0
-        loop, = log.loops_by_filename(self.filepath)
-        #
-        # check that the array bound checks are removed
-        #
-        # The force_token()s are still there, but will be eliminated by the
-        # backend regalloc, so they are harmless
-        assert loop.match(ignore_ops=['force_token'],
-                          expected_src="""
-            ...
-            i17 = int_and(i14, 255)
-            f18 = getarrayitem_raw(i8, i17, descr=...)
-            f20 = getarrayitem_raw(i8, i9, descr=...)
-            f21 = float_add(f18, f20)
-            f23 = getarrayitem_raw(i8, i10, descr=...)
-            f24 = float_add(f21, f23)
-            i26 = int_add(i6, 1)
-            i29 = int_and(i26, 255)
-            f30 = getarrayitem_raw(i8, i29, descr=...)
-            f31 = float_add(f24, f30)
-            i33 = int_add(i6, 2)
-            i36 = int_and(i33, 255)
-            f37 = getarrayitem_raw(i8, i36, descr=...)
-            ...
-        """)
-
-    def test_min_max(self):
-        def main():
-            i=0
-            sa=0
-            while i < 300:
-                sa+=min(max(i, 3000), 4000)
-                i+=1
-            return sa
-        log = self.run(main, [], threshold=200)
-        assert log.result == 300*3000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i4, 300)
-            guard_true(i7, descr=...)
-            i9 = int_add_ovf(i5, 3000)
-            guard_no_overflow(descr=...)
-            i11 = int_add(i4, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, i11, i9, descr=<Loop0>)
-        """)
-
-    def test_silly_max(self):
-        def main():
-            i = 2
-            sa = 0
-            while i < 300:
-                lst = range(i)
-                sa += max(*lst) # ID: max
-                i += 1
-            return sa
-        log = self.run(main, [], threshold=200)
-        assert log.result == main()
-        loop, = log.loops_by_filename(self.filepath)
-        # We dont want too many guards, but a residual call to min_max_loop
-        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
-        assert len(guards) < 20
-        assert loop.match_by_id('max',"""
-            ...
-            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
-            ...
-        """)
-
-    def test_iter_max(self):
-        def main():
-            i = 2
-            sa = 0
-            while i < 300:
-                lst = range(i)
-                sa += max(lst) # ID: max
-                i += 1
-            return sa
-        log = self.run(main, [], threshold=200)
-        assert log.result == main()
-        loop, = log.loops_by_filename(self.filepath)
-        # We dont want too many guards, but a residual call to min_max_loop
-        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
-        assert len(guards) < 20
-        assert loop.match_by_id('max',"""
-            ...
-            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
-            ...
-        """)
-
-    def test__ffi_call(self):
-        from pypy.rlib.test.test_libffi import get_libm_name
-        def main(libm_name):
-            try:
-                from _ffi import CDLL, types
-            except ImportError:
-                sys.stderr.write('SKIP: cannot import _ffi\n')
-                return 0
-
-            libm = CDLL(libm_name)
-            pow = libm.getfunc('pow', [types.double, types.double],
-                               types.double)
-            i = 0
-            res = 0
-            while i < 300:
-                res += pow(2, 3)
-                i += 1
-            return pow.getaddr(), res
-        #
-        libm_name = get_libm_name(sys.platform)
-        log = self.run(main, [libm_name], threshold=200)
-        pow_addr, res = log.result
-        assert res == 8.0 * 300
-        loop, = log.loops_by_filename(self.filepath)
-        # XXX: write the actual test when we merge this to jitypes2
-        ## ops = self.get_by_bytecode('CALL_FUNCTION')
-        ## assert len(ops) == 2 # we get two loops, because of specialization
-        ## call_function = ops[0]
-        ## last_ops = [op.getopname() for op in call_function[-5:]]
-        ## assert last_ops == ['force_token',
-        ##                     'setfield_gc',
-        ##                     'call_may_force',
-        ##                     'guard_not_forced',
-        ##                     'guard_no_exception']
-        ## call = call_function[-3]
-        ## assert call.getarg(0).value == pow_addr
-        ## assert call.getarg(1).value == 2.0
-        ## assert call.getarg(2).value == 3.0
-
-    def test_xor(self):
-        def main(b):
-            a = sa = 0
-            while a < 300:
-                if a > 0: # Specialises the loop
-                    pass
-                if b > 10:
-                    pass
-                if a^b >= 0:  # ID: guard
-                    sa += 1
-                sa += a^a     # ID: a_xor_a
-                a += 1
-            return sa
-
-        log = self.run(main, [11], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        # if both are >=0, a^b is known to be >=0
-        # note that we know that b>10
-        assert loop.match_by_id('guard', """
-            i10 = int_xor(i5, i7)
-        """)
-        #
-        # x^x is always optimized to 0
-        assert loop.match_by_id('a_xor_a', "")
-
-        log = self.run(main, [9], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        # we don't know that b>10, hence we cannot optimize it
-        assert loop.match_by_id('guard', """
-            i10 = int_xor(i5, i7)
-            i12 = int_ge(i10, 0)
-            guard_true(i12, descr=...)
-        """)
-
-    def test_shift_intbound(self):
-        def main(b):
-            res = 0
-            a = 0
-            while a < 300:
-                assert a >= 0
-                assert 0 <= b <= 10
-                val = a >> b
-                if val >= 0:    # ID: rshift
-                    res += 1
-                val = a << b
-                if val >= 0:    # ID: lshift
-                    res += 2
-                a += 1
-            return res
-        #
-        log = self.run(main, [2], threshold=200)
-        assert log.result == 300*3
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('rshift', "")  # guard optimized away
-        assert loop.match_by_id('lshift', "")  # guard optimized away
-
-    def test_lshift_and_then_rshift(self):
-        py.test.skip('fixme, this optimization is disabled')
-        def main(b):
-            res = 0
-            a = 0
-            while res < 300:
-                assert a >= 0
-                assert 0 <= b <= 10
-                res = (a << b) >> b     # ID: shift
-                a += 1
-            return res
-        #
-        log = self.run(main, [2], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('shift', "")  # optimized away
-
-    def test_division_to_rshift(self):
-        py.test.skip('in-progress')
-        def main(b):
-            res = 0
-            a = 0
-            while a < 300:
-                assert a >= 0
-                assert 0 <= b <= 10
-                res = a/b     # ID: div
-                a += 1
-            return res
-        #
-        log = self.run(main, [3], threshold=200)
-        #assert log.result == 149
-        loop, = log.loops_by_filename(self.filepath)
-        import pdb;pdb.set_trace()
-        assert loop.match_by_id('div', "")  # optimized away
-
-    def test_oldstyle_newstyle_mix(self):
-        def main():
-            class A:
-                pass
-
-            class B(object, A):
-                def __init__(self, x):
-                    self.x = x
-
-            i = 0
-            b = B(1)
-            while i < 100:
-                v = b.x # ID: loadattr
-                i += v
-            return i
-
-        log = self.run(main, [], threshold=80)
-        loop, = log.loops_by_filename(self.filepath)
-        loop.match_by_id('loadattr',
-        '''
-        guard_not_invalidated(descr=...)
-        i19 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
-        guard_no_exception(descr=...)
-        i21 = int_and(i19, _)
-        i22 = int_is_true(i21)
-        guard_true(i22, descr=...)
-        i26 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
-        guard_no_exception(descr=...)
-        i28 = int_and(i26, _)
-        i29 = int_is_true(i28)
-        guard_true(i29, descr=...)
-        ''')
-
-    def test_python_contains(self):
-        def main():
-            class A(object):
-                def __contains__(self, v):
-                    return True
-
-            i = 0
-            a = A()
-            while i < 100:
-                i += i in a # ID: contains
-
-            log = self.run(main, [], threshold=80)
-            loop, = log.loops_by_filename(self.filemath)
-            # XXX: haven't confirmed his is correct, it's probably missing a
-            # few instructions
-            loop.match_by_id("contains", """
-                i1 = int_add(i0, 1)
-            """)
\ No newline at end of file
+        self.run_and_check(main, [])
diff --git a/pypy/module/pypyjit/test_pypy_c/test_shift.py b/pypy/module/pypyjit/test_pypy_c/test_shift.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_shift.py
@@ -0,0 +1,166 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestShift(BaseTestPyPyC):
+
+    def test_shift_intbound(self):
+        def main(b):
+            res = 0
+            a = 0
+            while a < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                val = a >> b
+                if val >= 0:    # ID: rshift
+                    res += 1
+                val = a << b
+                if val >= 0:    # ID: lshift
+                    res += 2
+                a += 1
+            return res
+        #
+        log = self.run(main, [2])
+        assert log.result == 300*3
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('rshift', "")  # guard optimized away
+        assert loop.match_by_id('lshift', "")  # guard optimized away
+
+    def test_lshift_and_then_rshift(self):
+        py.test.skip('fixme, this optimization is disabled')
+        def main(b):
+            res = 0
+            a = 0
+            while res < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                res = (a << b) >> b     # ID: shift
+                a += 1
+            return res
+        #
+        log = self.run(main, [2])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('shift', "")  # optimized away
+
+    def test_division_to_rshift(self):
+        def main(b):
+            res = 0
+            a = 0
+            while a < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                res = a/b     # ID: div
+                a += 1
+            return res
+        #
+        log = self.run(main, [3])
+        assert log.result == 99
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('div', """
+            i10 = int_floordiv(i6, i7)
+            i11 = int_mul(i10, i7)
+            i12 = int_sub(i6, i11)
+            i14 = int_rshift(i12, 63)
+            i15 = int_add(i10, i14)
+        """)
+
+    def test_division_to_rshift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        avalues = ('a', 'b', 7, -42, 8)
+        bvalues = ['b'] + range(-10, 0) + range(1,10)
+        code = ''
+        for a in avalues:
+            for b in bvalues:
+                code += '                sa += %s / %s\n' % (a, b)
+        src = """
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+%s
+                i += 1
+            return sa
+        """ % code
+        self.run_and_check(src, [ 10,  20])
+        self.run_and_check(src, [ 10, -20])
+        self.run_and_check(src, [-10, -20])
+
+    def test_mod(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        avalues = ('a', 'b', 7, -42, 8)
+        bvalues = ['b'] + range(-10, 0) + range(1,10)
+        code = ''
+        for a in avalues:
+            for b in bvalues:
+                code += '                sa += %s %% %s\n' % (a, b)
+        src = """
+        def main(a, b):
+            i = sa = 0
+            while i < 2000:
+                if a > 0: pass
+                if 1 < b < 2: pass
+%s
+                i += 1
+            return sa
+        """ % code
+        self.run_and_check(src, [ 10,  20])
+        self.run_and_check(src, [ 10, -20])
+        self.run_and_check(src, [-10, -20])
+
+    def test_shift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        from sys import maxint
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+                if a > 0: # Specialises the loop
+                    pass
+                if b < 2 and b > 0:
+                    pass
+                if (a >> b) >= 0:
+                    sa += 1
+                if (a << b) > 2:
+                    sa += 10000
+                i += 1
+            return sa
+        #
+        maxvals = (-maxint-1, -maxint, maxint-1, maxint)
+        for a in (-4, -3, -2, -1, 0, 1, 2, 3, 4) + maxvals:
+            for b in (0, 1, 2, 31, 32, 33, 61, 62, 63):
+                yield self.run_and_check, main, [a, b]
+
+    def test_revert_shift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        from sys import maxint
+
+        def main(a, b, c):
+            from sys import maxint
+            i = sa = 0
+            while i < 300:
+                if 0 < a < 10: pass
+                if -100 < b < 100: pass
+                if -maxint/2 < c < maxint/2: pass
+                sa += (a<<a)>>a
+                sa += (b<<a)>>a
+                sa += (c<<a)>>a
+                sa += (a<<100)>>100
+                sa += (b<<100)>>100
+                sa += (c<<100)>>100
+                i += 1
+            return long(sa)
+
+        for a in (1, 4, 8, 100):
+            for b in (-10, 10, -201, 201, -maxint/3, maxint/3):
+                for c in (-10, 10, -maxint/3, maxint/3):
+                    yield self.run_and_check, main, [a, b, c]
diff --git a/pypy/module/sys/__init__.py b/pypy/module/sys/__init__.py
--- a/pypy/module/sys/__init__.py
+++ b/pypy/module/sys/__init__.py
@@ -150,7 +150,7 @@
             if operror is None:
                 return space.w_None
             else:
-                return space.wrap(operror.application_traceback)
+                return space.wrap(operror.get_traceback())
         return None 
 
     def get_w_default_encoder(self):
diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py
--- a/pypy/module/sys/vm.py
+++ b/pypy/module/sys/vm.py
@@ -40,24 +40,24 @@
             break
         depth -= 1
         f = ec.getnextframe_nohidden(f)
+    f.mark_as_escaped()
     return space.wrap(f)
 
 def setrecursionlimit(space, w_new_limit):
-    """setrecursionlimit() is ignored (and not needed) on PyPy.
-
-On CPython it would set the maximum number of nested calls that can
-occur before a RuntimeError is raised.  On PyPy overflowing the stack
-also causes RuntimeErrors, but the limit is checked at a lower level.
-(The limit is currenty hard-coded at 768 KB, corresponding to roughly
-1480 Python calls on Linux.)"""
+    """setrecursionlimit() sets the maximum number of nested calls that
+can occur before a RuntimeError is raised.  On PyPy the limit is
+approximative and checked at a lower level.  The default 1000
+reserves 768KB of stack space, which should suffice (on Linux,
+depending on the compiler settings) for ~1400 calls.  Setting the
+value to N reserves N/1000 times 768KB of stack space.
+"""
+    from pypy.rlib.rstack import _stack_set_length_fraction
     new_limit = space.int_w(w_new_limit)
     if new_limit <= 0:
         raise OperationError(space.w_ValueError,
                              space.wrap("recursion limit must be positive"))
-    # for now, don't rewrite a warning but silently ignore the
-    # recursion limit.
-    #space.warn('setrecursionlimit() is ignored (and not needed) on PyPy', space.w_RuntimeWarning)
     space.sys.recursionlimit = new_limit
+    _stack_set_length_fraction(new_limit * 0.001)
 
 def getrecursionlimit(space):
     """Return the last value set by setrecursionlimit().
@@ -91,7 +91,7 @@
         return space.newtuple([space.w_None,space.w_None,space.w_None])
     else:
         return space.newtuple([operror.w_type, operror.get_w_value(space),
-                               space.wrap(operror.application_traceback)])
+                               space.wrap(operror.get_traceback())])
 
 def exc_clear(space):
     """Clear global information on the current exception.  Subsequent calls
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
--- a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
+++ b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
@@ -43,6 +43,12 @@
 	qsort(base, num, width, compare);
 }
 
+EXPORT(char) deref_LP_c_char_p(char** argv)
+{
+    char* s = *argv;
+    return s[0];
+}
+
 EXPORT(int *) _testfunc_ai8(int a[8])
 {
 	return a;
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/support.py b/pypy/module/test_lib_pypy/ctypes_tests/support.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/support.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/support.py
@@ -1,4 +1,5 @@
 import py
+import sys
 import ctypes
 
 py.test.importorskip("ctypes", "1.0.2")
@@ -14,6 +15,16 @@
         if _rawffi:
             py.test.skip("white-box tests for pypy _rawffi based ctypes impl")
 
+def del_funcptr_refs_maybe(obj, attrname):
+    dll = getattr(obj, attrname, None)
+    if not dll:
+        return
+    _FuncPtr = dll._FuncPtr
+    for name in dir(dll):
+        obj = getattr(dll, name, None)
+        if isinstance(obj, _FuncPtr):
+            delattr(dll, name)
+
 class BaseCTypesTestChecker:
     def setup_class(cls):
         if _rawffi:
@@ -21,8 +32,21 @@
             for _ in range(4):
                 gc.collect()
             cls.old_num = _rawffi._num_of_allocated_objects()
-    
+
+
     def teardown_class(cls):
+        if sys.pypy_translation_info['translation.gc'] == 'boehm':
+            return # it seems that boehm has problems with __del__, so not
+                   # everything is freed
+        #
+        mod = sys.modules[cls.__module__]
+        del_funcptr_refs_maybe(mod, 'dll')
+        del_funcptr_refs_maybe(mod, 'dll2')
+        del_funcptr_refs_maybe(mod, 'lib')
+        del_funcptr_refs_maybe(mod, 'testdll')
+        del_funcptr_refs_maybe(mod, 'ctdll')
+        del_funcptr_refs_maybe(cls, '_dll')
+        #
         if hasattr(cls, 'old_num'):
             import gc
             for _ in range(4):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
@@ -0,0 +1,103 @@
+from ctypes import CDLL, POINTER, pointer, c_byte, c_int, c_char_p
+import sys
+import py
+from support import BaseCTypesTestChecker
+
+class MyCDLL(CDLL):
+    def __getattr__(self, attr):
+        fn = self[attr] # this way it's not cached as an attribute
+        fn._slowpath_allowed = False
+        return fn
+
+def setup_module(mod):
+    import conftest
+    _ctypes_test = str(conftest.sofile)
+    mod.dll = MyCDLL(_ctypes_test)  # slowpath not allowed
+    mod.dll2 = CDLL(_ctypes_test)   # slowpath allowed
+
+
+class TestFastpath(BaseCTypesTestChecker):
+
+    def test_fastpath_forbidden(self):
+        def myfunc():
+            pass
+        #
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        #
+        # so far, it's still using the slowpath
+        assert not tf_b._is_fastpath
+        tf_b.callable = myfunc
+        tf_b.argtypes = (c_byte,)
+        # errcheck prevented the fastpath to kick in
+        assert not tf_b._is_fastpath
+        #
+        del tf_b.callable
+        tf_b.argtypes = (c_byte,) # try to re-enable the fastpath
+        assert tf_b._is_fastpath
+        #
+        assert not tf_b._slowpath_allowed
+        py.test.raises(AssertionError, "tf_b.callable = myfunc")
+        py.test.raises(AssertionError, "tf_b('aaa')") # force a TypeError
+
+    def test_simple_args(self):
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        assert tf_b(-126) == -42
+
+    def test_pointer_args(self):
+        f = dll._testfunc_p_p
+        f.restype = POINTER(c_int)
+        f.argtypes = [POINTER(c_int)]
+        v = c_int(42)
+        result = f(pointer(v))
+        assert type(result) == POINTER(c_int)
+        assert result.contents.value == 42
+
+    def test_simple_pointer_args(self):
+        f = dll.my_strchr
+        f.argtypes = [c_char_p, c_int]
+        f.restype = c_char_p
+        mystr = c_char_p("abcd")
+        result = f(mystr, ord("b"))
+        assert result == "bcd"
+
+    @py.test.mark.xfail
+    def test_strings(self):
+        f = dll.my_strchr
+        f.argtypes = [c_char_p, c_int]
+        f.restype = c_char_p
+        # python strings need to be converted to c_char_p, but this is
+        # supported only in the slow path so far
+        result = f("abcd", ord("b"))
+        assert result == "bcd"
+
+    def test_errcheck(self):
+        def errcheck(result, func, args):
+            return 'hello'
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.errcheck = errcheck
+        assert tf_b(-126) == 'hello'
+
+
+class TestFallbackToSlowpath(BaseCTypesTestChecker):
+
+    def test_argtypes_is_None(self):
+        tf_b = dll2.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_char_p,)  # this is intentionally wrong
+        tf_b.argtypes = None # kill the fast path
+        assert not tf_b._is_fastpath
+        assert tf_b(-126) == -42
+
+    def test_callable_is_None(self):
+        tf_b = dll2.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.callable = lambda x: x+1
+        assert not tf_b._is_fastpath
+        assert tf_b(-126) == -125
+        tf_b.callable = None
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
@@ -91,6 +91,13 @@
         result = f(0, 0, 0, 0, 0, 0)
         assert result == u'\x00'
 
+    def test_char_result(self):
+        f = dll._testfunc_i_bhilfd
+        f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double]
+        f.restype = c_char
+        result = f(0, 0, 0, 0, 0, 0)
+        assert result == '\x00'
+
     def test_voidresult(self):
         f = dll._testfunc_v
         f.restype = None
@@ -211,8 +218,19 @@
         result = f(byref(c_int(99)))
         assert not result.contents == 99
 
+    def test_convert_pointers(self):
+        f = dll.deref_LP_c_char_p
+        f.restype = c_char
+        f.argtypes = [POINTER(c_char_p)]
+        #
+        s = c_char_p('hello world')
+        ps = pointer(s)
+        assert f(ps) == 'h'
+        assert f(s) == 'h'  # automatic conversion from char** to char*
+
     def test_errors_1(self):
         f = dll._testfunc_p_p
+        f.argtypes = [POINTER(c_int)]
         f.restype = c_int
 
         class X(Structure):
@@ -428,6 +446,16 @@
         u = dll.ret_un_func(a[1])
         assert u.y == 33*10000
 
+    def test_cache_funcptr(self):
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        assert tf_b(-126) == -42
+        ptr = tf_b._ptr
+        assert ptr is not None
+        assert tf_b(-126) == -42
+        assert tf_b._ptr is ptr
+
     def test_warnings(self):
         import warnings
         warnings.simplefilter("always")
@@ -439,6 +467,22 @@
             assert "C function without declared arguments called" in str(w[0].message)
             assert "C function without declared return type called" in str(w[1].message)
 
+    def test_errcheck(self):
+        py.test.skip('fixme')
+        def errcheck(result, func, args):
+            assert result == -42
+            assert type(result) is int
+            arg, = args
+            assert arg == -126
+            assert type(arg) is int
+            return result
+        #
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.errcheck = errcheck
+        assert tf_b(-126) == -42
+        del tf_b.errcheck
         with warnings.catch_warnings(record=True) as w:
             dll.get_an_integer.argtypes = []
             dll.get_an_integer()
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py b/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
@@ -12,8 +12,10 @@
     from _ctypes.function import CFuncPtr
 
     def guess(value):
-        cobj = CFuncPtr._conv_param(None, value)
-        return type(cobj)
+        cobj, ctype = CFuncPtr._conv_param(None, value)
+        return ctype
+        ## cobj = CFuncPtr._conv_param(None, value)
+        ## return type(cobj)
 
     assert guess(13) == c_int
     assert guess(0) == c_int
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
@@ -125,6 +125,9 @@
             if t is c_longdouble:   # no support for 'g' in the struct module
                 continue
             code = t._type_ # the typecode
+            if code == 'g':
+                # typecode not supported by "struct"
+                continue
             align = struct.calcsize("c%c" % code) - struct.calcsize(code)
 
             # alignment of the type...
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
@@ -12,6 +12,13 @@
     mod._ctypes_test = str(conftest.sofile)
 
 class TestPointers(BaseCTypesTestChecker):
+
+    def test_get_ffi_argtype(self):
+        P = POINTER(c_int)
+        ffitype = P.get_ffi_argtype()
+        assert P.get_ffi_argtype() is ffitype
+        assert ffitype.deref_pointer() is c_int.get_ffi_argtype()
+    
     def test_pointer_crash(self):
 
         class A(POINTER(c_ulong)):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
@@ -15,6 +15,10 @@
         mod.wcslen.argtypes = [ctypes.c_wchar_p]
         mod.func = dll._testfunc_p_p
 
+    def teardown_module(mod):
+        del mod.func
+        del mod.wcslen
+
     class TestUnicode(BaseCTypesTestChecker):
         def setup_method(self, method):
             self.prev_conv_mode = ctypes.set_conversion_mode("ascii", "strict")
diff --git a/pypy/module/test_lib_pypy/test_stackless.py b/pypy/module/test_lib_pypy/test_stackless.py
--- a/pypy/module/test_lib_pypy/test_stackless.py
+++ b/pypy/module/test_lib_pypy/test_stackless.py
@@ -8,15 +8,12 @@
         space = gettestobjspace(usemodules=('_stackless', '_socket'))
         cls.space = space
         # cannot test the unpickle part on top of py.py
-        cls.w_can_unpickle = space.wrap(bool(option.runappdirect))
 
     def test_pickle(self):
         import new, sys
 
         mod = new.module('mod')
         sys.modules['mod'] = mod
-        mod.can_unpickle = self.can_unpickle
-        mod.skip = skip
         try:
             exec '''
 import pickle, sys
@@ -45,8 +42,6 @@
 t = stackless.tasklet(demo)(lev)
 stackless.run()
 assert seen == range(1, lev+1) + range(lev, 0, -1)
-if not can_unpickle:
-    skip("cannot test the unpickling part on top of py.py")
 print "now running the clone"
 tt = pickle.loads(blob)
 tt.insert()
@@ -64,8 +59,6 @@
 
         mod = new.module('mod')
         sys.modules['mod'] = mod
-        mod.can_unpickle = self.can_unpickle
-        mod.skip = skip
         try:
             exec '''
 import pickle, sys
diff --git a/pypy/module/test_lib_pypy/test_tputil.py b/pypy/module/test_lib_pypy/test_tputil.py
--- a/pypy/module/test_lib_pypy/test_tputil.py
+++ b/pypy/module/test_lib_pypy/test_tputil.py
@@ -28,9 +28,9 @@
         from tputil import make_proxy 
         l = []
         tp = make_proxy(l.append, type=list)
-        x = len(tp)
+        x = tp[0:1]
         assert len(l) == 1
-        assert l[0].opname == '__len__'
+        assert l[0].opname == '__getslice__'
        
     def test_simple(self):
         from tputil import make_proxy 
diff --git a/pypy/module/zipimport/test/test_zipimport.py b/pypy/module/zipimport/test/test_zipimport.py
--- a/pypy/module/zipimport/test/test_zipimport.py
+++ b/pypy/module/zipimport/test/test_zipimport.py
@@ -1,7 +1,7 @@
 
 from pypy.conftest import gettestobjspace
 import marshal
-import py
+import py, os
 import time
 import struct
 from pypy.module.imp.importing import get_pyc_magic, _w_long
@@ -15,6 +15,7 @@
     cpy's regression tests
     """
     compression = ZIP_STORED
+    pathsep = '/'
     
     def make_pyc(cls, space, co, mtime):
         data = marshal.dumps(co)
@@ -57,6 +58,7 @@
         test_pyc = cls.make_pyc(space, co, now)
         cls.w_test_pyc = space.wrap(test_pyc)
         cls.w_compression = space.wrap(cls.compression)
+        cls.w_pathsep = space.wrap(cls.pathsep)
         #ziptestmodule = tmpdir.ensure('ziptestmodule.zip').write(
         ziptestmodule = tmpdir.join("somezip.zip")
         cls.w_tmpzip = space.wrap(str(ziptestmodule))
@@ -100,6 +102,7 @@
         from zipfile import ZipFile, ZipInfo
         z = ZipFile(self.zipfile, 'w')
         write_files = self.write_files
+        filename = filename.replace('/', self.pathsep)
         write_files.append((filename, data))
         for filename, data in write_files:
             zinfo = ZipInfo(filename, time.localtime(self.now))
@@ -121,6 +124,7 @@
             del _zip_directory_cache[self.zipfile]
 
     def test_cache_subdir(self):
+        import os
         self.writefile('x.py', '')
         self.writefile('sub/__init__.py', '')
         self.writefile('sub/yy.py', '')
@@ -130,7 +134,7 @@
 
         assert main_importer is not sub_importer
         assert main_importer.prefix == ""
-        assert sub_importer.prefix == "sub/"
+        assert sub_importer.prefix == "sub" + os.path.sep
 
     def test_good_bad_arguments(self):
         from zipimport import zipimporter
@@ -262,7 +266,7 @@
         import zipimport
         data = "saddsadsa"
         self.writefile("xxx", data)
-        self.writefile("xx"+os.sep+"__init__.py", "5")
+        self.writefile("xx/__init__.py", "5")
         self.writefile("yy.py", "3")
         self.writefile('uu.pyc', self.test_pyc)
         z = zipimport.zipimporter(self.zipfile)
@@ -287,8 +291,7 @@
         """
         import os
         import zipimport
-        self.writefile(
-            os.sep.join(("directory", "package", "__init__.py")), "")
+        self.writefile("directory/package/__init__.py", "")
         importer = zipimport.zipimporter(self.zipfile + "/directory")
         # Grab this so if the assertion fails, py.test will display its
         # value.  Not sure why it doesn't the assertion uses import.archive
@@ -296,15 +299,14 @@
         archive = importer.archive
         realprefix = importer.prefix
         allbutlast = self.zipfile.split(os.path.sep)[:-1]
-        prefix = 'directory/'
+        prefix = 'directory' + os.path.sep
         assert archive == self.zipfile
         assert realprefix == prefix
 
     def test_subdirectory_importer(self):
         import os
         import zipimport
-        self.writefile(
-            os.sep.join(("directory", "package", "__init__.py")), "")
+        self.writefile("directory/package/__init__.py", "")
         z = zipimport.zipimporter(self.zipfile + "/directory")
         mod = z.load_module("package")
         assert z.is_package("package")
@@ -313,14 +315,9 @@
     def test_subdirectory_twice(self):
         import os, zipimport
  
-        self.writefile(
-            os.sep.join(("package", "__init__.py")), "")
-        self.writefile(
-            os.sep.join(("package", "subpackage",
-                         "__init__.py")), "")
-        self.writefile(
-            os.sep.join(("package", "subpackage",
-                         "foo.py")), "")
+        self.writefile("package/__init__.py", "")
+        self.writefile("package/subpackage/__init__.py", "")
+        self.writefile("package/subpackage/foo.py", "")
         import sys
         print sys.path
         mod = __import__('package.subpackage.foo', None, None, [])
@@ -331,8 +328,7 @@
         """
         import os
         import zipimport
-        self.writefile(
-                     os.sep.join(("directory", "package", "__init__.py")), "")
+        self.writefile("directory/package/__init__.py", "")
         importer = zipimport.zipimporter(self.zipfile + "/directory")
         l = [i for i in zipimport._zip_directory_cache]
         assert len(l)
@@ -370,3 +366,8 @@
         except ImportError:
             py.test.skip("zlib not available, cannot test compressed zipfiles")
         cls.make_class()
+
+
+if os.sep != '/':
+    class AppTestNativePathSep(AppTestZipimport):
+        pathsep = os.sep
diff --git a/pypy/objspace/descroperation.py b/pypy/objspace/descroperation.py
--- a/pypy/objspace/descroperation.py
+++ b/pypy/objspace/descroperation.py
@@ -207,34 +207,51 @@
         return space.get_and_call_function(w_descr, w_obj, w_name)
 
     def is_true(space, w_obj):
-        w_descr = space.lookup(w_obj, '__nonzero__')
+        method = "__nonzero__"
+        w_descr = space.lookup(w_obj, method)
         if w_descr is None:
-            w_descr = space.lookup(w_obj, '__len__')
+            method = "__len__"
+            w_descr = space.lookup(w_obj, method)
             if w_descr is None:
                 return True
         w_res = space.get_and_call_function(w_descr, w_obj)
         # more shortcuts for common cases
-        if w_res is space.w_False:
+        if space.is_w(w_res, space.w_False):
             return False
-        if w_res is space.w_True:
+        if space.is_w(w_res, space.w_True):
             return True
         w_restype = space.type(w_res)
-        if (space.is_w(w_restype, space.w_bool) or
-            space.is_w(w_restype, space.w_int)):
+        # Note there is no check for bool here because the only possible
+        # instances of bool are w_False and w_True, which are checked above.
+        if (space.is_w(w_restype, space.w_int) or
+            space.is_w(w_restype, space.w_long)):
             return space.int_w(w_res) != 0
         else:
-            raise OperationError(space.w_TypeError,
-                                 space.wrap('__nonzero__ should return '
-                                            'bool or int'))
+            msg = "%s should return bool or integer" % (method,)
+            raise OperationError(space.w_TypeError, space.wrap(msg))
 
-    def nonzero(self, w_obj):
-        if self.is_true(w_obj):
-            return self.w_True
+    def nonzero(space, w_obj):
+        if space.is_true(w_obj):
+            return space.w_True
         else:
-            return self.w_False
+            return space.w_False
 
-##    def len(self, w_obj):
-##        XXX needs to check that the result is an int (or long?) >= 0
+    def len(space, w_obj):
+        w_descr = space.lookup(w_obj, '__len__')
+        if w_descr is None:
+            name = space.type(w_obj).getname(space)
+            msg = "'%s' has no length" % (name,)
+            raise OperationError(space.w_TypeError, space.wrap(msg))
+        w_res = space.get_and_call_function(w_descr, w_obj)
+        space._check_len_result(w_res)
+        return w_res
+
+    def _check_len_result(space, w_obj):
+        # Will complain if result is too big.
+        result = space.int_w(w_obj)
+        if result < 0:
+            raise OperationError(space.w_ValueError,
+                                 space.wrap("__len__() should return >= 0"))
 
     def iter(space, w_obj):
         w_descr = space.lookup(w_obj, '__iter__')
diff --git a/pypy/objspace/std/callmethod.py b/pypy/objspace/std/callmethod.py
--- a/pypy/objspace/std/callmethod.py
+++ b/pypy/objspace/std/callmethod.py
@@ -12,7 +12,7 @@
 
 from pypy.interpreter import function
 from pypy.objspace.descroperation import object_getattribute
-from pypy.rlib import jit, rstack # for resume points
+from pypy.rlib import jit
 from pypy.objspace.std.mapdict import LOOKUP_METHOD_mapdict, \
     LOOKUP_METHOD_mapdict_fill_cache_method
 
@@ -84,7 +84,6 @@
         w_callable = f.peekvalue(n_args + (2 * n_kwargs) + 1)
         try:
             w_result = f.space.call_valuestack(w_callable, n, f)
-            rstack.resume_point("CALL_METHOD", f, n_args, returns=w_result)
         finally:
             f.dropvalues(n_args + 2)
     else:
@@ -109,7 +108,6 @@
             w_result = f.space.call_args_and_c_profile(f, w_callable, args)
         else:
             w_result = f.space.call_args(w_callable, args)
-        rstack.resume_point("CALL_METHOD_KW", f, returns=w_result)
     f.pushvalue(w_result)
 
 
diff --git a/pypy/objspace/std/floattype.py b/pypy/objspace/std/floattype.py
--- a/pypy/objspace/std/floattype.py
+++ b/pypy/objspace/std/floattype.py
@@ -14,10 +14,8 @@
 float_as_integer_ratio = SMM("as_integer_ratio", 1)
 float_hex = SMM("hex", 1)
 
-float_conjugate = SMM("conjugate", 1, doc="Returns self, the complex conjugate of any float.")
-
-def float_conjugate__ANY(space, w_float):
-    return space.pos(w_float)
+def descr_conjugate(space, w_float):
+    return space.float(w_float)
 
 register_all(vars(), globals())
 
@@ -168,10 +166,10 @@
         if total_digits > min(const_one, const_two) // 4:
             raise OperationError(space.w_ValueError, space.wrap("way too long"))
         if i < length and (s[i] == "p" or s[i] == "P"):
+            i += 1
             if i == length:
                 raise OperationError(space.w_ValueError,
                                      space.wrap("invalid hex string"))
-            i += 1
             exp_sign = 1
             if s[i] == "-" or s[i] == "+":
                 if s[i] == "-":
@@ -280,6 +278,7 @@
                                        as_classmethod=True),
     fromhex = gateway.interp2app(descr_fromhex,
                                  as_classmethod=True),
+    conjugate = gateway.interp2app(descr_conjugate),
     real = typedef.GetSetProperty(descr_get_real),
     imag = typedef.GetSetProperty(descr_get_imag),
 )
diff --git a/pypy/objspace/std/inttype.py b/pypy/objspace/std/inttype.py
--- a/pypy/objspace/std/inttype.py
+++ b/pypy/objspace/std/inttype.py
@@ -11,14 +11,19 @@
 
 # ____________________________________________________________
 
-int_conjugate = SMM("conjugate", 1, doc="Returns self, the complex conjugate of any int.")
+def descr_conjugate(space, w_int):
+    "Returns self, the complex conjugate of any int."
+    return space.int(w_int)
 
-def int_conjugate__ANY(space, w_int):
-    return space.pos(w_int)
+def descr_bit_length(space, w_int):
+    """int.bit_length() -> int
 
-int_bit_length = SMM("bit_length", 1, doc="int.bit_length() -> int\n\nNumber of bits necessary to represent self in binary.\n>>> bin(37)\n'0b100101'\n>>> (37).bit_length()\n6")
-
-def int_bit_length__ANY(space, w_int):
+    Number of bits necessary to represent self in binary.
+    >>> bin(37)
+    '0b100101'
+    >>> (37).bit_length()
+    6
+    """
     val = space.int_w(w_int)
     if val < 0:
         val = -val
@@ -28,8 +33,6 @@
         val >>= 1
     return space.wrap(bits)
 
-register_all(vars(), globals())
-
 
 def wrapint(space, x):
     if space.config.objspace.std.withsmallint:
@@ -196,6 +199,8 @@
 non-string. If the argument is outside the integer range a long object
 will be returned instead.''',
     __new__ = gateway.interp2app(descr__new__),
+    conjugate = gateway.interp2app(descr_conjugate),
+    bit_length = gateway.interp2app(descr_bit_length),
     numerator = typedef.GetSetProperty(descr_get_numerator),
     denominator = typedef.GetSetProperty(descr_get_denominator),
     real = typedef.GetSetProperty(descr_get_real),
diff --git a/pypy/objspace/std/longtype.py b/pypy/objspace/std/longtype.py
--- a/pypy/objspace/std/longtype.py
+++ b/pypy/objspace/std/longtype.py
@@ -4,12 +4,8 @@
 from pypy.objspace.std.stdtypedef import StdTypeDef, SMM
 from pypy.objspace.std.strutil import string_to_bigint, ParseStringError
 
-long_conjugate = SMM("conjugate", 1, doc="Returns self, the complex conjugate of any long.")
-
-def long_conjugate__ANY(space, w_int):
-    return space.pos(w_int)
-
-register_all(vars(), globals())
+def descr_conjugate(space, w_int):
+    return space.long(w_int)
 
 
 def descr__new__(space, w_longtype, w_x=0, w_base=gateway.NoneNotWrapped):
@@ -128,6 +124,7 @@
 string, use the optional base.  It is an error to supply a base when
 converting a non-string.''',
     __new__ = gateway.interp2app(descr__new__),
+    conjugate = gateway.interp2app(descr_conjugate),
     numerator = typedef.GetSetProperty(descr_get_numerator),
     denominator = typedef.GetSetProperty(descr_get_denominator),
     real = typedef.GetSetProperty(descr_get_real),
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -266,6 +266,7 @@
         return None
 
     def unwrap(self, w_obj):
+        """NOT_RPYTHON"""
         if isinstance(w_obj, Wrappable):
             return w_obj
         if isinstance(w_obj, model.W_Object):
diff --git a/pypy/objspace/std/smalltupleobject.py b/pypy/objspace/std/smalltupleobject.py
--- a/pypy/objspace/std/smalltupleobject.py
+++ b/pypy/objspace/std/smalltupleobject.py
@@ -33,7 +33,7 @@
         raise NotImplementedError
 
     def unwrap(w_tuple, space):
-        items = [space.unwrap(w_item) for w_item in w_tuple.tolist()] # XXX generic mixed types unwrap
+        items = [space.unwrap(w_item) for w_item in w_tuple.tolist()]
         return tuple(items)
 
 def make_specialized_class(n):
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -252,15 +252,30 @@
 
     res_w = []
     start = 0
-    while maxsplit != 0:
-        next = value.find(by, start)
-        if next < 0:
-            break
-        res_w.append(sliced(space, value, start, next, w_self))
-        start = next + bylen
-        maxsplit -= 1   # NB. if it's already < 0, it stays < 0
+    if bylen == 1 and maxsplit < 0:
+        # fast path: uses str.rfind(character) and str.count(character)
+        by = by[0]    # annotator hack: string -> char
+        count = value.count(by)
+        res_w = [None] * (count + 1)
+        end = len(value)
+        while count >= 0:
+            assert end >= 0
+            prev = value.rfind(by, 0, end)
+            start = prev + 1
+            assert start >= 0
+            res_w[count] = sliced(space, value, start, end, w_self)
+            count -= 1
+            end = prev
+    else:
+        while maxsplit != 0:
+            next = value.find(by, start)
+            if next < 0:
+                break
+            res_w.append(sliced(space, value, start, next, w_self))
+            start = next + bylen
+            maxsplit -= 1   # NB. if it's already < 0, it stays < 0
+        res_w.append(sliced(space, value, start, len(value), w_self))
 
-    res_w.append(sliced(space, value, start, len(value), w_self))
     return space.newlist(res_w)
 
 def str_rsplit__String_None_ANY(space, w_self, w_none, w_maxsplit=-1):
diff --git a/pypy/objspace/std/test/test_floatobject.py b/pypy/objspace/std/test/test_floatobject.py
--- a/pypy/objspace/std/test/test_floatobject.py
+++ b/pypy/objspace/std/test/test_floatobject.py
@@ -63,6 +63,19 @@
     def setup_class(cls):
         cls.w_py26 = cls.space.wrap(sys.version_info >= (2, 6))
 
+    def test_conjugate(self):
+        assert (1.).conjugate() == 1.
+        assert (-1.).conjugate() == -1.
+
+        class F(float):
+            pass
+        assert F(1.).conjugate() == 1.
+
+        class F(float):
+            def __pos__(self):
+                return 42.
+        assert F(1.).conjugate() == 1.
+
     def test_negatives(self):
         assert -1.1 < 0
         assert -0.1 < 0
@@ -751,3 +764,6 @@
                 pass
             else:
                 self.identical(x, float.fromhex(x.hex()))
+
+    def test_invalid(self):
+        raises(ValueError, float.fromhex, "0P")
diff --git a/pypy/objspace/std/test/test_intobject.py b/pypy/objspace/std/test/test_intobject.py
--- a/pypy/objspace/std/test/test_intobject.py
+++ b/pypy/objspace/std/test/test_intobject.py
@@ -285,6 +285,19 @@
 
 class AppTestInt:
 
+    def test_conjugate(self):
+        assert (1).conjugate() == 1
+        assert (-1).conjugate() == -1
+
+        class I(int):
+            pass
+        assert I(1).conjugate() == 1
+
+        class I(int):
+            def __pos__(self):
+                return 42
+        assert I(1).conjugate() == 1
+
     def test_trunc(self):
         import math
         assert math.trunc(1) == 1
diff --git a/pypy/objspace/std/test/test_longobject.py b/pypy/objspace/std/test/test_longobject.py
--- a/pypy/objspace/std/test/test_longobject.py
+++ b/pypy/objspace/std/test/test_longobject.py
@@ -300,6 +300,11 @@
 
         assert type(L(7).conjugate()) is long
 
+        class L(long):
+            def __pos__(self):
+                return 43
+        assert L(7).conjugate() == 7L
+
     def test_bit_length(self):
         assert 8L.bit_length() == 4
         assert (-1<<40).bit_length() == 41
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -23,7 +23,7 @@
         return "%s(%s)" % (w_self.__class__.__name__, ', '.join(reprlist))
 
     def unwrap(w_tuple, space):
-        items = [space.unwrap(w_item) for w_item in w_tuple.wrappeditems] # XXX generic mixed types unwrap
+        items = [space.unwrap(w_item) for w_item in w_tuple.wrappeditems]
         return tuple(items)
 
 registerimplementation(W_TupleObject)
diff --git a/pypy/objspace/test/test_descroperation.py b/pypy/objspace/test/test_descroperation.py
--- a/pypy/objspace/test/test_descroperation.py
+++ b/pypy/objspace/test/test_descroperation.py
@@ -524,6 +524,31 @@
         assert issubclass(B, B)
         assert issubclass(23, B)
 
+    def test_truth_of_long(self):
+        class X(object):
+            def __len__(self): return 1L
+            __nonzero__ = __len__
+        assert X()
+        del X.__nonzero__
+        assert X()
+
+    def test_len_overflow(self):
+        import sys
+        class X(object):
+            def __len__(self):
+                return sys.maxsize + 1
+        raises(OverflowError, len, X())
+
+    def test_len_underflow(self):
+        import sys
+        class X(object):
+            def __len__(self):
+                return -1
+        raises(ValueError, len, X())
+        class Y(object):
+            def __len__(self):
+                return -1L
+        raises(ValueError, len, Y())
 
 class AppTestWithBuiltinShortcut(AppTest_Descroperation):
     OPTIONS = {'objspace.std.builtinshortcut': True}
diff --git a/pypy/objspace/trace.py b/pypy/objspace/trace.py
--- a/pypy/objspace/trace.py
+++ b/pypy/objspace/trace.py
@@ -110,10 +110,10 @@
         self.result.append(EnterFrame(frame))
         self.ec.enter(frame)
 
-    def leave(self, frame, w_exitvalue):
+    def leave(self, frame, w_exitvalue, got_exception):
         """ called just after evaluating of a frame is suspended/finished. """
         self.result.append(LeaveFrame(frame))
-        self.ec.leave(frame, w_exitvalue)
+        self.ec.leave(frame, w_exitvalue, got_exception)
 
     def bytecode_trace(self, frame):
         """ called just before execution of a bytecode. """
diff --git a/pypy/rlib/_jit_vref.py b/pypy/rlib/_jit_vref.py
--- a/pypy/rlib/_jit_vref.py
+++ b/pypy/rlib/_jit_vref.py
@@ -50,6 +50,7 @@
 
     def rtype_simple_call(self, hop):
         [v] = hop.inputargs(self)
+        hop.exception_is_here()
         v = hop.genop('jit_force_virtual', [v], resulttype = OBJECTPTR)
         return hop.genop('cast_pointer', [v], resulttype = hop.r_result)
 
@@ -65,6 +66,7 @@
     lowleveltype = OBJECT
     def rtype_simple_call(self, hop):
         [v] = hop.inputargs(self)
+        hop.exception_is_here()
         v = hop.genop('jit_force_virtual', [v], resulttype = OBJECT)
         return hop.genop('oodowncast', [v], resulttype = hop.r_result)
     
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -183,7 +183,6 @@
 # VRefs
 
 def virtual_ref(x):
-
     """Creates a 'vref' object that contains a reference to 'x'.  Calls
     to virtual_ref/virtual_ref_finish must be properly nested.  The idea
     is that the object 'x' is supposed to be JITted as a virtual between
@@ -194,10 +193,10 @@
     return DirectJitVRef(x)
 virtual_ref.oopspec = 'virtual_ref(x)'
 
-def virtual_ref_finish(x):
-    """See docstring in virtual_ref(x).  Note that virtual_ref_finish
-    takes as argument the real object, not the vref."""
+def virtual_ref_finish(vref, x):
+    """See docstring in virtual_ref(x)"""
     keepalive_until_here(x)   # otherwise the whole function call is removed
+    _virtual_ref_finish(vref, x)
 virtual_ref_finish.oopspec = 'virtual_ref_finish(x)'
 
 def non_virtual_ref(x):
@@ -205,19 +204,39 @@
     Used for None or for frames outside JIT scope."""
     return DirectVRef(x)
 
+class InvalidVirtualRef(Exception):
+    """
+    Raised if we try to call a non-forced virtualref after the call to
+    virtual_ref_finish
+    """
+
 # ---------- implementation-specific ----------
 
 class DirectVRef(object):
     def __init__(self, x):
         self._x = x
+        self._state = 'non-forced'
+
     def __call__(self):
+        if self._state == 'non-forced':
+            self._state = 'forced'
+        elif self._state == 'invalid':
+            raise InvalidVirtualRef
         return self._x
 
+    def _finish(self):
+        if self._state == 'non-forced':
+            self._state = 'invalid'
+
 class DirectJitVRef(DirectVRef):
     def __init__(self, x):
         assert x is not None, "virtual_ref(None) is not allowed"
         DirectVRef.__init__(self, x)
 
+def _virtual_ref_finish(vref, x):
+    assert vref._x is x, "Invalid call to virtual_ref_finish"
+    vref._finish()
+
 class Entry(ExtRegistryEntry):
     _about_ = (non_virtual_ref, DirectJitVRef)
 
@@ -237,6 +256,15 @@
         s_obj = self.bookkeeper.immutablevalue(self.instance())
         return _jit_vref.SomeVRef(s_obj)
 
+class Entry(ExtRegistryEntry):
+    _about_ = _virtual_ref_finish
+
+    def compute_result_annotation(self, s_vref, s_obj):
+        pass
+
+    def specialize_call(self, hop):
+        pass
+    
 vref_None = non_virtual_ref(None)
 
 # ____________________________________________________________
@@ -342,6 +370,24 @@
                             raise
     set_user_param._annspecialcase_ = 'specialize:arg(0)'
 
+    
+    def on_compile(self, logger, looptoken, operations, type, *greenargs):
+        """ A hook called when loop is compiled. Overwrite
+        for your own jitdriver if you want to do something special, like
+        call applevel code
+        """
+
+    def on_compile_bridge(self, logger, orig_looptoken, operations, n):
+        """ A hook called when a bridge is compiled. Overwrite
+        for your own jitdriver if you want to do something special
+        """
+
+    # note: if you overwrite this functions with the above signature it'll
+    #       work, but the *greenargs is different for each jitdriver, so we
+    #       can't share the same methods
+    del on_compile
+    del on_compile_bridge
+
     def _make_extregistryentries(self):
         # workaround: we cannot declare ExtRegistryEntries for functions
         # used as methods of a frozen object, but we can attach the
diff --git a/pypy/rlib/libffi.py b/pypy/rlib/libffi.py
--- a/pypy/rlib/libffi.py
+++ b/pypy/rlib/libffi.py
@@ -1,12 +1,15 @@
+from __future__ import with_statement
+
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rlib.objectmodel import specialize, enforceargs, we_are_translated
-from pypy.rlib.rarithmetic import intmask, r_uint
+from pypy.rlib.rarithmetic import intmask, r_uint, r_singlefloat
 from pypy.rlib import jit
 from pypy.rlib import clibffi
 from pypy.rlib.clibffi import get_libc_name, FUNCFLAG_CDECL, AbstractFuncPtr, \
-    push_arg_as_ffiptr, c_ffi_call
+    push_arg_as_ffiptr, c_ffi_call, FFI_TYPE_STRUCT
 from pypy.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal
 from pypy.rlib.rdynload import DLLHANDLE
+from pypy.rlib.longlong2float import longlong2float, float2longlong
 
 class types(object):
     """
@@ -31,6 +34,9 @@
                 setattr(cls, name, value)
         cls.slong = clibffi.cast_type_to_ffitype(rffi.LONG)
         cls.ulong = clibffi.cast_type_to_ffitype(rffi.ULONG)
+        cls.slonglong = clibffi.cast_type_to_ffitype(rffi.LONGLONG)
+        cls.ulonglong = clibffi.cast_type_to_ffitype(rffi.ULONGLONG)
+        cls.wchar_t = clibffi.cast_type_to_ffitype(lltype.UniChar)
         del cls._import
 
     @staticmethod
@@ -41,7 +47,8 @@
         """
         if   ffi_type is types.void:    return 'v'
         elif ffi_type is types.double:  return 'f'
-        elif ffi_type is types.pointer: return 'i'
+        elif ffi_type is types.float:   return 's'
+        elif ffi_type is types.pointer: return 'u'
         #
         elif ffi_type is types.schar:   return 'i'
         elif ffi_type is types.uchar:   return 'u'
@@ -58,13 +65,19 @@
         elif ffi_type is types.uint16:  return 'u'
         elif ffi_type is types.sint32:  return 'i'
         elif ffi_type is types.uint32:  return 'u'
-        ## we only support integers that fit in a lltype.Signed (==rffi.LONG)
-        ## (on 64-bit platforms, types.sint64 is types.slong and the case is
-        ## caught above)
-        ## elif ffi_type is types.sint64:  return 'i'
-        ## elif ffi_type is types.uint64:  return 'u'
+        ## (note that on 64-bit platforms, types.sint64 is types.slong and the
+        ## case is caught above)
+        elif ffi_type is types.sint64:  return 'I'
+        elif ffi_type is types.uint64:  return 'U'
+        #
+        elif types.is_struct(ffi_type): return 'S'
         raise KeyError
 
+    @staticmethod
+    @jit.purefunction
+    def is_struct(ffi_type):
+        return intmask(ffi_type.c_type) == intmask(FFI_TYPE_STRUCT)
+
 types._import()
 
 @specialize.arg(0)
@@ -78,8 +91,11 @@
     sz = rffi.sizeof(TYPE)
     return sz <= rffi.sizeof(rffi.LONG)
 
+
 # ======================================================================
 
+IS_32_BIT = (r_uint.BITS == 32)
+
 @specialize.memo()
 def _check_type(TYPE):
     if isinstance(TYPE, lltype.Ptr):
@@ -105,11 +121,37 @@
             val = rffi.cast(rffi.LONG, val)
         elif TYPE is rffi.DOUBLE:
             cls = FloatArg
+        elif TYPE is rffi.LONGLONG or TYPE is rffi.ULONGLONG:
+            raise TypeError, 'r_(u)longlong not supported by arg(), use arg_(u)longlong()'
+        elif TYPE is rffi.FLOAT:
+            raise TypeError, 'r_singlefloat not supported by arg(), use arg_singlefloat()'
         else:
             raise TypeError, 'Unsupported argument type: %s' % TYPE
         self._append(cls(val))
         return self
 
+    def arg_raw(self, val):
+        self._append(RawArg(val))
+
+    def arg_longlong(self, val):
+        """
+        Note: this is a hack. So far, the JIT does not support long longs, so
+        you must pass it as if it were a python Float (rffi.DOUBLE).  You can
+        use the convenience functions longlong2float and float2longlong to do
+        the conversions.  Note that if you use long longs, the call won't
+        be jitted at all.
+        """
+        assert IS_32_BIT      # use a normal integer on 64-bit platforms
+        self._append(LongLongArg(val))
+
+    def arg_singlefloat(self, val):
+        """
+        Note: you must pass a python Float (rffi.DOUBLE), not a r_singlefloat
+        (else the jit complains).  Note that if you use single floats, the
+        call won't be jitted at all.
+        """
+        self._append(SingleFloatArg(val))
+
     def _append(self, arg):
         if self.first is None:
             self.first = self.last = arg
@@ -132,8 +174,9 @@
     def push(self, func, ll_args, i):
         func._push_int(self.intval, ll_args, i)
 
+
 class FloatArg(AbstractArg):
-    """ An argument holding a float
+    """ An argument holding a python float (i.e. a C double)
     """
 
     def __init__(self, floatval):
@@ -142,6 +185,37 @@
     def push(self, func, ll_args, i):
         func._push_float(self.floatval, ll_args, i)
 
+class RawArg(AbstractArg):
+    """ An argument holding a raw pointer to put inside ll_args
+    """
+
+    def __init__(self, ptrval):
+        self.ptrval = ptrval
+
+    def push(self, func, ll_args, i):
+        func._push_raw(self.ptrval, ll_args, i)
+
+class SingleFloatArg(AbstractArg):
+    """ An argument representing a C float (but holding a C double)
+    """
+
+    def __init__(self, floatval):
+        self.floatval = floatval
+
+    def push(self, func, ll_args, i):
+        func._push_single_float(self.floatval, ll_args, i)
+
+
+class LongLongArg(AbstractArg):
+    """ An argument representing a C long long (but holding a C double)
+    """
+
+    def __init__(self, floatval):
+        self.floatval = floatval
+
+    def push(self, func, ll_args, i):
+        func._push_longlong(self.floatval, ll_args, i)
+
 
 # ======================================================================
 
@@ -164,8 +238,8 @@
     # ========================================================================
 
     @jit.unroll_safe
-    @specialize.arg(2)
-    def call(self, argchain, RESULT):
+    @specialize.arg(2, 3)
+    def call(self, argchain, RESULT, is_struct=False):
         # WARNING!  This code is written carefully in a way that the JIT
         # optimizer will see a sequence of calls like the following:
         #
@@ -179,6 +253,7 @@
         # the optimizer will fail to recognize the pattern and won't turn it
         # into a fast CALL.  Note that "arg = arg.next" is optimized away,
         # assuming that archain is completely virtual.
+        self = jit.hint(self, promote=True)
         if argchain.numargs != len(self.argtypes):
             raise TypeError, 'Wrong number of arguments: %d expected, got %d' %\
                 (argchain.numargs, len(self.argtypes))
@@ -190,10 +265,24 @@
             i += 1
             arg = arg.next
         #
-        if _fits_into_long(RESULT):
+        if is_struct:
+            assert types.is_struct(self.restype)
+            res = self._do_call_raw(self.funcsym, ll_args)
+        elif _fits_into_long(RESULT):
+            assert not types.is_struct(self.restype)
             res = self._do_call_int(self.funcsym, ll_args)
         elif RESULT is rffi.DOUBLE:
             return self._do_call_float(self.funcsym, ll_args)
+        elif RESULT is rffi.FLOAT:
+            # XXX: even if RESULT is FLOAT, we still return a DOUBLE, else the
+            # jit complains. Note that the jit is disabled in this case
+            return self._do_call_single_float(self.funcsym, ll_args)
+        elif RESULT is rffi.LONGLONG or RESULT is rffi.ULONGLONG:
+            # XXX: even if RESULT is LONGLONG, we still return a DOUBLE, else the
+            # jit complains. Note that the jit is disabled in this case
+            # (it's not a typo, we really return a DOUBLE)
+            assert IS_32_BIT
+            return self._do_call_longlong(self.funcsym, ll_args)
         elif RESULT is lltype.Void:
             return self._do_call_void(self.funcsym, ll_args)
         else:
@@ -222,11 +311,26 @@
     def _push_int(self, value, ll_args, i):
         self._push_arg(value, ll_args, i)
 
+    @jit.dont_look_inside
+    def _push_raw(self, value, ll_args, i):
+        ll_args[i] = value
+
     @jit.oopspec('libffi_push_float(self, value, ll_args, i)')
     @enforceargs(   None, float, None,    int) # fix the annotation for tests
     def _push_float(self, value, ll_args, i):
         self._push_arg(value, ll_args, i)
 
+    @jit.dont_look_inside
+    def _push_single_float(self, value, ll_args, i):
+        self._push_arg(r_singlefloat(value), ll_args, i)
+
+    @jit.dont_look_inside
+    def _push_longlong(self, floatval, ll_args, i):
+        """
+        Takes a longlong represented as a python Float. It's a hack for the
+        jit, else we could not see the whole libffi module at all"""  
+        self._push_arg(float2longlong(floatval), ll_args, i)
+
     @jit.oopspec('libffi_call_int(self, funcsym, ll_args)')
     def _do_call_int(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, rffi.LONG)
@@ -235,6 +339,21 @@
     def _do_call_float(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, rffi.DOUBLE)
 
+    @jit.dont_look_inside
+    def _do_call_single_float(self, funcsym, ll_args):
+        single_res = self._do_call(funcsym, ll_args, rffi.FLOAT)
+        return float(single_res)
+
+    @jit.dont_look_inside
+    def _do_call_raw(self, funcsym, ll_args):
+        # same as _do_call_int, but marked as jit.dont_look_inside
+        return self._do_call(funcsym, ll_args, rffi.LONG)
+
+    @jit.dont_look_inside
+    def _do_call_longlong(self, funcsym, ll_args):
+        llres = self._do_call(funcsym, ll_args, rffi.LONGLONG)
+        return longlong2float(llres)
+
     @jit.oopspec('libffi_call_void(self, funcsym, ll_args)')
     def _do_call_void(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, lltype.Void)
@@ -265,7 +384,14 @@
                             rffi.cast(rffi.VOIDPP, ll_args))
         if RESULT is not lltype.Void:
             TP = lltype.Ptr(rffi.CArray(RESULT))
-            res = rffi.cast(TP, ll_result)[0]
+            buf = rffi.cast(TP, ll_result)
+            if types.is_struct(self.restype):
+                assert RESULT == rffi.LONG
+                # for structs, we directly return the buffer and transfer the
+                # ownership
+                res = rffi.cast(RESULT, buf)
+            else:
+                res = buf[0]
         else:
             res = None
         self._free_buffers(ll_result, ll_args)
@@ -274,11 +400,19 @@
 
     def _free_buffers(self, ll_result, ll_args):
         if ll_result:
-            lltype.free(ll_result, flavor='raw')
+            self._free_buffer_maybe(rffi.cast(rffi.VOIDP, ll_result), self.restype)
         for i in range(len(self.argtypes)):
-            lltype.free(ll_args[i], flavor='raw')
+            argtype = self.argtypes[i]
+            self._free_buffer_maybe(ll_args[i], argtype)
         lltype.free(ll_args, flavor='raw')
 
+    def _free_buffer_maybe(self, buf, ffitype):
+        # if it's a struct, the buffer is not freed and the ownership is
+        # already of the caller (in case of ll_args buffers) or transferred to
+        # it (in case of ll_result buffer)
+        if not types.is_struct(ffitype):
+            lltype.free(buf, flavor='raw')
+
 
 # ======================================================================
 
@@ -288,11 +422,8 @@
     def __init__(self, libname):
         """Load the library, or raises DLOpenError."""
         self.lib = rffi.cast(DLLHANDLE, 0)
-        ll_libname = rffi.str2charp(libname)
-        try:
+        with rffi.scoped_str2charp(libname) as ll_libname:
             self.lib = dlopen(ll_libname)
-        finally:
-            lltype.free(ll_libname, flavor='raw')
 
     def __del__(self):
         if self.lib:
@@ -302,3 +433,6 @@
     def getpointer(self, name, argtypes, restype, flags=FUNCFLAG_CDECL):
         return Func(name, argtypes, restype, dlsym(self.lib, name),
                     flags=flags, keepalive=self)
+
+    def getaddressindll(self, name):
+        return dlsym(self.lib, name)
diff --git a/pypy/rlib/longlong2float.py b/pypy/rlib/longlong2float.py
--- a/pypy/rlib/longlong2float.py
+++ b/pypy/rlib/longlong2float.py
@@ -30,14 +30,19 @@
     return llval
 
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
-eci = ExternalCompilationInfo(post_include_bits=["""
+eci = ExternalCompilationInfo(includes=['string.h', 'assert.h'],
+                              post_include_bits=["""
 static double pypy__longlong2float(long long x) {
-    char *p = (char*)&x;
-    return *((double*)p);
+    double dd;
+    assert(sizeof(double) == 8 && sizeof(long long) == 8);
+    memcpy(&dd, &x, 8);
+    return dd;
 }
 static long long pypy__float2longlong(double x) {
-    char *p = (char*)&x;
-    return *((long long*)p);
+    long long ll;
+    assert(sizeof(double) == 8 && sizeof(long long) == 8);
+    memcpy(&ll, &x, 8);
+    return ll;
 }
 """])
 
diff --git a/pypy/rlib/rbigint.py b/pypy/rlib/rbigint.py
--- a/pypy/rlib/rbigint.py
+++ b/pypy/rlib/rbigint.py
@@ -1345,6 +1345,7 @@
 # XXX make sure that we don't ignore this!
 # YYY no, we decided to do ignore this!
 
+ at jit.dont_look_inside
 def _AsDouble(n):
     """ Get a C double from a bigint object. """
     # This is a "correctly-rounded" version from Python 2.7.
diff --git a/pypy/rlib/rcoroutine.py b/pypy/rlib/rcoroutine.py
--- a/pypy/rlib/rcoroutine.py
+++ b/pypy/rlib/rcoroutine.py
@@ -29,7 +29,7 @@
 The type of a switch is determined by the target's costate.
 """
 
-from pypy.rlib.rstack import yield_current_frame_to_caller, resume_point
+from pypy.rlib.rstack import yield_current_frame_to_caller
 from pypy.rlib.objectmodel import we_are_translated
 
 from pypy.interpreter.error import OperationError
@@ -228,7 +228,6 @@
                         self.thunk = None
                         syncstate.switched(incoming_frame)
                         thunk.call()
-                        resume_point("coroutine__bind", state)
                     except Exception, e:
                         exc = e
                         raise
@@ -257,7 +256,6 @@
                 raise CoroutineDamage
             state = self.costate
             incoming_frame = state.update(self).switch()
-            resume_point("coroutine_switch", state, returns=incoming_frame)
             syncstate.switched(incoming_frame)
 
         def kill(self):
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -191,6 +191,21 @@
         hop.exception_cannot_occur()
         return hop.genop('gc_can_move', hop.args_v, resulttype=hop.r_result)
 
+def _make_sure_does_not_move(p):
+    """'p' is a non-null GC object.  This (tries to) make sure that the
+    object does not move any more, by forcing collections if needed.
+    Warning: should ideally only be used with the minimark GC, and only
+    on objects that are already a bit old, so have a chance to be
+    already non-movable."""
+    if not we_are_translated():
+        return
+    i = 0
+    while can_move(p):
+        if i > 6:
+            raise NotImplementedError("can't make object non-movable!")
+        collect(i)
+        i += 1
+
 def _heap_stats():
     raise NotImplementedError # can't be run directly
 
diff --git a/pypy/rlib/rsre/rsre_core.py b/pypy/rlib/rsre/rsre_core.py
--- a/pypy/rlib/rsre/rsre_core.py
+++ b/pypy/rlib/rsre/rsre_core.py
@@ -759,17 +759,27 @@
 @specializectx
 def find_repetition_end(ctx, ppos, ptr, maxcount):
     end = ctx.end
-    if maxcount <= 1:
-        if maxcount == 1 and ptr < end:
-            # Relatively common case: maxcount == 1.  If we are not at the
-            # end of the string, it's done by a single direct check.
-            op = ctx.pat(ppos)
-            for op1, checkerfn in unroll_char_checker:
-                if op1 == op:
-                    if checkerfn(ctx, ptr, ppos):
-                        return ptr + 1
+    ptrp1 = ptr + 1
+    # First get rid of the cases where we don't have room for any match.
+    if maxcount <= 0 or ptrp1 > end:
         return ptr
-    elif maxcount != 65535:
+    # Check the first character directly.  If it doesn't match, we are done.
+    # The idea is to be fast for cases like re.search("b+"), where we expect
+    # the common case to be a non-match.  It's much faster with the JIT to
+    # have the non-match inlined here rather than detect it in the fre() call.
+    op = ctx.pat(ppos)
+    for op1, checkerfn in unroll_char_checker:
+        if op1 == op:
+            if checkerfn(ctx, ptr, ppos):
+                break
+    else:
+        return ptr
+    # It matches at least once.  If maxcount == 1 (relatively common),
+    # then we are done.
+    if maxcount == 1:
+        return ptrp1
+    # Else we really need to count how many times it matches.
+    if maxcount != 65535:
         # adjust end
         end1 = ptr + maxcount
         if end1 <= end:
@@ -777,7 +787,7 @@
     op = ctx.pat(ppos)
     for op1, fre in unroll_fre_checker:
         if op1 == op:
-            return fre(ctx, ptr, end, ppos)
+            return fre(ctx, ptrp1, end, ppos)
     raise Error("rsre.find_repetition_end[%d]" % op)
 
 @specializectx
diff --git a/pypy/rlib/rsre/test/test_zjit.py b/pypy/rlib/rsre/test/test_zjit.py
--- a/pypy/rlib/rsre/test/test_zjit.py
+++ b/pypy/rlib/rsre/test/test_zjit.py
@@ -160,3 +160,9 @@
         res = self.meta_interp_match(r"<[\S ]+>", "<..a   .. aa>")
         assert res == 13
         self.check_enter_count(1)
+
+
+    def test_find_repetition_end_fastpath(self):
+        res = self.meta_interp_search(r"b+", "a"*30 + "b")
+        assert res == 30
+        self.check_loops(call=0)
diff --git a/pypy/rlib/rstack.py b/pypy/rlib/rstack.py
--- a/pypy/rlib/rstack.py
+++ b/pypy/rlib/rstack.py
@@ -42,16 +42,26 @@
                            sandboxsafe=True, _nowrapper=True,
                            _callable=_callable)
 
-_stack_get_start = llexternal('LL_stack_get_start', [], lltype.Signed,
-                              lambda: 0)
+_stack_get_end = llexternal('LL_stack_get_end', [], lltype.Signed,
+                            lambda: 0)
 _stack_get_length = llexternal('LL_stack_get_length', [], lltype.Signed,
                                lambda: 1)
+_stack_set_length_fraction = llexternal('LL_stack_set_length_fraction',
+                                        [lltype.Float], lltype.Void,
+                                        lambda frac: None)
 _stack_too_big_slowpath = llexternal('LL_stack_too_big_slowpath',
                                      [lltype.Signed], lltype.Char,
                                      lambda cur: '\x00')
 # the following is used by the JIT
-_stack_get_start_adr = llexternal('LL_stack_get_start_adr', [], lltype.Signed)
+_stack_get_end_adr   = llexternal('LL_stack_get_end_adr',   [], lltype.Signed)
+_stack_get_length_adr= llexternal('LL_stack_get_length_adr',[], lltype.Signed)
 
+# the following is also used by the JIT: "critical code" paths are paths in
+# which we should not raise StackOverflow at all, but just ignore the stack limit
+_stack_criticalcode_start = llexternal('LL_stack_criticalcode_start', [],
+                                       lltype.Void, lambda: None)
+_stack_criticalcode_stop = llexternal('LL_stack_criticalcode_stop', [],
+                                      lltype.Void, lambda: None)
 
 def stack_check():
     if not we_are_translated():
@@ -62,13 +72,13 @@
     current = llop.stack_current(lltype.Signed)
     #
     # Load these variables from C code
-    start = _stack_get_start()
+    end = _stack_get_end()
     length = _stack_get_length()
     #
-    # Common case: if 'current' is within [start:start+length], everything
+    # Common case: if 'current' is within [end-length:end], everything
     # is fine
-    ofs = r_uint(current - start)
-    if ofs < r_uint(length):
+    ofs = r_uint(end - current)
+    if ofs <= r_uint(length):
         return
     #
     # Else call the slow path
@@ -140,111 +150,6 @@
         return var
 
 
-def resume_point(label, *args, **kwds):
-    pass
-
-
-
-class ResumePointFnEntry(ExtRegistryEntry):
-    _about_ = resume_point
-
-    def compute_result_annotation(self, s_label, *args_s, **kwds_s):
-        from pypy.annotation import model as annmodel
-        return annmodel.s_None
-
-    def specialize_call(self, hop, **kwds_i):
-        from pypy.rpython.lltypesystem import lltype
-        from pypy.objspace.flow import model
-
-        assert hop.args_s[0].is_constant()
-        c_label = hop.inputconst(lltype.Void, hop.args_s[0].const)
-        args_v = hop.args_v[1:]
-        if 'i_returns' in kwds_i:
-            assert len(kwds_i) == 1
-            returns_index = kwds_i['i_returns']
-            v_return = args_v.pop(returns_index-1)
-            assert isinstance(v_return, model.Variable), \
-                   "resume_point returns= argument must be a Variable"
-        else:
-            assert not kwds_i
-            v_return = hop.inputconst(lltype.Void, None)
-
-        for v in args_v:
-            assert isinstance(v, model.Variable), "resume_point arguments must be Variables"
-
-        hop.exception_is_here()
-        return hop.genop('resume_point', [c_label, v_return] + args_v,
-                         hop.r_result)
-
-def resume_state_create(prevstate, label, *args):
-    raise RuntimeError("cannot resume states in non-translated versions")
-
-def concretify_argument(hop, index):
-    from pypy.objspace.flow import model
-
-    v_arg = hop.args_v[index]
-    if isinstance(v_arg, model.Variable):
-        return v_arg
-
-    r_arg = hop.rtyper.bindingrepr(v_arg)
-    return hop.inputarg(r_arg, arg=index)
-
-class ResumeStateCreateFnEntry(FrameStackTopReturningFnEntry):
-    _about_ = resume_state_create
-
-    def compute_result_annotation(self, s_prevstate, s_label, *args_s):
-        return FrameStackTopReturningFnEntry.compute_result_annotation(self)
-
-    def specialize_call(self, hop):
-        from pypy.rpython.lltypesystem import lltype
-
-        assert hop.args_s[1].is_constant()
-        c_label = hop.inputconst(lltype.Void, hop.args_s[1].const)
-
-        v_state = hop.inputarg(hop.r_result, arg=0)
-
-        args_v = []
-        for i in range(2, len(hop.args_v)):
-            args_v.append(concretify_argument(hop, i))
-
-        hop.exception_is_here()
-        return hop.genop('resume_state_create', [v_state, c_label] + args_v,
-                         hop.r_result)
-
-def resume_state_invoke(type, state, **kwds):
-    raise NotImplementedError("only works in translated versions")
-
-class ResumeStateInvokeFnEntry(ExtRegistryEntry):
-    _about_ = resume_state_invoke
-
-    def compute_result_annotation(self, s_type, s_state, **kwds):
-        from pypy.annotation.bookkeeper import getbookkeeper
-        assert s_type.is_constant()
-        return getbookkeeper().valueoftype(s_type.const)
-
-    def specialize_call(self, hop, **kwds_i):
-        from pypy.rpython.lltypesystem import lltype
-        v_state = hop.args_v[1]
-        
-        if 'i_returning' in kwds_i:
-            assert len(kwds_i) == 1
-            returning_index = kwds_i['i_returning']
-            v_returning = concretify_argument(hop, returning_index)
-            v_raising = hop.inputconst(lltype.Void, None)
-        elif 'i_raising' in kwds_i:
-            assert len(kwds_i) == 1
-            raising_index = kwds_i['i_raising']
-            v_returning = hop.inputconst(lltype.Void, None)
-            v_raising = concretify_argument(hop, raising_index)
-        else:
-            assert not kwds_i
-            v_returning = hop.inputconst(lltype.Void, None)
-            v_raising = hop.inputconst(lltype.Void, None)
-
-        hop.exception_is_here()
-        return hop.genop('resume_state_invoke', [v_state, v_returning, v_raising],
-                         hop.r_result)
-        
 # ____________________________________________________________
 
 def get_stack_depth_limit():
diff --git a/pypy/rlib/streamio.py b/pypy/rlib/streamio.py
--- a/pypy/rlib/streamio.py
+++ b/pypy/rlib/streamio.py
@@ -141,7 +141,8 @@
 def construct_stream_tower(stream, buffering, universal, reading, writing,
                            binary):
     if buffering == 0:   # no buffering
-        pass
+        if reading:      # force some minimal buffering for readline()
+            stream = ReadlineInputStream(stream)
     elif buffering == 1:   # line-buffering
         if writing:
             stream = LineBufferingOutputStream(stream)
@@ -749,6 +750,113 @@
                                               flush_buffers=False)
 
 
+class ReadlineInputStream(Stream):
+
+    """Minimal buffering input stream.
+
+    Only does buffering for readline().  The other kinds of reads, and
+    all writes, are not buffered at all.
+    """
+
+    bufsize = 2**13 # 8 K
+
+    def __init__(self, base, bufsize=-1):
+        self.base = base
+        self.do_read = base.read   # function to fill buffer some more
+        self.do_seek = base.seek   # seek to a byte offset
+        if bufsize == -1:     # Get default from the class
+            bufsize = self.bufsize
+        self.bufsize = bufsize  # buffer size (hint only)
+        self.buf = None         # raw data (may contain "\n")
+        self.bufstart = 0
+
+    def flush_buffers(self):
+        if self.buf is not None:
+            try:
+                self.do_seek(self.bufstart-len(self.buf), 1)
+            except MyNotImplementedError:
+                pass
+            else:
+                self.buf = None
+                self.bufstart = 0
+
+    def readline(self):
+        if self.buf is not None:
+            i = self.buf.find('\n', self.bufstart)
+        else:
+            self.buf = ''
+            i = -1
+        #
+        if i < 0:
+            self.buf = self.buf[self.bufstart:]
+            self.bufstart = 0
+            while True:
+                bufsize = max(self.bufsize, len(self.buf) >> 2)
+                data = self.do_read(bufsize)
+                if not data:
+                    result = self.buf              # end-of-file reached
+                    self.buf = None
+                    return result
+                startsearch = len(self.buf)   # there is no '\n' in buf so far
+                self.buf += data
+                i = self.buf.find('\n', startsearch)
+                if i >= 0:
+                    break
+        #
+        i += 1
+        result = self.buf[self.bufstart:i]
+        self.bufstart = i
+        return result
+
+    def peek(self):
+        if self.buf is None:
+            return ''
+        if self.bufstart > 0:
+            self.buf = self.buf[self.bufstart:]
+            self.bufstart = 0
+        return self.buf
+
+    def tell(self):
+        pos = self.base.tell()
+        if self.buf is not None:
+            pos -= (len(self.buf) - self.bufstart)
+        return pos
+
+    def readall(self):
+        result = self.base.readall()
+        if self.buf is not None:
+            result = self.buf[self.bufstart:] + result
+            self.buf = None
+            self.bufstart = 0
+        return result
+
+    def read(self, n):
+        if self.buf is None:
+            return self.do_read(n)
+        else:
+            m = n - (len(self.buf) - self.bufstart)
+            start = self.bufstart
+            if m > 0:
+                result = self.buf[start:] + self.do_read(m)
+                self.buf = None
+                self.bufstart = 0
+                return result
+            elif n >= 0:
+                self.bufstart = start + n
+                return self.buf[start : self.bufstart]
+            else:
+                return ''
+
+    seek       = PassThrough("seek",      flush_buffers=True)
+    write      = PassThrough("write",     flush_buffers=True)
+    truncate   = PassThrough("truncate",  flush_buffers=True)
+    flush      = PassThrough("flush",     flush_buffers=True)
+    flushable  = PassThrough("flushable", flush_buffers=False)
+    close      = PassThrough("close",     flush_buffers=False)
+    try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
+                                              flush_buffers=False)
+
+
 class BufferingOutputStream(Stream):
 
     """Standard buffering output stream.
diff --git a/pypy/rlib/test/test__jit_vref.py b/pypy/rlib/test/test__jit_vref.py
--- a/pypy/rlib/test/test__jit_vref.py
+++ b/pypy/rlib/test/test__jit_vref.py
@@ -1,6 +1,6 @@
 import py
 from pypy.rlib.jit import virtual_ref, virtual_ref_finish
-from pypy.rlib.jit import vref_None, non_virtual_ref
+from pypy.rlib.jit import vref_None, non_virtual_ref, InvalidVirtualRef
 from pypy.rlib._jit_vref import SomeVRef
 from pypy.annotation import model as annmodel
 from pypy.annotation.annrpython import RPythonAnnotator
@@ -23,18 +23,23 @@
     pass
 
 
-def test_direct_1():
+def test_direct_forced():
     x1 = X()
     vref = virtual_ref(x1)
+    assert vref._state == 'non-forced'
     assert vref() is x1
-    virtual_ref_finish(x1)
+    assert vref._state == 'forced'
+    virtual_ref_finish(vref, x1)
+    assert vref._state == 'forced'
     assert vref() is x1
 
-def test_direct_2():
+def test_direct_invalid():
     x1 = X()
     vref = virtual_ref(x1)
-    virtual_ref_finish(x1)
-    assert vref() is x1
+    assert vref._state == 'non-forced'
+    virtual_ref_finish(vref, x1)
+    assert vref._state == 'invalid'
+    py.test.raises(InvalidVirtualRef, "vref()")
 
 def test_annotate_1():
     def f():
@@ -50,7 +55,7 @@
         x1 = X()
         vref = virtual_ref(x1)
         x2 = vref()
-        virtual_ref_finish(x1)
+        virtual_ref_finish(vref, x1)
         return x2
     a = RPythonAnnotator()
     s = a.build_types(f, [])
@@ -95,7 +100,7 @@
             x1 = X()
             vref = virtual_ref(x1)
             x2 = vref()
-            virtual_ref_finish(x2)
+            virtual_ref_finish(vref, x2)
             return x2
         x = self.interpret(f, [])
         assert self.castable(self.OBJECTTYPE, x)
@@ -119,6 +124,18 @@
         assert lltype.typeOf(x) == self.OBJECTTYPE
         assert not x
 
+    def test_rtype_5(self):
+        def f():
+            vref = virtual_ref(X())
+            try:
+                vref()
+                return 42
+            except InvalidVirtualRef:
+                return -1
+        x = self.interpret(f, [])
+        assert x == 42
+
+
 class TestLLtype(BaseTestVRef, LLRtypeMixin):
     OBJECTTYPE = OBJECTPTR
     def castable(self, TO, var):
diff --git a/pypy/rlib/test/test_jit.py b/pypy/rlib/test/test_jit.py
--- a/pypy/rlib/test/test_jit.py
+++ b/pypy/rlib/test/test_jit.py
@@ -52,9 +52,12 @@
         import sys
         
         s = StringIO()
+        prev = sys.stdout
         sys.stdout = s
-        dis.dis(g)
-        sys.stdout = sys.__stdout__
+        try:
+            dis.dis(g)
+        finally:
+            sys.stdout = prev
         x = s.getvalue().find('CALL_FUNCTION')
         assert x != -1
         x = s.getvalue().find('CALL_FUNCTION', x)
diff --git a/pypy/rlib/test/test_libffi.py b/pypy/rlib/test/test_libffi.py
--- a/pypy/rlib/test/test_libffi.py
+++ b/pypy/rlib/test/test_libffi.py
@@ -2,8 +2,10 @@
 import sys
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rpython.lltypesystem.ll2ctypes import ALLOCATED
-from pypy.rlib.test.test_clibffi import BaseFfiTest, get_libm_name
+from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
+from pypy.rlib.test.test_clibffi import BaseFfiTest, get_libm_name, make_struct_ffitype_e
 from pypy.rlib.libffi import CDLL, Func, get_libc_name, ArgChain, types
+from pypy.rlib.libffi import longlong2float, float2longlong, IS_32_BIT
 
 class TestLibffiMisc(BaseFfiTest):
 
@@ -50,6 +52,18 @@
         del lib
         assert not ALLOCATED
 
+    def test_longlong_as_float(self):
+        from pypy.translator.c.test.test_genc import compile
+        maxint64 = r_longlong(9223372036854775807)
+        def fn(x):
+            d = longlong2float(x)
+            ll = float2longlong(d)
+            return ll
+        assert fn(maxint64) == maxint64
+        #
+        fn2 = compile(fn, [r_longlong])
+        res = fn2(maxint64)
+        assert res == maxint64
 
 class TestLibffiCall(BaseFfiTest):
     """
@@ -97,7 +111,7 @@
     def get_libfoo(self):
         return self.CDLL(self.libfoo_name)
 
-    def call(self, funcspec, args, RESULT, init_result=0):
+    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
         """
         Call the specified function after constructing and ArgChain with the
         arguments in ``args``.
@@ -114,8 +128,20 @@
         func = lib.getpointer(name, argtypes, restype)
         chain = ArgChain()
         for arg in args:
-            chain.arg(arg)
-        return func.call(chain, RESULT)
+            if isinstance(arg, r_singlefloat):
+                chain.arg_singlefloat(float(arg))
+            elif IS_32_BIT and isinstance(arg, r_longlong):
+                chain.arg_longlong(longlong2float(arg))
+            elif IS_32_BIT and isinstance(arg, r_ulonglong):
+                arg = rffi.cast(rffi.LONGLONG, arg)
+                chain.arg_longlong(longlong2float(arg))
+            elif isinstance(arg, tuple):
+                methname, arg = arg
+                meth = getattr(chain, methname)
+                meth(arg)
+            else:
+                chain.arg(arg)
+        return func.call(chain, RESULT, is_struct=is_struct)
 
     def check_loops(self, *args, **kwds):
         """
@@ -137,7 +163,7 @@
         res = self.call(func, [38, 4.2], rffi.LONG)
         assert res == 42
         self.check_loops({
-                'call_may_force': 1,
+                'call_release_gil': 1,
                 'guard_no_exception': 1,
                 'guard_not_forced': 1,
                 'int_add': 1,
@@ -150,7 +176,7 @@
         func = (libm, 'pow', [types.double, types.double], types.double)
         res = self.call(func, [2.0, 3.0], rffi.DOUBLE, init_result=0.0)
         assert res == 8.0
-        self.check_loops(call_may_force=1, guard_no_exception=1, guard_not_forced=1)
+        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_result(self):
         """
@@ -163,7 +189,7 @@
         func = (libfoo, 'cast_to_uchar_and_ovf', [types.sint], types.uchar)
         res = self.call(func, [0], rffi.UCHAR)
         assert res == 200
-        self.check_loops(call_may_force=1, guard_no_exception=1, guard_not_forced=1)
+        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_argument(self):
         """
@@ -267,6 +293,76 @@
         res = self.call(get_dummy, [], rffi.LONG)
         assert res == initval+1
 
+    def test_single_float_args(self):
+        """
+            float sum_xy_float(float x, float y)
+            {
+                return x+y;
+            }
+        """
+        from ctypes import c_float # this is used only to compute the expected result
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_float', [types.float, types.float], types.float)
+        x = r_singlefloat(12.34)
+        y = r_singlefloat(56.78)
+        res = self.call(func, [x, y], rffi.FLOAT, init_result=0.0)
+        expected = c_float(c_float(12.34).value + c_float(56.78).value).value
+        assert res == expected
+
+    def test_slonglong_args(self):
+        """
+            long long sum_xy_longlong(long long x, long long y)
+            {
+                return x+y;
+            }
+        """
+        maxint32 = 2147483647 # we cannot really go above maxint on 64 bits
+                              # (and we would not test anything, as there long
+                              # is the same as long long)
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_longlong', [types.slonglong, types.slonglong],
+                types.slonglong)
+        if IS_32_BIT:
+            x = r_longlong(maxint32+1)
+            y = r_longlong(maxint32+2)
+            zero = longlong2float(r_longlong(0))
+        else:
+            x = maxint32+1
+            y = maxint32+2
+            zero = 0
+        res = self.call(func, [x, y], rffi.LONGLONG, init_result=zero)
+        if IS_32_BIT:
+            # obscure, on 32bit it's really a long long, so it returns a
+            # DOUBLE because of the JIT hack
+            res = float2longlong(res)
+        expected = maxint32*2 + 3
+        assert res == expected
+
+    def test_ulonglong_args(self):
+        """
+            unsigned long long sum_xy_ulonglong(unsigned long long x,
+                                                unsigned long long y)
+            {
+                return x+y;
+            }
+        """
+        maxint64 = 9223372036854775807 # maxint64+1 does not fit into a
+                                       # longlong, but it does into a
+                                       # ulonglong
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_ulonglong', [types.ulonglong, types.ulonglong],
+                types.ulonglong)
+        x = r_ulonglong(maxint64+1)
+        y = r_ulonglong(2)
+        res = self.call(func, [x, y], rffi.ULONGLONG, init_result=0)
+        if IS_32_BIT:
+            # obscure, on 32bit it's really a long long, so it returns a
+            # DOUBLE because of the JIT hack
+            res = float2longlong(res)
+            res = rffi.cast(rffi.ULONGLONG, res)
+        expected = maxint64 + 3
+        assert res == expected
+
     def test_wrong_number_of_arguments(self):
         from pypy.rpython.llinterp import LLException
         libfoo = self.get_libfoo() 
@@ -287,3 +383,57 @@
 
         my_raises("self.call(func, [38], rffi.LONG)") # one less
         my_raises("self.call(func, [38, 12.3, 42], rffi.LONG)") # one more
+
+
+    def test_byval_argument(self):
+        """
+            struct Point {
+                long x;
+                long y;
+            };
+
+            long sum_point(struct Point p) {
+                return p.x + p.y;
+            }
+        """
+        libfoo = CDLL(self.libfoo_name)
+        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.slong, types.slong])
+        ffi_point = ffi_point_struct.ffistruct
+        sum_point = (libfoo, 'sum_point', [ffi_point], types.slong)
+        #
+        ARRAY = rffi.CArray(rffi.LONG)
+        buf = lltype.malloc(ARRAY, 2, flavor='raw')
+        buf[0] = 30
+        buf[1] = 12
+        adr = rffi.cast(rffi.VOIDP, buf)
+        res = self.call(sum_point, [('arg_raw', adr)], rffi.LONG, init_result=0)
+        assert res == 42
+        # check that we still have the ownership on the buffer
+        assert buf[0] == 30
+        assert buf[1] == 12
+        lltype.free(buf, flavor='raw')
+        lltype.free(ffi_point_struct, flavor='raw')
+
+    def test_byval_result(self):
+        """
+            struct Point make_point(long x, long y) {
+                struct Point p;
+                p.x = x;
+                p.y = y;
+                return p;
+            }
+        """
+        libfoo = CDLL(self.libfoo_name)
+        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.slong, types.slong])
+        ffi_point = ffi_point_struct.ffistruct
+
+        libfoo = CDLL(self.libfoo_name)
+        make_point = (libfoo, 'make_point', [types.slong, types.slong], ffi_point)
+        #
+        PTR = lltype.Ptr(rffi.CArray(rffi.LONG))
+        p = self.call(make_point, [12, 34], PTR, init_result=lltype.nullptr(PTR.TO),
+                      is_struct=True)
+        assert p[0] == 12
+        assert p[1] == 34
+        lltype.free(p, flavor='raw')
+        lltype.free(ffi_point_struct, flavor='raw')
diff --git a/pypy/rlib/test/test_streamio.py b/pypy/rlib/test/test_streamio.py
--- a/pypy/rlib/test/test_streamio.py
+++ b/pypy/rlib/test/test_streamio.py
@@ -1008,6 +1008,75 @@
             assert base.buf == data
 
 
+class TestReadlineInputStream:
+
+    packets = ["a", "b", "\n", "def", "\nxy\npq\nuv", "wx"]
+    lines = ["ab\n", "def\n", "xy\n", "pq\n", "uvwx"]
+
+    def makeStream(self, seek=False, tell=False, bufsize=-1):
+        base = TSource(self.packets)
+        self.source = base
+        def f(*args):
+            if seek is False:
+                raise NotImplementedError     # a bug!
+            if seek is None:
+                raise streamio.MyNotImplementedError   # can be caught
+            raise ValueError(seek)  # uh?
+        if not tell:
+            base.tell = f
+        if not seek:
+            base.seek = f
+        return streamio.ReadlineInputStream(base, bufsize)
+
+    def test_readline(self):
+        for file in [self.makeStream(), self.makeStream(bufsize=2)]:
+            i = 0
+            while 1:
+                r = file.readline()
+                if r == "":
+                    break
+                assert self.lines[i] == r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_read_interleaved(self):
+        for file in [self.makeStream(seek=True),
+                     self.makeStream(seek=True, bufsize=2)]:
+            i = 0
+            while 1:
+                firstchar = file.read(1)
+                if firstchar == "":
+                    break
+                r = file.readline()
+                assert r != ""
+                assert self.lines[i] == firstchar + r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_read_interleaved_no_seek(self):
+        for file in [self.makeStream(seek=None),
+                     self.makeStream(seek=None, bufsize=2)]:
+            i = 0
+            while 1:
+                firstchar = file.read(1)
+                if firstchar == "":
+                    break
+                r = file.readline()
+                assert r != ""
+                assert self.lines[i] == firstchar + r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_readall(self):
+        file = self.makeStream(seek=True, tell=True, bufsize=2)
+        r = file.readline()
+        assert r == 'ab\n'
+        assert file.tell() == 3
+        r = file.readall()
+        assert r == 'def\nxy\npq\nuvwx'
+        r = file.readall()
+        assert r == ''
+
 
 # Speed test
 
diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -563,15 +563,6 @@
     def op_hint(self, x, hints):
         return x
 
-    def op_resume_point(self, *args):
-        pass
-
-    def op_resume_state_create(self, *args):
-        raise RuntimeError("resume_state_create can not be called.")
-
-    def op_resume_state_invoke(self, *args):
-        raise RuntimeError("resume_state_invoke can not be called.")
-
     def op_decode_arg(self, fname, i, name, vargs, vkwds):
         raise NotImplementedError("decode_arg")
 
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -20,7 +20,6 @@
 from pypy.rpython.extfunc import ExtRegistryEntry
 from pypy.rlib.objectmodel import Symbolic, ComputedIntSymbolic
 from pypy.tool.uid import fixid
-from pypy.tool.tls import tlsobject
 from pypy.rlib.rarithmetic import r_uint, r_singlefloat, r_longfloat, intmask
 from pypy.annotation import model as annmodel
 from pypy.rpython.llinterp import LLInterpreter, LLException
@@ -28,6 +27,7 @@
 from pypy.rpython import raddress
 from pypy.translator.platform import platform
 from array import array
+from thread import _local as tlsobject
 
 # ____________________________________________________________
 
@@ -418,6 +418,9 @@
     instance._storage = ctypes_storage
     assert ctypes_storage   # null pointer?
 
+class NotCtypesAllocatedStructure(ValueError):
+    pass
+
 class _parentable_mixin(object):
     """Mixin added to _parentable containers when they become ctypes-based.
     (This is done by changing the __class__ of the instance to reference
@@ -436,7 +439,7 @@
     def _addressof_storage(self):
         "Returns the storage address as an int"
         if self._storage is None or self._storage is True:
-            raise ValueError("Not a ctypes allocated structure")
+            raise NotCtypesAllocatedStructure("Not a ctypes allocated structure")
         return intmask(ctypes.cast(self._storage, ctypes.c_void_p).value)
 
     def _free(self):
diff --git a/pypy/rpython/lltypesystem/lloperation.py b/pypy/rpython/lltypesystem/lloperation.py
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -521,10 +521,6 @@
                                                     RuntimeError)),
     #                               can always unwind, not just if stackless gc
 
-    'resume_point':         LLOp(canraise=(Exception,)),
-    'resume_state_create':  LLOp(canraise=(MemoryError,), canunwindgc=True),
-    'resume_state_invoke':  LLOp(canraise=(Exception, StackException,
-                                           RuntimeError)),
     'stack_frames_depth':   LLOp(sideeffects=False, canraise=(StackException,
                                                               RuntimeError)),
     'stack_switch':         LLOp(canraise=(StackException, RuntimeError)),
diff --git a/pypy/rpython/lltypesystem/lltype.py b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -4,14 +4,16 @@
                                    base_int, normalizedinttype)
 from pypy.rlib.objectmodel import Symbolic
 from pypy.tool.uid import Hashable
-from pypy.tool.tls import tlsobject
 from pypy.tool.identity_dict import identity_dict
 from pypy.tool import leakfinder
 from types import NoneType
 from sys import maxint
 import weakref
 
-TLS = tlsobject()
+class State(object):
+    pass
+
+TLS = State()
 
 class WeakValueDictionary(weakref.WeakValueDictionary):
     """A subclass of weakref.WeakValueDictionary
@@ -829,7 +831,7 @@
     raise TypeError, "unsupported cast"
 
 def _cast_whatever(TGT, value):
-    from pypy.rpython.lltypesystem import llmemory
+    from pypy.rpython.lltypesystem import llmemory, rffi
     ORIG = typeOf(value)
     if ORIG == TGT:
         return value
@@ -845,6 +847,8 @@
                 return cast_pointer(TGT, value)
         elif ORIG == llmemory.Address:
             return llmemory.cast_adr_to_ptr(value, TGT)
+        elif TGT == rffi.VOIDP and ORIG == Unsigned:
+            return rffi.cast(TGT, value)
         elif ORIG == Signed:
             return cast_int_to_ptr(TGT, value)
     elif TGT == llmemory.Address and isinstance(ORIG, Ptr):
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -244,7 +244,7 @@
     def __init__(self):
         self.callbacks = {}
 
-def _make_wrapper_for(TP, callable, callbackholder, aroundstate=None):
+def _make_wrapper_for(TP, callable, callbackholder=None, aroundstate=None):
     """ Function creating wrappers for callbacks. Note that this is
     cheating as we assume constant callbacks and we just memoize wrappers
     """
@@ -255,7 +255,8 @@
     else:
         errorcode = TP.TO.RESULT._example()
     callable_name = getattr(callable, '__name__', '?')
-    callbackholder.callbacks[callable] = True
+    if callbackholder is not None:
+        callbackholder.callbacks[callable] = True
     args = ', '.join(['a%d' % i for i in range(len(TP.TO.ARGS))])
     source = py.code.Source(r"""
         def wrapper(%s):    # no *args - no GIL for mallocing the tuple
diff --git a/pypy/rpython/lltypesystem/rlist.py b/pypy/rpython/lltypesystem/rlist.py
--- a/pypy/rpython/lltypesystem/rlist.py
+++ b/pypy/rpython/lltypesystem/rlist.py
@@ -237,6 +237,7 @@
         l.length = newsize
     else:
         _ll_list_resize_really(l, newsize)
+_ll_list_resize_ge.oopspec = 'list._resize_ge(l, newsize)'
 
 def _ll_list_resize_le(l, newsize):
     if newsize >= (len(l.items) >> 1) - 5:
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -1020,6 +1020,7 @@
                 objhdr.tid |= GCFLAG_CARDS_SET
 
         remember_young_pointer_from_array._dont_inline_ = True
+        assert self.card_page_indices > 0
         self.remember_young_pointer_from_array = (
             remember_young_pointer_from_array)
 
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -860,9 +860,9 @@
 
     def gct_get_write_barrier_from_array_failing_case(self, hop):
         op = hop.spaceop
-        hop.genop("same_as",
-                  [self.write_barrier_from_array_failing_case_ptr],
-                  resultvar=op.result)
+        v = getattr(self, 'write_barrier_from_array_failing_case_ptr',
+                    lltype.nullptr(op.result.concretetype.TO))
+        hop.genop("same_as", [v], resultvar=op.result)
 
     def gct_zero_gc_pointers_inside(self, hop):
         if not self.malloc_zero_filled:
diff --git a/pypy/rpython/module/test/test_posix.py b/pypy/rpython/module/test/test_posix.py
--- a/pypy/rpython/module/test/test_posix.py
+++ b/pypy/rpython/module/test/test_posix.py
@@ -43,6 +43,17 @@
         for i in range(len(stat)):
             assert long(getattr(func, 'item%d' % i)) == stat[i]
 
+    def test_stat_exception(self):
+        def fo():
+            try:
+                posix.stat('I/do/not/exist')
+            except OSError:
+                return True
+            else:
+                return False
+        res = self.interpret(fo,[])
+        assert res
+
     def test_times(self):
         import py; py.test.skip("llinterp does not like tuple returns")
         from pypy.rpython.test.test_llinterp import interpret
@@ -205,5 +216,8 @@
     def test_stat(self):
         py.test.skip("ootypesystem does not support os.stat")
 
+    def test_stat_exception(self):
+        py.test.skip("ootypesystem does not support os.stat")
+
     def test_chown(self):
         py.test.skip("ootypesystem does not support os.chown")
diff --git a/pypy/rpython/rlist.py b/pypy/rpython/rlist.py
--- a/pypy/rpython/rlist.py
+++ b/pypy/rpython/rlist.py
@@ -568,7 +568,6 @@
     length = l.ll_length()
     l._ll_resize_ge(length+1)           # see "a note about overflows" above
     l.ll_setitem_fast(length, newitem)
-ll_append.oopspec = 'list.append(l, newitem)'
 
 # this one is for the special case of insert(0, x)
 def ll_prepend(l, newitem):
@@ -793,7 +792,6 @@
         raise MemoryError
     l1._ll_resize_ge(newlength)
     ll_arraycopy(l2, l1, 0, len1, len2)
-ll_extend.oopspec = 'list.extend(l1, l2)'
 
 def ll_extend_with_str(lst, s, getstrlen, getstritem):
     return ll_extend_with_str_slice_startonly(lst, s, getstrlen, getstritem, 0)
diff --git a/pypy/tool/jitlogparser/parser.py b/pypy/tool/jitlogparser/parser.py
--- a/pypy/tool/jitlogparser/parser.py
+++ b/pypy/tool/jitlogparser/parser.py
@@ -61,7 +61,7 @@
         if not argspec.strip():
             return [], None
         if opname == 'debug_merge_point':
-            return argspec.rsplit(", ", 1), None
+            return argspec.split(", ", 1), None
         else:
             args = argspec.split(', ')
             descr = None
@@ -95,12 +95,12 @@
 
     def __init__(self, operations, storage):
         if operations[0].name == 'debug_merge_point':
-            self.inline_level = int(operations[0].args[1])
+            self.inline_level = int(operations[0].args[0])
             m = re.search('<code object ([<>\w]+), file \'(.+?)\', line (\d+)> #(\d+) (\w+)',
-                         operations[0].getarg(0))
+                         operations[0].getarg(1))
             if m is None:
                 # a non-code loop, like StrLiteralSearch or something
-                self.bytecode_name = operations[0].args[0].split(" ")[0][1:]
+                self.bytecode_name = operations[0].args[1].split(" ")[0][1:]
             else:
                 self.name, self.filename, lineno, bytecode_no, self.bytecode_name = m.groups()
                 self.startlineno = int(lineno)
diff --git a/pypy/tool/jitlogparser/test/test_parser.py b/pypy/tool/jitlogparser/test/test_parser.py
--- a/pypy/tool/jitlogparser/test/test_parser.py
+++ b/pypy/tool/jitlogparser/test/test_parser.py
@@ -29,7 +29,7 @@
 def test_parse_non_code():
     ops = parse('''
     []
-    debug_merge_point("SomeRandomStuff", 0)
+    debug_merge_point(0, "SomeRandomStuff")
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 1
@@ -38,10 +38,10 @@
 def test_split():
     ops = parse('''
     [i0]
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -54,12 +54,12 @@
 def test_inlined_call():
     ops = parse("""
     []
-    debug_merge_point('<code object inlined_call, file 'source.py', line 12> #28 CALL_FUNCTION', 0)
+    debug_merge_point(0, '<code object inlined_call, file 'source.py', line 12> #28 CALL_FUNCTION')
     i18 = getfield_gc(p0, descr=<BoolFieldDescr pypy.interpreter.pyframe.PyFrame.inst_is_being_profiled 89>)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #0 LOAD_FAST', 1)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #3 LOAD_CONST', 1)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #7 RETURN_VALUE', 1)
-    debug_merge_point('<code object inlined_call, file 'source.py', line 12> #31 STORE_FAST', 0)
+    debug_merge_point(1, '<code object inner, file 'source.py', line 9> #0 LOAD_FAST')
+    debug_merge_point(1, '<code object inner, file 'source.py', line 9> #3 LOAD_CONST')
+    debug_merge_point(1, '<code object inner, file 'source.py', line 9> #7 RETURN_VALUE')
+    debug_merge_point(0, '<code object inlined_call, file 'source.py', line 12> #31 STORE_FAST')
     """)
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 3 # two chunks + inlined call
@@ -72,10 +72,10 @@
 def test_name():
     ops = parse('''
     [i0]
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -89,10 +89,10 @@
     ops = parse('''
     [i0]
     i3 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -102,10 +102,10 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #0 LOAD_FAST", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #3 LOAD_FAST", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #6 BINARY_ADD", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #7 RETURN_VALUE", 0)
+    debug_merge_point(0, "<code object f, file '%(fname)s', line 2> #0 LOAD_FAST")
+    debug_merge_point(0, "<code object f, file '%(fname)s', line 2> #3 LOAD_FAST")
+    debug_merge_point(0, "<code object f, file '%(fname)s', line 2> #6 BINARY_ADD")
+    debug_merge_point(0, "<code object f, file '%(fname)s', line 2> #7 RETURN_VALUE")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.chunks[1].lineno == 3
@@ -114,11 +114,11 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #9 LOAD_FAST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #12 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #22 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #28 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #6 SETUP_LOOP", 0)
+    debug_merge_point(0, "<code object g, file '%(fname)s', line 5> #9 LOAD_FAST")
+    debug_merge_point(0, "<code object g, file '%(fname)s', line 5> #12 LOAD_CONST")
+    debug_merge_point(0, "<code object g, file '%(fname)s', line 5> #22 LOAD_CONST")
+    debug_merge_point(0, "<code object g, file '%(fname)s', line 5> #28 LOAD_CONST")
+    debug_merge_point(0, "<code object g, file '%(fname)s', line 5> #6 SETUP_LOOP")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.linerange == (7, 9)
@@ -128,7 +128,7 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse("""
     [p6, p1]
-    debug_merge_point('<code object h, file '%(fname)s', line 11> #17 FOR_ITER', 0)
+    debug_merge_point(0, '<code object h, file '%(fname)s', line 11> #17 FOR_ITER')
     guard_class(p6, 144264192, descr=<Guard2>)
     p12 = getfield_gc(p6, descr=<GcPtrFieldDescr pypy.objspace.std.iterobject.W_AbstractSeqIterObject.inst_w_seq 12>)
     """ % locals())
@@ -174,7 +174,7 @@
 
 def test_parsing_strliteral():
     loop = parse("""
-    debug_merge_point('StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]', 0)
+    debug_merge_point(0, 'StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]')
     """)
     ops = Function.from_operations(loop.operations, LoopStorage())
     chunk = ops.chunks[0]
diff --git a/pypy/tool/pytest/appsupport.py b/pypy/tool/pytest/appsupport.py
--- a/pypy/tool/pytest/appsupport.py
+++ b/pypy/tool/pytest/appsupport.py
@@ -81,7 +81,7 @@
         self.space = space
         self.operr = operr
         self.typename = operr.w_type.getname(space, "?")
-        self.traceback = AppTraceback(space, self.operr.application_traceback)
+        self.traceback = AppTraceback(space, self.operr.get_traceback())
         debug_excs = getattr(operr, 'debug_excs', [])
         if debug_excs:
             self._excinfo = debug_excs[0]
diff --git a/pypy/tool/tls.py b/pypy/tool/tls.py
deleted file mode 100644
--- a/pypy/tool/tls.py
+++ /dev/null
@@ -1,8 +0,0 @@
-
-"""Thread-local storage."""
-
-try:
-    from thread import _local as tlsobject
-except ImportError:
-    class tlsobject(object):
-        pass
diff --git a/pypy/translator/backendopt/inline.py b/pypy/translator/backendopt/inline.py
--- a/pypy/translator/backendopt/inline.py
+++ b/pypy/translator/backendopt/inline.py
@@ -541,7 +541,6 @@
               'cast_pointer': 0,
               'malloc': 2,
               'yield_current_frame_to_caller': sys.maxint, # XXX bit extreme
-              'resume_point': sys.maxint, # XXX bit extreme
               'instrument_count': 0,
               'debug_assert': -1,
               }
diff --git a/pypy/translator/backendopt/removenoops.py b/pypy/translator/backendopt/removenoops.py
--- a/pypy/translator/backendopt/removenoops.py
+++ b/pypy/translator/backendopt/removenoops.py
@@ -81,8 +81,6 @@
                     num_removed += 1
                 else:
                     available[key] = op.result
-            elif op.opname == 'resume_point':
-                available.clear()
     if num_removed:
         remove_same_as(graph)
         # remove casts with unused results
diff --git a/pypy/translator/c/gcc/instruction.py b/pypy/translator/c/gcc/instruction.py
--- a/pypy/translator/c/gcc/instruction.py
+++ b/pypy/translator/c/gcc/instruction.py
@@ -187,8 +187,8 @@
 
     def requestgcroots(self, tracker):
         # no need to track the value of these registers in the caller
-        # function if we are the main(), or if we are flagged as a
-        # "bottom" function (a callback from C code)
+        # function if we are flagged as a "bottom" function (a callback
+        # from C code, or pypy_main_function())
         if tracker.is_stack_bottom:
             return {}
         else:
diff --git a/pypy/translator/c/gcc/test/elf/track10.s b/pypy/translator/c/gcc/test/elf/track10.s
--- a/pypy/translator/c/gcc/test/elf/track10.s
+++ b/pypy/translator/c/gcc/test/elf/track10.s
@@ -1,5 +1,5 @@
-	.type	main, @function
-main:
+	.type	main1, @function
+main1:
 	pushl	%ebx
 	call	pypy_f
 	;; expected {4(%esp) | (%esp), %esi, %edi, %ebp | %ebx}
@@ -11,4 +11,4 @@
 	/* GCROOT %ebx */
 	popl	%ebx
 	ret
-	.size	main, .-main
+	.size	main1, .-main1
diff --git a/pypy/translator/c/gcc/test/elf/track4.s b/pypy/translator/c/gcc/test/elf/track4.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/elf/track4.s
+++ /dev/null
@@ -1,52 +0,0 @@
-	.type	main, @function
-main:
-	;; this is an artificial example showing what kind of code gcc
-	;; can produce for main()
-	pushl	%ebp
-	movl	%eax, $globalptr1
-	movl	%esp, %ebp
-	pushl	%edi
-	subl	$8, %esp
-	andl	$-16, %esp
-	movl	%ebx, -8(%ebp)
-	movl	8(%ebp), %edi
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %esi, -4(%ebp), (%ebp) | %edi}
-.L1:
-	cmpl	$0, %eax
-	je	.L3
-.L2:
-	;; inlined function here with -fomit-frame-pointer
-	movl	%eax, -12(%ebp)
-	movl	%edi, %edx
-	subl	$16, %esp
-	movl	%eax, (%esp)
-	movl	$42, %edi
-	movl	%edx, 4(%esp)
-	movl	%esi, %ebx
-	movl	$nonsense, %esi
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %ebx, -4(%ebp), (%ebp) | 4(%esp), -12(%ebp)}
-	addl	%edi, %eax
-	movl	4(%esp), %eax
-	movl	%ebx, %esi
-	addl	$16, %esp
-	movl	%eax, %edi
-	movl	-12(%ebp), %eax
-#APP
-	/* GCROOT %eax */
-#NO_APP
-	;; end of inlined function
-.L3:
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %esi, -4(%ebp), (%ebp) | %edi}
-#APP
-	/* GCROOT %edi */
-#NO_APP
-	movl	-8(%ebp), %ebx
-	movl	-4(%ebp), %edi
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-
-	.size	main, .-main
diff --git a/pypy/translator/c/gcc/test/elf/track6.s b/pypy/translator/c/gcc/test/elf/track6.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/elf/track6.s
+++ /dev/null
@@ -1,26 +0,0 @@
-	.type	main, @function
-main:
-	;; a minimal example showing what kind of code gcc
-	;; can produce for main(): some local variable accesses
-	;; are relative to %ebp, while others are relative to
-	;; %esp, and the difference %ebp-%esp is not constant
-	;; because of the 'andl' to align the stack
-	pushl	%ebp
-	movl	%esp, %ebp
-	subl	$8, %esp
-	andl	$-16, %esp
-	movl	$globalptr1, -4(%ebp)
-	movl	$globalptr2, (%esp)
-	pushl	$0
-	call	foobar
-	;; expected {4(%ebp) | %ebx, %esi, %edi, (%ebp) | 4(%esp), -4(%ebp)}
-	popl	%eax
-#APP
-	/* GCROOT -4(%ebp) */
-	/* GCROOT (%esp) */
-#NO_APP
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-
-	.size	main, .-main
diff --git a/pypy/translator/c/gcc/test/elf/track7.s b/pypy/translator/c/gcc/test/elf/track7.s
--- a/pypy/translator/c/gcc/test/elf/track7.s
+++ b/pypy/translator/c/gcc/test/elf/track7.s
@@ -1,5 +1,5 @@
-	.type	main, @function
-main:
+	.type	main1, @function
+main1:
 	;; cmovCOND tests.
 	pushl	%ebx
 	movl	12(%esp), %ebx
@@ -16,4 +16,4 @@
 	popl	%ebx
 	ret
 
-	.size	main, .-main
+	.size	main1, .-main1
diff --git a/pypy/translator/c/gcc/test/msvc/track6.s b/pypy/translator/c/gcc/test/msvc/track6.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/msvc/track6.s
+++ /dev/null
@@ -1,15 +0,0 @@
-_TEXT	SEGMENT
-_pypy_g_foo PROC					; COMDAT
-
-	push	ebp
-	mov	ebp, esp
-	and	esp, -64
-	sub	esp, 12
-	push	esi
-	call	_pypy_g_something_else
-	;; expected {4(%ebp) | %ebx, (%esp), %edi, (%ebp) | }
-	pop	esi
-	mov	esp, ebp
-	pop	ebp
-	ret	0
-_pypy_g_foo ENDP
diff --git a/pypy/translator/c/gcc/trackgcroot.py b/pypy/translator/c/gcc/trackgcroot.py
--- a/pypy/translator/c/gcc/trackgcroot.py
+++ b/pypy/translator/c/gcc/trackgcroot.py
@@ -39,10 +39,15 @@
         self.uses_frame_pointer = False
         self.r_localvar = self.r_localvarnofp
         self.filetag = filetag
-        # a "stack bottom" function is either main() or a callback from C code
+        # a "stack bottom" function is either pypy_main_function() or a
+        # callback from C code.  In both cases they are identified by
+        # the presence of pypy_asm_stack_bottom().
         self.is_stack_bottom = False
 
     def computegcmaptable(self, verbose=0):
+        if self.funcname in ['main', '_main']:
+            return []     # don't analyze main(), its prologue may contain
+                          # strange instructions
         self.findlabels()
         self.parse_instructions()
         try:
@@ -226,7 +231,7 @@
         # in the frame at this point.  This doesn't count the return address
         # which is the word immediately following the frame in memory.
         # The 'framesize' is set to an odd value if it is only an estimate
-        # (see visit_andl()).
+        # (see InsnCannotFollowEsp).
 
         def walker(insn, size_delta):
             check = deltas.setdefault(insn, size_delta)
@@ -521,10 +526,8 @@
         target = match.group("target")
         if target == self.ESP:
             # only for  andl $-16, %esp  used to align the stack in main().
-            # The exact amount of adjutment is not known yet, so we use
-            # an odd-valued estimate to make sure the real value is not used
-            # elsewhere by the FunctionGcRootTracker.
-            return InsnCannotFollowEsp()
+            # main() should not be seen at all.
+            raise AssertionError("instruction unexpected outside of main()")
         else:
             return self.binary_insn(line)
 
@@ -1323,12 +1326,11 @@
         self.verbose = verbose
         self.shuffle = shuffle
         self.gcmaptable = []
-        self.seen_main = False
 
-    def process(self, iterlines, newfile, entrypoint='main', filename='?'):
+    def process(self, iterlines, newfile, filename='?'):
         for in_function, lines in self.find_functions(iterlines):
             if in_function:
-                tracker = self.process_function(lines, entrypoint, filename)
+                tracker = self.process_function(lines, filename)
                 lines = tracker.lines
             self.write_newfile(newfile, lines, filename.split('.')[0])
         if self.verbose == 1:
@@ -1337,11 +1339,9 @@
     def write_newfile(self, newfile, lines, grist):
         newfile.writelines(lines)
 
-    def process_function(self, lines, entrypoint, filename):
+    def process_function(self, lines, filename):
         tracker = self.FunctionGcRootTracker(
             lines, filetag=getidentifier(filename))
-        is_main = tracker.funcname == entrypoint
-        tracker.is_stack_bottom = is_main
         if self.verbose == 1:
             sys.stderr.write('.')
         elif self.verbose > 1:
@@ -1356,7 +1356,6 @@
             self.gcmaptable[:0] = table
         else:
             self.gcmaptable.extend(table)
-        self.seen_main |= is_main
         return tracker
 
 class ElfAssemblerParser(AssemblerParser):
@@ -1432,11 +1431,6 @@
         if functionlines:
             yield in_function, functionlines
 
-    def process_function(self, lines, entrypoint, filename):
-        entrypoint = '_' + entrypoint
-        return super(DarwinAssemblerParser, self).process_function(
-            lines, entrypoint, filename)
-
 class DarwinAssemblerParser64(DarwinAssemblerParser):
     format = "darwin64"
     FunctionGcRootTracker = DarwinFunctionGcRootTracker64
@@ -1494,11 +1488,6 @@
             "missed the end of the previous function")
         yield False, functionlines
 
-    def process_function(self, lines, entrypoint, filename):
-        entrypoint = '_' + entrypoint
-        return super(MsvcAssemblerParser, self).process_function(
-            lines, entrypoint, filename)
-
     def write_newfile(self, newfile, lines, grist):
         newlines = []
         for line in lines:
@@ -1560,24 +1549,21 @@
         self.shuffle = shuffle     # to debug the sorting logic in asmgcroot.py
         self.format = format
         self.gcmaptable = []
-        self.seen_main = False
 
     def dump_raw_table(self, output):
-        print >> output, "seen_main = %d" % (self.seen_main,)
+        print 'raw table'
         for entry in self.gcmaptable:
             print >> output, entry
 
     def reload_raw_table(self, input):
         firstline = input.readline()
-        assert firstline.startswith("seen_main = ")
-        self.seen_main |= bool(int(firstline[len("seen_main = "):].strip()))
+        assert firstline == 'raw table\n'
         for line in input:
             entry = eval(line)
             assert type(entry) is tuple
             self.gcmaptable.append(entry)
 
     def dump(self, output):
-        assert self.seen_main
 
         def _globalname(name, disp=""):
             return tracker_cls.function_names_prefix + name
@@ -1649,8 +1635,8 @@
             s = """\
             /* See description in asmgcroot.py */
             .cfi_startproc
-            movq\t%rdi, %rdx\t/* 1st argument, which is the callback */
-            movq\t%rsi, %rcx\t/* 2nd argument, which is gcrootanchor */
+            /* %rdi is the 1st argument, which is the callback */
+            /* %rsi is the 2nd argument, which is gcrootanchor */
             movq\t%rsp, %rax\t/* my frame top address */
             pushq\t%rax\t\t/* ASM_FRAMEDATA[8] */
             pushq\t%rbp\t\t/* ASM_FRAMEDATA[7] */
@@ -1663,15 +1649,15 @@
             /* Add this ASM_FRAMEDATA to the front of the circular linked */
             /* list.  Let's call it 'self'.                               */
 
-            movq\t8(%rcx), %rax\t/* next = gcrootanchor->next */
+            movq\t8(%rsi), %rax\t/* next = gcrootanchor->next */
             pushq\t%rax\t\t\t\t/* self->next = next */
-            pushq\t%rcx\t\t\t/* self->prev = gcrootanchor */
-            movq\t%rsp, 8(%rcx)\t/* gcrootanchor->next = self */
+            pushq\t%rsi\t\t\t/* self->prev = gcrootanchor */
+            movq\t%rsp, 8(%rsi)\t/* gcrootanchor->next = self */
             movq\t%rsp, 0(%rax)\t\t\t/* next->prev = self */
             .cfi_def_cfa_offset 80\t/* 9 pushes + the retaddr = 80 bytes */
 
             /* note: the Mac OS X 16 bytes aligment must be respected. */
-            call\t*%rdx\t\t/* invoke the callback */
+            call\t*%rdi\t\t/* invoke the callback */
 
             /* Detach this ASM_FRAMEDATA from the circular linked list */
             popq\t%rsi\t\t/* prev = self->prev */
@@ -1688,7 +1674,7 @@
             popq\t%rcx\t\t/* ignored      ASM_FRAMEDATA[8] */
 
             /* the return value is the one of the 'call' above, */
-            /* because %rax (and possibly %rdx) are unmodified  */
+            /* because %rax is unmodified  */
             ret
             .cfi_endproc
             """
@@ -1835,11 +1821,11 @@
             """.replace("__gccallshapes", _globalname("__gccallshapes"))
             output.writelines(shapelines)
 
-    def process(self, iterlines, newfile, entrypoint='main', filename='?'):
+    def process(self, iterlines, newfile, filename='?'):
         parser = PARSERS[format](verbose=self.verbose, shuffle=self.shuffle)
         for in_function, lines in parser.find_functions(iterlines):
             if in_function:
-                tracker = parser.process_function(lines, entrypoint, filename)
+                tracker = parser.process_function(lines, filename)
                 lines = tracker.lines
             parser.write_newfile(newfile, lines, filename.split('.')[0])
         if self.verbose == 1:
@@ -1848,7 +1834,6 @@
             self.gcmaptable[:0] = parser.gcmaptable
         else:
             self.gcmaptable.extend(parser.gcmaptable)
-        self.seen_main |= parser.seen_main
 
 
 class UnrecognizedOperation(Exception):
@@ -1915,7 +1900,6 @@
             format = 'elf64'
         else:
             format = 'elf'
-    entrypoint = 'main'
     while len(sys.argv) > 1:
         if sys.argv[1] == '-v':
             del sys.argv[1]
@@ -1929,9 +1913,9 @@
         elif sys.argv[1].startswith('-f'):
             format = sys.argv[1][2:]
             del sys.argv[1]
-        elif sys.argv[1].startswith('-m'):
-            entrypoint = sys.argv[1][2:]
-            del sys.argv[1]
+        elif sys.argv[1].startswith('-'):
+            print >> sys.stderr, "unrecognized option:", sys.argv[1]
+            sys.exit(1)
         else:
             break
     tracker = GcRootTracker(verbose=verbose, shuffle=shuffle, format=format)
@@ -1940,7 +1924,7 @@
         firstline = f.readline()
         f.seek(0)
         assert firstline, "file %r is empty!" % (fn,)
-        if firstline.startswith('seen_main = '):
+        if firstline == 'raw table\n':
             tracker.reload_raw_table(f)
             f.close()
         else:
@@ -1948,7 +1932,7 @@
             lblfn = fn[:-2] + '.lbl.s'
             g = open(lblfn, 'w')
             try:
-                tracker.process(f, g, entrypoint=entrypoint, filename=fn)
+                tracker.process(f, g, filename=fn)
             except:
                 g.close()
                 os.unlink(lblfn)
diff --git a/pypy/translator/c/genc.py b/pypy/translator/c/genc.py
--- a/pypy/translator/c/genc.py
+++ b/pypy/translator/c/genc.py
@@ -602,7 +602,7 @@
                         'cmd /c $(MASM) /nologo /Cx /Cp /Zm /coff /Fo$@ /c $< $(INCLUDEDIRS)')
                 mk.rule('.c.gcmap', '',
                         ['$(CC) /nologo $(ASM_CFLAGS) /c /FAs /Fa$*.s $< $(INCLUDEDIRS)',
-                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -m$(PYPY_MAIN_FUNCTION) -t $*.s > $@']
+                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
                         )
                 mk.rule('gcmaptable.c', '$(GCMAPFILES)',
                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
@@ -613,7 +613,7 @@
                 mk.rule('%.lbl.s %.gcmap', '%.s',
                         [python +
                              '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
-                             '-m$(PYPY_MAIN_FUNCTION) -t $< > $*.gctmp',
+                             '-t $< > $*.gctmp',
                          'mv $*.gctmp $*.gcmap'])
                 mk.rule('gcmaptable.s', '$(GCMAPFILES)',
                         [python +
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -1,4 +1,5 @@
 #include <stdlib.h>
+#include <string.h>
 #include "src/cjkcodecs/multibytecodec.h"
 
 
@@ -93,6 +94,22 @@
   return d->inbuf - d->inbuf_start;
 }
 
+Py_ssize_t pypy_cjk_dec_replace_on_error(struct pypy_cjk_dec_s* d,
+                                         Py_UNICODE *newbuf, Py_ssize_t newlen,
+                                         Py_ssize_t in_offset)
+{
+  if (newlen > 0)
+    {
+      if (d->outbuf + newlen > d->outbuf_end)
+        if (expand_decodebuffer(d, newlen) == -1)
+          return MBERR_NOMEMORY;
+      memcpy(d->outbuf, newbuf, newlen * sizeof(Py_UNICODE));
+      d->outbuf += newlen;
+    }
+  d->inbuf = d->inbuf_start + in_offset;
+  return 0;
+}
+
 /************************************************************/
 
 struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
@@ -209,3 +226,19 @@
 {
   return d->inbuf - d->inbuf_start;
 }
+
+Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
+                                         char *newbuf, Py_ssize_t newlen,
+                                         Py_ssize_t in_offset)
+{
+  if (newlen > 0)
+    {
+      if (d->outbuf + newlen > d->outbuf_end)
+        if (expand_encodebuffer(d, newlen) == -1)
+          return MBERR_NOMEMORY;
+      memcpy(d->outbuf, newbuf, newlen);
+      d->outbuf += newlen;
+    }
+  d->inbuf = d->inbuf_start + in_offset;
+  return 0;
+}
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -102,6 +102,8 @@
 Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *);
 Py_ssize_t pypy_cjk_dec_inbuf_remaining(struct pypy_cjk_dec_s *d);
 Py_ssize_t pypy_cjk_dec_inbuf_consumed(struct pypy_cjk_dec_s* d);
+Py_ssize_t pypy_cjk_dec_replace_on_error(struct pypy_cjk_dec_s* d,
+                                         Py_UNICODE *, Py_ssize_t, Py_ssize_t);
 
 struct pypy_cjk_enc_s {
   const MultibyteCodec *codec;
@@ -119,6 +121,8 @@
 Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *);
 Py_ssize_t pypy_cjk_enc_inbuf_remaining(struct pypy_cjk_enc_s *d);
 Py_ssize_t pypy_cjk_enc_inbuf_consumed(struct pypy_cjk_enc_s* d);
+Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
+                                         char *, Py_ssize_t, Py_ssize_t);
 
 /* list of codecs defined in the .c files */
 
diff --git a/pypy/translator/c/src/debug_print.c b/pypy/translator/c/src/debug_print.c
--- a/pypy/translator/c/src/debug_print.c
+++ b/pypy/translator/c/src/debug_print.c
@@ -6,6 +6,8 @@
 #include <stdio.h>
 #ifndef _WIN32
 #include <unistd.h>
+#include <time.h>
+#include <sys/time.h>
 #else
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
diff --git a/pypy/translator/c/src/debug_print.h b/pypy/translator/c/src/debug_print.h
--- a/pypy/translator/c/src/debug_print.h
+++ b/pypy/translator/c/src/debug_print.h
@@ -53,8 +53,6 @@
 #  ifdef _WIN32
 #    define READ_TIMESTAMP(val) QueryPerformanceCounter((LARGE_INTEGER*)&(val))
 #  else
-#    include <time.h>
-#    include <sys/time.h>
 
 long long pypy_read_timestamp();
 
diff --git a/pypy/translator/c/src/debug_traceback.h b/pypy/translator/c/src/debug_traceback.h
--- a/pypy/translator/c/src/debug_traceback.h
+++ b/pypy/translator/c/src/debug_traceback.h
@@ -21,7 +21,11 @@
    line to the f:17/KeyError line.
 */
 
-#define PYPY_DEBUG_TRACEBACK_DEPTH        128     /* a power of two */
+#ifdef RPY_LL_ASSERT
+#  define PYPY_DEBUG_TRACEBACK_DEPTH        8192    /* a power of two */
+#else
+#  define PYPY_DEBUG_TRACEBACK_DEPTH        128     /* a power of two */
+#endif
 
 #define PYPYDTPOS_RERAISE                 ((struct pypydtpos_s *) -1)
 #define PYPYDTSTORE(loc, etype)                         \
diff --git a/pypy/translator/c/src/main.h b/pypy/translator/c/src/main.h
--- a/pypy/translator/c/src/main.h
+++ b/pypy/translator/c/src/main.h
@@ -23,12 +23,19 @@
 #include "src/winstuff.c"
 #endif
 
-int PYPY_MAIN_FUNCTION(int argc, char *argv[])
+#ifdef __GNUC__
+/* Hack to prevent this function from being inlined.  Helps asmgcc
+   because the main() function has often a different prologue/epilogue. */
+int pypy_main_function(int argc, char *argv[]) __attribute__((__noinline__));
+#endif
+
+int pypy_main_function(int argc, char *argv[])
 {
     char *errmsg;
     int i, exitcode;
     RPyListOfString *list;
 
+    pypy_asm_stack_bottom();
     instrument_setup();
 
     if (sizeof(void*) != SIZEOF_LONG) {
@@ -74,4 +81,9 @@
     abort();
 }
 
+int PYPY_MAIN_FUNCTION(int argc, char *argv[])
+{
+    return pypy_main_function(argc, argv);
+}
+
 #endif /* PYPY_NOT_MAIN_FILE */
diff --git a/pypy/translator/c/src/stack.h b/pypy/translator/c/src/stack.h
--- a/pypy/translator/c/src/stack.h
+++ b/pypy/translator/c/src/stack.h
@@ -11,15 +11,22 @@
  * It is needed to have RPyThreadStaticTLS, too. */
 #include "thread.h"
 
-extern char *_LLstacktoobig_stack_start;
+extern char *_LLstacktoobig_stack_end;
+extern long _LLstacktoobig_stack_length;
+extern char _LLstacktoobig_report_error;
 
 void LL_stack_unwind(void);
 char LL_stack_too_big_slowpath(long);    /* returns 0 (ok) or 1 (too big) */
+void LL_stack_set_length_fraction(double);
 
 /* some macros referenced from pypy.rlib.rstack */
-#define LL_stack_get_start() ((long)_LLstacktoobig_stack_start)
-#define LL_stack_get_length() MAX_STACK_SIZE
-#define LL_stack_get_start_adr() ((long)&_LLstacktoobig_stack_start)  /* JIT */
+#define LL_stack_get_end() ((long)_LLstacktoobig_stack_end)
+#define LL_stack_get_length() _LLstacktoobig_stack_length
+#define LL_stack_get_end_adr()    ((long)&_LLstacktoobig_stack_end)   /* JIT */
+#define LL_stack_get_length_adr() ((long)&_LLstacktoobig_stack_length)/* JIT */
+
+#define LL_stack_criticalcode_start()  (_LLstacktoobig_report_error = 0)
+#define LL_stack_criticalcode_stop()   (_LLstacktoobig_report_error = 1)
 
 
 #ifdef __GNUC__
@@ -32,93 +39,67 @@
 #ifndef PYPY_NOT_MAIN_FILE
 #include <stdio.h>
 
-#ifndef PYPY_NOINLINE
-# if defined __GNUC__
-#  define PYPY_NOINLINE __attribute__((noinline))
-# else
-// add hints for other compilers here ...
-#  define PYPY_NOINLINE
-# endif
-#endif
+/* the current stack is in the interval [end-length:end].  We assume a
+   stack that grows downward here. */
+char *_LLstacktoobig_stack_end = NULL;
+long _LLstacktoobig_stack_length = MAX_STACK_SIZE;
+char _LLstacktoobig_report_error = 1;
+static RPyThreadStaticTLS end_tls_key;
 
-long PYPY_NOINLINE _LL_stack_growing_direction(char *parent)
+void LL_stack_set_length_fraction(double fraction)
 {
-	char local;
-	if (parent == NULL)
-		return _LL_stack_growing_direction(&local);
-	else
-		return &local - parent;
+	_LLstacktoobig_stack_length = (long)(MAX_STACK_SIZE * fraction);
 }
 
-char *_LLstacktoobig_stack_start = NULL;
-int stack_direction = 0;
-RPyThreadStaticTLS start_tls_key;
-
 char LL_stack_too_big_slowpath(long current)
 {
-	long diff;
+	long diff, max_stack_size;
 	char *baseptr, *curptr = (char*)current;
 
-	/* The stack_start variable is updated to match the current value
+	/* The stack_end variable is updated to match the current value
 	   if it is still 0 or if we later find a 'curptr' position
-	   that is below it.  The real stack_start pointer is stored in
+	   that is above it.  The real stack_end pointer is stored in
 	   thread-local storage, but we try to minimize its overhead by
-	   keeping a local copy in _LLstacktoobig_stack_start. */
+	   keeping a local copy in _LLstacktoobig_stack_end. */
 
-	if (stack_direction == 0) {
+	if (_LLstacktoobig_stack_end == NULL) {
 		/* not initialized */
 		/* XXX We assume that initialization is performed early,
 		   when there is still only one thread running.  This
 		   allows us to ignore race conditions here */
-		char *errmsg = RPyThreadStaticTLS_Create(&start_tls_key);
+		char *errmsg = RPyThreadStaticTLS_Create(&end_tls_key);
 		if (errmsg) {
 			/* XXX should we exit the process? */
 			fprintf(stderr, "Internal PyPy error: %s\n", errmsg);
 			return 1;
 		}
-		if (_LL_stack_growing_direction(NULL) > 0)
-			stack_direction = +1;
-		else
-			stack_direction = -1;
 	}
 
-	baseptr = (char *) RPyThreadStaticTLS_Get(start_tls_key);
-	if (baseptr != NULL) {
-		diff = curptr - baseptr;
-		if (((unsigned long)diff) < (unsigned long)MAX_STACK_SIZE) {
+	baseptr = (char *) RPyThreadStaticTLS_Get(end_tls_key);
+	max_stack_size = _LLstacktoobig_stack_length;
+	if (baseptr == NULL) {
+		/* first time we see this thread */
+	}
+	else {
+		diff = baseptr - curptr;
+		if (((unsigned long)diff) <= (unsigned long)max_stack_size) {
 			/* within bounds, probably just had a thread switch */
-			_LLstacktoobig_stack_start = baseptr;
+			_LLstacktoobig_stack_end = baseptr;
 			return 0;
 		}
-
-		if (stack_direction > 0) {
-			if (diff < 0 && diff > -MAX_STACK_SIZE)
-				;           /* stack underflow */
-			else
-				return 1;   /* stack overflow (probably) */
+		if (((unsigned long)-diff) <= (unsigned long)max_stack_size) {
+			/* stack underflowed: the initial estimation of
+			   the stack base must be revised */
 		}
-		else {
-			if (diff >= MAX_STACK_SIZE && diff < 2*MAX_STACK_SIZE)
-				;           /* stack underflow */
-			else
-				return 1;   /* stack overflow (probably) */
+		else {	/* stack overflow (probably) */
+			return _LLstacktoobig_report_error;
 		}
-		/* else we underflowed the stack, which means that
-		   the initial estimation of the stack base must
-		   be revised */
 	}
 
 	/* update the stack base pointer to the current value */
-	if (stack_direction > 0) {
-		/* the valid range is [curptr:curptr+MAX_STACK_SIZE] */
-		baseptr = curptr;
-	}
-	else {
-		/* the valid range is [curptr-MAX_STACK_SIZE+1:curptr+1] */
-		baseptr = curptr - MAX_STACK_SIZE + 1;
-	}
-	RPyThreadStaticTLS_Set(start_tls_key, baseptr);
-	_LLstacktoobig_stack_start = baseptr;
+	baseptr = curptr;
+	RPyThreadStaticTLS_Set(end_tls_key, baseptr);
+	_LLstacktoobig_stack_end = baseptr;
 	return 0;
 }
 
diff --git a/pypy/translator/c/test/test_standalone.py b/pypy/translator/c/test/test_standalone.py
--- a/pypy/translator/c/test/test_standalone.py
+++ b/pypy/translator/c/test/test_standalone.py
@@ -689,6 +689,78 @@
         out = cbuilder.cmdexec("")
         assert out.strip() == "hi!"
 
+    def test_set_length_fraction(self):
+        # check for pypy.rlib.rstack._stack_set_length_fraction()
+        from pypy.rlib.rstack import _stack_set_length_fraction
+        from pypy.rlib.rstackovf import StackOverflow
+        class A:
+            n = 0
+        glob = A()
+        def f(n):
+            glob.n += 1
+            if n <= 0:
+                return 42
+            return f(n+1)
+        def entry_point(argv):
+            _stack_set_length_fraction(0.1)
+            try:
+                return f(1)
+            except StackOverflow:
+                glob.n = 0
+            _stack_set_length_fraction(float(argv[1]))
+            try:
+                return f(1)
+            except StackOverflow:
+                print glob.n
+                return 0
+        t, cbuilder = self.compile(entry_point, stackcheck=True)
+        counts = {}
+        for fraction in [0.1, 0.4, 1.0]:
+            out = cbuilder.cmdexec(str(fraction))
+            print 'counts[%s]: %r' % (fraction, out)
+            counts[fraction] = int(out.strip())
+        #
+        assert counts[1.0] >= 1000
+        # ^^^ should actually be much more than 1000 for this small test
+        assert counts[0.1] < counts[0.4] / 3
+        assert counts[0.4] < counts[1.0] / 2
+        assert counts[0.1] > counts[0.4] / 7
+        assert counts[0.4] > counts[1.0] / 4
+
+    def test_stack_criticalcode(self):
+        # check for pypy.rlib.rstack._stack_criticalcode_start/stop()
+        from pypy.rlib.rstack import _stack_criticalcode_start
+        from pypy.rlib.rstack import _stack_criticalcode_stop
+        from pypy.rlib.rstackovf import StackOverflow
+        class A:
+            pass
+        glob = A()
+        def f(n):
+            if n <= 0:
+                return 42
+            try:
+                return f(n+1)
+            except StackOverflow:
+                if glob.caught:
+                    print 'Oups! already caught!'
+                glob.caught = True
+                _stack_criticalcode_start()
+                critical(100)   # recurse another 100 times here
+                _stack_criticalcode_stop()
+                return 789
+        def critical(n):
+            if n > 0:
+                n = critical(n - 1)
+            return n - 42
+        def entry_point(argv):
+            glob.caught = False
+            print f(1)
+            return 0
+        t, cbuilder = self.compile(entry_point, stackcheck=True)
+        out = cbuilder.cmdexec('')
+        assert out.strip() == '789'
+
+
 class TestMaemo(TestStandalone):
     def setup_class(cls):
         py.test.skip("TestMaemo: tests skipped for now")
diff --git a/pypy/translator/cli/opcodes.py b/pypy/translator/cli/opcodes.py
--- a/pypy/translator/cli/opcodes.py
+++ b/pypy/translator/cli/opcodes.py
@@ -77,7 +77,6 @@
     'cast_ptr_to_weakadr':      [PushAllArgs, 'newobj instance void class %s::.ctor(object)' % WEAKREF],
     'gc__collect':              'call void class [mscorlib]System.GC::Collect()',
     'gc_set_max_heap_size':     Ignore,
-    'resume_point':             Ignore,
     'debug_assert':             Ignore,
     'debug_start_traceback':    Ignore,
     'debug_record_traceback':   Ignore,
@@ -85,6 +84,8 @@
     'debug_reraise_traceback':  Ignore,
     'debug_print_traceback':    Ignore,
     'debug_print':              [DebugPrint],
+    'debug_flush':              [PushAllArgs, 'call void [pypylib]pypy.runtime.DebugPrint::DEBUG_FLUSH()'],
+    'debug_offset':             [PushAllArgs, 'call int32 [pypylib]pypy.runtime.DebugPrint::DEBUG_OFFSET()'],
     'debug_start':              [PushAllArgs, 'call void [pypylib]pypy.runtime.DebugPrint::DEBUG_START(string)'],
     'debug_stop':               [PushAllArgs, 'call void [pypylib]pypy.runtime.DebugPrint::DEBUG_STOP(string)'],
     'have_debug_prints':        [PushAllArgs, 'call bool [pypylib]pypy.runtime.DebugPrint::HAVE_DEBUG_PRINTS()'],
diff --git a/pypy/translator/cli/src/debug.cs b/pypy/translator/cli/src/debug.cs
--- a/pypy/translator/cli/src/debug.cs
+++ b/pypy/translator/cli/src/debug.cs
@@ -38,6 +38,20 @@
             return false;
         }
 
+        public static void DEBUG_FLUSH()
+        {
+            if (debug_file != null)
+                debug_file.Flush();
+        }
+
+        public static int DEBUG_OFFSET()
+        {
+            StreamWriter sw = debug_file as StreamWriter;
+            if (sw == null)
+                return -1;
+            return (int)sw.BaseStream.Position; // XXX: the cast might be incorrect
+        }
+
         public static bool HAVE_DEBUG_PRINTS()
         {
             if ((have_debug_prints & 1) != 0) {
diff --git a/pypy/translator/driver.py b/pypy/translator/driver.py
--- a/pypy/translator/driver.py
+++ b/pypy/translator/driver.py
@@ -559,6 +559,7 @@
                 shutil.copy(str(soname), str(newsoname))
                 self.log.info("copied: %s" % (newsoname,))
             self.c_entryp = newexename
+        self.log.info('usession directory: %s' % (udir,))
         self.log.info("created: %s" % (self.c_entryp,))
 
     def task_compile_c(self):
diff --git a/pypy/translator/goal/targetpypystandalone.py b/pypy/translator/goal/targetpypystandalone.py
--- a/pypy/translator/goal/targetpypystandalone.py
+++ b/pypy/translator/goal/targetpypystandalone.py
@@ -105,7 +105,8 @@
         return parser
 
     def handle_config(self, config, translateconfig):
-        if translateconfig._cfgimpl_value_owners['opt'] == 'default':
+        if (not translateconfig.help and
+            translateconfig._cfgimpl_value_owners['opt'] == 'default'):
             raise Exception("You have to specify the --opt level.\n"
                     "Try --opt=2 or --opt=jit, or equivalently -O2 or -Ojit .")
         self.translateconfig = translateconfig
diff --git a/pypy/translator/jvm/opcodes.py b/pypy/translator/jvm/opcodes.py
--- a/pypy/translator/jvm/opcodes.py
+++ b/pypy/translator/jvm/opcodes.py
@@ -95,7 +95,6 @@
 
     'gc__collect':              jvm.SYSTEMGC,
     'gc_set_max_heap_size':     Ignore,
-    'resume_point':             Ignore,
     'jit_marker':               Ignore,
     'jit_force_virtualizable':  Ignore,
     'jit_force_virtual':        DoNothing,
diff --git a/pypy/translator/oosupport/test_template/operations.py b/pypy/translator/oosupport/test_template/operations.py
--- a/pypy/translator/oosupport/test_template/operations.py
+++ b/pypy/translator/oosupport/test_template/operations.py
@@ -107,12 +107,6 @@
             return res
         assert self.interpret(fn, [sys.maxint, 2]) == 1
 
-    def test_ignore_resume_point(self):
-        def fn(x):
-            rstack.resume_point('hello world', x)
-            return x
-        assert self.interpret(fn, [42]) == 42
-
     def test_rshift(self):
         def fn(x, y):
             return x >> y
diff --git a/pypy/translator/platform/posix.py b/pypy/translator/platform/posix.py
--- a/pypy/translator/platform/posix.py
+++ b/pypy/translator/platform/posix.py
@@ -129,7 +129,9 @@
         m.cfiles = rel_cfiles
 
         rel_includedirs = [pypyrel(incldir) for incldir in
-                           self._preprocess_include_dirs(eci.include_dirs)]
+                           self.preprocess_include_dirs(eci.include_dirs)]
+        rel_libdirs = [pypyrel(libdir) for libdir in
+                       self.preprocess_library_dirs(eci.library_dirs)]
 
         m.comment('automatically generated makefile')
         definitions = [
@@ -139,7 +141,7 @@
             ('SOURCES', rel_cfiles),
             ('OBJECTS', rel_ofiles),
             ('LIBS', self._libs(eci.libraries)),
-            ('LIBDIRS', self._libdirs(eci.library_dirs)),
+            ('LIBDIRS', self._libdirs(rel_libdirs)),
             ('INCLUDEDIRS', self._includedirs(rel_includedirs)),
             ('CFLAGS', cflags),
             ('CFLAGSEXTRA', list(eci.compile_extra)),
diff --git a/pypy/translator/platform/test/test_posix.py b/pypy/translator/platform/test/test_posix.py
--- a/pypy/translator/platform/test/test_posix.py
+++ b/pypy/translator/platform/test/test_posix.py
@@ -3,7 +3,7 @@
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.tool.udir import udir
 from StringIO import StringIO
-import sys
+import sys, os
 
 def test_echo():
     res = host.execute('echo', '42 24')
@@ -49,6 +49,19 @@
         mk.write()
         assert 'LINKFILES = /foo/bar.a' in tmpdir.join('Makefile').read()
 
+    def test_preprocess_localbase(self):
+        tmpdir = udir.join('test_preprocess_localbase').ensure(dir=1)
+        eci = ExternalCompilationInfo()
+        os.environ['PYPY_LOCALBASE'] = '/foo/baz'
+        try:
+            mk = self.platform.gen_makefile(['blip.c'], eci, path=tmpdir)
+            mk.write()
+        finally:
+            del os.environ['PYPY_LOCALBASE']
+        Makefile = tmpdir.join('Makefile').read()
+        assert 'INCLUDEDIRS = -I/foo/baz/include' in Makefile
+        assert 'LIBDIRS = -L/foo/baz/lib' in Makefile
+
 class TestMaemo(TestMakefile):
     strict_on_stderr = False
     
diff --git a/pypy/translator/stackless/frame.py b/pypy/translator/stackless/frame.py
--- a/pypy/translator/stackless/frame.py
+++ b/pypy/translator/stackless/frame.py
@@ -104,10 +104,8 @@
 
 class RestartInfo(object):
 
-    """A RestartInfo is created (briefly) for each graph that contains
-    a resume point.
-
-    In addition, a RestartInfo is created for each function that needs
+    """
+    A RestartInfo is created for each function that needs
     to do explicit stackless manipulations
     (e.g. code.yield_current_frame_to_caller)."""
 
diff --git a/pypy/translator/stackless/test/test_coroutine_reconstruction.py b/pypy/translator/stackless/test/test_coroutine_reconstruction.py
deleted file mode 100644
--- a/pypy/translator/stackless/test/test_coroutine_reconstruction.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from pypy.rlib import rcoroutine
-from pypy.rlib import rstack
-from pypy.rlib.rstack import resume_state_create
-from pypy.translator.stackless.test.test_transform import llinterp_stackless_function
-from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.rpython.lltypesystem import lltype
-
-namespace = rcoroutine.make_coroutine_classes(object)
-syncstate = namespace['syncstate']
-AbstractThunk = namespace['AbstractThunk']
-Coroutine = namespace['Coroutine']
-
-class TestCoroutineReconstruction:
-
-    def setup_meth(self):
-        syncstate.reset()
-
-    def test_simple_ish(self):
-
-        output = []
-        def f(coro, n, x):
-            if n == 0:
-                coro.switch()
-                rstack.resume_point("f_0")
-                assert rstack.stack_frames_depth() == 9
-                return
-            f(coro, n-1, 2*x)
-            rstack.resume_point("f_1", coro, n, x)
-            output.append(x)
-
-        class T(AbstractThunk):
-            def __init__(self, arg_coro, arg_n, arg_x):
-                self.arg_coro = arg_coro
-                self.arg_n = arg_n
-                self.arg_x = arg_x
-            def call(self):
-                f(self.arg_coro, self.arg_n, self.arg_x)
-
-        def example():
-            main_coro = Coroutine.getcurrent()
-            sub_coro = Coroutine()
-            thunk_f = T(main_coro, 5, 1)
-            sub_coro.bind(thunk_f)
-            sub_coro.switch()
-
-            new_coro = Coroutine()
-            new_thunk_f = T(main_coro, 5, 1)
-            new_coro.bind(new_thunk_f)
-
-            costate = Coroutine._get_default_costate()
-            bottom = resume_state_create(None, "yield_current_frame_to_caller_1")
-            _bind_frame = resume_state_create(bottom, "coroutine__bind", costate)
-            f_frame_1 = resume_state_create(_bind_frame, "f_1", main_coro, 5, 1)
-            f_frame_2 = resume_state_create(f_frame_1, "f_1", main_coro, 4, 2)
-            f_frame_3 = resume_state_create(f_frame_2, "f_1", main_coro, 3, 4)
-            f_frame_4 = resume_state_create(f_frame_3, "f_1", main_coro, 2, 8)
-            f_frame_5 = resume_state_create(f_frame_4, "f_1", main_coro, 1, 16)
-            f_frame_0 = resume_state_create(f_frame_5, "f_0")
-            switch_frame = resume_state_create(f_frame_0, "coroutine_switch", costate)
-
-            new_coro.frame = switch_frame
-
-            new_coro.switch()
-            return output == [16, 8, 4, 2, 1]
-
-        res = llinterp_stackless_function(example)
-        assert res == 1
-
diff --git a/pypy/translator/stackless/test/test_resume_point.py b/pypy/translator/stackless/test/test_resume_point.py
deleted file mode 100644
--- a/pypy/translator/stackless/test/test_resume_point.py
+++ /dev/null
@@ -1,457 +0,0 @@
-from pypy.translator.stackless.transform import StacklessTransformer
-from pypy.translator.stackless.test.test_transform import llinterp_stackless_function, rtype_stackless_function, one, run_stackless_function
-from pypy import conftest
-import py
-from pypy.rlib import rstack
-
-def do_backendopt(t):
-    from pypy.translator.backendopt import all
-    all.backend_optimizations(t)
-
-def transform_stackless_function(fn, callback_for_transform=None):
-    def wrapper(argv):
-        return fn()
-    t = rtype_stackless_function(wrapper)
-    if callback_for_transform:
-        callback_for_transform(t)
-    if conftest.option.view:
-        t.view()
-    st = StacklessTransformer(t, wrapper, False)
-    st.transform_all()
-
-def test_no_call():
-    def f(x, y):
-        x = x-1
-        rstack.resume_point("rp0", x, y) 
-        r = x+y
-        rstack.stack_unwind()
-        return r
-    def example():
-        v1 = f(one(),one()+one())
-        state = rstack.resume_state_create(None, "rp0", one(), one()+one()+one())
-        v2 = rstack.resume_state_invoke(int, state)
-        return v1*10 + v2
-##     transform_stackless_function(example)
-    res = llinterp_stackless_function(example, assert_unwind=False)
-    assert res == 24
-
-def test_bogus_restart_state_create():
-    def f(x, y):
-        x = x-1
-        rstack.resume_point("rp0", x, y) 
-        return x+y
-    def example():
-        v1 = f(one(),one()+one())
-        state = rstack.resume_state_create(None, "rp0", one())
-        return v1
-    info = py.test.raises(AssertionError, "transform_stackless_function(example)")
-    assert 'rp0' in str(info.value)
-    
-
-def test_call():
-    def g(x,y):
-        return x*y
-    def f(x, y):
-        z = g(x,y)
-        rstack.resume_point("rp1", y, returns=z) 
-        return z+y
-    def example():
-        v1 = f(one(),one()+one())
-        s = rstack.resume_state_create(None, "rp1", 5*one())
-        v2 = rstack.resume_state_invoke(int, s, returning=one()*7)
-        return v1*100 + v2
-    res = llinterp_stackless_function(example)
-    assert res == 412
-    res = run_stackless_function(example)
-    assert res == 412
-
-def test_returns_with_instance():
-    class C:
-        def __init__(self, x):
-            self.x = x
-    def g(x):
-        return C(x+1)
-    def f(x, y):
-        r = g(x)
-        rstack.resume_point("rp1", y, returns=r)
-        return r.x + y
-    def example():
-        v1 = f(one(),one()+one())
-        s = rstack.resume_state_create(None, "rp1", 5*one())
-        v2 = rstack.resume_state_invoke(int, s, returning=C(one()*3))
-        return v1*100 + v2
-    res = llinterp_stackless_function(example, assert_unwind=False)
-    assert res == 408
-    res = run_stackless_function(example)
-    assert res == 408
-
-def test_call_uncovered():
-    def g(x,y):
-        return x*y
-    def f(x, y):
-        z = g(x,y)
-        rstack.resume_point("rp1", y, returns=z)
-        return z+y+x
-    def example():
-        f(one(),one()+one())
-        return 0
-    e = py.test.raises(Exception, transform_stackless_function, example)
-    msg, = e.value.args
-    assert msg.startswith('not covered needed value at resume_point') and 'rp1' in msg
-
-def test_chained_states():
-    def g(x, y):
-        x += 1
-        rstack.resume_point("rp1", x, y)
-        return x + y
-    def f(x, y, z):
-        y += 1
-        r = g(x, y)
-        rstack.resume_point("rp2", z, returns=r)
-        return r + z
-    def example():
-        v1 = f(one(), 2*one(), 3*one())
-        s2 = rstack.resume_state_create(None, "rp2", 2*one())
-        s1 = rstack.resume_state_create(s2, "rp1", 4*one(), 5*one())
-        return 100*v1 + rstack.resume_state_invoke(int, s1)
-    res = llinterp_stackless_function(example)
-    assert res == 811
-    res = run_stackless_function(example)
-    assert res == 811
-
-def test_return_instance():
-    class C:
-        pass
-    def g(x):
-        c = C()
-        c.x = x + 1
-        rstack.resume_point("rp1", c)
-        return c
-    def f(x, y):
-        r = g(x)
-        rstack.resume_point("rp2", y, returns=r)
-        return r.x + y
-    def example():
-        v1 = f(one(), 2*one())
-        s2 = rstack.resume_state_create(None, "rp2", 2*one())
-        c = C()
-        c.x = 4*one()
-        s1 = rstack.resume_state_create(s2, "rp1", c)
-        return v1*100 + rstack.resume_state_invoke(int, s1)
-    res = llinterp_stackless_function(example)
-    assert res == 406
-    res = run_stackless_function(example)
-    assert res == 406
-
-def test_really_return_instance():
-    class C:
-        pass
-    def g(x):
-        c = C()
-        c.x = x + 1
-        rstack.resume_point("rp1", c)
-        return c
-    def example():
-        v1 = g(one()).x
-        c = C()
-        c.x = 4*one()
-        s1 = rstack.resume_state_create(None, "rp1", c)
-        return v1*100 + rstack.resume_state_invoke(C, s1).x
-    res = llinterp_stackless_function(example)
-    assert res == 204
-    res = run_stackless_function(example)
-    assert res == 204
-
-def test_resume_and_raise():
-    def g(x):
-        rstack.resume_point("rp0", x)
-        if x == 0:
-            raise KeyError
-        return x + 1
-    def example():
-        v1 = g(one())
-        s = rstack.resume_state_create(None, "rp0", one()-1)
-        try:
-            v2 = rstack.resume_state_invoke(int, s)
-        except KeyError:
-            v2 = 42
-        return v1*100 + v2
-    res = llinterp_stackless_function(example)
-    assert res == 242
-    res = run_stackless_function(example)
-    assert res == 242
-    
-def test_resume_and_raise_and_catch():
-    def g(x):
-        rstack.resume_point("rp0", x)
-        if x == 0:
-            raise KeyError
-        return x + 1
-    def f(x):
-        x = x - 1
-        try:
-            r = g(x)
-            rstack.resume_point("rp1", returns=r)
-        except KeyError:
-            r = 42
-        return r - 1
-    def example():
-        v1 = f(one()+one())
-        s1 = rstack.resume_state_create(None, "rp1")
-        s0 = rstack.resume_state_create(s1, "rp0", one()-1)
-        v2 = rstack.resume_state_invoke(int, s0)
-        return v1*100 + v2
-    res = llinterp_stackless_function(example)
-    assert res == 141
-    res = run_stackless_function(example)
-    assert res == 141
-
-def test_invoke_raising():
-    def g(x):
-        rstack.resume_point("rp0", x)
-        return x + 1
-    def f(x):
-        x = x - 1
-        try:
-            r = g(x)
-            rstack.resume_point("rp1", returns=r)
-        except KeyError:
-            r = 42
-        return r - 1
-    def example():
-        v1 = f(one()+one())
-        s1 = rstack.resume_state_create(None, "rp1")
-        s0 = rstack.resume_state_create(s1, "rp0", 0)
-        v2 = rstack.resume_state_invoke(int, s0, raising=KeyError())
-        return v1*100 + v2
-    res = llinterp_stackless_function(example)
-    assert res == 141
-    res = run_stackless_function(example)
-    assert res == 141
-    
-
-def test_finally():
-    def f(x):
-        rstack.resume_point("rp1", x)        
-        return 1/x
-    def in_finally(x): 
-        rstack.resume_point("rp1.5", x)
-        return 2/x
-    def g(x):
-        r = y = 0
-        r += f(x)
-        try:
-            y = f(x)
-            rstack.resume_point("rp0", x, r, returns=y)
-        finally:
-            r += in_finally(x)
-        return r + y
-    def example():
-        return g(one())
-    transform_stackless_function(example)
-
-def test_except():
-    py.test.skip("please don't write code like this")
-    def f(x):
-        rstack.resume_point("rp1", x)        
-        return 1/x
-    def g(x):
-        r = y = 0
-        r += f(x)
-        try:
-            y = f(x)
-            rstack.resume_point("rp0", x, r, y, returns=y)
-        except ZeroDivisionError:
-            r += f(x)
-        return r + y
-    def example():
-        return g(one())
-    transform_stackless_function(example)
-
-def test_using_pointers():
-    from pypy.interpreter.miscutils import FixedStack
-    class Arguments:
-        def __init__(self, a, b, c, d, e):
-            pass
-    class W_Root:
-        pass
-    class FakeFrame:
-        def __init__(self, space):
-            self.space = space
-            self.valuestack = FixedStack()
-            self.valuestack.setup(10)
-            self.valuestack.push(W_Root())
-    class FakeSpace:
-        def call_args(self, args, kw):
-            return W_Root()
-        def str_w(self, ob):
-            return 'a string'
-    def call_function(f, oparg, w_star=None, w_starstar=None):
-        n_arguments = oparg & 0xff
-        n_keywords = (oparg>>8) & 0xff
-        keywords = None
-        if n_keywords:
-            keywords = {}
-            for i in range(n_keywords):
-                w_value = f.valuestack.pop()
-                w_key   = f.valuestack.pop()
-                key = f.space.str_w(w_key)
-                keywords[key] = w_value
-        arguments = [None] * n_arguments
-        for i in range(n_arguments - 1, -1, -1):
-            arguments[i] = f.valuestack.pop()
-        args = Arguments(f.space, arguments, keywords, w_star, w_starstar)
-        w_function  = f.valuestack.pop()
-        w_result = f.space.call_args(w_function, args)
-        rstack.resume_point("call_function", f, returns=w_result)
-        f.valuestack.push(w_result)
-    def example():
-        s = FakeSpace()
-        f = FakeFrame(s)
-        call_function(f, 100, W_Root(), W_Root())
-        return one()
-    transform_stackless_function(example, do_backendopt)
-
-def test_always_raising():
-    def g(out):
-        out.append(3)
-        rstack.resume_point('g')
-        raise KeyError
-
-    def h(out):
-        try:
-            # g is always raising, good enough to put the resume point
-            # before, instead of after!
-            rstack.resume_point('h', out)
-            g(out)
-        except KeyError:
-            return 0
-        return -1
-
-    def example():
-        out = []
-        x = h(out)
-        l  = len(out)
-        chain = rstack.resume_state_create(None, 'h', out)
-        chain = rstack.resume_state_create(chain, 'g')
-        x += rstack.resume_state_invoke(int, chain)
-        l += len(out)
-        return l*100+x
-
-    res = llinterp_stackless_function(example)
-    assert res == 200
-    res = run_stackless_function(example)
-    assert res == 200
-
-def test_more_mess():
-    from pypy.interpreter.miscutils import Stack
-
-    def new_framestack():
-        return Stack()
-
-    class FakeFrame:
-        pass
-    class FakeSlpFrame:
-        def switch(self):
-            rstack.stack_unwind()
-            return FakeSlpFrame()
-
-    class FakeCoState:
-        def update(self, new):
-            self.last, self.current = self.current, new
-            frame, new.frame = new.frame, None
-            return frame
-        def do_things_to_do(self):
-            self.do_things_to_do()
-
-    costate = FakeCoState()
-    costate.current = None
-
-    class FakeExecutionContext:
-        def __init__(self):
-            self.space = space
-            self.framestack = new_framestack()
-
-        def subcontext_new(coobj):
-            coobj.framestack = new_framestack()
-        subcontext_new = staticmethod(subcontext_new)
-
-        def subcontext_enter(self, next):
-            self.framestack = next.framestack
-
-        def subcontext_leave(self, current):
-            current.framestack = self.framestack
-
-    class FakeSpace:
-        def __init__(self):
-            self.ec = None
-        def getexecutioncontext(self):
-            if self.ec is None:
-                self.ec = FakeExecutionContext()
-            return self.ec
-
-    space = FakeSpace()
-
-    class MainCoroutineGetter(object):
-        def __init__(self):
-            self.costate = None
-        def _get_default_costate(self):
-            if self.costate is None:
-                costate = FakeCoState()
-                self.costate = costate
-                return costate
-            return self.costate
-
-    main_coroutine_getter = MainCoroutineGetter()
-    
-    class FakeCoroutine:
-        def __init__(self):
-            self.frame = None
-            self.costate = costate
-            space.getexecutioncontext().subcontext_new(self)
-            
-        def switch(self):
-            if self.frame is None:
-                raise RuntimeError
-            state = self.costate
-            incoming_frame = state.update(self).switch()
-            rstack.resume_point("coroutine_switch", self, state, returns=incoming_frame)
-            left = state.last
-            left.frame = incoming_frame
-            left.goodbye()
-            self.hello()
-            #main_coroutine_getter._get_default_costate().do_things_to_do()
-
-        def hello(self):
-            pass
-
-        def goodbye(self):
-            pass
-
-    class FakeAppCoroutine(FakeCoroutine):
-        def __init__(self):
-            FakeCoroutine.__init__(self)
-            self.space = space
-            
-        def hello(self):
-            ec = self.space.getexecutioncontext()
-            ec.subcontext_enter(self)
-
-        def goodbye(self):
-            ec = self.space.getexecutioncontext()
-            ec.subcontext_leave(self)
-
-    def example():
-        coro = FakeAppCoroutine()
-        othercoro = FakeCoroutine()
-        othercoro.frame = FakeSlpFrame()
-        if one():
-            coro.frame = FakeSlpFrame()
-        if one() - one():
-            coro.costate = FakeCoState()
-            coro.costate.last = coro.costate.current = othercoro
-        space.getexecutioncontext().framestack.push(FakeFrame())
-        coro.switch()
-        return one()
-
-    transform_stackless_function(example, do_backendopt)
diff --git a/pypy/translator/stackless/transform.py b/pypy/translator/stackless/transform.py
--- a/pypy/translator/stackless/transform.py
+++ b/pypy/translator/stackless/transform.py
@@ -112,19 +112,6 @@
 #         abort()
 #     return retval + x + 1
 
-class SymbolicRestartNumber(ComputedIntSymbolic):
-    def __init__(self, label, value=None):
-        ComputedIntSymbolic.__init__(self, self._getvalue)
-        self.label = label
-        self.value = value
-
-    def _getvalue(self):
-        # argh, we'd like to assert-fail if value is None here, but we
-        # get called too early (during databasing) for this to be
-        # valid.  so we might return None and rely on the database
-        # checking that this only happens before the database is
-        # complete.
-        return self.value
 
 # the strategy for sharing parts of the resume code:
 #
@@ -248,8 +235,7 @@
         self.stackless_gc = stackless_gc
 
     def analyze_simple_operation(self, op, graphinfo):
-        if op.opname in ('yield_current_frame_to_caller', 'resume_point',
-                'resume_state_invoke', 'resume_state_create', 'stack_frames_depth',
+        if op.opname in ('yield_current_frame_to_caller', 'stack_frames_depth',
                 'stack_switch', 'stack_unwind', 'stack_capture',
                 'get_stack_depth_limit', 'set_stack_depth_limit'):
             return True
@@ -458,24 +444,11 @@
 
         self.is_finished = False
 
-        # only for sanity checking, but still very very important
-        self.explicit_resume_point_data = {}
-        
-        self.symbolic_restart_numbers = {}
-
-        # register the prebuilt restartinfos & give them names for use
-        # with resume_state_create
         # the mauling of frame_typer internals should be a method on FrameTyper.
         for restartinfo in frame.RestartInfo.prebuilt:
             name = restartinfo.func_or_graph.__name__
             for i in range(len(restartinfo.frame_types)):
-                label = name + '_' + str(i)
-                assert label not in self.symbolic_restart_numbers
-                # XXX we think this is right:
-                self.symbolic_restart_numbers[label] = SymbolicRestartNumber(
-                    label, len(self.masterarray1) + i)
                 frame_type = restartinfo.frame_types[i]
-                self.explicit_resume_point_data[label] = frame_type
                 self.frametyper.ensure_frame_type_for_types(frame_type)
             self.register_restart_info(restartinfo)
 
@@ -589,156 +562,6 @@
                 # yes
                 convertblock.exits[0].args[index] = newvar
         # end ouch!
-        
-    def handle_resume_point(self, block, i):
-        # in some circumstances we might be able to reuse
-        # an already inserted resume point
-        op = block.operations[i]
-        if i == len(block.operations) - 1:
-            link = block.exits[0]
-            nextblock = None
-        else:
-            link = split_block(None, block, i+1)
-            i = 0
-            nextblock = link.target
-
-        label = op.args[0].value
-
-        parms = op.args[1:]
-        if not isinstance(parms[0], model.Variable):
-            assert parms[0].value is None
-            parms[0] = None
-        args = vars_to_save(block)
-        for a in args:
-            if a not in parms:
-                raise Exception, "not covered needed value at resume_point %r"%(label,)
-        if parms[0] is not None: # returns= case
-            res = parms[0]
-            args = [arg for arg in args if arg is not res]
-        else:
-            args = args
-            res = op.result
-
-        (FRAME_TYPE, varsforcall, saver) = self.frametyper.frame_type_for_vars(parms[1:])
-
-        if label in self.explicit_resume_point_data:
-            OTHER_TYPE = self.explicit_resume_point_data[label]
-            assert FRAME_TYPE == OTHER_TYPE, "inconsistent types for label %r"%(label,)
-        else:
-            self.explicit_resume_point_data[label] = FRAME_TYPE
-
-        self._make_resume_handling(FRAME_TYPE, varsforcall, res, block.exits)
-
-        restart_number = len(self.masterarray1) + len(self.resume_blocks) - 1
-
-        if label in self.symbolic_restart_numbers:
-            symb = self.symbolic_restart_numbers[label]
-            assert symb.value is None
-            symb.value = restart_number
-        else:
-            symb = SymbolicRestartNumber(label, restart_number)
-            self.symbolic_restart_numbers[label] = symb
-
-        return nextblock
-
-    def handle_resume_state_create(self, block, i):
-        op = block.operations[i]
-        llops = LowLevelOpList()
-        label = op.args[1].value
-        parms = op.args[2:]
-        FRAME_TYPE, varsforcall, saver = self.frametyper.frame_type_for_vars(parms)
-
-        if label in self.explicit_resume_point_data:
-            OTHER_TYPE = self.explicit_resume_point_data[label]
-            assert FRAME_TYPE == OTHER_TYPE, "inconsistent types for label %r"%(label,)
-        else:
-            self.explicit_resume_point_data[label] = FRAME_TYPE
-
-        if label in self.symbolic_restart_numbers:
-            symb = self.symbolic_restart_numbers[label]
-        else:
-            symb = SymbolicRestartNumber(label)
-            self.symbolic_restart_numbers[label] = symb
-
-        # this is rather insane: we create an exception object, pass
-        # it to the saving function, then read the thus created state
-        # out of and then clear global_state.top
-        c_EXC = model.Constant(self.unwind_exception_type.TO, lltype.Void)
-        c_flags = model.Constant({'flavor': 'gc'}, lltype.Void)
-        v_exc = llops.genop('malloc', [c_EXC, c_flags],
-                            resulttype = self.unwind_exception_type)
-        llops.genop('setfield', [v_exc,
-                                 model.Constant('inst_depth', lltype.Void),
-                                 model.Constant(0, lltype.Signed)])
-
-        realvarsforcall = []
-        for v in varsforcall:
-            if v.concretetype != lltype.Void:
-                realvarsforcall.append(gen_cast(llops, storage_type(v.concretetype), v))
-        
-        llops.genop('direct_call',
-                    [model.Constant(saver, lltype.typeOf(saver)), v_exc,
-                     model.Constant(symb, lltype.Signed)] + realvarsforcall,
-                    resulttype = lltype.Void)
-        v_state = varoftype(lltype.Ptr(frame.STATE_HEADER))
-        v_state_hdr = llops.genop("getfield",
-                                  [self.ll_global_state, self.c_inst_top_name],
-                                  resulttype=lltype.Ptr(STATE_HEADER))
-        v_state = gen_cast(llops, lltype.Ptr(FRAME_TYPE), v_state_hdr)
-        llops.genop("setfield",
-                    [self.ll_global_state, self.c_inst_top_name, self.c_null_state])
-
-        v_prevstate = gen_cast(llops, lltype.Ptr(frame.STATE_HEADER), op.args[0])
-        llops.genop('direct_call', [self.set_back_pointer_ptr,
-                                    v_state_hdr, v_prevstate])
-        llops.append(model.SpaceOperation('cast_opaque_ptr', [v_state_hdr], op.result))
-        block.operations[i:i+1] = llops
-
-    def handle_resume_state_invoke(self, block):
-        op = block.operations[-1]
-        assert op.opname == 'resume_state_invoke'
-        # some commentary.
-        #
-        # we don't want to write 155 or so different versions of
-        # resume_after_foo that appear to the annotator to return
-        # different types.  we take advantage of the fact that this
-        # function always raises UnwindException and have it (appear
-        # to) return Void.  then to placate all the other machinery,
-        # we pass a constant zero-of-the-appropriate-type along the
-        # non-exceptional link (which we know will never be taken).
-        # Nota Bene: only mutate a COPY of the non-exceptional link
-        # because the non-exceptional link has been stored in
-        # self.resume_blocks and we don't want a constant "zero" in
-        # there.
-        v_state = op.args[0]
-        v_returning = op.args[1]
-        v_raising = op.args[2]
-        llops = LowLevelOpList()
-
-        if v_raising.concretetype == lltype.Void:
-            erased_type = storage_type(v_returning.concretetype)
-            resume_after_ptr = self.resume_afters[erased_type]
-            v_param = v_returning
-        else:
-            assert v_returning.concretetype == lltype.Void
-            erased_type = self.exception_type
-            resume_after_ptr = self.resume_after_raising_ptr
-            v_param = v_raising
-
-        if erased_type != v_param.concretetype:
-            v_param = gen_cast(llops, erased_type, v_param)
-        llops.genop('direct_call', [resume_after_ptr, v_state, v_param],
-                    resulttype=lltype.Void)
-
-        del block.operations[-1]
-        block.operations.extend(llops)
-
-        noexclink = block.exits[0].copy()
-        realrettype = op.result.concretetype
-        for i, a in enumerate(noexclink.args):
-            if a is op.result:
-                noexclink.args[i] = model.Constant(realrettype._defl(), realrettype)
-        block.recloseblock(*((noexclink,) + block.exits[1:]))        
 
     def insert_unwind_handling(self, block, i):
         # for the case where we are resuming to an except:
@@ -821,19 +644,8 @@
                 op = replace_with_call(self.operation_replacement[op.opname])
                 stackless_op = True
 
-            if op.opname == 'resume_state_create':
-                self.handle_resume_state_create(block, i)
-                continue # go back and look at that malloc
-                        
             if (op.opname in ('direct_call', 'indirect_call')
                 or self.analyzer.analyze(op)):
-                if op.opname == 'resume_point':
-                    block = self.handle_resume_point(block, i)
-                    if block is None:
-                        return
-                    else:
-                        i = 0
-                        continue
 
                 if not stackless_op and not self.analyzer.analyze(op):
                     i += 1
@@ -849,9 +661,7 @@
                     continue
 
                 nextblock = self.insert_unwind_handling(block, i)
-                if op.opname == 'resume_state_invoke':
-                    self.handle_resume_state_invoke(block)
-                
+
                 if nextblock is None:
                     return
 
diff --git a/pypy/translator/transform.py b/pypy/translator/transform.py
--- a/pypy/translator/transform.py
+++ b/pypy/translator/transform.py
@@ -175,41 +175,6 @@
     # make sure the bookkeeper knows about AssertionError
     self.bookkeeper.getuniqueclassdef(AssertionError)
 
-def insert_stackcheck(ann):
-    from pypy.tool.algo.graphlib import Edge, make_edge_dict, break_cycles
-    edges = []
-    graphs_to_patch = {}
-    for callposition, (caller, callee) in ann.translator.callgraph.items():
-        if getattr(getattr(callee, 'func', None), 'insert_stack_check_here', False):
-            graphs_to_patch[callee] = True
-            continue
-        edge = Edge(caller, callee)
-        edge.callposition = callposition
-        edges.append(edge)
-
-    for graph in graphs_to_patch:
-        v = Variable()
-        ann.setbinding(v, annmodel.SomeImpossibleValue())
-        unwind_op = SpaceOperation('simple_call', [Constant(stack_check)], v)
-        graph.startblock.operations.insert(0, unwind_op)
-
-    edgedict = make_edge_dict(edges)
-    for edge in break_cycles(edgedict, edgedict):
-        caller = edge.source
-        _, _, call_tag = edge.callposition
-        if call_tag:
-            caller_block, _ = call_tag
-        else:
-            ann.warning("cycle detected but no information on where to insert "
-                        "stack_check()")
-            continue
-        # caller block found, insert stack_check()
-        v = Variable()
-        # push annotation on v
-        ann.setbinding(v, annmodel.SomeImpossibleValue())
-        unwind_op = SpaceOperation('simple_call', [Constant(stack_check)], v)
-        caller_block.operations.insert(0, unwind_op)
-
 def insert_ll_stackcheck(translator):
     from pypy.translator.backendopt.support import find_calls_from
     from pypy.rlib.rstack import stack_check


More information about the pypy-commit mailing list