[pypy-commit] pypy default: merge

Mon Jun 13 09:44:30 CEST 2011

Author: Ilya Osadchiy <osadchiy.ilya at gmail.com>
Branch: 
Changeset: r44901:56eacbbd59af
Date: 2011-06-13 01:30 +0300
http://bitbucket.org/pypy/pypy/changeset/56eacbbd59af/

Log:	merge

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -64,6 +64,7 @@
 ^pypy/doc/image/lattice3\.png$
 ^pypy/doc/image/stackless_informal\.png$
 ^pypy/doc/image/parsing_example.+\.png$
+^pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test\.o$
 ^compiled
 ^.git/
 ^release/
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -569,7 +569,6 @@
 #
 import os
 import time
-import socket
 import getpass
 
 class ReallyRunFileExternal(py.test.collect.Item): 
diff --git a/lib-python/modified-2.7/ctypes/__init__.py b/lib-python/modified-2.7/ctypes/__init__.py
--- a/lib-python/modified-2.7/ctypes/__init__.py
+++ b/lib-python/modified-2.7/ctypes/__init__.py
@@ -7,6 +7,7 @@
 
 __version__ = "1.1.0"
 
+import _ffi
 from _ctypes import Union, Structure, Array
 from _ctypes import _Pointer
 from _ctypes import CFuncPtr as _CFuncPtr
@@ -350,7 +351,7 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            self._handle = _dlopen(self._name, mode)
+            self._handle = _ffi.CDLL(name)
         else:
             self._handle = handle
 
diff --git a/lib-python/modified-2.7/ctypes/test/test_cfuncs.py b/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
--- a/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
+++ b/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
@@ -3,8 +3,8 @@
 
 import unittest
 from ctypes import *
-
 import _ctypes_test
+from test.test_support import impl_detail
 
 class CFunctions(unittest.TestCase):
     _dll = CDLL(_ctypes_test.__file__)
@@ -158,12 +158,14 @@
         self.assertEqual(self._dll.tf_bd(0, 42.), 14.)
         self.assertEqual(self.S(), 42)
 
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdouble(self):
         self._dll.tf_D.restype = c_longdouble
         self._dll.tf_D.argtypes = (c_longdouble,)
         self.assertEqual(self._dll.tf_D(42.), 14.)
         self.assertEqual(self.S(), 42)
-
+        
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdouble_plus(self):
         self._dll.tf_bD.restype = c_longdouble
         self._dll.tf_bD.argtypes = (c_byte, c_longdouble)
diff --git a/lib-python/modified-2.7/ctypes/test/test_functions.py b/lib-python/modified-2.7/ctypes/test/test_functions.py
--- a/lib-python/modified-2.7/ctypes/test/test_functions.py
+++ b/lib-python/modified-2.7/ctypes/test/test_functions.py
@@ -8,6 +8,7 @@
 from ctypes import *
 import sys, unittest
 from ctypes.test import xfail
+from test.test_support import impl_detail
 
 try:
     WINFUNCTYPE
@@ -144,6 +145,7 @@
         self.assertEqual(result, -21)
         self.assertEqual(type(result), float)
 
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdoubleresult(self):
         f = dll._testfunc_D_bhilfD
         f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_longdouble]
diff --git a/lib-python/modified-2.7/ctypes/test/test_libc.py b/lib-python/modified-2.7/ctypes/test/test_libc.py
--- a/lib-python/modified-2.7/ctypes/test/test_libc.py
+++ b/lib-python/modified-2.7/ctypes/test/test_libc.py
@@ -26,6 +26,7 @@
         self.assertEqual(chars.raw, "   ,,aaaadmmmnpppsss\x00")
 
     def test_no_more_xfail(self):
+        import socket
         import ctypes.test
         self.assertTrue(not hasattr(ctypes.test, 'xfail'),
                         "You should incrementally grep for '@xfail' and remove them, they are real failures")
diff --git a/lib-python/modified-2.7/distutils/sysconfig.py b/lib-python/modified-2.7/distutils/sysconfig.py
--- a/lib-python/modified-2.7/distutils/sysconfig.py
+++ b/lib-python/modified-2.7/distutils/sysconfig.py
@@ -20,8 +20,10 @@
 if '__pypy__' in sys.builtin_module_names:
     from distutils.sysconfig_pypy import *
     from distutils.sysconfig_pypy import _config_vars # needed by setuptools
+    from distutils.sysconfig_pypy import _variable_rx # read_setup_file()
 else:
     from distutils.sysconfig_cpython import *
     from distutils.sysconfig_cpython import _config_vars # needed by setuptools
+    from distutils.sysconfig_cpython import _variable_rx # read_setup_file()
 
 
diff --git a/lib-python/modified-2.7/distutils/sysconfig_pypy.py b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
--- a/lib-python/modified-2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
@@ -116,3 +116,7 @@
     if compiler.compiler_type == "unix":
         compiler.compiler_so.extend(['-fPIC', '-Wimplicit'])
         compiler.shared_lib_extension = get_config_var('SO')
+
+from sysconfig_cpython import (
+    parse_makefile, _variable_rx, expand_makefile_vars)
+
diff --git a/lib-python/2.7/test/test_multibytecodec.py b/lib-python/modified-2.7/test/test_multibytecodec.py
copy from lib-python/2.7/test/test_multibytecodec.py
copy to lib-python/modified-2.7/test/test_multibytecodec.py
--- a/lib-python/2.7/test/test_multibytecodec.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec.py
@@ -42,7 +42,7 @@
         dec = codecs.getdecoder('euc-kr')
         myreplace  = lambda exc: (u'', sys.maxint+1)
         codecs.register_error('test.cjktest', myreplace)
-        self.assertRaises(IndexError, dec,
+        self.assertRaises((IndexError, OverflowError), dec,
                           'apple\x92ham\x93spam', 'test.cjktest')
 
     def test_codingspec(self):
diff --git a/lib-python/2.7/test/test_multibytecodec_support.py b/lib-python/modified-2.7/test/test_multibytecodec_support.py
copy from lib-python/2.7/test/test_multibytecodec_support.py
copy to lib-python/modified-2.7/test/test_multibytecodec_support.py
--- a/lib-python/2.7/test/test_multibytecodec_support.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec_support.py
@@ -107,8 +107,8 @@
         def myreplace(exc):
             return (u'x', sys.maxint + 1)
         codecs.register_error("test.cjktest", myreplace)
-        self.assertRaises(IndexError, self.encode, self.unmappedunicode,
-                          'test.cjktest')
+        self.assertRaises((IndexError, OverflowError), self.encode,
+                          self.unmappedunicode, 'test.cjktest')
 
     def test_callback_None_index(self):
         def myreplace(exc):
diff --git a/lib-python/modified-2.7/test/test_support.py b/lib-python/modified-2.7/test/test_support.py
--- a/lib-python/modified-2.7/test/test_support.py
+++ b/lib-python/modified-2.7/test/test_support.py
@@ -1066,7 +1066,7 @@
         if '--pdb' in sys.argv:
             import pdb, traceback
             traceback.print_tb(exc_info[2])
-            pdb.post_mortem(exc_info[2], pdb.Pdb)
+            pdb.post_mortem(exc_info[2])
 
 # ----------------------------------
 
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -208,6 +208,9 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _to_ffi_param(self):
+        return self._get_buffer_value()
+
 ARRAY_CACHE = {}
 
 def create_array_type(base, length):
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -1,5 +1,6 @@
 
 import _rawffi
+import _ffi
 import sys
 
 keepalive_key = str # XXX fix this when provided with test
@@ -46,6 +47,14 @@
         else:
             return self.from_param(as_parameter)
 
+    def get_ffi_param(self, value):
+        return self.from_param(value)._to_ffi_param()
+
+    def get_ffi_argtype(self):
+        if self._ffiargtype:
+            return self._ffiargtype
+        return _shape_to_ffi_type(self._ffiargshape)
+
     def _CData_output(self, resbuffer, base=None, index=-1):
         #assert isinstance(resbuffer, _rawffi.ArrayInstance)
         """Used when data exits ctypes and goes into user code.
@@ -99,6 +108,7 @@
     """
     __metaclass__ = _CDataMeta
     _objects = None
+    _ffiargtype = None
 
     def __init__(self, *args, **kwds):
         raise TypeError("%s has no type" % (type(self),))
@@ -119,6 +129,12 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _to_ffi_param(self):
+        if self.__class__._is_pointer_like():
+            return self._get_buffer_value()
+        else:
+            return self.value
+
     def __buffer__(self):
         return buffer(self._buffer)
 
@@ -150,7 +166,7 @@
     return pointer(cdata)
 
 def cdata_from_address(self, address):
-    # fix the address, in case it's unsigned
+    # fix the address: turn it into as unsigned, in case it's a negative number
     address = address & (sys.maxint * 2 + 1)
     instance = self.__new__(self)
     lgt = getattr(self, '_length_', 1)
@@ -159,3 +175,48 @@
 
 def addressof(tp):
     return tp._buffer.buffer
+
+
+# ----------------------------------------------------------------------
+
+def is_struct_shape(shape):
+    # see the corresponding code to set the shape in
+    # _ctypes.structure._set_shape
+    return (isinstance(shape, tuple) and
+            len(shape) == 2 and
+            isinstance(shape[0], _rawffi.Structure) and
+            shape[1] == 1)
+
+def _shape_to_ffi_type(shape):
+    try:
+        return _shape_to_ffi_type.typemap[shape]
+    except KeyError:
+        pass
+    if is_struct_shape(shape):
+        return shape[0].get_ffi_type()
+    #
+    assert False, 'unknown shape %s' % (shape,)
+
+
+_shape_to_ffi_type.typemap =  {
+    'c' : _ffi.types.char,
+    'b' : _ffi.types.sbyte,
+    'B' : _ffi.types.ubyte,
+    'h' : _ffi.types.sshort,
+    'u' : _ffi.types.unichar,
+    'H' : _ffi.types.ushort,
+    'i' : _ffi.types.sint,
+    'I' : _ffi.types.uint,
+    'l' : _ffi.types.slong,
+    'L' : _ffi.types.ulong,
+    'q' : _ffi.types.slonglong,
+    'Q' : _ffi.types.ulonglong,
+    'f' : _ffi.types.float,
+    'd' : _ffi.types.double,
+    's' : _ffi.types.void_p,
+    'P' : _ffi.types.void_p,
+    'z' : _ffi.types.void_p,
+    'O' : _ffi.types.void_p,
+    'Z' : _ffi.types.void_p,
+    }
+
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -1,12 +1,15 @@
+
+from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
+from _ctypes.primitive import SimpleType, _SimpleCData
+from _ctypes.basics import ArgumentError, keepalive_key
+from _ctypes.basics import is_struct_shape
+from _ctypes.builtin import set_errno, set_last_error
 import _rawffi
+import _ffi
 import sys
 import traceback
 import warnings
 
-from _ctypes.basics import ArgumentError, keepalive_key
-from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
-from _ctypes.builtin import set_errno, set_last_error
-from _ctypes.primitive import SimpleType
 
 # XXX this file needs huge refactoring I fear
 
@@ -24,6 +27,7 @@
 
 WIN64 = sys.platform == 'win32' and sys.maxint == 2**63 - 1
 
+
 def get_com_error(errcode, riid, pIunk):
     "Win32 specific: build a COM Error exception"
     # XXX need C support code
@@ -36,6 +40,7 @@
     funcptr.restype = int
     return funcptr(*args)
 
+
 class CFuncPtrType(_CDataMeta):
     # XXX write down here defaults and such things
 
@@ -50,6 +55,7 @@
 
     from_address = cdata_from_address
 
+
 class CFuncPtr(_CData):
     __metaclass__ = CFuncPtrType
 
@@ -65,10 +71,12 @@
     callable = None
     _ptr = None
     _buffer = None
+    _address = None
     # win32 COM properties
     _paramflags = None
     _com_index = None
     _com_iid = None
+    _is_fastpath = False
 
     __restype_set = False
 
@@ -85,8 +93,11 @@
                     raise TypeError(
                         "item %d in _argtypes_ has no from_param method" % (
                             i + 1,))
-            self._argtypes_ = argtypes
-
+            #
+            if all([hasattr(argtype, '_ffiargshape') for argtype in argtypes]):
+                fastpath_cls = make_fastpath_subclass(self.__class__)
+                fastpath_cls.enable_fastpath_maybe(self)
+            self._argtypes_ = list(argtypes)
     argtypes = property(_getargtypes, _setargtypes)
 
     def _getparamflags(self):
@@ -133,6 +144,7 @@
 
     paramflags = property(_getparamflags, _setparamflags)
 
+
     def _getrestype(self):
         return self._restype_
 
@@ -146,27 +158,24 @@
                 callable(restype)):
             raise TypeError("restype must be a type, a callable, or None")
         self._restype_ = restype
-
+        
     def _delrestype(self):
         self._ptr = None
         del self._restype_
-
+        
     restype = property(_getrestype, _setrestype, _delrestype)
 
     def _geterrcheck(self):
         return getattr(self, '_errcheck_', None)
-
     def _seterrcheck(self, errcheck):
         if not callable(errcheck):
             raise TypeError("The errcheck attribute must be callable")
         self._errcheck_ = errcheck
-
     def _delerrcheck(self):
         try:
             del self._errcheck_
         except AttributeError:
             pass
-
     errcheck = property(_geterrcheck, _seterrcheck, _delerrcheck)
 
     def _ffishapes(self, args, restype):
@@ -181,6 +190,14 @@
             restype = 'O' # void
         return argtypes, restype
 
+    def _set_address(self, address):
+        if not self._buffer:
+            self._buffer = _rawffi.Array('P')(1)
+        self._buffer[0] = address
+
+    def _get_address(self):
+        return self._buffer[0]
+
     def __init__(self, *args):
         self.name = None
         self._objects = {keepalive_key(0):self}
@@ -188,7 +205,7 @@
 
         # Empty function object -- this is needed for casts
         if not args:
-            self._buffer = _rawffi.Array('P')(1)
+            self._set_address(0)
             return
 
         argsl = list(args)
@@ -196,20 +213,24 @@
 
         # Direct construction from raw address
         if isinstance(argument, (int, long)) and not argsl:
-            ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_)
-            self._ptr = _rawffi.FuncPtr(argument, ffiargs, ffires, self._flags_)
-            self._buffer = self._ptr.byptr()
+            self._set_address(argument)
+            restype = self._restype_
+            if restype is None:
+                import ctypes
+                restype = ctypes.c_int
+            self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype)
             return
 
-        # A callback into Python
+        
+        # A callback into python
         if callable(argument) and not argsl:
             self.callable = argument
             ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_)
             if self._restype_ is None:
                 ffires = None
-            self._ptr = _rawffi.CallbackPtr(self._wrap_callable(
-                argument, self.argtypes
-                ), ffiargs, ffires, self._flags_)
+            self._ptr = _rawffi.CallbackPtr(self._wrap_callable(argument,
+                                                                self.argtypes),
+                                            ffiargs, ffires, self._flags_)
             self._buffer = self._ptr.byptr()
             return
 
@@ -218,7 +239,7 @@
             import ctypes
             self.name, dll = argument
             if isinstance(dll, str):
-                self.dll = ctypes.CDLL(dll)
+                self.dll = ctypes.CDLL(self.dll)
             else:
                 self.dll = dll
             if argsl:
@@ -227,7 +248,7 @@
                     raise TypeError("Unknown constructor %s" % (args,))
             # We need to check dll anyway
             ptr = self._getfuncptr([], ctypes.c_int)
-            self._buffer = ptr.byptr()
+            self._set_address(ptr.getaddr())
             return
 
         # A COM function call, by index
@@ -270,15 +291,15 @@
                     # than the length of the argtypes tuple.
                     args = args[:len(self._argtypes_)]
             else:
-                plural = len(argtypes) > 1 and "s" or ""
+                plural = len(self._argtypes_) > 1 and "s" or ""
                 raise TypeError(
                     "This function takes %d argument%s (%s given)"
-                    % (len(argtypes), plural, len(args)))
+                    % (len(self._argtypes_), plural, len(args)))
 
             # check that arguments are convertible
             ## XXX Not as long as ctypes.cast is a callback function with
             ## py_object arguments...
-            ## self._convert_args(argtypes, args, {})
+            ## self._convert_args(self._argtypes_, args, {})
 
             try:
                 res = self.callable(*args)
@@ -301,6 +322,7 @@
                           RuntimeWarning, stacklevel=2)
 
         if self._com_index:
+            assert False, 'TODO2'
             from ctypes import cast, c_void_p, POINTER
             if not args:
                 raise ValueError(
@@ -312,77 +334,63 @@
             args[0] = args[0].value
         else:
             thisarg = None
+            
+        newargs, argtypes, outargs = self._convert_args(argtypes, args, kwargs)
 
-        args, outargs = self._convert_args(argtypes, args, kwargs)
-        argtypes = [type(arg) for arg in args]
+        funcptr = self._getfuncptr(argtypes, self._restype_, thisarg)
+        result = self._call_funcptr(funcptr, *newargs)
+        result = self._do_errcheck(result, args)
 
-        restype = self._restype_
-        funcptr = self._getfuncptr(argtypes, restype, thisarg)
+        if not outargs:
+            return result
+        if len(outargs) == 1:
+            return outargs[0]
+        return tuple(outargs)
+
+    def _call_funcptr(self, funcptr, *newargs):
+
         if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
             set_errno(_rawffi.get_errno())
         if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
             set_last_error(_rawffi.get_last_error())
         try:
-            resbuffer = funcptr(*[arg._get_buffer_for_param()._buffer
-                                  for arg in args])
+            result = funcptr(*newargs)
         finally:
             if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
                 set_errno(_rawffi.get_errno())
             if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
                 set_last_error(_rawffi.get_last_error())
+        #
+        return self._build_result(self._restype_, result, newargs)
 
-        result = None
-        if self._com_index:
-            if resbuffer[0] & 0x80000000:
-                raise get_com_error(resbuffer[0],
-                                    self._com_iid, args[0])
-            else:
-                result = int(resbuffer[0])
-        elif restype is not None:
-            checker = getattr(self.restype, '_check_retval_', None)
-            if checker:
-                val = restype(resbuffer[0])
-                # the original ctypes seems to make the distinction between
-                # classes defining a new type, and their subclasses
-                if '_type_' in restype.__dict__:
-                    val = val.value
-                result = checker(val)
-            elif not isinstance(restype, _CDataMeta):
-                result = restype(resbuffer[0])
-            else:
-                result = restype._CData_retval(resbuffer)
-
+    def _do_errcheck(self, result, args):
         # The 'errcheck' protocol
         if self._errcheck_:
             v = self._errcheck_(result, self, args)
             # If the errcheck funtion failed, let it throw
-            # If the errcheck function returned callargs unchanged,
+            # If the errcheck function returned newargs unchanged,
             # continue normal processing.
             # If the errcheck function returned something else,
             # use that as result.
             if v is not args:
-                result = v
+                return v
+        return result
 
-        if not outargs:
-            return result
-
-        if len(outargs) == 1:
-            return outargs[0]
-
-        return tuple(outargs)
+    def _getfuncptr_fromaddress(self, argtypes, restype):
+        address = self._get_address()
+        ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes]
+        ffires = restype.get_ffi_argtype()
+        return _ffi.FuncPtr.fromaddr(address, '', ffiargs, ffires)
 
     def _getfuncptr(self, argtypes, restype, thisarg=None):
-        if self._ptr is not None and argtypes is self._argtypes_:
+        if self._ptr is not None and (argtypes is self._argtypes_ or argtypes == self._argtypes_):
             return self._ptr
         if restype is None or not isinstance(restype, _CDataMeta):
             import ctypes
             restype = ctypes.c_int
-        argshapes = [arg._ffiargshape for arg in argtypes]
-        resshape = restype._ffiargshape
         if self._buffer is not None:
-            ptr = _rawffi.FuncPtr(self._buffer[0], argshapes, resshape,
-                                  self._flags_)
-            if argtypes is self._argtypes_:
+            ptr = self._getfuncptr_fromaddress(argtypes, restype)
+            if argtypes == self._argtypes_:
                 self._ptr = ptr
             return ptr
 
@@ -391,14 +399,20 @@
             if not thisarg:
                 raise ValueError("COM method call without VTable")
             ptr = thisarg[self._com_index - 0x1000]
+            argshapes = [arg._ffiargshape for arg in argtypes]
+            resshape = restype._ffiargshape
             return _rawffi.FuncPtr(ptr, argshapes, resshape, self._flags_)
-
+        
         cdll = self.dll._handle
         try:
-            return cdll.ptr(self.name, argshapes, resshape, self._flags_)
+            ffi_argtypes = [argtype.get_ffi_argtype() for argtype in argtypes]
+            ffi_restype = restype.get_ffi_argtype()
+            self._ptr = cdll.getfunc(self.name, ffi_argtypes, ffi_restype)
+            return self._ptr
         except AttributeError:
             if self._flags_ & _rawffi.FUNCFLAG_CDECL:
                 raise
+
             # Win64 has no stdcall calling conv, so it should also not have the
             # name mangling of it.
             if WIN64:
@@ -409,23 +423,33 @@
             for i in range(33):
                 mangled_name = "_%s@%d" % (self.name, i*4)
                 try:
-                    return cdll.ptr(mangled_name, argshapes, resshape,
-                                    self._flags_)
+                    return cdll.getfunc(mangled_name,
+                                        ffi_argtypes, ffi_restype,
+                                        # XXX self._flags_
+                                        )
                 except AttributeError:
                     pass
             raise
 
-    @staticmethod
-    def _conv_param(argtype, arg):
-        from ctypes import c_char_p, c_wchar_p, c_void_p, c_int
+    @classmethod
+    def _conv_param(cls, argtype, arg):
+        if isinstance(argtype, _CDataMeta):
+            #arg = argtype.from_param(arg)
+            arg = argtype.get_ffi_param(arg)
+            return arg, argtype
+        
         if argtype is not None:
             arg = argtype.from_param(arg)
         if hasattr(arg, '_as_parameter_'):
             arg = arg._as_parameter_
         if isinstance(arg, _CData):
-            # The usual case when argtype is defined
-            cobj = arg
-        elif isinstance(arg, str):
+            return arg._to_ffi_param(), type(arg)
+        #
+        # non-usual case: we do the import here to save a lot of code in the
+        # jit trace of the normal case
+        from ctypes import c_char_p, c_wchar_p, c_void_p, c_int
+        #
+        if isinstance(arg, str):
             cobj = c_char_p(arg)
         elif isinstance(arg, unicode):
             cobj = c_wchar_p(arg)
@@ -435,11 +459,13 @@
             cobj = c_int(arg)
         else:
             raise TypeError("Don't know how to handle %s" % (arg,))
-        return cobj
+
+        return cobj._to_ffi_param(), type(cobj)
 
     def _convert_args(self, argtypes, args, kwargs, marker=object()):
-        callargs = []
+        newargs = []
         outargs = []
+        newargtypes = []
         total = len(args)
         paramflags = self._paramflags
 
@@ -470,8 +496,9 @@
                     val = defval
                     if val is marker:
                         val = 0
-                    wrapped = self._conv_param(argtype, val)
-                    callargs.append(wrapped)
+                    newarg, newargtype = self._conv_param(argtype, val)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 elif flag in (0, PARAMFLAG_FIN):
                     if inargs_idx < total:
                         val = args[inargs_idx]
@@ -485,38 +512,102 @@
                         raise TypeError("required argument '%s' missing" % name)
                     else:
                         raise TypeError("not enough arguments")
-                    wrapped = self._conv_param(argtype, val)
-                    callargs.append(wrapped)
+                    newarg, newargtype = self._conv_param(argtype, val)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 elif flag == PARAMFLAG_FOUT:
                     if defval is not marker:
                         outargs.append(defval)
-                        wrapped = self._conv_param(argtype, defval)
+                        newarg, newargtype = self._conv_param(argtype, defval)
                     else:
                         import ctypes
                         val = argtype._type_()
                         outargs.append(val)
-                        wrapped = ctypes.byref(val)
-                    callargs.append(wrapped)
+                        newarg = ctypes.byref(val)
+                        newargtype = type(newarg)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 else:
                     raise ValueError("paramflag %d not yet implemented" % flag)
             else:
                 try:
-                    wrapped = self._conv_param(argtype, args[i])
+                    newarg, newargtype = self._conv_param(argtype, args[i])
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
-                callargs.append(wrapped)
+                newargs.append(newarg)
+                newargtypes.append(newargtype)
                 inargs_idx += 1
 
-        if len(callargs) < total:
-            extra = args[len(callargs):]
+        if len(newargs) < len(args):
+            extra = args[len(newargs):]
             for i, arg in enumerate(extra):
                 try:
-                    wrapped = self._conv_param(None, arg)
+                    newarg, newargtype = self._conv_param(None, arg)
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
-                callargs.append(wrapped)
+                newargs.append(newarg)
+                newargtypes.append(newargtype)
+        return newargs, newargtypes, outargs
 
-        return callargs, outargs
+    
+    def _wrap_result(self, restype, result):
+        """
+        Convert from low-level repr of the result to the high-level python
+        one.
+        """
+        # hack for performance: if restype is a "simple" primitive type, don't
+        # allocate the buffer because it's going to be thrown away immediately
+        if restype.__bases__[0] is _SimpleCData and not restype._is_pointer_like():
+            return result
+        #
+        shape = restype._ffishape
+        if is_struct_shape(shape):
+            buf = result
+        else:
+            buf = _rawffi.Array(shape)(1, autofree=True)
+            buf[0] = result
+        retval = restype._CData_retval(buf)
+        return retval
+
+    def _build_result(self, restype, result, argsandobjs):
+        """Build the function result:
+           If there is no OUT parameter, return the actual function result
+           If there is one OUT parameter, return it
+           If there are many OUT parameters, return a tuple"""
+
+        # XXX: note for the future: the function used to take a "resbuffer",
+        # i.e. an array of ints. Now it takes a result, which is already a
+        # python object. All places that do "resbuffer[0]" should check that
+        # result is actually an int and just use it.
+        #
+        # Also, argsandobjs used to be "args" in __call__, now it's "newargs"
+        # (i.e., the already unwrapped objects). It's used only when we have a
+        # PARAMFLAG_FOUT and it's probably wrong, I'll fix it when I find a
+        # failing test
+
+        retval = None
+
+        if self._com_index:
+            if resbuffer[0] & 0x80000000:
+                raise get_com_error(resbuffer[0],
+                                    self._com_iid, argsandobjs[0])
+            else:
+                retval = int(resbuffer[0])
+        elif restype is not None:
+            checker = getattr(self.restype, '_check_retval_', None)
+            if checker:
+                val = restype(result)
+                # the original ctypes seems to make the distinction between
+                # classes defining a new type, and their subclasses
+                if '_type_' in restype.__dict__:
+                    val = val.value
+                retval = checker(val)
+            elif not isinstance(restype, _CDataMeta):
+                retval = restype(result)
+            else:
+                retval = self._wrap_result(restype, result)
+
+        return retval
 
     def __nonzero__(self):
         return self._com_index is not None or bool(self._buffer[0])
@@ -532,3 +623,61 @@
                 self._ptr.free()
                 self._ptr = None
             self._needs_free = False
+
+
+def make_fastpath_subclass(CFuncPtr):
+    if CFuncPtr._is_fastpath:
+        return CFuncPtr
+    #
+    try:
+        return make_fastpath_subclass.memo[CFuncPtr]
+    except KeyError:
+        pass
+
+    class CFuncPtrFast(CFuncPtr):
+
+        _is_fastpath = True
+        _slowpath_allowed = True # set to False by tests
+
+        @classmethod
+        def enable_fastpath_maybe(cls, obj):
+            if (obj.callable is None and
+                obj._com_index is None):
+                obj.__class__ = cls
+
+        def __rollback(self):
+            assert self._slowpath_allowed
+            self.__class__ = CFuncPtr
+
+        # disable the fast path if we reset argtypes
+        def _setargtypes(self, argtypes):
+            self.__rollback()
+            self._setargtypes(argtypes)
+        argtypes = property(CFuncPtr._getargtypes, _setargtypes)
+
+        def _setcallable(self, func):
+            self.__rollback()
+            self.callable = func
+        callable = property(lambda x: None, _setcallable)
+
+        def _setcom_index(self, idx):
+            self.__rollback()
+            self._com_index = idx
+        _com_index = property(lambda x: None, _setcom_index)
+
+        def __call__(self, *args):
+            thisarg = None
+            argtypes = self._argtypes_
+            restype = self._restype_
+            funcptr = self._getfuncptr(argtypes, restype, thisarg)
+            try:
+                result = self._call_funcptr(funcptr, *args)
+                result = self._do_errcheck(result, args)
+            except (TypeError, ArgumentError): # XXX, should be FFITypeError
+                assert self._slowpath_allowed
+                return CFuncPtr.__call__(self, *args)
+            return result
+
+    make_fastpath_subclass.memo[CFuncPtr] = CFuncPtrFast
+    return CFuncPtrFast
+make_fastpath_subclass.memo = {}
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -1,6 +1,7 @@
 
 import _rawffi
-from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
+import _ffi
+from _ctypes.basics import _CData, _CDataMeta, cdata_from_address, ArgumentError
 from _ctypes.basics import keepalive_key, store_reference, ensure_objects
 from _ctypes.basics import sizeof, byref
 from _ctypes.array import Array, array_get_slice_params, array_slice_getitem,\
@@ -19,7 +20,7 @@
             length     = 1,
             _ffiargshape = 'P',
             _ffishape  = 'P',
-            _fficompositesize = None
+            _fficompositesize = None,
         )
         # XXX check if typedict['_type_'] is any sane
         # XXX remember about paramfunc
@@ -66,6 +67,7 @@
         self._ffiarray = ffiarray
         self.__init__ = __init__
         self._type_ = TP
+        self._ffiargtype = _ffi.types.Pointer(TP.get_ffi_argtype())
 
     from_address = cdata_from_address
 
@@ -114,6 +116,17 @@
 
     contents = property(getcontents, setcontents)
 
+    def _as_ffi_pointer_(self, ffitype):
+        return as_ffi_pointer(self, ffitype)
+
+def as_ffi_pointer(value, ffitype):
+    my_ffitype = type(value).get_ffi_argtype()
+    # for now, we always allow types.pointer, else a lot of tests
+    # break. We need to rethink how pointers are represented, though
+    if my_ffitype is not ffitype and ffitype is not _ffi.types.void_p:
+        raise ArgumentError, "expected %s instance, got %s" % (type(value), ffitype)
+    return value._get_buffer_value()
+
 def _cast_addr(obj, _, tp):
     if not (isinstance(tp, _CDataMeta) and tp._is_pointer_like()):
         raise TypeError("cast() argument 2 must be a pointer type, not %s"
diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py
--- a/lib_pypy/_ctypes/primitive.py
+++ b/lib_pypy/_ctypes/primitive.py
@@ -1,3 +1,4 @@
+import _ffi
 import _rawffi
 import weakref
 import sys
@@ -8,7 +9,7 @@
      CArgObject
 from _ctypes.builtin import ConvMode
 from _ctypes.array import Array
-from _ctypes.pointer import _Pointer
+from _ctypes.pointer import _Pointer, as_ffi_pointer
 
 class NULL(object):
     pass
@@ -140,6 +141,8 @@
                     value = 0
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
+            result._ffiargtype = _ffi.types.Pointer(_ffi.types.char)
+
         elif tp == 'Z':
             # c_wchar_p
             def _getvalue(self):
@@ -162,6 +165,7 @@
                     value = 0
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
+            result._ffiargtype = _ffi.types.Pointer(_ffi.types.unichar)
 
         elif tp == 'P':
             # c_void_p
@@ -248,6 +252,12 @@
                     self._buffer[0] = 0  # VARIANT_FALSE
             result.value = property(_getvalue, _setvalue)
 
+        # make pointer-types compatible with the _ffi fast path
+        if result._is_pointer_like():
+            def _as_ffi_pointer_(self, ffitype):
+                return as_ffi_pointer(self, ffitype)
+            result._as_ffi_pointer_ = _as_ffi_pointer_
+            
         return result
 
     from_address = cdata_from_address
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -240,6 +240,9 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _to_ffi_param(self):
+        return self._buffer
+
 
 class StructureMeta(StructOrUnionMeta):
     _is_union = False
diff --git a/lib_pypy/ctypes_support.py b/lib_pypy/ctypes_support.py
--- a/lib_pypy/ctypes_support.py
+++ b/lib_pypy/ctypes_support.py
@@ -10,8 +10,8 @@
 # __________ the standard C library __________
 
 if sys.platform == 'win32':
-    import _rawffi
-    standard_c_lib = ctypes.CDLL('msvcrt', handle=_rawffi.get_libc())
+    import _ffi
+    standard_c_lib = ctypes.CDLL('msvcrt', handle=_ffi.get_libc())
 else:
     standard_c_lib = ctypes.CDLL(ctypes.util.find_library('c'))
 
diff --git a/lib_pypy/datetime.py b/lib_pypy/datetime.py
--- a/lib_pypy/datetime.py
+++ b/lib_pypy/datetime.py
@@ -1422,12 +1422,17 @@
             converter = _time.localtime
         else:
             converter = _time.gmtime
-        if 1 - (t % 1.0) < 0.000001:
-            t = float(int(t)) + 1
-        if t < 0:
-            t -= 1
+        if t < 0.0:
+            us = int(round(((-t) % 1.0) * 1000000))
+            if us > 0:
+                us = 1000000 - us
+                t -= 1.0
+        else:
+            us = int(round((t % 1.0) * 1000000))
+            if us == 1000000:
+                us = 0
+                t += 1.0
         y, m, d, hh, mm, ss, weekday, jday, dst = converter(t)
-        us = int((t % 1.0) * 1000000)
         ss = min(ss, 59)    # clamp out leap seconds if the platform has them
         result = cls(y, m, d, hh, mm, ss, us, tz)
         if tz is not None:
diff --git a/lib_pypy/msvcrt.py b/lib_pypy/msvcrt.py
--- a/lib_pypy/msvcrt.py
+++ b/lib_pypy/msvcrt.py
@@ -46,4 +46,42 @@
         e = get_errno()
         raise IOError(e, errno.errorcode[e])
 
+# Console I/O routines
+
+kbhit = _c._kbhit
+kbhit.argtypes = []
+kbhit.restype = ctypes.c_int
+
+getch = _c._getch
+getch.argtypes = []
+getch.restype = ctypes.c_char
+
+getwch = _c._getwch
+getwch.argtypes = []
+getwch.restype = ctypes.c_wchar
+
+getche = _c._getche
+getche.argtypes = []
+getche.restype = ctypes.c_char
+
+getwche = _c._getwche
+getwche.argtypes = []
+getwche.restype = ctypes.c_wchar
+
+putch = _c._putch
+putch.argtypes = [ctypes.c_char]
+putch.restype = None
+
+putwch = _c._putwch
+putwch.argtypes = [ctypes.c_wchar]
+putwch.restype = None
+
+ungetch = _c._ungetch
+ungetch.argtypes = [ctypes.c_char]
+ungetch.restype = None
+
+ungetwch = _c._ungetwch
+ungetwch.argtypes = [ctypes.c_wchar]
+ungetwch.restype = None
+
 del ctypes
diff --git a/lib_pypy/pypy_test/test_datetime.py b/lib_pypy/pypy_test/test_datetime.py
--- a/lib_pypy/pypy_test/test_datetime.py
+++ b/lib_pypy/pypy_test/test_datetime.py
@@ -32,4 +32,28 @@
     assert datetime.datetime.utcfromtimestamp(a).microsecond == 0
     assert datetime.datetime.utcfromtimestamp(a).second == 1
 
-    
+def test_more_datetime_rounding():
+    # this test verified on top of CPython 2.7 (using a plain
+    # "import datetime" above)
+    expected_results = {
+        -1000.0: 'datetime.datetime(1970, 1, 1, 0, 43, 20)',
+        -999.9999996: 'datetime.datetime(1970, 1, 1, 0, 43, 20)',
+        -999.4: 'datetime.datetime(1970, 1, 1, 0, 43, 20, 600000)',
+        -999.0000004: 'datetime.datetime(1970, 1, 1, 0, 43, 21)',
+        -1.0: 'datetime.datetime(1970, 1, 1, 0, 59, 59)',
+        -0.9999996: 'datetime.datetime(1970, 1, 1, 0, 59, 59)',
+        -0.4: 'datetime.datetime(1970, 1, 1, 0, 59, 59, 600000)',
+        -0.0000004: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.0: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.0000004: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.4: 'datetime.datetime(1970, 1, 1, 1, 0, 0, 400000)',
+        0.9999996: 'datetime.datetime(1970, 1, 1, 1, 0, 1)',
+        1000.0: 'datetime.datetime(1970, 1, 1, 1, 16, 40)',
+        1000.0000004: 'datetime.datetime(1970, 1, 1, 1, 16, 40)',
+        1000.4: 'datetime.datetime(1970, 1, 1, 1, 16, 40, 400000)',
+        1000.9999996: 'datetime.datetime(1970, 1, 1, 1, 16, 41)',
+        1293843661.191: 'datetime.datetime(2011, 1, 1, 2, 1, 1, 191000)',
+        }
+    for t in sorted(expected_results):
+        dt = datetime.datetime.fromtimestamp(t)
+        assert repr(dt) == expected_results[t]
diff --git a/pypy/annotation/bookkeeper.py b/pypy/annotation/bookkeeper.py
--- a/pypy/annotation/bookkeeper.py
+++ b/pypy/annotation/bookkeeper.py
@@ -279,13 +279,13 @@
         desc = self.getdesc(cls)
         return desc.getuniqueclassdef()
 
-    def getlistdef(self, **flags):
+    def getlistdef(self, **flags_if_new):
         """Get the ListDef associated with the current position."""
         try:
             listdef = self.listdefs[self.position_key]
         except KeyError:
             listdef = self.listdefs[self.position_key] = ListDef(self)
-            listdef.listitem.__dict__.update(flags)
+            listdef.listitem.__dict__.update(flags_if_new)
         return listdef
 
     def newlist(self, *s_values, **flags):
@@ -294,6 +294,9 @@
         listdef = self.getlistdef(**flags)
         for s_value in s_values:
             listdef.generalize(s_value)
+        if flags:
+            assert flags.keys() == ['range_step']
+            listdef.generalize_range_step(flags['range_step'])
         return SomeList(listdef)
 
     def getdictdef(self, is_r_dict=False):
diff --git a/pypy/annotation/listdef.py b/pypy/annotation/listdef.py
--- a/pypy/annotation/listdef.py
+++ b/pypy/annotation/listdef.py
@@ -184,6 +184,11 @@
     def generalize(self, s_value):
         self.listitem.generalize(s_value)
 
+    def generalize_range_step(self, range_step):
+        newlistitem = ListItem(self.listitem.bookkeeper, s_ImpossibleValue)
+        newlistitem.range_step = range_step
+        self.listitem.merge(newlistitem)
+
     def __repr__(self):
         return '<[%r]%s%s%s%s>' % (self.listitem.s_value,
                                self.listitem.mutated and 'm' or '',
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -3483,6 +3483,17 @@
         a = self.RPythonAnnotator()
         raises(Exception, a.build_types, f, [int])
 
+    def test_range_variable_step(self):
+        def g(n):
+            return range(0, 10, n)
+        def f(n):
+            r = g(1)    # constant step, at first
+            s = g(n)    # but it becomes a variable step
+            return r
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [int])
+        assert s.listdef.listitem.range_step == 0
+
 
 def g(n):
     return [0,1,2,n]
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -33,13 +33,17 @@
      "struct", "_hashlib", "_md5", "_sha", "_minimal_curses", "cStringIO",
      "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
      "_bisect", "binascii", "_multiprocessing", '_warnings',
-     "_collections", "_multibytecodec", "micronumpy"]
+     "_collections", "_multibytecodec", "micronumpy", "_ffi"]
 ))
 
 translation_modules = default_modules.copy()
 translation_modules.update(dict.fromkeys(
     ["fcntl", "rctime", "select", "signal", "_rawffi", "zlib",
-     "struct", "_md5", "cStringIO", "array"]))
+     "struct", "_md5", "cStringIO", "array", "_ffi",
+     # the following are needed for pyrepl (and hence for the
+     # interactive prompt/pdb)
+     "termios", "_minimal_curses",
+     ]))
 
 working_oo_modules = default_modules.copy()
 working_oo_modules.update(dict.fromkeys(
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -136,6 +136,11 @@
 next access.  Any code that uses weak proxies must carefully catch such
 ``ReferenceError`` at any place that uses them.
 
+As a side effect, the ``finally`` clause inside a generator will be executed
+only when the generator object is garbage collected (see `issue 736`__).
+
+.. __: http://bugs.pypy.org/issue736
+
 There are a few extra implications for the difference in the GC.  Most
 notably, if an object has a ``__del__``, the ``__del__`` is never called more
 than once in PyPy; but CPython will call the same ``__del__`` several times
@@ -168,6 +173,11 @@
     >>>> A.__del__ = lambda self: None
     __main__:1: RuntimeWarning: a __del__ method added to an existing type will not be called
 
+Even more obscure: the same is true, for old-style classes, if you attach
+the ``__del__`` to an instance (even in CPython this does not work with
+new-style classes).  You get a RuntimeWarning in PyPy.  To fix these cases
+just make sure there is a ``__del__`` method in the class to start with.
+
 
 Subclasses of built-in types
 ----------------------------
diff --git a/pypy/doc/image/jitviewer.png b/pypy/doc/image/jitviewer.png
new file mode 100644
index 0000000000000000000000000000000000000000..ad2abca5c88125061fa519dcf3f9fada577573ee
GIT binary patch

[cut]

diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -21,6 +21,8 @@
 
 * `speed.pypy.org`_: Daily benchmarks of how fast PyPy is
 
+* `potential project ideas`_: In case you want to get your feet wet...
+
 Documentation for the PyPy Python Interpreter
 ===============================================
 
@@ -59,8 +61,6 @@
   (if they are not already developed in the FAQ_).
   You can find logs of the channel here_.
 
-.. XXX play1? 
-
 Meeting PyPy developers
 =======================
 
@@ -83,7 +83,7 @@
 .. _`Release 1.5`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
-
+.. _`potential project ideas`: project-ideas.html
 
 Project Documentation
 =====================================
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/project-ideas.rst
@@ -0,0 +1,130 @@
+
+Potential project list
+======================
+
+This is a list of projects that are interesting for potential contributors
+who are seriously interested in the PyPy project. They mostly share common
+patterns - they're mid-to-large in size, they're usually well defined as
+a standalone projects and they're not being actively worked on. For small
+projects that you might want to work on, it's much better to either look
+at the `issue tracker`_, pop up on #pypy on irc.freenode.net or write to the
+`mailing list`_. This is simply for the reason that small possible projects
+tend to change very rapidly.
+
+This list is mostly for having on overview on potential projects. This list is
+by definition not exhaustive and we're pleased if people come up with their
+own improvement ideas. In any case, if you feel like working on some of those
+projects, or anything else in PyPy, pop up on IRC or write to us on the
+`mailing list`_.
+
+Numpy improvements
+------------------
+
+This is more of a project-container than a single project. Possible ideas:
+
+* experiment with auto-vectorization using SSE or implement vectorization
+  without automatically detecting it for array operations.
+
+* improve numpy, for example implement memory views.
+
+* interface with fortran/C libraries.
+
+Improving the jitviewer
+------------------------
+
+Analyzing performance of applications is always tricky. We have various
+tools, for example a `jitviewer`_ that help us analyze performance.
+
+The jitviewer shows the code generated by the PyPy JIT in a hierarchical way,
+as shown by the screenshot below:
+
+  - at the bottom level, it shows the Python source code of the compiled loops
+
+  - for each source code line, it shows the corresponding Python bytecode
+
+  - for each opcode, it shows the corresponding jit operations, which are the
+    ones actually sent to the backend for compiling (such as ``i15 = i10 <
+    2000`` in the example)
+
+.. image:: image/jitviewer.png
+
+We would like to add one level to this hierarchy, by showing the generated
+machine code for each jit operation.  The necessary information is already in
+the log file produced by the JIT, so it is "only" a matter of teaching the
+jitviewer to display it.  Ideally, the machine code should be hidden by
+default and viewable on request.
+
+The jitviewer is a web application based on flask and jinja2 (and jQuery on
+the client): if you have great web developing skills and want to help PyPy,
+this is an ideal task to get started, because it does not require any deep
+knowledge of the internals.
+
+Translation Toolchain
+---------------------
+
+* Incremental or distributed translation.
+
+* Allow separate compilation of extension modules.
+
+Work on some of other languages
+-------------------------------
+
+There are various languages implemented using the RPython translation toolchain.
+One of the most interesting is the `JavaScript implementation`_, but there
+are others like scheme or prolog. An interesting project would be to improve
+the jittability of those or to experiment with various optimizations.
+
+Various GCs
+-----------
+
+PyPy has pluggable garbage collection policy. This means that various garbage
+collectors can be written for specialized purposes, or even various
+experiments can be done for the general purpose. Examples
+
+* An incremental garbage collector that has specified maximal pause times,
+  crucial for games
+
+* A garbage collector that compact memory better for mobile devices
+
+* A concurrent garbage collector (a lot of work)
+
+Remove the GIL
+--------------
+
+This is a major task that requires lots of thinking. However, few subprojects
+can be potentially specified, unless a better plan can be thought out:
+
+* A thread-aware garbage collector
+
+* Better RPython primitives for dealing with concurrency
+
+* JIT passes to remove locks on objects
+
+* (maybe) implement locking in Python interpreter
+
+* alternatively, look at Software Transactional Memory
+
+Introduce new benchmarks
+------------------------
+
+We're usually happy to introduce new benchmarks. Please consult us
+before, but in general something that's real-world python code
+and is not already represented is welcome. We need at least a standalone
+script that can run without parameters. Example ideas (benchmarks need
+to be got from them!):
+
+* `hg`
+
+* `sympy`
+
+Experiment (again) with LLVM backend for RPython compilation
+------------------------------------------------------------
+
+We already tried working with LLVM and at the time, LLVM was not mature enough
+for our needs. It's possible that this has changed, reviving the LLVM backend
+(or writing new from scratch) for static compilation would be a good project.
+
+.. _`issue tracker`: http://bugs.pypy.org
+.. _`mailing list`: http://mail.python.org/mailman/listinfo/pypy-dev
+.. _`jitviewer`: http://bitbucket.org/pypy/jitviewer
+.. _`JavaScript implementation`: https://bitbucket.org/pypy/lang-js/overview
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -600,15 +600,15 @@
         #
         return _op_default_implementation
 
-    def op_debug_merge_point(self, _, value, recdepth):
+    def op_debug_merge_point(self, _, *args):
         from pypy.jit.metainterp.warmspot import get_stats
-        loc = ConstPtr(value)._get_str()
         try:
             stats = get_stats()
         except AttributeError:
             pass
         else:
-            stats.add_merge_point_location(loc)
+            stats.add_merge_point_location(args[1:])
+        pass
 
     def op_guard_true(self, _, value):
         if not value:
@@ -820,6 +820,12 @@
             raise NotImplementedError
 
     def op_call(self, calldescr, func, *args):
+        return self._do_call(calldescr, func, args, call_with_llptr=False)
+
+    def op_call_release_gil(self, calldescr, func, *args):
+        return self._do_call(calldescr, func, args, call_with_llptr=True)
+
+    def _do_call(self, calldescr, func, args, call_with_llptr):
         global _last_exception
         assert _last_exception is None, "exception left behind"
         assert _call_args_i == _call_args_r == _call_args_f == []
@@ -838,7 +844,8 @@
             else:
                 raise TypeError(x)
         try:
-            return _do_call_common(func, args_in_order, calldescr)
+            return _do_call_common(func, args_in_order, calldescr,
+                                   call_with_llptr)
         except LLException, lle:
             _last_exception = lle
             d = {'v': None,
@@ -1480,17 +1487,20 @@
     'v': lltype.Void,
     }
 
-def _do_call_common(f, args_in_order=None, calldescr=None):
+def _do_call_common(f, args_in_order=None, calldescr=None,
+                    call_with_llptr=False):
     ptr = llmemory.cast_int_to_adr(f).ptr
     PTR = lltype.typeOf(ptr)
     if PTR == rffi.VOIDP:
         # it's a pointer to a C function, so we don't have a precise
         # signature: create one from the descr
+        assert call_with_llptr is True
         ARGS = map(kind2TYPE.get, calldescr.arg_types)
         RESULT = kind2TYPE[calldescr.typeinfo]
         FUNC = lltype.FuncType(ARGS, RESULT)
         func_to_call = rffi.cast(lltype.Ptr(FUNC), ptr)
     else:
+        assert call_with_llptr is False
         FUNC = PTR.TO
         ARGS = FUNC.ARGS
         func_to_call = ptr._obj._callable
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -134,7 +134,7 @@
         old, oldindex = faildescr._compiled_fail
         llimpl.compile_redirect_fail(old, oldindex, c)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """In a real assembler backend, this should assemble the given
         list of operations.  Here we just generate a similar CompiledLoop
         instance.  The code here is RPython, whereas the code in llimpl
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -1,5 +1,6 @@
 import py
 from pypy.rpython.lltypesystem import lltype, rffi, llmemory, rclass
+from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.jit.backend.llsupport import symbolic, support
 from pypy.jit.metainterp.history import AbstractDescr, getkind, BoxInt, BoxPtr
 from pypy.jit.metainterp.history import BasicFailDescr, LoopToken, BoxFloat
@@ -149,6 +150,7 @@
 
 class BaseArrayDescr(AbstractDescr):
     _clsname = ''
+    tid = llop.combine_ushort(lltype.Signed, 0, 0)
 
     def get_base_size(self, translate_support_code):
         basesize, _, _ = symbolic.get_array_token(_A, translate_support_code)
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -3,13 +3,16 @@
 from pypy.jit.backend.llsupport.descr import DynamicIntCallDescr, NonGcPtrCallDescr,\
     FloatCallDescr, VoidCallDescr
 
+class UnsupportedKind(Exception):
+    pass
+
 def get_call_descr_dynamic(ffi_args, ffi_result, extrainfo=None):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
         reskind = get_ffi_type_kind(ffi_result)
         argkinds = [get_ffi_type_kind(arg) for arg in ffi_args]
-    except KeyError:
+    except UnsupportedKind:
         return None # ??
     arg_classes = ''.join(argkinds)
     if reskind == history.INT:
@@ -33,7 +36,7 @@
         return history.FLOAT
     elif kind == 'v':
         return history.VOID
-    assert False, "Unsupported kind '%s'" % kind
+    raise UnsupportedKind("Unsupported kind '%s'" % kind)
 
 def is_ffi_type_signed(ffi_type):
     from pypy.rlib.libffi import types
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -34,7 +34,7 @@
         pass
     def do_write_barrier(self, gcref_struct, gcref_newptr):
         pass
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         return operations
     def can_inline_malloc(self, descr):
         return False
@@ -146,78 +146,6 @@
 # All code below is for the hybrid or minimark GC
 
 
-class GcRefList:
-    """Handles all references from the generated assembler to GC objects.
-    This is implemented as a nonmovable, but GC, list; the assembler contains
-    code that will (for now) always read from this list."""
-
-    GCREF_LIST = lltype.GcArray(llmemory.GCREF)     # followed by the GC
-
-    HASHTABLE = rffi.CArray(llmemory.Address)      # ignored by the GC
-    HASHTABLE_BITS = 10
-    HASHTABLE_SIZE = 1 << HASHTABLE_BITS
-
-    def initialize(self):
-        if we_are_translated(): n = 2000
-        else:                   n = 10    # tests only
-        self.list = self.alloc_gcref_list(n)
-        self.nextindex = 0
-        self.oldlists = []
-        # A pseudo dictionary: it is fixed size, and it may contain
-        # random nonsense after a collection moved the objects.  It is only
-        # used to avoid too many duplications in the GCREF_LISTs.
-        self.hashtable = lltype.malloc(self.HASHTABLE,
-                                       self.HASHTABLE_SIZE+1,
-                                       flavor='raw', track_allocation=False)
-        dummy = lltype.direct_ptradd(lltype.direct_arrayitems(self.hashtable),
-                                     self.HASHTABLE_SIZE)
-        dummy = llmemory.cast_ptr_to_adr(dummy)
-        for i in range(self.HASHTABLE_SIZE+1):
-            self.hashtable[i] = dummy
-
-    def alloc_gcref_list(self, n):
-        # Important: the GRREF_LISTs allocated are *non-movable*.  This
-        # requires support in the gc (hybrid GC or minimark GC so far).
-        if we_are_translated():
-            list = rgc.malloc_nonmovable(self.GCREF_LIST, n)
-            assert list, "malloc_nonmovable failed!"
-        else:
-            list = lltype.malloc(self.GCREF_LIST, n)     # for tests only
-        return list
-
-    def get_address_of_gcref(self, gcref):
-        assert lltype.typeOf(gcref) == llmemory.GCREF
-        # first look in the hashtable, using an inexact hash (fails after
-        # the object moves)
-        addr = llmemory.cast_ptr_to_adr(gcref)
-        hash = llmemory.cast_adr_to_int(addr, "forced")
-        hash -= hash >> self.HASHTABLE_BITS
-        hash &= self.HASHTABLE_SIZE - 1
-        addr_ref = self.hashtable[hash]
-        # the following test is safe anyway, because the addresses found
-        # in the hashtable are always the addresses of nonmovable stuff
-        # ('addr_ref' is an address inside self.list, not directly the
-        # address of a real moving GC object -- that's 'addr_ref.address[0]'.)
-        if addr_ref.address[0] == addr:
-            return addr_ref
-        # if it fails, add an entry to the list
-        if self.nextindex == len(self.list):
-            # reallocate first, increasing a bit the size every time
-            self.oldlists.append(self.list)
-            self.list = self.alloc_gcref_list(len(self.list) // 4 * 5)
-            self.nextindex = 0
-        # add it
-        index = self.nextindex
-        self.list[index] = gcref
-        addr_ref = lltype.direct_ptradd(lltype.direct_arrayitems(self.list),
-                                        index)
-        addr_ref = llmemory.cast_ptr_to_adr(addr_ref)
-        self.nextindex = index + 1
-        # record it in the hashtable
-        self.hashtable[hash] = addr_ref
-        return addr_ref
-
-
 class GcRootMap_asmgcc(object):
     """Handles locating the stack roots in the assembler.
     This is the class supporting --gcrootfinder=asmgcc.
@@ -527,6 +455,7 @@
     def __init__(self, gc_ll_descr):
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
+        self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
         self.fielddescr_tid = get_field_descr(gc_ll_descr,
                                               gc_ll_descr.GCClass.HDR, 'tid')
         self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
@@ -546,6 +475,13 @@
         funcaddr = llmemory.cast_ptr_to_adr(funcptr)
         return cpu.cast_adr_to_int(funcaddr)
 
+    def get_write_barrier_from_array_fn(self, cpu):
+        llop1 = self.llop1
+        funcptr = llop1.get_write_barrier_from_array_failing_case(
+            self.WB_ARRAY_FUNCPTR)
+        funcaddr = llmemory.cast_ptr_to_adr(funcptr)
+        return cpu.cast_adr_to_int(funcaddr)    # this may return 0
+
 
 class GcLLDescr_framework(GcLLDescription):
     DEBUG = False    # forced to True by x86/test/test_zrpy_gc.py
@@ -559,7 +495,7 @@
         self.translator = translator
         self.llop1 = llop1
 
-        # we need the hybrid or minimark GC for GcRefList.alloc_gcref_list()
+        # we need the hybrid or minimark GC for rgc._make_sure_does_not_move()
         # to work
         if gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
             raise NotImplementedError("--gc=%s not implemented with the JIT" %
@@ -574,8 +510,6 @@
                                       " with the JIT" % (name,))
         gcrootmap = cls(gcdescr)
         self.gcrootmap = gcrootmap
-        self.gcrefs = GcRefList()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
 
         # make a TransformerLayoutBuilder and save it on the translator
         # where it can be fished and reused by the FrameworkGCTransformer
@@ -617,6 +551,8 @@
             [lltype.Signed, lltype.Signed], llmemory.GCREF))
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
             [llmemory.Address, llmemory.Address], lltype.Void))
+        self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
+            [llmemory.Address, lltype.Signed], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
         #
         def malloc_array(itemsize, tid, num_elem):
@@ -706,7 +642,6 @@
         return rffi.cast(lltype.Signed, fptr)
 
     def initialize(self):
-        self.gcrefs.initialize()
         self.gcrootmap.initialize()
 
     def init_size_descr(self, S, descr):
@@ -768,54 +703,32 @@
             funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
                     llmemory.cast_ptr_to_adr(gcref_newptr))
 
-    def replace_constptrs_with_getfield_raw(self, cpu, newops, op):
-        # xxx some performance issue here
-        newargs = [None] * op.numargs()
-        needs_copy = False
+    def record_constptrs(self, op, gcrefs_output_list):
         for i in range(op.numargs()):
             v = op.getarg(i)
-            newargs[i] = v
             if isinstance(v, ConstPtr) and bool(v.value):
-                addr = self.gcrefs.get_address_of_gcref(v.value)
-                # ^^^even for non-movable objects, to record their presence
-                if rgc.can_move(v.value):
-                    box = BoxPtr(v.value)
-                    addr = cpu.cast_adr_to_int(addr)
-                    newops.append(ResOperation(rop.GETFIELD_RAW,
-                                               [ConstInt(addr)], box,
-                                               self.single_gcref_descr))
-                    newargs[i] = box
-                    needs_copy = True
-        #
-        if needs_copy:
-            return op.copy_and_change(op.getopnum(), args=newargs)
-        else:
-            return op
+                p = v.value
+                rgc._make_sure_does_not_move(p)
+                gcrefs_output_list.append(p)
 
-
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         # Perform two kinds of rewrites in parallel:
         #
         # - Add COND_CALLs to the write barrier before SETFIELD_GC and
         #   SETARRAYITEM_GC operations.
         #
-        # - Remove all uses of ConstPtrs away from the assembler.
-        #   Idea: when running on a moving GC, we can't (easily) encode
-        #   the ConstPtrs in the assembler, because they can move at any
-        #   point in time.  Instead, we store them in 'gcrefs.list', a GC
-        #   but nonmovable list; and here, we modify 'operations' to
-        #   replace direct usage of ConstPtr with a BoxPtr loaded by a
-        #   GETFIELD_RAW from the array 'gcrefs.list'.
+        # - Record the ConstPtrs from the assembler.
         #
         newops = []
+        known_lengths = {}
         # we can only remember one malloc since the next malloc can possibly
         # collect
         last_malloc = None
         for op in operations:
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 continue
-            # ---------- replace ConstPtrs with GETFIELD_RAW ----------
-            op = self.replace_constptrs_with_getfield_raw(cpu, newops, op)
+            # ---------- record the ConstPtrs ----------
+            self.record_constptrs(op, gcrefs_output_list)
             if op.is_malloc():
                 last_malloc = op.result
             elif op.can_malloc():
@@ -838,19 +751,40 @@
                     v = op.getarg(2)
                     if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
                                             bool(v.value)): # store a non-NULL
-                        # XXX detect when we should produce a
-                        # write_barrier_from_array
-                        self._gen_write_barrier(newops, op.getarg(0), v)
+                        self._gen_write_barrier_array(newops, op.getarg(0),
+                                                      op.getarg(1), v,
+                                                      cpu, known_lengths)
                         op = op.copy_and_change(rop.SETARRAYITEM_RAW)
+            elif op.getopnum() == rop.NEW_ARRAY:
+                v_length = op.getarg(0)
+                if isinstance(v_length, ConstInt):
+                    known_lengths[op.result] = v_length.getint()
             # ----------
             newops.append(op)
         return newops
 
-    def _gen_write_barrier(self, newops, v_base, v_value):
-        args = [v_base, v_value]
+    def _gen_write_barrier(self, newops, v_base, v_value_or_index):
+        # NB. the 2nd argument of COND_CALL_GC_WB is either a pointer
+        # (regular case), or an index (case of write_barrier_from_array)
+        args = [v_base, v_value_or_index]
         newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
                                    descr=self.write_barrier_descr))
 
+    def _gen_write_barrier_array(self, newops, v_base, v_index, v_value,
+                                 cpu, known_lengths):
+        if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0:
+            # If we know statically the length of 'v', and it is not too
+            # big, then produce a regular write_barrier.  If it's unknown or
+            # too big, produce instead a write_barrier_from_array.
+            LARGE = 130
+            length = known_lengths.get(v_base, LARGE)
+            if length >= LARGE:
+                # unknown or too big: produce a write_barrier_from_array
+                self._gen_write_barrier(newops, v_base, v_index)
+                return
+        # fall-back case: produce a write_barrier
+        self._gen_write_barrier(newops, v_base, v_value)
+
     def can_inline_malloc(self, descr):
         assert isinstance(descr, BaseSizeDescr)
         if descr.size < self.max_size_of_young_obj:
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -37,6 +37,11 @@
         self.frame_depth += size
         return newloc
 
+    def reserve_location_in_frame(self, size):
+        frame_depth = self.frame_depth
+        self.frame_depth += size
+        return frame_depth
+
     # abstract methods that need to be overwritten for specific assemblers
     @staticmethod
     def frame_pos(loc, type):
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -49,19 +49,6 @@
 
 # ____________________________________________________________
 
-def test_GcRefList():
-    S = lltype.GcStruct('S')
-    order = range(50) * 4
-    random.shuffle(order)
-    allocs = [lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(S))
-              for i in range(50)]
-    allocs = [allocs[i] for i in order]
-    #
-    gcrefs = GcRefList()
-    gcrefs.initialize()
-    addrs = [gcrefs.get_address_of_gcref(ptr) for ptr in allocs]
-    for i in range(len(allocs)):
-        assert addrs[i].address[0] == llmemory.cast_ptr_to_adr(allocs[i])
 
 class TestGcRootMapAsmGcc:
 
@@ -288,6 +275,18 @@
     def get_write_barrier_failing_case(self, FPTRTYPE):
         return llhelper(FPTRTYPE, self._write_barrier_failing_case)
 
+    _have_wb_from_array = False
+
+    def _write_barrier_from_array_failing_case(self, adr_struct, v_index):
+        self.record.append(('barrier_from_array', adr_struct, v_index))
+
+    def get_write_barrier_from_array_failing_case(self, FPTRTYPE):
+        if self._have_wb_from_array:
+            return llhelper(FPTRTYPE,
+                            self._write_barrier_from_array_failing_case)
+        else:
+            return lltype.nullptr(FPTRTYPE.TO)
+
 
 class TestFramework(object):
     gc = 'hybrid'
@@ -303,9 +302,20 @@
             config = config_
         class FakeCPU(object):
             def cast_adr_to_int(self, adr):
-                ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
-                assert ptr._obj._callable == llop1._write_barrier_failing_case
-                return 42
+                if not adr:
+                    return 0
+                try:
+                    ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_failing_case
+                    return 42
+                except lltype.InvalidCast:
+                    ptr = llmemory.cast_adr_to_ptr(
+                        adr, gc_ll_descr.WB_ARRAY_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_from_array_failing_case
+                    return 43
+
         gcdescr = get_description(config_)
         translator = FakeTranslator()
         llop1 = FakeLLOp()
@@ -414,11 +424,11 @@
             ResOperation(rop.DEBUG_MERGE_POINT, ['dummy', 2], None),
             ]
         gc_ll_descr = self.gc_ll_descr
-        operations = gc_ll_descr.rewrite_assembler(None, operations)
+        operations = gc_ll_descr.rewrite_assembler(None, operations, [])
         assert len(operations) == 0
 
     def test_rewrite_assembler_1(self):
-        # check rewriting of ConstPtrs
+        # check recording of ConstPtrs
         class MyFakeCPU(object):
             def cast_adr_to_int(self, adr):
                 assert adr == "some fake address"
@@ -438,56 +448,12 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         gc_ll_descr.gcrefs = MyFakeGCRefList()
+        gcrefs = []
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        assert len(operations) == 2
-        assert operations[0].getopnum() == rop.GETFIELD_RAW
-        assert operations[0].getarg(0) == ConstInt(43)
-        assert operations[0].getdescr() == gc_ll_descr.single_gcref_descr
-        v_box = operations[0].result
-        assert isinstance(v_box, BoxPtr)
-        assert operations[1].getopnum() == rop.PTR_EQ
-        assert operations[1].getarg(0) == v_random_box
-        assert operations[1].getarg(1) == v_box
-        assert operations[1].result == v_result
-
-    def test_rewrite_assembler_1_cannot_move(self):
-        # check rewriting of ConstPtrs
-        class MyFakeCPU(object):
-            def cast_adr_to_int(self, adr):
-                xxx    # should not be called
-        class MyFakeGCRefList(object):
-            def get_address_of_gcref(self, s_gcref1):
-                seen.append(s_gcref1)
-                assert s_gcref1 == s_gcref
-                return "some fake address"
-        seen = []
-        S = lltype.GcStruct('S')
-        s = lltype.malloc(S)
-        s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
-        v_random_box = BoxPtr()
-        v_result = BoxInt()
-        operations = [
-            ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)],
-                         v_result),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.gcrefs = MyFakeGCRefList()
-        old_can_move = rgc.can_move
-        operations = get_deep_immutable_oplist(operations)
-        try:
-            rgc.can_move = lambda s: False
-            operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        finally:
-            rgc.can_move = old_can_move
-        assert len(operations) == 1
-        assert operations[0].getopnum() == rop.PTR_EQ
-        assert operations[0].getarg(0) == v_random_box
-        assert operations[0].getarg(1) == ConstPtr(s_gcref)
-        assert operations[0].result == v_result
-        # check that s_gcref gets added to the list anyway, to make sure
-        # that the GC sees it
-        assert seen == [s_gcref]
+        operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
+                                                   gcrefs)
+        assert operations2 == operations
+        assert gcrefs == [s_gcref]
 
     def test_rewrite_assembler_2(self):
         # check write barriers before SETFIELD_GC
@@ -500,7 +466,8 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations,
+                                                   [])
         assert len(operations) == 2
         #
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
@@ -515,29 +482,90 @@
 
     def test_rewrite_assembler_3(self):
         # check write barriers before SETARRAYITEM_GC
-        v_base = BoxPtr()
-        v_index = BoxInt()
-        v_value = BoxPtr()
-        array_descr = AbstractDescr()
-        operations = [
-            ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], None,
-                         descr=array_descr),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
-        assert len(operations) == 2
-        #
-        assert operations[0].getopnum() == rop.COND_CALL_GC_WB
-        assert operations[0].getarg(0) == v_base
-        assert operations[0].getarg(1) == v_value
-        assert operations[0].result is None
-        #
-        assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
-        assert operations[1].getarg(0) == v_base
-        assert operations[1].getarg(1) == v_index
-        assert operations[1].getarg(2) == v_value
-        assert operations[1].getdescr() == array_descr
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            assert operations[0].getarg(1) == v_value
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
+
+    def test_rewrite_assembler_4(self):
+        # check write barriers before SETARRAYITEM_GC,
+        # if we have actually a write_barrier_from_array.
+        self.llop1._have_wb_from_array = True
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
+                assert operations[0].getarg(1) == v_value
+            else:
+                assert operations[0].getarg(1) == v_index
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
 
     def test_rewrite_assembler_initialization_store(self):
         S = lltype.GcStruct('S', ('parent', OBJECT),
@@ -558,7 +586,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_2(self):
@@ -583,7 +612,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_3(self):
@@ -602,7 +632,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
 class TestFrameworkMiniMark(TestFramework):
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -53,7 +53,7 @@
         """Called once by the front-end when the program stops."""
         pass
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """Assemble the given loop.
         Should create and attach a fresh CompiledLoopToken to
         looptoken.compiled_loop_token and stick extra attributes
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -57,146 +57,146 @@
         return ConstInt(heaptracker.adr2int(addr))
 
     def test_call_aligned_with_spilled_values(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                local_floats = list(floats)
-                local_ints = list(ints)
-                args = []
-                spills = []
-                funcargs = []
-                float_count = 0
-                int_count = 0
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append('f%d' % float_count)
-                        spills.append('force_spill(f%d)' % float_count)
-                        float_count += 1
-                        funcargs.append(F)
-                    else:
-                        args.append('i%d' % int_count)
-                        spills.append('force_spill(i%d)' % int_count)
-                        int_count += 1
-                        funcargs.append(I)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            local_floats = list(floats)
+            local_ints = list(ints)
+            args = []
+            spills = []
+            funcargs = []
+            float_count = 0
+            int_count = 0
+            for i in range(8):
+                if case & (1<<i):
+                    args.append('f%d' % float_count)
+                    spills.append('force_spill(f%d)' % float_count)
+                    float_count += 1
+                    funcargs.append(F)
+                else:
+                    args.append('i%d' % int_count)
+                    spills.append('force_spill(i%d)' % int_count)
+                    int_count += 1
+                    funcargs.append(I)
 
-                arguments = ', '.join(args)
-                spill_ops = '\n'.join(spills)
+            arguments = ', '.join(args)
+            spill_ops = '\n'.join(spills)
 
-                FUNC = self.FuncType(funcargs, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+            FUNC = self.FuncType(funcargs, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                ops = '[%s]\n' % arguments
-                ops += '%s\n' % spill_ops
-                ops += 'f99 = call(ConstClass(func_ptr), %s, descr=calldescr)\n' % arguments
-                ops += 'finish(f99, %s)\n' % arguments
+            ops = '[%s]\n' % arguments
+            ops += '%s\n' % spill_ops
+            ops += 'f99 = call(ConstClass(func_ptr), %s, descr=calldescr)\n' % arguments
+            ops += 'finish(f99, %s)\n' % arguments
 
-                loop = parse(ops, namespace=locals())
-                looptoken = LoopToken()
-                done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
-                self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
-                expected_result = self._prepare_args(args, floats, ints)
+            loop = parse(ops, namespace=locals())
+            looptoken = LoopToken()
+            done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            expected_result = self._prepare_args(args, floats, ints)
 
-                res = self.cpu.execute_token(looptoken)
-                x = longlong.getrealfloat(cpu.get_latest_value_float(0))
-                assert abs(x - expected_result) < 0.0001
+            res = self.cpu.execute_token(looptoken)
+            x = longlong.getrealfloat(cpu.get_latest_value_float(0))
+            assert abs(x - expected_result) < 0.0001
 
     def test_call_aligned_with_imm_values(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                result = 0.0
-                args = []
-                argslist = []
-                local_floats = list(floats)
-                local_ints = list(ints)
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append(F)
-                        arg = local_floats.pop()
-                        result += arg
-                        argslist.append(constfloat(arg))
-                    else:
-                        args.append(I)
-                        arg = local_ints.pop()
-                        result += arg
-                        argslist.append(ConstInt(arg))
-                FUNC = self.FuncType(args, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(constfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(ConstInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                res = self.execute_operation(rop.CALL,
-                                             [funcbox] + argslist,
-                                             'float', descr=calldescr)
-                assert abs(res.getfloat() - result) < 0.0001
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_aligned_with_args_on_the_stack(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                result = 0.0
-                args = []
-                argslist = []
-                local_floats = list(floats)
-                local_ints = list(ints)
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append(F)
-                        arg = local_floats.pop()
-                        result += arg
-                        argslist.append(boxfloat(arg))
-                    else:
-                        args.append(I)
-                        arg = local_ints.pop()
-                        result += arg
-                        argslist.append(BoxInt(arg))
-                FUNC = self.FuncType(args, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(boxfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(BoxInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                res = self.execute_operation(rop.CALL,
-                                             [funcbox] + argslist,
-                                             'float', descr=calldescr)
-                assert abs(res.getfloat() - result) < 0.0001
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_alignment_call_assembler(self):
         from pypy.rlib.libffi import types
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -560,23 +560,6 @@
                                          'int', descr=calldescr)
             assert res.value == func_ints(*args)
 
-    def test_call_to_c_function(self):
-        from pypy.rlib.libffi import CDLL, types, ArgChain
-        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
-        libc = CDLL(libc_name)
-        c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
-        argchain = ArgChain().arg(ord('A'))
-        assert c_tolower.call(argchain, rffi.INT) == ord('a')
-
-        func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
-        funcbox = ConstInt(heaptracker.adr2int(func_adr))
-        calldescr = self.cpu.calldescrof_dynamic([types.uchar], types.sint)
-        res = self.execute_operation(rop.CALL,
-                                     [funcbox, BoxInt(ord('A'))],
-                                     'int',
-                                     descr=calldescr)
-        assert res.value == ord('a')
-
     def test_call_with_const_floats(self):
         def func(f1, f2):
             return f1 + f2
@@ -1680,7 +1663,7 @@
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
@@ -1699,12 +1682,48 @@
             s = lltype.malloc(S)
             s.tid = value
             sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            t = lltype.malloc(S)
+            tgcref = lltype.cast_opaque_ptr(llmemory.GCREF, t)
             del record[:]
             self.execute_operation(rop.COND_CALL_GC_WB,
-                                   [BoxPtr(sgcref), ConstInt(-2121)],
+                                   [BoxPtr(sgcref), ConstPtr(tgcref)],
                                    'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, -2121)]
+                assert record == [(s, t)]
+            else:
+                assert record == []
+
+    def test_cond_call_gc_wb_array(self):
+        def func_void(a, b):
+            record.append((a, b))
+        record = []
+        #
+        S = lltype.GcStruct('S', ('tid', lltype.Signed))
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+        funcbox = self.get_funcbox(self.cpu, func_ptr)
+        class WriteBarrierDescr(AbstractDescr):
+            jit_wb_if_flag = 4096
+            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_singlebyte = 0x10
+            def get_write_barrier_from_array_fn(self, cpu):
+                return funcbox.getint()
+        #
+        for cond in [False, True]:
+            value = random.randrange(-sys.maxint, sys.maxint)
+            if cond:
+                value |= 4096
+            else:
+                value &= ~4096
+            s = lltype.malloc(S)
+            s.tid = value
+            sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            del record[:]
+            self.execute_operation(rop.COND_CALL_GC_WB,
+                                   [BoxPtr(sgcref), ConstInt(123)],
+                                   'void', descr=WriteBarrierDescr())
+            if cond:
+                assert record == [(s, 123)]
             else:
                 assert record == []
 
@@ -1843,6 +1862,99 @@
         assert self.cpu.get_latest_value_int(2) == 10
         assert values == [1, 10]
 
+    def test_call_to_c_function(self):
+        from pypy.rlib.libffi import CDLL, types, ArgChain
+        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+        libc = CDLL(libc_name)
+        c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
+        argchain = ArgChain().arg(ord('A'))
+        assert c_tolower.call(argchain, rffi.INT) == ord('a')
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.uchar], types.sint)
+        i1 = BoxInt()
+        i2 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i1], i2,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
+        ]
+        ops[1].setfailargs([i1, i2])
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, ord('G'))
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_int(0) == ord('g')
+
+    def test_call_to_c_function_with_callback(self):
+        from pypy.rlib.libffi import CDLL, types, ArgChain, clibffi
+        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+        libc = CDLL(libc_name)
+        types_size_t = clibffi.cast_type_to_ffitype(rffi.SIZE_T)
+        c_qsort = libc.getpointer('qsort', [types.pointer, types_size_t,
+                                            types_size_t, types.pointer],
+                                  types.void)
+        class Glob(object):
+            pass
+        glob = Glob()
+        class X(object):
+            pass
+        #
+        def callback(p1, p2):
+            glob.lst.append(X())
+            return rffi.cast(rffi.INT, 1)
+        CALLBACK = lltype.Ptr(lltype.FuncType([lltype.Signed,
+                                               lltype.Signed], rffi.INT))
+        fn = llhelper(CALLBACK, callback)
+        S = lltype.Struct('S', ('x', rffi.INT), ('y', rffi.INT))
+        raw = lltype.malloc(S, flavor='raw')
+        argchain = ArgChain()
+        argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+        argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 2))
+        argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 4))
+        argchain = argchain.arg(rffi.cast(lltype.Signed, fn))
+        glob.lst = []
+        c_qsort.call(argchain, lltype.Void)
+        assert len(glob.lst) > 0
+        del glob.lst[:]
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_qsort.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.pointer, types_size_t,
+                                             types_size_t, types.pointer],
+                                            types.void)
+        i0 = BoxInt()
+        i1 = BoxInt()
+        i2 = BoxInt()
+        i3 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i0, i1, i2, i3], None,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [], None, descr=BasicFailDescr(0))
+        ]
+        ops[1].setfailargs([])
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1, i2, i3], ops, looptoken)
+        self.cpu.set_future_value_int(0, rffi.cast(lltype.Signed, raw))
+        self.cpu.set_future_value_int(1, 2)
+        self.cpu.set_future_value_int(2, 4)
+        self.cpu.set_future_value_int(3, rffi.cast(lltype.Signed, fn))
+        assert glob.lst == []
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert len(glob.lst) > 0
+        lltype.free(raw, flavor='raw')
+
     def test_guard_not_invalidated(self):
         cpu = self.cpu
         i0 = BoxInt()
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -128,6 +128,8 @@
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
         self._build_stack_check_slowpath()
+        if gc_ll_descr.gcrootmap:
+            self._build_release_gil(gc_ll_descr.gcrootmap)
         debug_start('jit-backend-counts')
         self.set_debug(have_debug_prints())
         debug_stop('jit-backend-counts')
@@ -306,7 +308,66 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
-    def assemble_loop(self, inputargs, operations, looptoken, log):
+    @staticmethod
+    def _release_gil_asmgcc(css):
+        # similar to trackgcroot.py:pypy_asm_stackwalk, first part
+        from pypy.rpython.memory.gctransform import asmgcroot
+        new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
+        next = asmgcroot.gcrootanchor.next
+        new.next = next
+        new.prev = asmgcroot.gcrootanchor
+        asmgcroot.gcrootanchor.next = new
+        next.prev = new
+        # and now release the GIL
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_asmgcc(css):
+        # first reacquire the GIL
+        after = rffi.aroundstate.after
+        if after:
+            after()
+        # similar to trackgcroot.py:pypy_asm_stackwalk, second part
+        from pypy.rpython.memory.gctransform import asmgcroot
+        old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
+        prev = old.prev
+        next = old.next
+        prev.next = next
+        next.prev = prev
+
+    @staticmethod
+    def _release_gil_shadowstack():
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_shadowstack():
+        after = rffi.aroundstate.after
+        if after:
+            after()
+
+    _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+    _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
+                                                  lltype.Void))
+
+    def _build_release_gil(self, gcrootmap):
+        if gcrootmap.is_shadow_stack:
+            releasegil_func = llhelper(self._NOARG_FUNC,
+                                       self._release_gil_shadowstack)
+            reacqgil_func = llhelper(self._NOARG_FUNC,
+                                     self._reacquire_gil_shadowstack)
+        else:
+            releasegil_func = llhelper(self._CLOSESTACK_FUNC,
+                                       self._release_gil_asmgcc)
+            reacqgil_func = llhelper(self._CLOSESTACK_FUNC,
+                                     self._reacquire_gil_asmgcc)
+        self.releasegil_addr  = self.cpu.cast_ptr_to_int(releasegil_func)
+        self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
+
+    def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
         '''adds the following attributes to looptoken:
                _x86_loop_code       (an integer giving an address)
                _x86_bootstrap_code  (an integer giving an address)
@@ -322,6 +383,7 @@
         # for the duration of compiling one loop or a one bridge.
 
         clt = CompiledLoopToken(self.cpu, looptoken.number)
+        clt.allgcrefs = []
         looptoken.compiled_loop_token = clt
         if not we_are_translated():
             # Arguments should be unique
@@ -329,13 +391,13 @@
 
         self.setup(looptoken)
         self.currently_compiling_loop = looptoken
-        funcname = self._find_debug_merge_point(operations)
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        arglocs, operations = regalloc.prepare_loop(inputargs, operations, looptoken)
+        arglocs, operations = regalloc.prepare_loop(inputargs, operations,
+                                                    looptoken, clt.allgcrefs)
         looptoken._x86_arglocs = arglocs
 
         bootstrappos = self.mc.get_relative_pos()
@@ -355,7 +417,7 @@
         #
         rawstart = self.materialize_loop(looptoken)
         debug_print("Loop #%d (%s) has address %x to %x" % (
-            looptoken.number, funcname,
+            looptoken.number, loopname,
             rawstart + self.looppos,
             rawstart + directbootstrappos))
         self._patch_stackadjust(rawstart + stackadjustpos,
@@ -375,7 +437,7 @@
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
-            name = "Loop # %s: %s" % (looptoken.number, funcname)
+            name = "Loop # %s: %s" % (looptoken.number, loopname)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
         return ops_offset
@@ -395,7 +457,6 @@
             return
 
         self.setup(original_loop_token)
-        funcname = self._find_debug_merge_point(operations)
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(faildescr, operations)
@@ -407,7 +468,8 @@
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         fail_depths = faildescr._x86_current_depths
         operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
-                                             operations)
+                                             operations,
+                                             self.current_clt.allgcrefs)
 
         stackadjustpos = self._patchable_stackadjust()
         frame_depth, param_depth = self._assemble(regalloc, operations)
@@ -417,8 +479,8 @@
         #
         rawstart = self.materialize_loop(original_loop_token)
 
-        debug_print("Bridge out of guard %d (%s) has address %x to %x" %
-                    (descr_number, funcname, rawstart, rawstart + codeendpos))
+        debug_print("Bridge out of guard %d has address %x to %x" %
+                    (descr_number, rawstart, rawstart + codeendpos))
         self._patch_stackadjust(rawstart + stackadjustpos,
                                 frame_depth + param_depth)
         self.patch_pending_failure_recoveries(rawstart)
@@ -432,7 +494,7 @@
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
-            name = "Bridge # %s: %s" % (descr_number, funcname)
+            name = "Bridge # %s" % (descr_number,)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
         return ops_offset
@@ -492,17 +554,6 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _find_debug_merge_point(self, operations):
-
-        for op in operations:
-            if op.getopnum() == rop.DEBUG_MERGE_POINT:
-                funcname = op.getarg(0)._get_str()
-                break
-        else:
-            funcname = "<loop %d>" % len(self.loop_run_counters)
-        # invent the counter, so we don't get too confused
-        return funcname
-
     def _register_counter(self):
         if self._debug:
             # YYY very minor leak -- we need the counters to stay alive
@@ -1987,6 +2038,102 @@
         self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
         self.implement_guard(guard_token, 'L')
 
+    def genop_guard_call_release_gil(self, op, guard_op, guard_token,
+                                     arglocs, result_loc):
+        # first, close the stack in the sense of the asmgcc GC root tracker
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap:
+            self.call_release_gil(gcrootmap, arglocs)
+        # do the call
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
+        self._genop_call(op, arglocs, result_loc, fail_index)
+        # then reopen the stack
+        if gcrootmap:
+            self.call_reacquire_gil(gcrootmap, result_loc)
+        # finally, the guard_not_forced
+        self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
+        self.implement_guard(guard_token, 'L')
+
+    def call_release_gil(self, gcrootmap, save_registers):
+        # First, we need to save away the registers listed in
+        # 'save_registers' that are not callee-save.  XXX We assume that
+        # the XMM registers won't be modified.  We store them in
+        # [ESP+4], [ESP+8], etc., leaving enough room in [ESP] for the
+        # single argument to closestack_addr below.
+        p = WORD
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                self.mc.MOV_sr(p, reg.value)
+                p += WORD
+        self._regalloc.reserve_param(p//WORD)
+        #
+        if gcrootmap.is_shadow_stack:
+            args = []
+        else:
+            # note that regalloc.py used save_all_regs=True to save all
+            # registers, so we don't have to care about saving them (other
+            # than ebp) in the close_stack_struct.  But if they are registers
+            # like %eax that would be destroyed by this call, *and* they are
+            # used by arglocs for the *next* call, then trouble; for now we
+            # will just push/pop them.
+            from pypy.rpython.memory.gctransform import asmgcroot
+            css = self._regalloc.close_stack_struct
+            if css == 0:
+                use_words = (2 + max(asmgcroot.INDEX_OF_EBP,
+                                     asmgcroot.FRAME_PTR) + 1)
+                pos = self._regalloc.fm.reserve_location_in_frame(use_words)
+                css = get_ebp_ofs(pos + use_words - 1)
+                self._regalloc.close_stack_struct = css
+            # The location where the future CALL will put its return address
+            # will be [ESP-WORD], so save that as the next frame's top address
+            self.mc.LEA_rs(eax.value, -WORD)        # LEA EAX, [ESP-4]
+            frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
+            self.mc.MOV_br(frame_ptr, eax.value)    # MOV [css.frame], EAX
+            # Save ebp
+            index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
+            self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
+            # Call the closestack() function (also releasing the GIL)
+            if IS_X86_32:
+                reg = eax
+            elif IS_X86_64:
+                reg = edi
+            self.mc.LEA_rb(reg.value, css)
+            args = [reg]
+        #
+        self._emit_call(-1, imm(self.releasegil_addr), args)
+        # Finally, restore the registers saved above.
+        p = WORD
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                self.mc.MOV_rs(reg.value, p)
+                p += WORD
+
+    def call_reacquire_gil(self, gcrootmap, save_loc):
+        # save the previous result (eax/xmm0) into the stack temporarily.
+        # XXX like with call_release_gil(), we assume that we don't need
+        # to save xmm0 in this case.
+        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
+            self.mc.MOV_sr(WORD, save_loc.value)
+            self._regalloc.reserve_param(2)
+        # call the reopenstack() function (also reacquiring the GIL)
+        if gcrootmap.is_shadow_stack:
+            args = []
+        else:
+            css = self._regalloc.close_stack_struct
+            assert css != 0
+            if IS_X86_32:
+                reg = eax
+            elif IS_X86_64:
+                reg = edi
+            self.mc.LEA_rb(reg.value, css)
+            args = [reg]
+        self._emit_call(-1, imm(self.reacqgil_addr), args)
+        # restore the result from the stack
+        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
+            self.mc.MOV_rs(save_loc.value, WORD)
+
     def genop_guard_call_assembler(self, op, guard_op, guard_token,
                                    arglocs, result_loc):
         faildescr = guard_op.getdescr()
@@ -2079,6 +2226,8 @@
         # function remember_young_pointer() from the GC.  The two arguments
         # to the call are in arglocs[:2].  The rest, arglocs[2:], contains
         # registers that need to be saved and restored across the call.
+        # If op.getarg(1) is a int, it is an array index and we must call
+        # instead remember_young_pointer_from_array().
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
@@ -2110,13 +2259,19 @@
             remap_frame_layout(self, arglocs[:2], [edi, esi],
                                X86_64_SCRATCH_REG)
 
+        if op.getarg(1).type == INT:
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            assert func != 0
+        else:
+            func = descr.get_write_barrier_fn(self.cpu)
+
         # misaligned stack in the call, but it's ok because the write barrier
         # is not going to call anything more.  Also, this assumes that the
         # write barrier does not touch the xmm registers.  (Slightly delicate
         # assumption, given that the write barrier can end up calling the
         # platform's malloc() from AddressStack.append().  XXX may need to
         # be done properly)
-        self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu)))
+        self.mc.CALL(imm(func))
         if IS_X86_32:
             self.mc.ADD_ri(esp.value, 2*WORD)
         for i in range(2, len(arglocs)):
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -156,12 +156,14 @@
         self.translate_support_code = translate_support_code
         # to be read/used by the assembler too
         self.jump_target_descr = None
+        self.close_stack_struct = 0
 
-    def _prepare(self, inputargs, operations):
+    def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
         self.param_depth = 0
         cpu = self.assembler.cpu
-        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
+        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
+                                                       allgcrefs)
         # compute longevity of variables
         longevity = self._compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
@@ -172,15 +174,16 @@
                                    assembler = self.assembler)
         return operations
 
-    def prepare_loop(self, inputargs, operations, looptoken):
-        operations = self._prepare(inputargs, operations)
+    def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         jump = operations[-1]
         loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
         self.loop_consts = loop_consts
         return self._process_inputargs(inputargs), operations
 
-    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations):
-        operations = self._prepare(inputargs, operations)
+    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
+                       allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         self.loop_consts = {}
         self._update_bindings(arglocs, inputargs)
         self.fm.frame_depth = prev_depths[0]
@@ -388,7 +391,9 @@
         self.assembler.regalloc_perform_discard(op, arglocs)
 
     def can_merge_with_next_guard(self, op, i, operations):
-        if op.getopnum() == rop.CALL_MAY_FORCE or op.getopnum() == rop.CALL_ASSEMBLER:
+        if (op.getopnum() == rop.CALL_MAY_FORCE or
+            op.getopnum() == rop.CALL_ASSEMBLER or
+            op.getopnum() == rop.CALL_RELEASE_GIL):
             assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
             return True
         if not op.is_comparison():
@@ -779,6 +784,19 @@
         self.xrm.possibly_free_var(op.getarg(1))
 
     def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
+        # we need to save registers on the stack:
+        #
+        #  - at least the non-callee-saved registers
+        #
+        #  - for shadowstack, we assume that any call can collect, and we
+        #    save also the callee-saved registers that contain GC pointers,
+        #    so that they can be found by follow_stack_frame_of_assembler()
+        #
+        #  - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
+        #    anyway, in case we need to do cpu.force().  The issue is that
+        #    grab_frame_values() would not be able to locate values in
+        #    callee-saved registers.
+        #
         save_all_regs = guard_not_forced_op is not None
         self.xrm.before_call(force_store, save_all_regs=save_all_regs)
         if not save_all_regs:
@@ -845,6 +863,8 @@
         assert guard_op is not None
         self._consider_call(op, guard_op)
 
+    consider_call_release_gil = consider_call_may_force
+
     def consider_call_assembler(self, op, guard_op):
         descr = op.getdescr()
         assert isinstance(descr, LoopToken)
@@ -864,12 +884,12 @@
     def consider_cond_call_gc_wb(self, op):
         assert op.result is None
         args = op.getarglist()
-        loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
-        # ^^^ we force loc_newvalue in a reg (unless it's a Const),
+        loc_newvalue_or_index= self.rm.make_sure_var_in_reg(op.getarg(1), args)
+        # ^^^ we force loc_newvalue_or_index in a reg (unless it's a Const),
         # because it will be needed anyway by the following setfield_gc.
         # It avoids loading it twice from the memory.
         loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args)
-        arglocs = [loc_base, loc_newvalue]
+        arglocs = [loc_base, loc_newvalue_or_index]
         # add eax, ecx and edx as extra "arguments" to ensure they are
         # saved and restored.  Fish in self.rm to know which of these
         # registers really need to be saved (a bit of a hack).  Moreover,
@@ -1358,7 +1378,9 @@
         name = name[len('consider_'):]
         num = getattr(rop, name.upper())
         if (is_comparison_or_ovf_op(num)
-            or num == rop.CALL_MAY_FORCE or num == rop.CALL_ASSEMBLER):
+            or num == rop.CALL_MAY_FORCE
+            or num == rop.CALL_ASSEMBLER
+            or num == rop.CALL_RELEASE_GIL):
             oplist_with_guard[num] = value
             oplist[num] = add_none_argument(value)
         else:
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -22,6 +22,7 @@
 
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
+    with_threads = False
 
     def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
                  gcdescr=None):
@@ -38,6 +39,7 @@
                 if not oprofile.OPROFILE_AVAILABLE:
                     log.WARNING('oprofile support was explicitly enabled, but oprofile headers seem not to be available')
                 profile_agent = oprofile.OProfileAgent()
+            self.with_threads = config.translation.thread
 
         self.profile_agent = profile_agent
 
@@ -77,9 +79,9 @@
         lines = machine_code_dump(data, addr, self.backend_name, label_list)
         print ''.join(lines)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
-        return self.assembler.assemble_loop(inputargs, operations, looptoken,
-                                            log=log)
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
+        return self.assembler.assemble_loop(name, inputargs, operations,
+                                            looptoken, log=log)
 
     def compile_bridge(self, faildescr, inputargs, operations,
                        original_loop_token, log=True):
@@ -122,8 +124,8 @@
         addr = executable_token._x86_bootstrap_code
         #llop.debug_print(lltype.Void, ">>>> Entering", addr)
         func = rffi.cast(lltype.Ptr(self.BOOTSTRAP_TP), addr)
+        fail_index = self._execute_call(func)
         #llop.debug_print(lltype.Void, "<<<< Back")
-        fail_index = self._execute_call(func)
         return self.get_fail_descr_from_number(fail_index)
 
     def _execute_call(self, func):
@@ -140,10 +142,11 @@
                 LLInterpreter.current_interpreter = prev_interpreter
         return res
 
-    @staticmethod
     def cast_ptr_to_int(x):
         adr = llmemory.cast_ptr_to_adr(x)
         return CPU386.cast_adr_to_int(adr)
+    cast_ptr_to_int._annspecialcase_ = 'specialize:arglltype(0)'
+    cast_ptr_to_int = staticmethod(cast_ptr_to_int)
 
     all_null_registers = lltype.malloc(rffi.LONGP.TO, 24,
                                        flavor='raw', zero=True,
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -530,6 +530,7 @@
     POP_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
 
     LEA_rb = insn(rex_w, '\x8D', register(1,8), stack_bp(2))
+    LEA_rs = insn(rex_w, '\x8D', register(1,8), stack_sp(2))
     LEA32_rb = insn(rex_w, '\x8D', register(1,8),stack_bp(2,force_32bits=True))
     LEA_ra = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_scaled_reg_plus_const(2))
     LEA_rm = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_const(2))
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -16,7 +16,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcRefList, GcPtrFieldDescr
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr
 
 from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
 from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
@@ -51,11 +51,9 @@
     gcrootmap = MockGcRootMap()
 
     def initialize(self):
-        self.gcrefs = GcRefList()
-        self.gcrefs.initialize()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
+        pass
 
-    replace_constptrs_with_getfield_raw = GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func
+    record_constptrs = GcLLDescr_framework.record_constptrs.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 
 class TestRegallocDirectGcIntegration(object):
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -330,6 +330,7 @@
                         assert result != expected
 
     def test_compile_bridge_check_profile_info(self):
+        py.test.skip("does not work, reinvestigate")
         class FakeProfileAgent(object):
             def __init__(self):
                 self.functions = []
@@ -362,7 +363,7 @@
         operations[3].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
         name, loopaddress, loopsize = agent.functions[0]
-        assert name == "Loop # 17: hello"
+        assert name == "Loop # 17: hello (loop counter 0)"
         assert loopaddress <= looptoken._x86_loop_code
         assert loopsize >= 40 # randomish number
 
@@ -378,7 +379,7 @@
 
         self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken)
         name, address, size = agent.functions[1]
-        assert name == "Bridge # 0: bye"
+        assert name == "Bridge # 0: bye (loop counter 1)"
         # Would be exactly ==, but there are some guard failure recovery
         # stubs in-between
         assert address >= loopaddress + loopsize
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -1,8 +1,7 @@
 """
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
+This is a test that translates a complete JIT together with a GC and runs it.
+It is testing that the GC-dependent aspects basically work, mostly the mallocs
+and the various cases of write barrier.
 """
 
 import weakref
@@ -10,16 +9,11 @@
 from pypy.annotation import policy as annpolicy
 from pypy.rlib import rgc
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rlib.jit import JitDriver, dont_look_inside
 from pypy.rlib.jit import purefunction, unroll_safe
-from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
 from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
 from pypy.tool.udir import udir
-from pypy.jit.backend.x86.arch import IS_X86_64
 from pypy.config.translationoption import DEFL_GC
-import py.test
 
 class X(object):
     def __init__(self, x=0):
@@ -86,7 +80,7 @@
     #
     return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
 
-def compile(f, gc, **kwds):
+def compile(f, gc, enable_opts='', **kwds):
     from pypy.annotation.listdef import s_list_of_strings
     from pypy.translator.translator import TranslationContext
     from pypy.jit.metainterp.warmspot import apply_jit
@@ -110,14 +104,14 @@
                 old_value[obj, attr] = getattr(obj, attr)
                 setattr(obj, attr, value)
             #
-            apply_jit(t, enable_opts='')
+            apply_jit(t, enable_opts=enable_opts)
             #
         finally:
             for (obj, attr), oldvalue in old_value.items():
                 setattr(obj, attr, oldvalue)
 
     cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
+    cbuilder.generate_source(defines=cbuilder.DEBUG_DEFINES)
     cbuilder.compile()
     return cbuilder
 
@@ -154,8 +148,10 @@
 
 # ______________________________________________________________________
 
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
+
+class BaseFrameworkTests(object):
+    compile_kwds = {}
+
     def setup_class(cls):
         funcs = []
         name_to_func = {}
@@ -205,7 +201,8 @@
         try:
             GcLLDescr_framework.DEBUG = True
             cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
+                                   gcrootfinder=cls.gcrootfinder, jit=True,
+                                   **cls.compile_kwds)
         finally:
             GcLLDescr_framework.DEBUG = OLD_DEBUG
 
@@ -224,32 +221,36 @@
     def run_orig(self, name, n, x):
         self.main_allfuncs(name, n, x)
 
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
-        #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
+
+class CompileFrameworkTests(BaseFrameworkTests):
+    # Test suite using (so far) the minimark GC.
+
+##    def define_libffi_workaround(cls):
+##        # XXX: this is a workaround for a bug in database.py.  It seems that
+##        # the problem is triggered by optimizeopt/fficall.py, and in
+##        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
+##        # these tests, that line is the only place where libffi.Func is
+##        # referenced.
+##        #
+##        # The problem occurs because the gctransformer tries to annotate a
+##        # low-level helper to call the __del__ of libffi.Func when it's too
+##        # late.
+##        #
+##        # This workaround works by forcing the annotator (and all the rest of
+##        # the toolchain) to see libffi.Func in a "proper" context, not just as
+##        # the target of cast_base_ptr_to_instance.  Note that the function
+##        # below is *never* called by any actual test, it's just annotated.
+##        #
+##        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
+##        libc_name = get_libc_name()
+##        def f(n, x, *args):
+##            libc = CDLL(libc_name)
+##            ptr = libc.getpointer('labs', [types.slong], types.slong)
+##            chain = ArgChain()
+##            chain.arg(n)
+##            n = ptr.call(chain, lltype.Signed)
+##            return (n, x) + args
+##        return None, f, None
 
     def define_compile_framework_1(cls):
         # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
@@ -456,6 +457,73 @@
     def test_compile_framework_7(self):
         self.run('compile_framework_7')
 
+    def define_compile_framework_8(cls):
+        # Array of pointers, of unknown length (test write_barrier_from_array)
+        def before(n, x):
+            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            if n < 1900:
+                check(l[0].x == 123)
+                l = [None] * (16 + (n & 7))
+                l[0] = X(123)
+                l[1] = X(n)
+                l[2] = X(n+10)
+                l[3] = X(n+20)
+                l[4] = X(n+30)
+                l[5] = X(n+40)
+                l[6] = X(n+50)
+                l[7] = X(n+60)
+                l[8] = X(n+70)
+                l[9] = X(n+80)
+                l[10] = X(n+90)
+                l[11] = X(n+100)
+                l[12] = X(n+110)
+                l[13] = X(n+120)
+                l[14] = X(n+130)
+                l[15] = X(n+140)
+            if n < 1800:
+                check(len(l) == 16 + (n & 7))
+                check(l[0].x == 123)
+                check(l[1].x == n)
+                check(l[2].x == n+10)
+                check(l[3].x == n+20)
+                check(l[4].x == n+30)
+                check(l[5].x == n+40)
+                check(l[6].x == n+50)
+                check(l[7].x == n+60)
+                check(l[8].x == n+70)
+                check(l[9].x == n+80)
+                check(l[10].x == n+90)
+                check(l[11].x == n+100)
+                check(l[12].x == n+110)
+                check(l[13].x == n+120)
+                check(l[14].x == n+130)
+                check(l[15].x == n+140)
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            check(len(l) >= 16)
+            check(l[0].x == 123)
+            check(l[1].x == 2)
+            check(l[2].x == 12)
+            check(l[3].x == 22)
+            check(l[4].x == 32)
+            check(l[5].x == 42)
+            check(l[6].x == 52)
+            check(l[7].x == 62)
+            check(l[8].x == 72)
+            check(l[9].x == 82)
+            check(l[10].x == 92)
+            check(l[11].x == 102)
+            check(l[12].x == 112)
+            check(l[13].x == 122)
+            check(l[14].x == 132)
+            check(l[15].x == 142)
+        return before, f, after
+
+    def test_compile_framework_8(self):
+        self.run('compile_framework_8')
+
     def define_compile_framework_external_exception_handling(cls):
         def before(n, x):
             x = X(0)
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_releasegil.py
copy from pypy/jit/backend/x86/test/test_zrpy_gc.py
copy to pypy/jit/backend/x86/test/test_zrpy_releasegil.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_releasegil.py
@@ -1,618 +1,110 @@
-"""
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
-"""
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.rlib.jit import dont_look_inside
+from pypy.jit.metainterp.optimizeopt import ALL_OPTS_NAMES
 
-import weakref
-import py, os
-from pypy.annotation import policy as annpolicy
-from pypy.rlib import rgc
-from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.rlib.jit import JitDriver, dont_look_inside
-from pypy.rlib.jit import purefunction, unroll_safe
-from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
-from pypy.tool.udir import udir
-from pypy.jit.backend.x86.arch import IS_X86_64
-from pypy.config.translationoption import DEFL_GC
-import py.test
+from pypy.rlib.libffi import CDLL, types, ArgChain, clibffi
+from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+from pypy.rpython.annlowlevel import llhelper
 
-class X(object):
-    def __init__(self, x=0):
-        self.x = x
+from pypy.jit.backend.x86.test.test_zrpy_gc import BaseFrameworkTests
+from pypy.jit.backend.x86.test.test_zrpy_gc import check
 
-    next = None
 
-class CheckError(Exception):
-    pass
+class ReleaseGILTests(BaseFrameworkTests):
+    compile_kwds = dict(enable_opts=ALL_OPTS_NAMES, thread=True)
 
-def check(flag):
-    if not flag:
-        raise CheckError
-
-def get_g(main):
-    main._dont_inline_ = True
-    def g(name, n):
-        x = X()
-        x.foo = 2
-        main(n, x)
-        x.foo = 5
-        return weakref.ref(x)
-    g._dont_inline_ = True
-    return g
-
-
-def get_entry(g):
-
-    def entrypoint(args):
-        name = ''
-        n = 2000
-        argc = len(args)
-        if argc > 1:
-            name = args[1]
-        if argc > 2:
-            n = int(args[2])
-        r_list = []
-        for i in range(20):
-            r = g(name, n)
-            r_list.append(r)
-            rgc.collect()
-        rgc.collect(); rgc.collect()
-        freed = 0
-        for r in r_list:
-            if r() is None:
-                freed += 1
-        print freed
-        return 0
-
-    return entrypoint
-
-
-def get_functions_to_patch():
-    from pypy.jit.backend.llsupport import gc
-    #
-    can_inline_malloc1 = gc.GcLLDescr_framework.can_inline_malloc
-    def can_inline_malloc2(*args):
-        try:
-            if os.environ['PYPY_NO_INLINE_MALLOC']:
-                return False
-        except KeyError:
+    def define_simple(self):
+        class Glob:
             pass
-        return can_inline_malloc1(*args)
-    #
-    return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
-
-def compile(f, gc, **kwds):
-    from pypy.annotation.listdef import s_list_of_strings
-    from pypy.translator.translator import TranslationContext
-    from pypy.jit.metainterp.warmspot import apply_jit
-    from pypy.translator.c import genc
-    #
-    t = TranslationContext()
-    t.config.translation.gc = gc
-    if gc != 'boehm':
-        t.config.translation.gcremovetypeptr = True
-    for name, value in kwds.items():
-        setattr(t.config.translation, name, value)
-    ann = t.buildannotator(policy=annpolicy.StrictAnnotatorPolicy())
-    ann.build_types(f, [s_list_of_strings], main_entry_point=True)
-    t.buildrtyper().specialize()
-
-    if kwds['jit']:
-        patch = get_functions_to_patch()
-        old_value = {}
-        try:
-            for (obj, attr), value in patch.items():
-                old_value[obj, attr] = getattr(obj, attr)
-                setattr(obj, attr, value)
-            #
-            apply_jit(t, enable_opts='')
-            #
-        finally:
-            for (obj, attr), oldvalue in old_value.items():
-                setattr(obj, attr, oldvalue)
-
-    cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
-    cbuilder.compile()
-    return cbuilder
-
-def run(cbuilder, args=''):
-    #
-    pypylog = udir.join('test_zrpy_gc.log')
-    data = cbuilder.cmdexec(args, env={'PYPYLOG': ':%s' % pypylog})
-    return data.strip()
-
-def compile_and_run(f, gc, **kwds):
-    cbuilder = compile(f, gc, **kwds)
-    return run(cbuilder)
-
-
-
-def test_compile_boehm():
-    myjitdriver = JitDriver(greens = [], reds = ['n', 'x'])
-    @dont_look_inside
-    def see(lst, n):
-        assert len(lst) == 3
-        assert lst[0] == n+10
-        assert lst[1] == n+20
-        assert lst[2] == n+30
-    def main(n, x):
-        while n > 0:
-            myjitdriver.can_enter_jit(n=n, x=x)
-            myjitdriver.jit_merge_point(n=n, x=x)
-            y = X()
-            y.foo = x.foo
-            n -= y.foo
-            see([n+10, n+20, n+30], n)
-    res = compile_and_run(get_entry(get_g(main)), "boehm", jit=True)
-    assert int(res) >= 16
-
-# ______________________________________________________________________
-
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
-    def setup_class(cls):
-        funcs = []
-        name_to_func = {}
-        for fullname in dir(cls):
-            if not fullname.startswith('define'):
-                continue
-            definefunc = getattr(cls, fullname)
-            _, name = fullname.split('_', 1)
-            beforefunc, loopfunc, afterfunc = definefunc.im_func(cls)
-            if beforefunc is None:
-                def beforefunc(n, x):
-                    return n, x, None, None, None, None, None, None, None, None, None, ''
-            if afterfunc is None:
-                def afterfunc(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-                    pass
-            beforefunc.func_name = 'before_'+name
-            loopfunc.func_name = 'loop_'+name
-            afterfunc.func_name = 'after_'+name
-            funcs.append((beforefunc, loopfunc, afterfunc))
-            assert name not in name_to_func
-            name_to_func[name] = len(name_to_func)
-        print name_to_func
-        def allfuncs(name, n):
-            x = X()
-            x.foo = 2
-            main_allfuncs(name, n, x)
-            x.foo = 5
-            return weakref.ref(x)
-        def main_allfuncs(name, n, x):
-            num = name_to_func[name]
-            n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s = funcs[num][0](n, x)
-            while n > 0:
-                myjitdriver.can_enter_jit(num=num, n=n, x=x, x0=x0, x1=x1,
-                        x2=x2, x3=x3, x4=x4, x5=x5, x6=x6, x7=x7, l=l, s=s)
-                myjitdriver.jit_merge_point(num=num, n=n, x=x, x0=x0, x1=x1,
-                        x2=x2, x3=x3, x4=x4, x5=x5, x6=x6, x7=x7, l=l, s=s)
-
-                n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s = funcs[num][1](
-                        n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s)
-            funcs[num][2](n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s)
-        myjitdriver = JitDriver(greens = ['num'],
-                                reds = ['n', 'x', 'x0', 'x1', 'x2', 'x3', 'x4',
-                                        'x5', 'x6', 'x7', 'l', 's'])
-        cls.main_allfuncs = staticmethod(main_allfuncs)
-        cls.name_to_func = name_to_func
-        OLD_DEBUG = GcLLDescr_framework.DEBUG
-        try:
-            GcLLDescr_framework.DEBUG = True
-            cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
-        finally:
-            GcLLDescr_framework.DEBUG = OLD_DEBUG
-
-    def _run(self, name, n, env):
-        res = self.cbuilder.cmdexec("%s %d" %(name, n), env=env)
-        assert int(res) == 20
-
-    def run(self, name, n=2000):
-        pypylog = udir.join('TestCompileFramework.log')
-        env = {'PYPYLOG': ':%s' % pypylog,
-               'PYPY_NO_INLINE_MALLOC': '1'}
-        self._run(name, n, env)
-        env['PYPY_NO_INLINE_MALLOC'] = ''
-        self._run(name, n, env)
-
-    def run_orig(self, name, n, x):
-        self.main_allfuncs(name, n, x)
-
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
+        glob = Glob()
         #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
-
-    def define_compile_framework_1(cls):
-        # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
-        # without write_barriers and root stack enumeration.
-        def f(n, x, *args):
-            y = X()
-            y.foo = x.foo
-            n -= y.foo
-            return (n, x) + args
-        return None, f, None
-
-    def test_compile_framework_1(self):
-        self.run('compile_framework_1')
-
-    def define_compile_framework_2(cls):
-        # More complex test, requires root stack enumeration but
-        # not write_barriers.
-        def f(n, x, *args):
-            prev = x
-            for j in range(101):    # f() runs 20'000 times, thus allocates
-                y = X()             # a total of 2'020'000 objects
-                y.foo = prev.foo
-                prev = y
-            n -= prev.foo
-            return (n, x) + args
-        return None, f, None
-
-    def test_compile_framework_2(self):
-        self.run('compile_framework_2')
-
-    def define_compile_framework_3(cls):
-        # Third version of the test.  Really requires write_barriers.
-        def f(n, x, *args):
-            x.next = None
-            for j in range(101):    # f() runs 20'000 times, thus allocates
-                y = X()             # a total of 2'020'000 objects
-                y.foo = j+1
-                y.next = x.next
-                x.next = y
-            check(x.next.foo == 101)
-            total = 0
-            y = x
-            for j in range(101):
-                y = y.next
-                total += y.foo
-            check(not y.next)
-            check(total == 101*102/2)
-            n -= x.foo
-            return (n, x) + args
-        return None, f, None
-
-
-
-    def test_compile_framework_3(self):
-        x_test = X()
-        x_test.foo = 5
-        self.run_orig('compile_framework_3', 6, x_test)     # check that it does not raise CheckError
-        self.run('compile_framework_3')
-
-    def define_compile_framework_3_extra(cls):
-        # Extra version of the test, with tons of live vars around the residual
-        # call that all contain a GC pointer.
-        @dont_look_inside
-        def residual(n=26):
-            x = X()
-            x.next = X()
-            x.next.foo = n
-            return x
+        def f42(n):
+            c_strchr = glob.c_strchr
+            raw = rffi.str2charp("foobar" + chr((n & 63) + 32))
+            argchain = ArgChain()
+            argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+            argchain = argchain.arg(rffi.cast(rffi.INT, ord('b')))
+            res = c_strchr.call(argchain, rffi.CCHARP)
+            check(rffi.charp2str(res) == "bar" + chr((n & 63) + 32))
+            rffi.free_charp(raw)
         #
         def before(n, x):
-            residual(5)
-            x0 = residual()
-            x1 = residual()
-            x2 = residual()
-            x3 = residual()
-            x4 = residual()
-            x5 = residual()
-            x6 = residual()
-            x7 = residual()
-            n *= 19
-            return n, None, x0, x1, x2, x3, x4, x5, x6, x7, None, None
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            x8 = residual()
-            x9 = residual()
-            check(x0.next.foo == 26)
-            check(x1.next.foo == 26)
-            check(x2.next.foo == 26)
-            check(x3.next.foo == 26)
-            check(x4.next.foo == 26)
-            check(x5.next.foo == 26)
-            check(x6.next.foo == 26)
-            check(x7.next.foo == 26)
-            check(x8.next.foo == 26)
-            check(x9.next.foo == 26)
-            x0, x1, x2, x3, x4, x5, x6, x7 = x7, x4, x6, x5, x3, x2, x9, x8
+            libc = CDLL(libc_name)
+            c_strchr = libc.getpointer('strchr', [types.pointer, types.sint],
+                                       types.pointer)
+            glob.c_strchr = c_strchr
+            return (n, None, None, None, None, None,
+                    None, None, None, None, None, None)
+        #
+        def f(n, x, *args):
+            f42(n)
             n -= 1
-            return n, None, x0, x1, x2, x3, x4, x5, x6, x7, None, None
-        return before, f, None
-
-    def test_compile_framework_3_extra(self):
-        self.run_orig('compile_framework_3_extra', 6, None)     # check that it does not raise CheckError
-        self.run('compile_framework_3_extra')
-
-    def define_compile_framework_4(cls):
-        # Fourth version of the test, with __del__.
-        from pypy.rlib.debug import debug_print
-        class Counter:
-            cnt = 0
-        counter = Counter()
-        class Z:
-            def __del__(self):
-                counter.cnt -= 1
-        def before(n, x):
-            debug_print('counter.cnt =', counter.cnt)
-            check(counter.cnt < 5)
-            counter.cnt = n // x.foo
-            return n, x, None, None, None, None, None, None, None, None, None, None
-        def f(n, x, *args):
-            Z()
-            n -= x.foo
             return (n, x) + args
         return before, f, None
 
-    def test_compile_framework_4(self):
-        self.run('compile_framework_4')
+    def test_simple(self):
+        self.run('simple')
 
-    def define_compile_framework_5(cls):
-        # Test string manipulation.
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            n -= x.foo
-            s += str(n)
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(len(s) == 1*5 + 2*45 + 3*450 + 4*500)
-        return None, f, after
-
-    def test_compile_framework_5(self):
-        self.run('compile_framework_5')
-
-    def define_compile_framework_7(cls):
-        # Array of pointers (test the write barrier for setarrayitem_gc)
+    def define_close_stack(self):
+        #
+        class Glob(object):
+            pass
+        glob = Glob()
+        class X(object):
+            pass
+        #
+        def callback(p1, p2):
+            for i in range(100):
+                glob.lst.append(X())
+            return rffi.cast(rffi.INT, 1)
+        CALLBACK = lltype.Ptr(lltype.FuncType([lltype.Signed,
+                                               lltype.Signed], rffi.INT))
+        #
+        @dont_look_inside
+        def alloc1():
+            return llmemory.raw_malloc(16)
+        @dont_look_inside
+        def free1(p):
+            llmemory.raw_free(p)
+        #
+        def f42():
+            length = len(glob.lst)
+            c_qsort = glob.c_qsort
+            raw = alloc1()
+            fn = llhelper(CALLBACK, rffi._make_wrapper_for(CALLBACK, callback))
+            argchain = ArgChain()
+            argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+            argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 2))
+            argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 8))
+            argchain = argchain.arg(rffi.cast(lltype.Signed, fn))
+            c_qsort.call(argchain, lltype.Void)
+            free1(raw)
+            check(len(glob.lst) > length)
+            del glob.lst[:]
+        #
         def before(n, x):
-            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            if n < 1900:
-                check(l[0].x == 123)
-                l = [None] * 16
-                l[0] = X(123)
-                l[1] = X(n)
-                l[2] = X(n+10)
-                l[3] = X(n+20)
-                l[4] = X(n+30)
-                l[5] = X(n+40)
-                l[6] = X(n+50)
-                l[7] = X(n+60)
-                l[8] = X(n+70)
-                l[9] = X(n+80)
-                l[10] = X(n+90)
-                l[11] = X(n+100)
-                l[12] = X(n+110)
-                l[13] = X(n+120)
-                l[14] = X(n+130)
-                l[15] = X(n+140)
-            if n < 1800:
-                check(len(l) == 16)
-                check(l[0].x == 123)
-                check(l[1].x == n)
-                check(l[2].x == n+10)
-                check(l[3].x == n+20)
-                check(l[4].x == n+30)
-                check(l[5].x == n+40)
-                check(l[6].x == n+50)
-                check(l[7].x == n+60)
-                check(l[8].x == n+70)
-                check(l[9].x == n+80)
-                check(l[10].x == n+90)
-                check(l[11].x == n+100)
-                check(l[12].x == n+110)
-                check(l[13].x == n+120)
-                check(l[14].x == n+130)
-                check(l[15].x == n+140)
-            n -= x.foo
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(len(l) == 16)
-            check(l[0].x == 123)
-            check(l[1].x == 2)
-            check(l[2].x == 12)
-            check(l[3].x == 22)
-            check(l[4].x == 32)
-            check(l[5].x == 42)
-            check(l[6].x == 52)
-            check(l[7].x == 62)
-            check(l[8].x == 72)
-            check(l[9].x == 82)
-            check(l[10].x == 92)
-            check(l[11].x == 102)
-            check(l[12].x == 112)
-            check(l[13].x == 122)
-            check(l[14].x == 132)
-            check(l[15].x == 142)
-        return before, f, after
-
-    def test_compile_framework_7(self):
-        self.run('compile_framework_7')
-
-    def define_compile_framework_external_exception_handling(cls):
-        def before(n, x):
-            x = X(0)
-            return n, x, None, None, None, None, None, None, None, None, None, None
-
-        @dont_look_inside
-        def g(x):
-            if x > 200:
-                return 2
-            raise ValueError
-        @dont_look_inside
-        def h(x):
-            if x > 150:
-                raise ValueError
-            return 2
-
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            try:
-                x.x += g(n)
-            except ValueError:
-                x.x += 1
-            try:
-                x.x += h(n)
-            except ValueError:
-                x.x -= 1
+            libc = CDLL(libc_name)
+            types_size_t = clibffi.cast_type_to_ffitype(rffi.SIZE_T)
+            c_qsort = libc.getpointer('qsort', [types.pointer, types_size_t,
+                                                types_size_t, types.pointer],
+                                      types.void)
+            glob.c_qsort = c_qsort
+            glob.lst = []
+            return (n, None, None, None, None, None,
+                    None, None, None, None, None, None)
+        #
+        def f(n, x, *args):
+            f42()
             n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(x.x == 1800 * 2 + 1850 * 2 + 200 - 150)
-
+            return (n, x) + args
         return before, f, None
 
-    def test_compile_framework_external_exception_handling(self):
-        self.run('compile_framework_external_exception_handling')
+    def test_close_stack(self):
+        self.run('close_stack')
 
-    def define_compile_framework_bug1(self):
-        @purefunction
-        def nonmoving():
-            x = X(1)
-            for i in range(7):
-                rgc.collect()
-            return x
 
-        @dont_look_inside
-        def do_more_stuff():
-            x = X(5)
-            for i in range(7):
-                rgc.collect()
-            return x
-
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            x0 = do_more_stuff()
-            check(nonmoving().x == 1)
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-
-        return None, f, None
-
-    def test_compile_framework_bug1(self):
-        self.run('compile_framework_bug1', 200)
-
-    def define_compile_framework_vref(self):
-        from pypy.rlib.jit import virtual_ref, virtual_ref_finish
-        class A:
-            pass
-        glob = A()
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            a = A()
-            glob.v = vref = virtual_ref(a)
-            virtual_ref_finish(vref, a)
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f, None
-
-    def test_compile_framework_vref(self):
-        self.run('compile_framework_vref', 200)
-
-    def define_compile_framework_float(self):
-        # test for a bug: the fastpath_malloc does not save and restore
-        # xmm registers around the actual call to the slow path
-        class A:
-            x0 = x1 = x2 = x3 = x4 = x5 = x6 = x7 = 0
-        @dont_look_inside
-        def escape1(a):
-            a.x0 += 0
-            a.x1 += 6
-            a.x2 += 12
-            a.x3 += 18
-            a.x4 += 24
-            a.x5 += 30
-            a.x6 += 36
-            a.x7 += 42
-        @dont_look_inside
-        def escape2(n, f0, f1, f2, f3, f4, f5, f6, f7):
-            check(f0 == n + 0.0)
-            check(f1 == n + 0.125)
-            check(f2 == n + 0.25)
-            check(f3 == n + 0.375)
-            check(f4 == n + 0.5)
-            check(f5 == n + 0.625)
-            check(f6 == n + 0.75)
-            check(f7 == n + 0.875)
-        @unroll_safe
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            i = 0
-            while i < 42:
-                m = n + i
-                f0 = m + 0.0
-                f1 = m + 0.125
-                f2 = m + 0.25
-                f3 = m + 0.375
-                f4 = m + 0.5
-                f5 = m + 0.625
-                f6 = m + 0.75
-                f7 = m + 0.875
-                a1 = A()
-                # at this point, all or most f's are still in xmm registers
-                escape1(a1)
-                escape2(m, f0, f1, f2, f3, f4, f5, f6, f7)
-                i += 1
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f, None
-
-    def test_compile_framework_float(self):
-        self.run('compile_framework_float')
-
-    def define_compile_framework_minimal_size_in_nursery(self):
-        S = lltype.GcStruct('S')    # no fields!
-        T = lltype.GcStruct('T', ('i', lltype.Signed))
-        @unroll_safe
-        def f42(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            lst1 = []
-            lst2 = []
-            i = 0
-            while i < 42:
-                s1 = lltype.malloc(S)
-                t1 = lltype.malloc(T)
-                t1.i = 10000 + i + n
-                lst1.append(s1)
-                lst2.append(t1)
-                i += 1
-            i = 0
-            while i < 42:
-                check(lst2[i].i == 10000 + i + n)
-                i += 1
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f42, None
-
-    def test_compile_framework_minimal_size_in_nursery(self):
-        self.run('compile_framework_minimal_size_in_nursery')
-
-
-class TestShadowStack(CompileFrameworkTests):
+class TestShadowStack(ReleaseGILTests):
     gcrootfinder = "shadowstack"
 
-class TestAsmGcc(CompileFrameworkTests):
+class TestAsmGcc(ReleaseGILTests):
     gcrootfinder = "asmgcc"
diff --git a/pypy/jit/codewriter/assembler.py b/pypy/jit/codewriter/assembler.py
--- a/pypy/jit/codewriter/assembler.py
+++ b/pypy/jit/codewriter/assembler.py
@@ -76,7 +76,8 @@
                 TYPE = llmemory.Address
             if TYPE == llmemory.Address:
                 value = heaptracker.adr2int(value)
-            elif not isinstance(value, ComputedIntSymbolic):
+            if not isinstance(value, (llmemory.AddressAsInt,
+                                      ComputedIntSymbolic)):
                 value = lltype.cast_primitive(lltype.Signed, value)
                 if allow_short and -128 <= value <= 127:
                     # emit the constant as a small integer
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -237,6 +237,8 @@
             self.readwrite_analyzer.analyze(op), self.cpu, extraeffect,
             oopspecindex, can_invalidate)
         #
+        if oopspecindex != EffectInfo.OS_NONE:
+            assert effectinfo is not None
         if pure or loopinvariant:
             assert effectinfo is not None
             assert extraeffect != EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -108,6 +108,9 @@
     def check_forces_virtual_or_virtualizable(self):
         return self.extraeffect >= self.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
 
+    def has_random_effects(self):
+        return self.oopspecindex == self.OS_LIBFFI_CALL
+
 def effectinfo_from_writeanalyze(effects, cpu,
                                  extraeffect=EffectInfo.EF_CAN_RAISE,
                                  oopspecindex=EffectInfo.OS_NONE,
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -768,10 +768,10 @@
         from pypy.rpython.lltypesystem.rffi import size_and_sign, sizeof
         from pypy.rlib.rarithmetic import intmask
         assert not self._is_gc(op.args[0])
-        size1, unsigned1 = size_and_sign(op.args[0].concretetype)
         size2, unsigned2 = size_and_sign(op.result.concretetype)
         if size2 >= sizeof(lltype.Signed):
             return     # the target type is LONG or ULONG
+        size1, unsigned1 = size_and_sign(op.args[0].concretetype)
         #
         def bounds(size, unsigned):
             if unsigned:
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -4,6 +4,7 @@
 from pypy.objspace.flow.model import Constant, Variable
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import debug_start, debug_stop
+from pypy.rlib import rstack
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -156,6 +157,7 @@
 def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
     jitdriver_sd.on_compile(metainterp_sd.logger_ops, loop.token,
                             loop.operations, type, greenkey)
+    loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     globaldata = metainterp_sd.globaldata
     loop_token = loop.token
     loop_token.number = n = globaldata.loopnumbering
@@ -170,7 +172,7 @@
     debug_start("jit-backend")
     try:
         ops_offset = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                    loop.token)
+                                                    loop.token, name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
@@ -452,9 +454,17 @@
         # Called during a residual call from the assembler, if the code
         # actually needs to force one of the virtualrefs or the virtualizable.
         # Implemented by forcing *all* virtualrefs and the virtualizable.
-        faildescr = cpu.force(token)
-        assert isinstance(faildescr, ResumeGuardForcedDescr)
-        faildescr.handle_async_forcing(token)
+
+        # don't interrupt me! If the stack runs out in force_from_resumedata()
+        # then we have seen cpu.force() but not self.save_data(), leaving in
+        # an inconsistent state
+        rstack._stack_criticalcode_start()
+        try:
+            faildescr = cpu.force(token)
+            assert isinstance(faildescr, ResumeGuardForcedDescr)
+            faildescr.handle_async_forcing(token)
+        finally:
+            rstack._stack_criticalcode_stop()
 
     def handle_async_forcing(self, force_token):
         from pypy.jit.metainterp.resume import force_from_resumedata
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -82,9 +82,6 @@
 do_call_loopinvariant = do_call
 do_call_may_force = do_call
 
-def do_call_c(cpu, metainterp, argboxes, descr):
-    raise NotImplementedError("Should never be called directly")
-
 def do_getarrayitem_gc(cpu, _, arraybox, indexbox, arraydescr):
     array = arraybox.getref_base()
     index = indexbox.getint()
@@ -322,6 +319,7 @@
                          rop.DEBUG_MERGE_POINT,
                          rop.JIT_DEBUG,
                          rop.SETARRAYITEM_RAW,
+                         rop.CALL_RELEASE_GIL,
                          rop.QUASIIMMUT_FIELD,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -712,10 +712,14 @@
         return -2      # xxx risk of changing hash...
 
 def make_hashable_int(i):
+    from pypy.rpython.lltypesystem.ll2ctypes import NotCtypesAllocatedStructure
     if not we_are_translated() and isinstance(i, llmemory.AddressAsInt):
         # Warning: such a hash changes at the time of translation
         adr = heaptracker.int2adr(i)
-        return llmemory.cast_adr_to_int(adr, "emulated")
+        try:
+            return llmemory.cast_adr_to_int(adr, "emulated")
+        except NotCtypesAllocatedStructure:
+            return 12345 # use an arbitrary number for the hash
     return i
 
 def get_const_ptr_for_string(s):
@@ -792,6 +796,7 @@
     operations = None
     token = None
     call_pure_results = None
+    logops = None
     quasi_immutable_deps = None
 
     def __init__(self, name):
diff --git a/pypy/jit/metainterp/logger.py b/pypy/jit/metainterp/logger.py
--- a/pypy/jit/metainterp/logger.py
+++ b/pypy/jit/metainterp/logger.py
@@ -11,47 +11,71 @@
 
     def __init__(self, metainterp_sd, guard_number=False):
         self.metainterp_sd = metainterp_sd
-        self.ts = metainterp_sd.cpu.ts
         self.guard_number = guard_number
 
     def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
         if type is None:
             debug_start("jit-log-noopt-loop")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-loop")
         else:
             debug_start("jit-log-opt-loop")
             debug_print("# Loop", number, ":", type,
                         "with", len(operations), "ops")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-opt-loop")
+        return logops
 
     def log_bridge(self, inputargs, operations, number=-1, ops_offset=None):
         if number == -1:
             debug_start("jit-log-noopt-bridge")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-bridge")
         else:
             debug_start("jit-log-opt-bridge")
             debug_print("# bridge out of Guard", number,
                         "with", len(operations), "ops")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-opt-bridge")
+        return logops
 
     def log_short_preamble(self, inputargs, operations):
         debug_start("jit-log-short-preamble")
-        self._log_operations(inputargs, operations, ops_offset=None)
-        debug_stop("jit-log-short-preamble")            
+        logops = self._log_operations(inputargs, operations, ops_offset=None)
+        debug_stop("jit-log-short-preamble")
+        return logops
+
+    def _log_operations(self, inputargs, operations, ops_offset):
+        if not have_debug_prints():
+            return None
+        logops = self._make_log_operations()
+        logops._log_operations(inputargs, operations, ops_offset)
+        return logops
+
+    def _make_log_operations(self):
+        return LogOperations(self.metainterp_sd, self.guard_number)
+
+
+class LogOperations(object):
+    """
+    ResOperation logger.  Each instance contains a memo giving numbers
+    to boxes, and is typically used to log a single loop.
+    """
+    def __init__(self, metainterp_sd, guard_number):
+        self.metainterp_sd = metainterp_sd
+        self.ts = metainterp_sd.cpu.ts
+        self.guard_number = guard_number
+        self.memo = {}
 
     def repr_of_descr(self, descr):
         return descr.repr_of_descr()
 
-    def repr_of_arg(self, memo, arg):
+    def repr_of_arg(self, arg):
         try:
-            mv = memo[arg]
+            mv = self.memo[arg]
         except KeyError:
-            mv = len(memo)
-            memo[arg] = mv
+            mv = len(self.memo)
+            self.memo[arg] = mv
         if isinstance(arg, ConstInt):
             if int_could_be_an_address(arg.value):
                 addr = arg.getaddr()
@@ -75,11 +99,11 @@
         else:
             return '?'
 
-    def repr_of_resop(self, memo, op, ops_offset=None):
+    def repr_of_resop(self, op, ops_offset=None):
         if op.getopnum() == rop.DEBUG_MERGE_POINT:
-            loc = op.getarg(0)._get_str()
-            reclev = op.getarg(1).getint()
-            return "debug_merge_point('%s', %s)" % (loc, reclev)
+            jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
+            s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+            return "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
         if ops_offset is None:
             offset = -1
         else:
@@ -88,9 +112,10 @@
             s_offset = ""
         else:
             s_offset = "+%d: " % offset
-        args = ", ".join([self.repr_of_arg(memo, op.getarg(i)) for i in range(op.numargs())])
+        args = ", ".join([self.repr_of_arg(op.getarg(i)) for i in range(op.numargs())])
+
         if op.result is not None:
-            res = self.repr_of_arg(memo, op.result) + " = "
+            res = self.repr_of_arg(op.result) + " = "
         else:
             res = ""
         is_guard = op.is_guard()
@@ -103,7 +128,7 @@
                 r = self.repr_of_descr(descr)
             args += ', descr=' +  r
         if is_guard and op.getfailargs() is not None:
-            fail_args = ' [' + ", ".join([self.repr_of_arg(memo, arg)
+            fail_args = ' [' + ", ".join([self.repr_of_arg(arg)
                                           for arg in op.getfailargs()]) + ']'
         else:
             fail_args = ''
@@ -114,13 +139,12 @@
             return
         if ops_offset is None:
             ops_offset = {}
-        memo = {}
         if inputargs is not None:
-            args = ", ".join([self.repr_of_arg(memo, arg) for arg in inputargs])
+            args = ", ".join([self.repr_of_arg(arg) for arg in inputargs])
             debug_print('[' + args + ']')
         for i in range(len(operations)):
             op = operations[i]
-            debug_print(self.repr_of_resop(memo, operations[i], ops_offset))
+            debug_print(self.repr_of_resop(operations[i], ops_offset))
         if ops_offset and None in ops_offset:
             offset = ops_offset[None]
             debug_print("+%d: --end of the loop--" % offset)
diff --git a/pypy/jit/metainterp/optimize.py b/pypy/jit/metainterp/optimize.py
--- a/pypy/jit/metainterp/optimize.py
+++ b/pypy/jit/metainterp/optimize.py
@@ -14,7 +14,8 @@
 
 def _optimize_loop(metainterp_sd, old_loop_tokens, loop, enable_opts):
     cpu = metainterp_sd.cpu
-    metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations)
+    loop.logops = metainterp_sd.logger_noopt.log_loop(loop.inputargs,
+                                                      loop.operations)
     # XXX do we really still need a list?
     if old_loop_tokens:
         return old_loop_tokens[0]
@@ -36,7 +37,8 @@
 def _optimize_bridge(metainterp_sd, old_loop_tokens, bridge, enable_opts,
                      inline_short_preamble, retraced=False):
     cpu = metainterp_sd.cpu
-    metainterp_sd.logger_noopt.log_loop(bridge.inputargs, bridge.operations)
+    bridge.logops = metainterp_sd.logger_noopt.log_loop(bridge.inputargs,
+                                                        bridge.operations)
     if old_loop_tokens:
         old_loop_token = old_loop_tokens[0]
         bridge.operations[-1].setdescr(old_loop_token)   # patch jump target
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -1,10 +1,13 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.libffi import Func
+from pypy.rlib.debug import debug_start, debug_stop, debug_print, have_debug_prints
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.optimizeutil import _findall
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
+
 
 class FuncInfo(object):
 
@@ -12,14 +15,18 @@
     restype = None
     descr = None
     prepare_op = None
-    force_token_op = None
 
     def __init__(self, funcval, cpu, prepare_op):
         self.funcval = funcval
         self.opargs = []
         argtypes, restype = self._get_signature(funcval)
-        self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        try:
+            self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        except UnsupportedKind:
+            # e.g., I or U for long longs
+            self.descr = None
         self.prepare_op = prepare_op
+        self.delayed_ops = []
 
     def _get_signature(self, funcval):
         """
@@ -64,37 +71,51 @@
 
 class OptFfiCall(Optimization):
 
-    def __init__(self):
+    def setup(self):
         self.funcinfo = None
+        if self.optimizer.loop is not None:
+            self.logops = self.optimizer.loop.logops
+        else:
+            self.logops = None
+
+    def propagate_begin_forward(self):
+        debug_start('jit-log-ffiopt')
+        Optimization.propagate_begin_forward(self)
+
+    def propagate_end_forward(self):
+        debug_stop('jit-log-ffiopt')
+        Optimization.propagate_end_forward(self)
 
     def reconstruct_for_next_iteration(self, optimizer, valuemap):
         return OptFfiCall()
         # FIXME: Should any status be saved for next iteration?
 
     def begin_optimization(self, funcval, op):
-        self.rollback_maybe()
+        self.rollback_maybe('begin_optimization', op)
         self.funcinfo = FuncInfo(funcval, self.optimizer.cpu, op)
 
     def commit_optimization(self):
         self.funcinfo = None
 
-    def rollback_maybe(self):
+    def rollback_maybe(self, msg, op):
         if self.funcinfo is None:
             return # nothing to rollback
         #
         # we immediately set funcinfo to None to prevent recursion when
         # calling emit_op
+        if self.logops is not None:
+            debug_print('rollback: ' + msg + ': ', self.logops.repr_of_resop(op))
         funcinfo = self.funcinfo
         self.funcinfo = None
         self.emit_operation(funcinfo.prepare_op)
         for op in funcinfo.opargs:
             self.emit_operation(op)
-        if funcinfo.force_token_op:
-            self.emit_operation(funcinfo.force_token_op)
+        for delayed_op in funcinfo.delayed_ops:
+            self.emit_operation(delayed_op)
 
     def emit_operation(self, op):
         # we cannot emit any operation during the optimization
-        self.rollback_maybe()
+        self.rollback_maybe('invalid op', op)
         Optimization.emit_operation(self, op)
 
     def optimize_CALL(self, op):
@@ -135,13 +156,18 @@
         # call_may_force and the setfield_gc, so the final result we get is
         # again force_token/setfield_gc/call_may_force.
         #
+        # However, note that nowadays we also allow to have any setfield_gc
+        # between libffi_prepare and libffi_call, so while the comment above
+        # it's a bit superfluous, it has been left there for future reference.
         if self.funcinfo is None:
             self.emit_operation(op)
         else:
-            self.funcinfo.force_token_op = op
+            self.funcinfo.delayed_ops.append(op)
+
+    optimize_SETFIELD_GC = optimize_FORCE_TOKEN
 
     def do_prepare_call(self, op):
-        self.rollback_maybe()
+        self.rollback_maybe('prepare call', op)
         funcval = self._get_funcval(op)
         if not funcval.is_constant():
             return [op] # cannot optimize
@@ -165,16 +191,18 @@
         for push_op in funcinfo.opargs:
             argval = self.getvalue(push_op.getarg(2))
             arglist.append(argval.force_box())
-        newop = ResOperation(rop.CALL_MAY_FORCE, arglist, op.result,
+        newop = ResOperation(rop.CALL_RELEASE_GIL, arglist, op.result,
                              descr=funcinfo.descr)
         self.commit_optimization()
         ops = []
-        if funcinfo.force_token_op:
-            ops.append(funcinfo.force_token_op)
+        for delayed_op in funcinfo.delayed_ops:
+            ops.append(delayed_op)
         ops.append(newop)
         return ops
 
     def propagate_forward(self, op):
+        if self.logops is not None:
+            debug_print(self.logops.repr_of_resop(op))
         opnum = op.getopnum()
         for value, func in optimize_ops:
             if opnum == value:
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -235,6 +235,7 @@
         assert opnum != rop.CALL_PURE
         if (opnum == rop.CALL or
             opnum == rop.CALL_MAY_FORCE or
+            opnum == rop.CALL_RELEASE_GIL or
             opnum == rop.CALL_ASSEMBLER):
             if opnum == rop.CALL_ASSEMBLER:
                 effectinfo = None
@@ -242,7 +243,7 @@
                 effectinfo = op.getdescr().get_extra_info()
             if effectinfo is None or effectinfo.check_can_invalidate():
                 self._seen_guard_not_invalidated = False
-            if effectinfo is not None:
+            if effectinfo is not None and not effectinfo.has_random_effects():
                 # XXX we can get the wrong complexity here, if the lists
                 # XXX stored on effectinfo are large
                 for fielddescr in effectinfo.readonly_descrs_fields:
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -17,6 +17,14 @@
         assert self.posponedop is None
         return self
 
+    def setup(self):
+        self.posponedop = None
+        self.nextop = None
+
+    def reconstruct_for_next_iteration(self, optimizer, valuemap):
+        assert self.posponedop is None
+        return self 
+
     def propagate_forward(self, op):
         if op.is_ovf():
             self.posponedop = op
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -175,6 +175,14 @@
     def __init__(self):
         pass # make rpython happy
 
+    def propagate_begin_forward(self):
+        if self.next_optimization:
+            self.next_optimization.propagate_begin_forward()
+
+    def propagate_end_forward(self):
+        if self.next_optimization:
+            self.next_optimization.propagate_end_forward()
+
     def propagate_forward(self, op):
         raise NotImplementedError
 
@@ -406,11 +414,13 @@
         # ^^^ at least at the start of bridges.  For loops, we could set
         # it to False, but we probably don't care
         self.newoperations = []
+        self.first_optimization.propagate_begin_forward()
         self.i = 0
         while self.i < len(self.loop.operations):
             op = self.loop.operations[self.i]
             self.first_optimization.propagate_forward(op)
             self.i += 1
+        self.first_optimization.propagate_end_forward()
         self.loop.operations = self.newoperations
         self.loop.quasi_immutable_deps = self.quasi_immutable_deps
         # accumulate counters
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -415,14 +415,22 @@
         dest_start_box = self.get_constant_box(op.getarg(4))
         length = self.get_constant_box(op.getarg(5))
         if (source_value.is_virtual() and source_start_box and dest_start_box
-            and length and dest_value.is_virtual()):
-            # XXX optimize the case where dest value is not virtual,
-            #     but we still can avoid a mess
+            and length and (dest_value.is_virtual() or length.getint() <= 8)):
+            from pypy.jit.metainterp.optimizeopt.virtualize import VArrayValue
+            assert isinstance(source_value, VArrayValue)
             source_start = source_start_box.getint()
             dest_start = dest_start_box.getint()
             for index in range(length.getint()):
                 val = source_value.getitem(index + source_start)
-                dest_value.setitem(index + dest_start, val)
+                if dest_value.is_virtual():
+                    dest_value.setitem(index + dest_start, val)
+                else:
+                    newop = ResOperation(rop.SETARRAYITEM_GC,
+                                         [op.getarg(2),
+                                          ConstInt(index + dest_start),
+                                          val.force_box()], None,
+                                         descr=source_value.arraydescr)
+                    self.emit_operation(newop)
             return True
         if length and length.getint() == 0:
             return True # 0-length arraycopy
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -867,7 +867,7 @@
         any_operation = len(self.metainterp.history.operations) > 0
         jitdriver_sd = self.metainterp.staticdata.jitdrivers_sd[jdindex]
         self.verify_green_args(jitdriver_sd, greenboxes)
-        self.debug_merge_point(jitdriver_sd, self.metainterp.in_recursion,
+        self.debug_merge_point(jdindex, self.metainterp.in_recursion,
                                greenboxes)
 
         if self.metainterp.seen_loop_header_for_jdindex < 0:
@@ -914,13 +914,10 @@
                                     assembler_call=True)
             raise ChangeFrame
 
-    def debug_merge_point(self, jitdriver_sd, in_recursion, greenkey):
+    def debug_merge_point(self, jd_index, in_recursion, greenkey):
         # debugging: produce a DEBUG_MERGE_POINT operation
-        loc = jitdriver_sd.warmstate.get_location_str(greenkey)
-        debug_print(loc)
-        constloc = self.metainterp.cpu.ts.conststr(loc)
-        self.metainterp.history.record(rop.DEBUG_MERGE_POINT,
-                                       [constloc, ConstInt(in_recursion)], None)
+        args = [ConstInt(jd_index), ConstInt(in_recursion)] + greenkey
+        self.metainterp.history.record(rop.DEBUG_MERGE_POINT, args, None)
 
     @arguments("box", "label")
     def opimpl_goto_if_exception_mismatch(self, vtablebox, next_exc_target):
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -191,9 +191,15 @@
         # of the operation.  It must inherit from AbstractDescr.  The
         # backend provides it with cpu.fielddescrof(), cpu.arraydescrof(),
         # cpu.calldescrof(), and cpu.typedescrof().
+        self._check_descr(descr)
+        self._descr = descr
+
+    def _check_descr(self, descr):
+        if not we_are_translated() and getattr(descr, 'I_am_a_descr', False):
+            return # needed for the mock case in oparser_model
         from pypy.jit.metainterp.history import check_descr
         check_descr(descr)
-        self._descr = descr
+
 
 class GuardResOp(ResOpWithDescr):
 
@@ -471,8 +477,9 @@
     'STRSETITEM/3',
     'UNICODESETITEM/3',
     #'RUNTIMENEW/1',     # ootype operation
-    'COND_CALL_GC_WB/2d', # [objptr, newvalue]   (for the write barrier)
-    'DEBUG_MERGE_POINT/2',      # debugging only
+    'COND_CALL_GC_WB/2d', # [objptr, newvalue] or [arrayptr, index]
+                          # (for the write barrier, latter is in an array)
+    'DEBUG_MERGE_POINT/*',      # debugging only
     'JIT_DEBUG/*',              # debugging only
     'VIRTUAL_REF_FINISH/2',   # removed before it's passed to the backend
     'COPYSTRCONTENT/5',       # src, dst, srcstart, dststart, length
@@ -485,6 +492,7 @@
     'CALL_ASSEMBLER/*d',  # call already compiled assembler
     'CALL_MAY_FORCE/*d',
     'CALL_LOOPINVARIANT/*d',
+    'CALL_RELEASE_GIL/*d',  # release the GIL and "close the stack" for asmgcc
     #'OOSEND',                     # ootype operation
     #'OOSEND_PURE',                # ootype operation
     'CALL_PURE/*d',             # removed before it's passed to the backend
diff --git a/pypy/jit/metainterp/test/support.py b/pypy/jit/metainterp/test/support.py
--- a/pypy/jit/metainterp/test/support.py
+++ b/pypy/jit/metainterp/test/support.py
@@ -15,14 +15,14 @@
                   supports_longlong=False, **kwds):
     from pypy.jit.codewriter import support
 
-    class FakeJitCell:
+    class FakeJitCell(object):
         __compiled_merge_points = []
         def get_compiled_merge_points(self):
             return self.__compiled_merge_points[:]
         def set_compiled_merge_points(self, lst):
             self.__compiled_merge_points = lst
 
-    class FakeWarmRunnerState:
+    class FakeWarmRunnerState(object):
         def attach_unoptimized_bridge_from_interp(self, greenkey, newloop):
             pass
 
@@ -30,6 +30,9 @@
             from pypy.rpython.annlowlevel import llhelper
             return llhelper(FUNCPTR, func)
 
+        def get_location_str(self, args):
+            return 'location'
+
         def jit_cell_at_key(self, greenkey):
             assert greenkey == []
             return self._cell
diff --git a/pypy/jit/metainterp/test/test_compile.py b/pypy/jit/metainterp/test/test_compile.py
--- a/pypy/jit/metainterp/test/test_compile.py
+++ b/pypy/jit/metainterp/test/test_compile.py
@@ -30,13 +30,16 @@
     ts = typesystem.llhelper
     def __init__(self):
         self.seen = []
-    def compile_loop(self, inputargs, operations, token):
+    def compile_loop(self, inputargs, operations, token, name=''):
         self.seen.append((inputargs, operations, token))
 
 class FakeLogger(object):
     def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
         pass
 
+    def repr_of_resop(self, op):
+        return repr(op)
+
 class FakeState(object):
     enable_opts = ALL_OPTS_DICT.copy()
     enable_opts.pop('unroll')
@@ -44,6 +47,9 @@
     def attach_unoptimized_bridge_from_interp(*args):
         pass
 
+    def get_location_str(self, args):
+        return 'location'
+
 class FakeGlobalData(object):
     loopnumbering = 0
 
@@ -63,6 +69,8 @@
     call_pure_results = {}
     class jitdriver_sd:
         warmstate = FakeState()
+        on_compile = staticmethod(lambda *args: None)
+        on_compile_bridge = staticmethod(lambda *args: None)
 
 def test_compile_new_loop():
     cpu = FakeCPU()
diff --git a/pypy/jit/metainterp/test/test_fficall.py b/pypy/jit/metainterp/test/test_fficall.py
--- a/pypy/jit/metainterp/test/test_fficall.py
+++ b/pypy/jit/metainterp/test/test_fficall.py
@@ -1,28 +1,46 @@
 
 import py
-from pypy.rlib.jit import JitDriver, hint
+from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
+from pypy.rlib.jit import JitDriver, hint, dont_look_inside
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.libffi import ArgChain
+from pypy.rlib.libffi import ArgChain, longlong2float, float2longlong
+from pypy.rlib.libffi import IS_32_BIT
 from pypy.rlib.test.test_libffi import TestLibffiCall as _TestLibffiCall
 from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.objectmodel import specialize
+from pypy.tool.sourcetools import func_with_new_name
 from pypy.jit.metainterp.test.support import LLJitMixin
 
-
 class TestFfiCall(LLJitMixin, _TestLibffiCall):
 
     # ===> ../../../rlib/test/test_libffi.py
 
-    def call(self, funcspec, args, RESULT, init_result=0):
+    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
         """
         Call the function specified by funcspec in a loop, and let the jit to
         see and optimize it.
         """
         #
         lib, name, argtypes, restype = funcspec
-        args = unrolling_iterable(args)
+        method_and_args = []
+        for argval in args:
+            if type(argval) is r_singlefloat:
+                method_name = 'arg_singlefloat'
+                argval = float(argval)
+            elif IS_32_BIT and type(argval) in [r_longlong, r_ulonglong]:
+                method_name = 'arg_longlong'
+                argval = rffi.cast(rffi.LONGLONG, argval)
+                argval = longlong2float(argval)
+            elif isinstance(argval, tuple):
+                method_name, argval = argval
+            else:
+                method_name = 'arg'
+            method_and_args.append((method_name, argval))
+        method_and_args = unrolling_iterable(method_and_args)
         #
         reds = ['n', 'res', 'func']
-        if type(init_result) is float:
+        if (RESULT in [rffi.FLOAT, rffi.DOUBLE] or
+            IS_32_BIT and RESULT in [rffi.LONGLONG, rffi.ULONGLONG]):
             reds = ['n', 'func', 'res'] # floats must be *after* refs
         driver = JitDriver(reds=reds, greens=[])
         #
@@ -34,12 +52,17 @@
                 driver.can_enter_jit(n=n, res=res, func=func)
                 func = hint(func, promote=True)
                 argchain = ArgChain()
-                for argval in args: # this loop is unrolled
-                    argchain.arg(argval)
-                res = func.call(argchain, RESULT)
+                # this loop is unrolled
+                for method_name, argval in method_and_args:
+                    getattr(argchain, method_name)(argval)
+                res = func.call(argchain, RESULT, is_struct=is_struct)
                 n += 1
             return res
         #
-        res = self.meta_interp(f, [0])
+        res = self.meta_interp(f, [0], backendopt=True)
         return res
 
+    def test_byval_result(self):
+        _TestLibffiCall.test_byval_result(self)
+    test_byval_result.__doc__ = _TestLibffiCall.test_byval_result.__doc__
+    test_byval_result.dont_track_allocations = True
diff --git a/pypy/jit/metainterp/test/test_history.py b/pypy/jit/metainterp/test/test_history.py
--- a/pypy/jit/metainterp/test/test_history.py
+++ b/pypy/jit/metainterp/test/test_history.py
@@ -1,5 +1,5 @@
 from pypy.jit.metainterp.history import *
-from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 
 
 def test_repr():
@@ -10,6 +10,18 @@
     const = ConstPtr(lltype.cast_opaque_ptr(llmemory.GCREF, s))
     assert const._getrepr_() == "*T"
 
+def test_repr_ll2ctypes():
+    ptr = lltype.malloc(rffi.VOIDPP.TO, 10, flavor='raw')
+    # force it to be a ll2ctypes object
+    ptr = rffi.cast(rffi.VOIDPP, rffi.cast(rffi.LONG, ptr))
+    adr = llmemory.cast_ptr_to_adr(ptr)
+    lltype.free(ptr, flavor='raw')
+    intval = llmemory.cast_adr_to_int(adr, 'symbolic')
+    box = BoxInt(intval)
+    s = box.repr_rpython()
+    assert s.startswith('12345/') # the arbitrary hash value used by
+                                  # make_hashable_int
+
 def test_same_constant():
     c1a = ConstInt(0)
     c1b = ConstInt(0)
diff --git a/pypy/jit/metainterp/test/test_logger.py b/pypy/jit/metainterp/test/test_logger.py
--- a/pypy/jit/metainterp/test/test_logger.py
+++ b/pypy/jit/metainterp/test/test_logger.py
@@ -36,19 +36,29 @@
         return capturing(logger.Logger.log_loop, self,
                          loop.inputargs, loop.operations, ops_offset=ops_offset)
 
-    def repr_of_descr(self, descr):
-        for k, v in self.namespace.items():
-            if v == descr:
-                return k
-        return descr.repr_of_descr()
+    def _make_log_operations(self1):
+        class LogOperations(logger.LogOperations):
+            def repr_of_descr(self, descr):
+                for k, v in self1.namespace.items():
+                    if v == descr:
+                        return k
+                return descr.repr_of_descr()
+        logops = LogOperations(self1.metainterp_sd, self1.guard_number)
+        self1.logops = logops
+        return logops
 
 class TestLogger(object):
     ts = llhelper
 
     def make_metainterp_sd(self):
+        class FakeJitDriver(object):
+            class warmstate(object):
+                get_location_str = staticmethod(lambda args: "dupa")
+        
         class FakeMetaInterpSd:
             cpu = AbstractCPU()
             cpu.ts = self.ts
+            jitdrivers_sd = [FakeJitDriver()]
             def get_name_from_address(self, addr):
                 return 'Name'
         return FakeMetaInterpSd()
@@ -66,7 +76,7 @@
         if check_equal:
             equaloplists(loop.operations, oloop.operations)
             assert oloop.inputargs == loop.inputargs
-        return loop, oloop
+        return logger, loop, oloop
     
     def test_simple(self):
         inp = '''
@@ -106,18 +116,18 @@
     def test_debug_merge_point(self):
         inp = '''
         []
-        debug_merge_point("info", 0)
+        debug_merge_point(0, 0)
         '''
-        loop, oloop = self.reparse(inp, check_equal=False)
-        assert loop.operations[0].getarg(0)._get_str() == 'info'
-        assert oloop.operations[0].getarg(0)._get_str() == 'info'
+        _, loop, oloop = self.reparse(inp, check_equal=False)
+        assert loop.operations[0].getarg(1).getint() == 0
+        assert oloop.operations[0].getarg(1)._get_str() == "dupa"
         
     def test_floats(self):
         inp = '''
         [f0]
         f1 = float_add(3.5, f0)
         '''
-        loop, oloop = self.reparse(inp)
+        _, loop, oloop = self.reparse(inp)
         equaloplists(loop.operations, oloop.operations)
 
     def test_jump(self):
@@ -179,6 +189,17 @@
         assert output.splitlines()[0] == "# bridge out of Guard 3 with 0 ops"
         pure_parse(output)
 
+    def test_repr_single_op(self):
+        inp = '''
+        [i0, i1, i2, p3, p4, p5]
+        i6 = int_add(i1, i2)
+        i8 = int_add(i6, 3)
+        jump(i0, i8, i6, p3, p4, p5)
+        '''
+        logger, loop, _ = self.reparse(inp)
+        op = loop.operations[1]
+        assert logger.logops.repr_of_resop(op) == "i8 = int_add(i6, 3)"
+
     def test_ops_offset(self):
         inp = '''
         [i0]
diff --git a/pypy/jit/metainterp/test/test_optimizebasic.py b/pypy/jit/metainterp/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/test/test_optimizebasic.py
@@ -3,6 +3,7 @@
 from pypy.jit.metainterp.test.test_optimizeutil import (LLtypeMixin,
                                                         #OOtypeMixin,
                                                         BaseTest)
+from pypy.jit.metainterp.test.test_compile import FakeLogger
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
 from pypy.jit.metainterp.optimizeutil import InvalidLoop
@@ -32,6 +33,8 @@
         self.profiler = EmptyProfiler()
         self.options = Fake()
         self.globaldata = Fake()
+        self.logger_ops = FakeLogger()
+        self.logger_noopt = FakeLogger()
 
 def test_store_final_boxes_in_guard():
     from pypy.jit.metainterp.compile import ResumeGuardDescr
@@ -229,7 +232,7 @@
 
 class BaseTestBasic(BaseTest):
 
-    def invent_fail_descr(self, fail_args):
+    def invent_fail_descr(self, model, fail_args):
         if fail_args is None:
             return None
         descr = Storage()
diff --git a/pypy/jit/metainterp/test/test_optimizefficall.py b/pypy/jit/metainterp/test/test_optimizefficall.py
--- a/pypy/jit/metainterp/test/test_optimizefficall.py
+++ b/pypy/jit/metainterp/test/test_optimizefficall.py
@@ -38,6 +38,8 @@
         cpu = LLtypeMixin.cpu
         FUNC = LLtypeMixin.FUNC
         vable_token_descr = LLtypeMixin.valuedescr
+        valuedescr = LLtypeMixin.valuedescr
+
         int_float__int = MyCallDescr('if', 'i')
         funcptr = FakeLLObject()
         func = FakeLLObject(_fake_class=Func,
@@ -76,7 +78,7 @@
         """
         expected = """
         [i0, f1]
-        i3 = call_may_force(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
         guard_not_forced() []
         guard_no_exception() []
         jump(i3, f1)
@@ -99,7 +101,7 @@
 
     def test_handle_virtualizables(self):
         # this test needs an explanation to understand what goes on: see the
-        # coment in optimize_FORCE_TOKEN
+        # comment in optimize_FORCE_TOKEN
         ops = """
         [i0, f1, p2]
         call(0, ConstPtr(func),                       descr=libffi_prepare)
@@ -116,7 +118,7 @@
         [i0, f1, p2]
         i4 = force_token()
         setfield_gc(p2, i4, descr=vable_token_descr)
-        i3 = call_may_force(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
         guard_not_forced() [p2]
         guard_no_exception() [p2]
         jump(i3, f1, p2)
@@ -213,7 +215,7 @@
         call(0, ConstPtr(func),                        descr=libffi_prepare)
         #
         # this "nested" call is nicely optimized
-        i4 = call_may_force(67890, i0, f1, descr=int_float__int)
+        i4 = call_release_gil(67890, i0, f1, descr=int_float__int)
         guard_not_forced() []
         guard_no_exception() []
         #
@@ -242,3 +244,25 @@
         """
         expected = ops
         loop = self.optimize_loop(ops, expected)
+
+    def test_allow_setfields_in_between(self):
+        ops = """
+        [i0, f1, p2]
+        call(0, ConstPtr(func),                       descr=libffi_prepare)
+        call(0, ConstPtr(func), i0,                   descr=libffi_push_arg)
+        call(0, ConstPtr(func), f1,                   descr=libffi_push_arg)
+        setfield_gc(p2, i0,                           descr=valuedescr)
+        i3 = call_may_force(0, ConstPtr(func), 12345, descr=libffi_call)
+        guard_not_forced() []
+        guard_no_exception() []
+        jump(i3, f1, p2)
+        """
+        expected = """
+        [i0, f1, p2]
+        setfield_gc(p2, i0, descr=valuedescr)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
+        guard_not_forced() []
+        guard_no_exception() []
+        jump(i3, f1, p2)
+        """
+        loop = self.optimize_loop(ops, expected)
diff --git a/pypy/jit/metainterp/test/test_optimizeopt.py b/pypy/jit/metainterp/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/test/test_optimizeopt.py
@@ -145,7 +145,7 @@
 class BaseTestOptimizeOpt(BaseTest):
     jit_ffi = False
 
-    def invent_fail_descr(self, fail_args):
+    def invent_fail_descr(self, model, fail_args):
         if fail_args is None:
             return None
         descr = Storage()
@@ -3402,6 +3402,56 @@
         '''
         self.optimize_loop(ops, expected)
 
+    def test_arraycopy_dest_not_virtual(self):
+        ops = '''
+        []
+        p1 = new_array(3, descr=arraydescr)
+        p2 = new_array(3, descr=arraydescr)
+        setarrayitem_gc(p1, 2, 10, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        call(0, p1, p2, 0, 0, 3, descr=arraycopydescr)
+        escape(p2)
+        jump()
+        '''
+        expected = '''
+        []
+        p2 = new_array(3, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        setarrayitem_gc(p2, 0, 0, descr=arraydescr)
+        setarrayitem_gc(p2, 1, 0, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 10, descr=arraydescr)
+        escape(p2)
+        jump()
+        '''
+        self.optimize_loop(ops, expected)
+
+    def test_arraycopy_dest_not_virtual_too_long(self):
+        ops = '''
+        []
+        p1 = new_array(10, descr=arraydescr)
+        p2 = new_array(10, descr=arraydescr)
+        setarrayitem_gc(p1, 2, 10, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        call(0, p1, p2, 0, 0, 10, descr=arraycopydescr)
+        escape(p2)
+        jump()
+        '''
+        expected = '''
+        []
+        p2 = new_array(10, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        p1 = new_array(10, descr=arraydescr)
+        setarrayitem_gc(p1, 2, 10, descr=arraydescr)
+        call(0, p1, p2, 0, 0, 10, descr=arraycopydescr)
+        escape(p2)
+        jump()
+        '''
+        self.optimize_loop(ops, expected)
+
     def test_bound_lt(self):
         ops = """
         [i0]
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -80,7 +80,7 @@
         self.meta_interp(f, [123, 10])
         assert len(get_stats().locations) >= 4
         for loc in get_stats().locations:
-            assert loc == 'GREEN IS 123.'
+            assert loc == (0, 123)
 
     def test_set_param_enable_opts(self):
         from pypy.rpython.annlowlevel import llstr, hlstr
diff --git a/pypy/jit/metainterp/test/test_warmstate.py b/pypy/jit/metainterp/test/test_warmstate.py
--- a/pypy/jit/metainterp/test/test_warmstate.py
+++ b/pypy/jit/metainterp/test/test_warmstate.py
@@ -181,6 +181,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
@@ -207,6 +208,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = llhelper(GET_LOCATION, get_location)
         _confirm_enter_jit_ptr = None
@@ -230,6 +232,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = llhelper(ENTER_JIT, confirm_enter_jit)
@@ -253,6 +256,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -599,12 +599,8 @@
         get_location_ptr = self.jitdriver_sd._get_printable_location_ptr
         if get_location_ptr is None:
             missing = '(no jitdriver.get_printable_location!)'
-            missingll = llstr(missing)
             def get_location_str(greenkey):
-                if we_are_translated():
-                    return missingll
-                else:
-                    return missing
+                return missing
         else:
             rtyper = self.warmrunnerdesc.rtyper
             unwrap_greenkey = self.make_unwrap_greenkey()
@@ -612,10 +608,10 @@
             def get_location_str(greenkey):
                 greenargs = unwrap_greenkey(greenkey)
                 fn = support.maybe_on_top_of_llinterp(rtyper, get_location_ptr)
-                res = fn(*greenargs)
-                if not we_are_translated() and not isinstance(res, str):
-                    res = hlstr(res)
-                return res
+                llres = fn(*greenargs)
+                if not we_are_translated() and isinstance(llres, str):
+                    return llres
+                return hlstr(llres)
         self.get_location_str = get_location_str
         #
         confirm_enter_jit_ptr = self.jitdriver_sd._confirm_enter_jit_ptr
diff --git a/pypy/jit/tl/pypyjit.py b/pypy/jit/tl/pypyjit.py
--- a/pypy/jit/tl/pypyjit.py
+++ b/pypy/jit/tl/pypyjit.py
@@ -30,6 +30,7 @@
     BACKEND = 'c'
 
 config = get_pypy_config(translating=True)
+config.translation.backendopt.inline_threshold = 0.1
 config.translation.gc = 'boehm'
 config.objspace.nofaking = True
 config.translating = True
diff --git a/pypy/jit/tool/oparser.py b/pypy/jit/tool/oparser.py
--- a/pypy/jit/tool/oparser.py
+++ b/pypy/jit/tool/oparser.py
@@ -3,24 +3,15 @@
 in a nicer fashion
 """
 
-from pypy.jit.metainterp.history import TreeLoop, BoxInt, ConstInt,\
-     ConstObj, ConstPtr, Box, BasicFailDescr, BoxFloat, ConstFloat,\
-     LoopToken, get_const_ptr_for_string, get_const_ptr_for_unicode
+from pypy.jit.tool.oparser_model import get_model
+
 from pypy.jit.metainterp.resoperation import rop, ResOperation, \
                                             ResOpWithDescr, N_aryOp, \
                                             UnaryOp, PlainResOp
-from pypy.jit.metainterp.typesystem import llhelper
-from pypy.jit.codewriter.heaptracker import adr2int
-from pypy.jit.codewriter import longlong
-from pypy.rpython.lltypesystem import lltype, llmemory
-from pypy.rpython.ootypesystem import ootype
 
 class ParseError(Exception):
     pass
 
-class Boxes(object):
-    pass
-
 class ESCAPE_OP(N_aryOp, ResOpWithDescr):
 
     OPNUM = -123
@@ -54,37 +45,15 @@
     def clone(self):
         return FORCE_SPILL(self.OPNUM, self.getarglist()[:])
 
-class ExtendedTreeLoop(TreeLoop):
 
-    def getboxes(self):
-        def opboxes(operations):
-            for op in operations:
-                yield op.result
-                for box in op.getarglist():
-                    yield box
-        def allboxes():
-            for box in self.inputargs:
-                yield box
-            for box in opboxes(self.operations):
-                yield box
-
-        boxes = Boxes()
-        for box in allboxes():
-            if isinstance(box, Box):
-                name = str(box)
-                setattr(boxes, name, box)
-        return boxes
-
-    def setvalues(self, **kwds):
-        boxes = self.getboxes()
-        for name, value in kwds.iteritems():
-            getattr(boxes, name).value = value
-
-def default_fail_descr(fail_args=None):
-    return BasicFailDescr()
+def default_fail_descr(model, fail_args=None):
+    return model.BasicFailDescr()
 
 
 class OpParser(object):
+
+    use_mock_model = False
+    
     def __init__(self, input, cpu, namespace, type_system, boxkinds,
                  invent_fail_descr=default_fail_descr,
                  nonstrict=False):
@@ -100,7 +69,8 @@
             self._cache = {}
         self.invent_fail_descr = invent_fail_descr
         self.nonstrict = nonstrict
-        self.looptoken = LoopToken()
+        self.model = get_model(self.use_mock_model)
+        self.looptoken = self.model.LoopToken()
 
     def get_const(self, name, typ):
         if self._consts is None:
@@ -108,16 +78,16 @@
         obj = self._consts[name]
         if self.type_system == 'lltype':
             if typ == 'ptr':
-                return ConstPtr(obj)
+                return self.model.ConstPtr(obj)
             else:
                 assert typ == 'class'
-                return ConstInt(adr2int(llmemory.cast_ptr_to_adr(obj)))
+                return self.model.ConstInt(self.model.ptr_to_int(obj))
         else:
             if typ == 'ptr':
-                return ConstObj(obj)
+                return self.model.ConstObj(obj)
             else:
                 assert typ == 'class'
-                return ConstObj(ootype.cast_to_object(obj))
+                return self.model.ConstObj(ootype.cast_to_object(obj))
 
     def get_descr(self, poss_descr):
         if poss_descr.startswith('<'):
@@ -132,16 +102,16 @@
             pass
         if elem.startswith('i'):
             # integer
-            box = BoxInt()
-            _box_counter_more_than(elem[1:])
+            box = self.model.BoxInt()
+            _box_counter_more_than(self.model, elem[1:])
         elif elem.startswith('f'):
-            box = BoxFloat()
-            _box_counter_more_than(elem[1:])
+            box = self.model.BoxFloat()
+            _box_counter_more_than(self.model, elem[1:])
         elif elem.startswith('p'):
             # pointer
-            ts = getattr(self.cpu, 'ts', llhelper)
+            ts = getattr(self.cpu, 'ts', self.model.llhelper)
             box = ts.BoxRef()
-            _box_counter_more_than(elem[1:])
+            _box_counter_more_than(self.model, elem[1:])
         else:
             for prefix, boxclass in self.boxkinds.iteritems():
                 if elem.startswith(prefix):
@@ -175,21 +145,21 @@
 
     def getvar(self, arg):
         if not arg:
-            return ConstInt(0)
+            return self.model.ConstInt(0)
         try:
-            return ConstInt(int(arg))
+            return self.model.ConstInt(int(arg))
         except ValueError:
             if self.is_float(arg):
-                return ConstFloat(longlong.getfloatstorage(float(arg)))
+                return self.model.ConstFloat(self.model.convert_to_floatstorage(arg))
             if (arg.startswith('"') or arg.startswith("'") or
                 arg.startswith('s"')):
                 # XXX ootype
                 info = arg[1:].strip("'\"")
-                return get_const_ptr_for_string(info)
+                return self.model.get_const_ptr_for_string(info)
             if arg.startswith('u"'):
                 # XXX ootype
                 info = arg[1:].strip("'\"")
-                return get_const_ptr_for_unicode(info)
+                return self.model.get_const_ptr_for_unicode(info)
             if arg.startswith('ConstClass('):
                 name = arg[len('ConstClass('):-1]
                 return self.get_const(name, 'class')
@@ -197,9 +167,9 @@
                 return None
             elif arg == 'NULL':
                 if self.type_system == 'lltype':
-                    return ConstPtr(ConstPtr.value)
+                    return self.model.ConstPtr(self.model.ConstPtr.value)
                 else:
-                    return ConstObj(ConstObj.value)
+                    return self.model.ConstObj(self.model.ConstObj.value)
             elif arg.startswith('ConstPtr('):
                 name = arg[len('ConstPtr('):-1]
                 return self.get_const(name, 'ptr')
@@ -212,7 +182,7 @@
         descr = None
         if argspec.strip():
             if opname == 'debug_merge_point':
-                allargs = argspec.rsplit(', ', 1)
+                allargs = argspec.split(',', 1)
             else:
                 allargs = [arg for arg in argspec.split(",")
                            if arg != '']
@@ -266,14 +236,14 @@
                                 "Unknown var in fail_args: %s" % arg)
                     fail_args.append(fail_arg)
             if descr is None and self.invent_fail_descr:
-                descr = self.invent_fail_descr(fail_args)
+                descr = self.invent_fail_descr(self.model, fail_args)
             if hasattr(descr, '_oparser_uses_descr_of_guard'):
                 descr._oparser_uses_descr_of_guard(self, fail_args)
         else:
             fail_args = None
             if opnum == rop.FINISH:
                 if descr is None and self.invent_fail_descr:
-                    descr = self.invent_fail_descr()
+                    descr = self.invent_fail_descr(self.model)
             elif opnum == rop.JUMP:
                 if descr is None and self.invent_fail_descr:
                     descr = self.looptoken
@@ -338,7 +308,7 @@
         num, ops, last_offset = self.parse_ops(base_indent, newlines, 0)
         if num < len(newlines):
             raise ParseError("unexpected dedent at line: %s" % newlines[num])
-        loop = ExtendedTreeLoop("loop")
+        loop = self.model.ExtendedTreeLoop("loop")
         loop.comment = first_comment
         loop.token = self.looptoken
         loop.operations = ops
@@ -394,7 +364,7 @@
 
 def parse(input, cpu=None, namespace=None, type_system='lltype',
           boxkinds=None, invent_fail_descr=default_fail_descr,
-          no_namespace=False, nonstrict=False):
+          no_namespace=False, nonstrict=False, OpParser=OpParser):
     if namespace is None and not no_namespace:
         namespace = {}
     return OpParser(input, cpu, namespace, type_system, boxkinds,
@@ -405,6 +375,6 @@
     return parse(*args, **kwds)
 
 
-def _box_counter_more_than(s):
+def _box_counter_more_than(model, s):
     if s.isdigit():
-        Box._counter = max(Box._counter, int(s)+1)
+        model.Box._counter = max(model.Box._counter, int(s)+1)
diff --git a/pypy/jit/tool/oparser_model.py b/pypy/jit/tool/oparser_model.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/tool/oparser_model.py
@@ -0,0 +1,148 @@
+class Boxes(object):
+    pass
+
+def get_real_model():
+    class LoopModel(object):
+        from pypy.jit.metainterp.history import TreeLoop, LoopToken
+        from pypy.jit.metainterp.history import Box, BoxInt, BoxFloat
+        from pypy.jit.metainterp.history import ConstInt, ConstObj, ConstPtr, ConstFloat
+        from pypy.jit.metainterp.history import BasicFailDescr
+        from pypy.jit.metainterp.typesystem import llhelper
+
+        from pypy.jit.metainterp.history import get_const_ptr_for_string
+        from pypy.jit.metainterp.history import get_const_ptr_for_unicode
+        get_const_ptr_for_string = staticmethod(get_const_ptr_for_string)
+        get_const_ptr_for_unicode = staticmethod(get_const_ptr_for_unicode)
+
+        @staticmethod
+        def convert_to_floatstorage(arg):
+            from pypy.jit.codewriter import longlong
+            return longlong.getfloatstorage(float(arg))
+
+        @staticmethod
+        def ptr_to_int(obj):
+            from pypy.jit.codewriter.heaptracker import adr2int
+            from pypy.rpython.lltypesystem import llmemory
+            return adr2int(llmemory.cast_ptr_to_adr(obj))
+
+        @staticmethod
+        def ootype_cast_to_object(obj):
+            from pypy.rpython.ootypesystem import ootype
+            return ootype.cast_to_object(obj)
+
+    return LoopModel
+
+def get_mock_model():
+    class LoopModel(object):
+
+        class TreeLoop(object):
+            def __init__(self, name):
+                self.name = name
+
+        class LoopToken(object):
+            I_am_a_descr = True
+
+        class BasicFailDescr(object):
+            I_am_a_descr = True
+
+        class Box(object):
+            _counter = 0
+            type = 'b'
+
+            def __init__(self, value=0):
+                self.value = value
+
+            def __repr__(self):
+                result = str(self)
+                result += '(%s)' % self.value
+                return result
+
+            def __str__(self):
+                if not hasattr(self, '_str'):
+                    self._str = '%s%d' % (self.type, Box._counter)
+                    Box._counter += 1
+                return self._str
+
+        class BoxInt(Box):
+            type = 'i'
+
+        class BoxFloat(Box):
+            type = 'f'
+
+        class BoxRef(Box):
+            type = 'p'
+
+        class Const(object):
+            def __init__(self, value=None):
+                self.value = value
+
+            def _get_str(self):
+                return str(self.value)
+
+        class ConstInt(Const):
+            pass
+
+        class ConstPtr(Const):
+            pass
+
+        class ConstFloat(Const):
+            pass
+
+        @classmethod
+        def get_const_ptr_for_string(cls, s):
+            return cls.ConstPtr(s)
+
+        @classmethod
+        def get_const_ptr_for_unicode(cls, s):
+            return cls.ConstPtr(s)
+
+        @staticmethod
+        def convert_to_floatstorage(arg):
+            return float(arg)
+
+        @staticmethod
+        def ptr_to_int(obj):
+            return id(obj)
+
+        class llhelper(object):
+            pass
+
+    LoopModel.llhelper.BoxRef = LoopModel.BoxRef
+
+    return LoopModel
+
+
+def get_model(use_mock):
+    if use_mock:
+        model = get_mock_model()
+    else:
+        model = get_real_model()
+
+    class ExtendedTreeLoop(model.TreeLoop):
+
+        def getboxes(self):
+            def opboxes(operations):
+                for op in operations:
+                    yield op.result
+                    for box in op.getarglist():
+                        yield box
+            def allboxes():
+                for box in self.inputargs:
+                    yield box
+                for box in opboxes(self.operations):
+                    yield box
+
+            boxes = Boxes()
+            for box in allboxes():
+                if isinstance(box, model.Box):
+                    name = str(box)
+                    setattr(boxes, name, box)
+            return boxes
+
+        def setvalues(self, **kwds):
+            boxes = self.getboxes()
+            for name, value in kwds.iteritems():
+                getattr(boxes, name).value = value
+
+    model.ExtendedTreeLoop = ExtendedTreeLoop
+    return model
diff --git a/pypy/jit/tool/pypytrace-mode.el b/pypy/jit/tool/pypytrace-mode.el
--- a/pypy/jit/tool/pypytrace-mode.el
+++ b/pypy/jit/tool/pypytrace-mode.el
@@ -8,10 +8,16 @@
 (defun set-truncate-lines ()
   (setq truncate-lines t))
 
+;; to generate the list of keywords:
+;; from pypy.jit.metainterp import resoperation
+;; print ' '.join(sorted('"%s"' % op.lower() for op in resoperation.opname.values() if not op.startswith('GUARD')))
+
+
+
 (define-generic-mode 
   'pypytrace-mode                   ;; name of the mode to create
   nil
-  '("jump" "finish" "int_add" "int_sub" "int_mul" "int_floordiv" "uint_floordiv" "int_mod" "int_and" "int_or" "int_xor" "int_rshift" "int_lshift" "uint_rshift" "float_add" "float_sub" "float_mul" "float_truediv" "float_neg" "float_abs" "cast_float_to_int" "cast_int_to_float" "int_lt" "int_le" "int_eq" "int_ne" "int_gt" "int_ge" "uint_lt" "uint_le" "uint_gt" "uint_ge" "float_lt" "float_le" "float_eq" "float_ne" "float_gt" "float_ge" "int_is_zero" "int_is_true" "int_neg" "int_invert" "same_as" "ptr_eq" "ptr_ne" "arraylen_gc" "strlen" "strgetitem" "getfield_gc_pure" "getfield_raw_pure" "getarrayitem_gc_pure" "unicodelen" "unicodegetitem" "getarrayitem_gc" "getarrayitem_raw" "getfield_gc" "getfield_raw" "new" "new_with_vtable" "new_array" "force_token" "virtual_ref" "setarrayitem_gc" "setarrayitem_raw" "setfield_gc" "setfield_raw" "arraycopy" "newstr" "strsetitem" "unicodesetitem" "newunicode" "cond_call_gc_wb" "virtual_ref_finish" "call" "call_assembler" "call_may_force" "call_loopinvariant" "call_pure" "int_add_ovf" "int_sub_ovf" "int_mul_ovf") ;; keywords
+  '("arraylen_gc" "call" "call_assembler" "call_loopinvariant" "call_may_force" "call_pure" "call_release_gil" "cast_float_to_int" "cast_int_to_float" "cond_call_gc_wb" "copystrcontent" "copyunicodecontent" "debug_merge_point" "finish" "float_abs" "float_add" "float_eq" "float_ge" "float_gt" "float_le" "float_lt" "float_mul" "float_ne" "float_neg" "float_sub" "float_truediv" "force_token" "getarrayitem_gc" "getarrayitem_gc_pure" "getarrayitem_raw" "getfield_gc" "getfield_gc_pure" "getfield_raw" "getfield_raw_pure" "int_add" "int_add_ovf" "int_and" "int_eq" "int_floordiv" "int_ge" "int_gt" "int_invert" "int_is_true" "int_is_zero" "int_le" "int_lshift" "int_lt" "int_mod" "int_mul" "int_mul_ovf" "int_ne" "int_neg" "int_or" "int_rshift" "int_sub" "int_sub_ovf" "int_xor" "jit_debug" "jump" "new" "new_array" "new_with_vtable" "newstr" "newunicode" "ptr_eq" "ptr_ne" "quasiimmut_field" "read_timestamp" "same_as" "setarrayitem_gc" "setarrayitem_raw" "setfield_gc" "setfield_raw" "strgetitem" "strlen" "strsetitem" "uint_floordiv" "uint_ge" "uint_gt" "uint_le" "uint_lt" "uint_rshift" "unicodegetitem" "unicodelen" "unicodesetitem" "virtual_ref" "virtual_ref_finish") ;; keywords
   '( ;; additional regexps
     ("^# Loop.*" . 'hi-blue)
     ("\\[.*\\]" . 'font-lock-comment-face) ;; comment out argument lists
diff --git a/pypy/jit/tool/test/test_oparser.py b/pypy/jit/tool/test/test_oparser.py
--- a/pypy/jit/tool/test/test_oparser.py
+++ b/pypy/jit/tool/test/test_oparser.py
@@ -1,227 +1,274 @@
 import py
+import sys
 from pypy.rpython.lltypesystem import lltype, llmemory
 
-from pypy.jit.tool.oparser import parse, ParseError
+from pypy.jit.tool.oparser import parse, OpParser
 from pypy.jit.metainterp.resoperation import rop
-from pypy.jit.metainterp.history import AbstractDescr, BoxInt, LoopToken,\
-     BoxFloat
+from pypy.jit.metainterp.history import AbstractDescr, BoxInt, LoopToken
 
-def test_basic_parse():
-    x = """
-    [i0, i1]
-    # a comment
-    i2 = int_add(i0, i1)
-    i3 = int_sub(i2, 3) # another comment
-    finish() # (tricky)
-    """
-    loop = parse(x)
-    assert len(loop.operations) == 3
-    assert [op.getopnum() for op in loop.operations] == [rop.INT_ADD, rop.INT_SUB,
-                                                    rop.FINISH]
-    assert len(loop.inputargs) == 2
-    assert loop.operations[-1].getdescr()
+class BaseTestOparser(object):
 
-def test_const_ptr_subops():
-    x = """
-    [p0]
-    guard_class(p0, ConstClass(vtable)) []
-    """
-    S = lltype.Struct('S')
-    vtable = lltype.nullptr(S)
-    loop = parse(x, None, locals())
-    assert len(loop.operations) == 1
-    assert loop.operations[0].getdescr()
-    assert loop.operations[0].getfailargs() == []
+    OpParser = None
 
-def test_descr():
-    class Xyz(AbstractDescr):
-        pass
-    
-    x = """
-    [p0]
-    i1 = getfield_gc(p0, descr=stuff)
-    """
-    stuff = Xyz()
-    loop = parse(x, None, locals())
-    assert loop.operations[0].getdescr() is stuff
+    def parse(self, *args, **kwds):
+        kwds['OpParser'] = self.OpParser
+        return parse(*args, **kwds)
 
-def test_after_fail():
-    x = """
-    [i0]
-    guard_value(i0, 3) []
-    i1 = int_add(1, 2)
-    """
-    loop = parse(x, None, {})
-    assert len(loop.operations) == 2
+    def test_basic_parse(self):
+        x = """
+        [i0, i1]
+        # a comment
+        i2 = int_add(i0, i1)
+        i3 = int_sub(i2, 3) # another comment
+        finish() # (tricky)
+        """
+        loop = self.parse(x)
+        assert len(loop.operations) == 3
+        assert [op.getopnum() for op in loop.operations] == [rop.INT_ADD, rop.INT_SUB,
+                                                        rop.FINISH]
+        assert len(loop.inputargs) == 2
+        assert loop.operations[-1].getdescr()
 
-def test_descr_setfield():
-    class Xyz(AbstractDescr):
-        pass
-    
-    x = """
-    [p0]
-    setfield_gc(p0, 3, descr=stuff)
-    """
-    stuff = Xyz()
-    loop = parse(x, None, locals())
-    assert loop.operations[0].getdescr() is stuff
+    def test_const_ptr_subops(self):
+        x = """
+        [p0]
+        guard_class(p0, ConstClass(vtable)) []
+        """
+        S = lltype.Struct('S')
+        vtable = lltype.nullptr(S)
+        loop = self.parse(x, None, locals())
+        assert len(loop.operations) == 1
+        assert loop.operations[0].getdescr()
+        assert loop.operations[0].getfailargs() == []
 
-def test_boxname():
-    x = """
-    [i42]
-    i50 = int_add(i42, 1)
-    """
-    loop = parse(x, None, {})
-    assert str(loop.inputargs[0]) == 'i42'
-    assert str(loop.operations[0].result) == 'i50'
+    def test_descr(self):
+        class Xyz(AbstractDescr):
+            I_am_a_descr = True # for the mock case
 
-def test_getboxes():
-    x = """
-    [i0]
-    i1 = int_add(i0, 10)
-    """
-    loop = parse(x, None, {})
-    boxes = loop.getboxes()
-    assert boxes.i0 is loop.inputargs[0]
-    assert boxes.i1 is loop.operations[0].result
-    
-def test_setvalues():
-    x = """
-    [i0]
-    i1 = int_add(i0, 10)
-    """
-    loop = parse(x, None, {})
-    loop.setvalues(i0=32, i1=42)
-    assert loop.inputargs[0].value == 32
-    assert loop.operations[0].result.value == 42
+        x = """
+        [p0]
+        i1 = getfield_gc(p0, descr=stuff)
+        """
+        stuff = Xyz()
+        loop = self.parse(x, None, locals())
+        assert loop.operations[0].getdescr() is stuff
 
-def test_boxkind():
-    x = """
-    [sum0]
-    """
-    loop = parse(x, None, {}, boxkinds={'sum': BoxInt})
-    b = loop.getboxes()
-    assert isinstance(b.sum0, BoxInt)
-    
-def test_getvar_const_ptr():
-    x = '''
-    []
-    call(ConstPtr(func_ptr))
+    def test_after_fail(self):
+        x = """
+        [i0]
+        guard_value(i0, 3) []
+        i1 = int_add(1, 2)
+        """
+        loop = self.parse(x, None, {})
+        assert len(loop.operations) == 2
+
+    def test_descr_setfield(self):
+        class Xyz(AbstractDescr):
+            I_am_a_descr = True # for the mock case
+
+        x = """
+        [p0]
+        setfield_gc(p0, 3, descr=stuff)
+        """
+        stuff = Xyz()
+        loop = self.parse(x, None, locals())
+        assert loop.operations[0].getdescr() is stuff
+
+    def test_boxname(self):
+        x = """
+        [i42]
+        i50 = int_add(i42, 1)
+        """
+        loop = self.parse(x, None, {})
+        assert str(loop.inputargs[0]) == 'i42'
+        assert str(loop.operations[0].result) == 'i50'
+
+    def test_getboxes(self):
+        x = """
+        [i0]
+        i1 = int_add(i0, 10)
+        """
+        loop = self.parse(x, None, {})
+        boxes = loop.getboxes()
+        assert boxes.i0 is loop.inputargs[0]
+        assert boxes.i1 is loop.operations[0].result
+
+    def test_setvalues(self):
+        x = """
+        [i0]
+        i1 = int_add(i0, 10)
+        """
+        loop = self.parse(x, None, {})
+        loop.setvalues(i0=32, i1=42)
+        assert loop.inputargs[0].value == 32
+        assert loop.operations[0].result.value == 42
+
+    def test_getvar_const_ptr(self):
+        x = '''
+        []
+        call(ConstPtr(func_ptr))
+        '''
+        TP = lltype.GcArray(lltype.Signed)
+        NULL = lltype.cast_opaque_ptr(llmemory.GCREF, lltype.nullptr(TP))
+        loop = self.parse(x, None, {'func_ptr' : NULL})
+        assert loop.operations[0].getarg(0).value == NULL
+
+    def test_jump_target(self):
+        x = '''
+        []
+        jump()
+        '''
+        loop = self.parse(x)
+        assert loop.operations[0].getdescr() is loop.token
+
+    def test_jump_target_other(self):
+        looptoken = LoopToken()
+        looptoken.I_am_a_descr = True # for the mock case
+        x = '''
+        []
+        jump(descr=looptoken)
+        '''
+        loop = self.parse(x, namespace=locals())
+        assert loop.operations[0].getdescr() is looptoken
+
+    def test_floats(self):
+        x = '''
+        [f0]
+        f1 = float_add(f0, 3.5)
+        '''
+        loop = self.parse(x)
+        box = loop.operations[0].getarg(0)
+        # we cannot use isinstance, because in case of mock the class will be
+        # constructed on the fly
+        assert box.__class__.__name__ == 'BoxFloat'
+
+    def test_debug_merge_point(self):
+        x = '''
+        []
+        debug_merge_point(0, "info")
+        debug_merge_point(0, 'info')
+        debug_merge_point(1, '<some ('other,')> info')
+        debug_merge_point(0, '(stuff) #1')
+        '''
+        loop = self.parse(x)
+        assert loop.operations[0].getarg(1)._get_str() == 'info'
+        assert loop.operations[1].getarg(1)._get_str() == 'info'
+        assert loop.operations[2].getarg(1)._get_str() == "<some ('other,')> info"
+        assert loop.operations[3].getarg(1)._get_str() == "(stuff) #1"
+
+
+    def test_descr_with_obj_print(self):
+        x = '''
+        [p0]
+        setfield_gc(p0, 1, descr=<SomeDescr>)
+        '''
+        loop = self.parse(x)
+        # assert did not explode
+
+    example_loop_log = '''\
+    # bridge out of Guard12, 6 ops
+    [i0, i1, i2]
+    i4 = int_add(i0, 2)
+    i6 = int_sub(i1, 1)
+    i8 = int_gt(i6, 3)
+    guard_true(i8, descr=<Guard15>) [i4, i6]
+    debug_merge_point('(no jitdriver.get_printable_location!)', 0)
+    jump(i6, i4, descr=<Loop0>)
     '''
-    TP = lltype.GcArray(lltype.Signed)
-    NULL = lltype.cast_opaque_ptr(llmemory.GCREF, lltype.nullptr(TP))
-    loop = parse(x, None, {'func_ptr' : NULL})
-    assert loop.operations[0].getarg(0).value == NULL
 
-def test_jump_target():
-    x = '''
-    []
-    jump()
-    '''
-    loop = parse(x)
-    assert loop.operations[0].getdescr() is loop.token
+    def test_parse_no_namespace(self):
+        loop = self.parse(self.example_loop_log, no_namespace=True)
 
-def test_jump_target_other():
-    looptoken = LoopToken()
-    x = '''
-    []
-    jump(descr=looptoken)
-    '''
-    loop = parse(x, namespace=locals())
-    assert loop.operations[0].getdescr() is looptoken
+    def test_attach_comment_to_loop(self):
+        loop = self.parse(self.example_loop_log, no_namespace=True)
+        assert loop.comment == '    # bridge out of Guard12, 6 ops'
 
-def test_floats():
-    x = '''
-    [f0]
-    f1 = float_add(f0, 3.5)
-    '''
-    loop = parse(x)
-    assert isinstance(loop.operations[0].getarg(0), BoxFloat)
-    
-def test_debug_merge_point():
-    x = '''
-    []
-    debug_merge_point("info", 0)
-    debug_merge_point('info', 1)
-    debug_merge_point('<some ('other,')> info', 1)
-    debug_merge_point('(stuff) #1', 1)
-    '''
-    loop = parse(x)
-    assert loop.operations[0].getarg(0)._get_str() == 'info'
-    assert loop.operations[1].getarg(0)._get_str() == 'info'
-    assert loop.operations[2].getarg(0)._get_str() == "<some ('other,')> info"
-    assert loop.operations[3].getarg(0)._get_str() == "(stuff) #1"
-    
+    def test_parse_new_with_comma(self):
+        # this is generated by PYPYJITLOG, check that we can handle it
+        x = '''
+        []
+        p0 = new(, descr=<SizeDescr 12>)
+        '''
+        loop = self.parse(x)
+        assert loop.operations[0].getopname() == 'new'
 
-def test_descr_with_obj_print():
-    x = '''
-    [p0]
-    setfield_gc(p0, 1, descr=<SomeDescr>)
-    '''
-    loop = parse(x)
-    # assert did not explode
+    def test_no_fail_args(self):
+        x = '''
+        [i0]
+        guard_true(i0, descr=<Guard0>)
+        '''
+        loop = self.parse(x, nonstrict=True)
+        assert loop.operations[0].getfailargs() == []
 
-example_loop_log = '''\
-# bridge out of Guard12, 6 ops
-[i0, i1, i2]
-i4 = int_add(i0, 2)
-i6 = int_sub(i1, 1)
-i8 = int_gt(i6, 3)
-guard_true(i8, descr=<Guard15>) [i4, i6]
-debug_merge_point('(no jitdriver.get_printable_location!)', 0)
-jump(i6, i4, descr=<Loop0>)
-'''
+    def test_no_inputargs(self):
+        x = '''
+        i2 = int_add(i0, i1)
+        '''
+        loop = self.parse(x, nonstrict=True)
+        assert loop.inputargs == []
+        assert loop.operations[0].getopname() == 'int_add'
 
-def test_parse_no_namespace():
-    loop = parse(example_loop_log, no_namespace=True)
+    def test_offsets(self):
+        x = """
+        [i0, i1]
+        +10: i2 = int_add(i0, i1)
+        i3 = int_add(i2, 3)
+        """
+        #    +30: --end of the loop--
+        loop = self.parse(x)
+        assert loop.operations[0].offset == 10
+        assert not hasattr(loop.operations[1], 'offset')
 
-def test_attach_comment_to_loop():
-    loop = parse(example_loop_log, no_namespace=True)
-    assert loop.comment == '# bridge out of Guard12, 6 ops'
+    def test_last_offset(self):
+        x = """
+        [i0, i1]
+        +10: i2 = int_add(i0, i1)
+        i3 = int_add(i2, 3)
+        +30: --end of the loop--
+        """
+        loop = self.parse(x)
+        assert len(loop.operations) == 2
+        assert loop.last_offset == 30
 
-def test_parse_new_with_comma():
-    # this is generated by PYPYJITLOG, check that we can handle it
-    x = '''
-    []
-    p0 = new(, descr=<SizeDescr 12>)
-    '''
-    loop = parse(x)
-    assert loop.operations[0].getopname() == 'new'
 
-def test_no_fail_args():
-    x = '''
-    [i0]
-    guard_true(i0, descr=<Guard0>)
-    '''
-    loop = parse(x, nonstrict=True)
-    assert loop.operations[0].getfailargs() == []
+class TestOpParser(BaseTestOparser):
 
-def test_no_inputargs():
-    x = '''
-    i2 = int_add(i0, i1)
-    '''
-    loop = parse(x, nonstrict=True)
-    assert loop.inputargs == []
-    assert loop.operations[0].getopname() == 'int_add'
+    OpParser = OpParser
 
-def test_offsets():
-    x = """
-    [i0, i1]
-    +10: i2 = int_add(i0, i1)
-    i3 = int_add(i2, 3)
-    """
-    #    +30: --end of the loop--
-    loop = parse(x)
-    assert loop.operations[0].offset == 10
-    assert not hasattr(loop.operations[1], 'offset')
+    def test_boxkind(self):
+        x = """
+        [sum0]
+        """
+        loop = self.parse(x, None, {}, boxkinds={'sum': BoxInt})
+        b = loop.getboxes()
+        assert isinstance(b.sum0, BoxInt)
 
-def test_last_offset():
-    x = """
-    [i0, i1]
-    +10: i2 = int_add(i0, i1)
-    i3 = int_add(i2, 3)
-    +30: --end of the loop--
-    """
-    loop = parse(x)
-    assert len(loop.operations) == 2
-    assert loop.last_offset == 30
+
+class ForbiddenModule(object):
+    def __init__(self, name, old_mod):
+        self.name = name
+        self.old_mod = old_mod
+
+    def __getattr__(self, attr):
+        assert False, "You should not import module %s" % self.name
+
+
+class TestOpParserWithMock(BaseTestOparser):
+
+    class OpParser(OpParser):
+        use_mock_model = True
+
+    def setup_class(cls):
+        forbidden_mods = [
+            'pypy.jit.metainterp.history',
+            'pypy.rpython.lltypesystem.lltype',
+            ]
+        for modname in forbidden_mods:
+            if modname in sys.modules:
+                newmod = ForbiddenModule(modname, sys.modules[modname])
+                sys.modules[modname] = newmod
+
+    def teardown_class(cls):
+        for modname, mod in sys.modules.iteritems():
+            if isinstance(mod, ForbiddenModule):
+                sys.modules[modname] = mod.old_mod
diff --git a/pypy/module/__builtin__/__init__.py b/pypy/module/__builtin__/__init__.py
--- a/pypy/module/__builtin__/__init__.py
+++ b/pypy/module/__builtin__/__init__.py
@@ -31,6 +31,8 @@
 
         'apply'         : 'app_functional.apply',
         'sorted'        : 'app_functional.sorted',
+        'any'           : 'app_functional.any',
+        'all'           : 'app_functional.all',
         'vars'          : 'app_inspect.vars',
         'dir'           : 'app_inspect.dir',
 
@@ -95,8 +97,6 @@
         'range'         : 'functional.range_int',
         'xrange'        : 'functional.W_XRange',
         'enumerate'     : 'functional.W_Enumerate',
-        'all'           : 'functional.all',
-        'any'           : 'functional.any',
         'min'           : 'functional.min',
         'max'           : 'functional.max',
         'sum'           : 'functional.sum',
diff --git a/pypy/module/__builtin__/app_functional.py b/pypy/module/__builtin__/app_functional.py
--- a/pypy/module/__builtin__/app_functional.py
+++ b/pypy/module/__builtin__/app_functional.py
@@ -16,3 +16,21 @@
     sorted_lst = list(lst)
     sorted_lst.sort(cmp, key, reverse)
     return sorted_lst
+
+def any(seq):
+    """any(iterable) -> bool
+
+Return True if bool(x) is True for any x in the iterable."""
+    for x in seq:
+        if x:
+            return True
+    return False
+
+def all(seq):
+    """all(iterable) -> bool
+
+Return True if bool(x) is True for all values x in the iterable."""
+    for x in seq:
+        if not x:
+            return False
+    return True
diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py
--- a/pypy/module/__builtin__/functional.py
+++ b/pypy/module/__builtin__/functional.py
@@ -452,40 +452,6 @@
     w_empty = space.call_function(w_str_type)
     return space.call_method(w_empty, "join", space.newlist(result_w))
 
-def all(space, w_S):
-    """all(iterable) -> bool
-
-Return True if bool(x) is True for all values x in the iterable."""
-    w_iter = space.iter(w_S)
-    while True:
-        try:
-            w_next = space.next(w_iter)
-        except OperationError, e:
-            if not e.match(space, space.w_StopIteration):
-                raise       # re-raise other app-level exceptions
-            break
-        if not space.is_true(w_next):
-            return space.w_False
-    return space.w_True
-
-
-def any(space, w_S):
-    """any(iterable) -> bool
-
-Return True if bool(x) is True for any x in the iterable."""
-    w_iter = space.iter(w_S)
-    while True:
-        try:
-            w_next = space.next(w_iter)
-        except OperationError, e:
-            if not e.match(space, space.w_StopIteration):
-                raise       # re-raise other app-level exceptions
-            break
-        if space.is_true(w_next):
-            return space.w_True
-    return space.w_False
-
-
 class W_Enumerate(Wrappable):
 
     def __init__(self, w_iter, w_start):
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -32,15 +32,22 @@
                 space.wrap(reason))
             w_res = space.call_function(w_errorhandler, w_exc)
             if (not space.is_true(space.isinstance(w_res, space.w_tuple))
-                or space.len_w(w_res) != 2):
+                or space.len_w(w_res) != 2
+                or not space.is_true(space.isinstance(
+                                 space.getitem(w_res, space.wrap(0)),
+                                 space.w_unicode))):
+                if decode:
+                    msg = ("decoding error handler must return "
+                           "(unicode, int) tuple, not %s")
+                else:
+                    msg = ("encoding error handler must return "
+                           "(unicode, int) tuple, not %s")
                 raise operationerrfmt(
-                    space.w_TypeError,
-                    "encoding error handler must return "
-                    "(unicode, int) tuple, not %s",
+                    space.w_TypeError, msg,
                     space.str_w(space.repr(w_res)))
             w_replace, w_newpos = space.fixedview(w_res, 2)
             newpos = space.int_w(w_newpos)
-            if (newpos < 0):
+            if newpos < 0:
                 newpos = len(input) + newpos
             if newpos < 0 or newpos > len(input):
                 raise operationerrfmt(
@@ -50,7 +57,9 @@
                 replace = space.unicode_w(w_replace)
                 return replace, newpos
             else:
-                replace = space.str_w(w_replace)
+                from pypy.objspace.std.unicodetype import encode_object
+                w_str = encode_object(space, w_replace, encoding, None)
+                replace = space.str_w(w_str)
                 return replace, newpos
         return unicode_call_errorhandler
 
@@ -160,15 +169,7 @@
 def ignore_errors(space, w_exc):
     check_exception(space, w_exc)
     w_end = space.getattr(w_exc, space.wrap('end'))
-    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        return space.newtuple([space.wrap(''), w_end])
-    elif (space.isinstance_w(w_exc, space.w_UnicodeDecodeError) or
-          space.isinstance_w(w_exc, space.w_UnicodeTranslateError)):
-        return space.newtuple([space.wrap(u''), w_end])
-    else:
-        typename = space.type(w_exc).getname(space, '?')
-        raise operationerrfmt(space.w_TypeError,
-            "don't know how to handle %s in error callback", typename)
+    return space.newtuple([space.wrap(u''), w_end])
 
 def replace_errors(space, w_exc):
     check_exception(space, w_exc)
@@ -176,7 +177,7 @@
     w_end = space.getattr(w_exc, space.wrap('end'))
     size = space.int_w(w_end) - space.int_w(w_start)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        text = '?' * size
+        text = u'?' * size
         return space.newtuple([space.wrap(text), w_end])
     elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
         text = u'\ufffd'
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -540,6 +540,17 @@
         else:
             assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
 
+    def test_encode_error_bad_handler(self):
+        import codecs
+        codecs.register_error("test.bad_handler", lambda e: (repl, 1))
+        assert u"xyz".encode("latin-1", "test.bad_handler") == "xyz"
+        repl = u"\u1234"
+        raises(UnicodeEncodeError, u"\u5678".encode, "latin-1",
+               "test.bad_handler")
+        repl = u"\u00E9"
+        s = u"\u5678".encode("latin-1", "test.bad_handler")
+        assert s == '\xe9'
+
     def test_charmap_encode(self):
         assert 'xxx'.encode('charmap') == 'xxx'
 
@@ -593,3 +604,11 @@
         assert u'caf\xe9'.encode('mbcs') == 'caf\xe9'
         assert u'\u040a'.encode('mbcs') == '?' # some cyrillic letter
         assert 'cafx\e9'.decode('mbcs') == u'cafx\e9'
+
+    def test_bad_handler_string_result(self):
+        import _codecs
+        def f(exc):
+            return ('foo', exc.end)
+        _codecs.register_error("test.test_codecs_not_a_string", f)
+        raises(TypeError, u'\u1234'.encode, 'ascii',
+               'test.test_codecs_not_a_string')
diff --git a/pypy/module/_ffi/__init__.py b/pypy/module/_ffi/__init__.py
--- a/pypy/module/_ffi/__init__.py
+++ b/pypy/module/_ffi/__init__.py
@@ -4,8 +4,10 @@
 class Module(MixedModule):
 
     interpleveldefs = {
-        'CDLL'               : 'interp_ffi.W_CDLL',
-        'types':             'interp_ffi.W_types',
+        'CDLL':    'interp_ffi.W_CDLL',
+        'types':   'interp_ffi.W_types',
+        'FuncPtr': 'interp_ffi.W_FuncPtr',
+        'get_libc':'interp_ffi.get_libc',
     }
 
     appleveldefs = {}
diff --git a/pypy/module/_ffi/interp_ffi.py b/pypy/module/_ffi/interp_ffi.py
--- a/pypy/module/_ffi/interp_ffi.py
+++ b/pypy/module/_ffi/interp_ffi.py
@@ -4,63 +4,170 @@
     operationerrfmt
 from pypy.interpreter.gateway import interp2app, NoneNotWrapped, unwrap_spec
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
+from pypy.module._rawffi.structure import W_StructureInstance, W_Structure
 #
 from pypy.rpython.lltypesystem import lltype, rffi
 #
 from pypy.rlib import jit
 from pypy.rlib import libffi
 from pypy.rlib.rdynload import DLOpenError
-from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.rarithmetic import intmask, r_uint
 
 class W_FFIType(Wrappable):
-    def __init__(self, name, ffitype):
+
+    _immutable_fields_ = ['name', 'ffitype', 'w_datashape', 'w_pointer_to']
+    
+    def __init__(self, name, ffitype, w_datashape=None, w_pointer_to=None):
         self.name = name
         self.ffitype = ffitype
+        self.w_datashape = w_datashape
+        self.w_pointer_to = w_pointer_to
+        if self.is_struct():
+            assert w_datashape is not None
 
-    def str(self, space):
-        return space.wrap('<ffi type %s>' % self.name)
+    def descr_deref_pointer(self, space):
+        if self.w_pointer_to is None:
+            return space.w_None
+        return self.w_pointer_to
 
+    def repr(self, space):
+        return space.wrap(self.__repr__())
 
+    def __repr__(self):
+        return "<ffi type %s>" % self.name
+
+    def is_signed(self):
+        return (self is app_types.slong or
+                self is app_types.sint or
+                self is app_types.sshort or
+                self is app_types.sbyte or
+                self is app_types.slonglong)
+
+    def is_unsigned(self):
+        return (self is app_types.ulong or
+                self is app_types.uint or
+                self is app_types.ushort or
+                self is app_types.ubyte or
+                self is app_types.ulonglong)
+
+    def is_pointer(self):
+        return self.ffitype is libffi.types.pointer
+
+    def is_char(self):
+        return self is app_types.char
+
+    def is_unichar(self):
+        return self is app_types.unichar
+
+    def is_longlong(self):
+        return libffi.IS_32_BIT and (self is app_types.slonglong or
+                                     self is app_types.ulonglong)
+
+    def is_double(self):
+        return self is app_types.double
+
+    def is_singlefloat(self):
+        return self is app_types.float
+
+    def is_void(self):
+        return self is app_types.void
+
+    def is_struct(self):
+        return libffi.types.is_struct(self.ffitype)
 
 W_FFIType.typedef = TypeDef(
     'FFIType',
-    __str__ = interp2app(W_FFIType.str),
+    __repr__ = interp2app(W_FFIType.repr),
+    deref_pointer = interp2app(W_FFIType.descr_deref_pointer),
     )
 
 
+def build_ffi_types():
+    from pypy.rlib.clibffi import FFI_TYPE_P
+    types = [
+        # note: most of the type name directly come from the C equivalent,
+        # with the exception of bytes: in C, ubyte and char are equivalent,
+        # but for _ffi the first expects a number while the second a 1-length
+        # string
+        W_FFIType('slong',     libffi.types.slong),
+        W_FFIType('sint',      libffi.types.sint),
+        W_FFIType('sshort',    libffi.types.sshort),
+        W_FFIType('sbyte',     libffi.types.schar),
+        W_FFIType('slonglong', libffi.types.slonglong),
+        #
+        W_FFIType('ulong',     libffi.types.ulong),
+        W_FFIType('uint',      libffi.types.uint),
+        W_FFIType('ushort',    libffi.types.ushort),
+        W_FFIType('ubyte',     libffi.types.uchar),
+        W_FFIType('ulonglong', libffi.types.ulonglong),
+        #
+        W_FFIType('char',      libffi.types.uchar),
+        W_FFIType('unichar',   libffi.types.wchar_t),
+        #
+        W_FFIType('double',    libffi.types.double),
+        W_FFIType('float',     libffi.types.float),
+        W_FFIType('void',      libffi.types.void),
+        W_FFIType('void_p',    libffi.types.pointer),
+        #
+        # missing types:
+
+        ## 's' : ffi_type_pointer,
+        ## 'z' : ffi_type_pointer,
+        ## 'O' : ffi_type_pointer,
+        ## 'Z' : ffi_type_pointer,
+
+        ]
+    return dict([(t.name, t) for t in types])
+
+class app_types:
+    pass
+app_types.__dict__ = build_ffi_types()
+
+def descr_new_pointer(space, w_cls, w_pointer_to):
+    try:
+        return descr_new_pointer.cache[w_pointer_to]
+    except KeyError:
+        w_pointer_to = space.interp_w(W_FFIType, w_pointer_to)
+        name = '(pointer to %s)' % w_pointer_to.name
+        w_result = W_FFIType(name, libffi.types.pointer, w_pointer_to = w_pointer_to)
+        descr_new_pointer.cache[w_pointer_to] = w_result
+        return w_result
+descr_new_pointer.cache = {}
+
 class W_types(Wrappable):
     pass
-
-def build_ffi_types():
-    from pypy.rlib.clibffi import FFI_TYPE_P
-    tdict = {}
-    for key, value in libffi.types.__dict__.iteritems():
-        if key == 'getkind' or key.startswith('__'):
-            continue
-        assert lltype.typeOf(value) == FFI_TYPE_P
-        tdict[key] = W_FFIType(key, value)
-    return tdict
-    
 W_types.typedef = TypeDef(
     'types',
-    **build_ffi_types())
+    Pointer = interp2app(descr_new_pointer, as_classmethod=True),
+    **app_types.__dict__)
+
+
+def unwrap_ffitype(space, w_argtype, allow_void=False):
+    res = w_argtype.ffitype
+    if res is libffi.types.void and not allow_void:
+        msg = 'void is not a valid argument type'
+        raise OperationError(space.w_TypeError, space.wrap(msg))
+    return res
+
 
 # ========================================================================
 
 class W_FuncPtr(Wrappable):
 
-    _immutable_fields_ = ['func']
+    _immutable_fields_ = ['func', 'argtypes_w[*]', 'w_restype']
     
-    def __init__(self, func):
+    def __init__(self, func, argtypes_w, w_restype):
         self.func = func
+        self.argtypes_w = argtypes_w
+        self.w_restype = w_restype
 
     @jit.unroll_safe
-    def build_argchain(self, space, argtypes, args_w):
-        expected = len(argtypes)
+    def build_argchain(self, space, args_w):
+        expected = len(self.argtypes_w)
         given = len(args_w)
         if given != expected:
             arg = 'arguments'
-            if len(argtypes) == 1:
+            if len(self.argtypes_w) == 1:
                 arg = 'argument'
             raise operationerrfmt(space.w_TypeError,
                                   '%s() takes exactly %d %s (%d given)',
@@ -68,34 +175,103 @@
         #
         argchain = libffi.ArgChain()
         for i in range(expected):
-            argtype = argtypes[i]
+            w_argtype = self.argtypes_w[i]
             w_arg = args_w[i]
-            kind = libffi.types.getkind(argtype)
-            if kind == 'i':
+            if w_argtype.is_longlong():
+                # note that we must check for longlong first, because either
+                # is_signed or is_unsigned returns true anyway
+                assert libffi.IS_32_BIT
+                kind = libffi.types.getkind(w_argtype.ffitype) # XXX: remove the kind
+                self.arg_longlong(space, argchain, kind, w_arg)
+            elif w_argtype.is_signed():
                 argchain.arg(space.int_w(w_arg))
-            elif kind == 'u':
+            elif w_argtype.is_pointer():
+                w_arg = self.convert_pointer_arg_maybe(space, w_arg, w_argtype)
                 argchain.arg(intmask(space.uint_w(w_arg)))
-            elif kind == 'f':
+            elif w_argtype.is_unsigned():
+                argchain.arg(intmask(space.uint_w(w_arg)))
+            elif w_argtype.is_char():
+                w_arg = space.ord(w_arg)
+                argchain.arg(space.int_w(w_arg))
+            elif w_argtype.is_unichar():
+                w_arg = space.ord(w_arg)
+                argchain.arg(space.int_w(w_arg))
+            elif w_argtype.is_double():
                 argchain.arg(space.float_w(w_arg))
+            elif w_argtype.is_singlefloat():
+                argchain.arg_singlefloat(space.float_w(w_arg))
+            elif w_argtype.is_struct():
+                # arg_raw directly takes value to put inside ll_args
+                w_arg = space.interp_w(W_StructureInstance, w_arg)                
+                ptrval = w_arg.ll_buffer
+                argchain.arg_raw(ptrval)
             else:
-                assert False, "Argument kind '%s' not supported" % kind
+                assert False, "Argument shape '%s' not supported" % w_argtype
         return argchain
 
+    def convert_pointer_arg_maybe(self, space, w_arg, w_argtype):
+        """
+        Try to convert the argument by calling _as_ffi_pointer_()
+        """
+        meth = space.lookup(w_arg, '_as_ffi_pointer_') # this also promotes the type
+        if meth:
+            return space.call_function(meth, w_arg, w_argtype)
+        else:
+            return w_arg
+
+    @jit.dont_look_inside
+    def arg_longlong(self, space, argchain, kind, w_arg):
+        bigarg = space.bigint_w(w_arg)
+        if kind == 'I':
+            llval = bigarg.tolonglong()
+        elif kind == 'U':
+            ullval = bigarg.toulonglong()
+            llval = rffi.cast(rffi.LONGLONG, ullval)
+        else:
+            assert False
+        # this is a hack: we store the 64 bits of the long long into the
+        # 64 bits of a float (i.e., a C double)
+        floatval = libffi.longlong2float(llval)
+        argchain.arg_longlong(floatval)
+
     def call(self, space, args_w):
         self = jit.hint(self, promote=True)
-        argchain = self.build_argchain(space, self.func.argtypes, args_w)
-        reskind = libffi.types.getkind(self.func.restype)
-        if reskind == 'i':
+        argchain = self.build_argchain(space, args_w)
+        w_restype = self.w_restype
+        if w_restype.is_longlong():
+            # note that we must check for longlong first, because either
+            # is_signed or is_unsigned returns true anyway
+            assert libffi.IS_32_BIT
+            reskind = libffi.types.getkind(self.func.restype) # XXX: remove the kind
+            return self._call_longlong(space, argchain, reskind)
+        elif w_restype.is_signed():
             return self._call_int(space, argchain)
-        elif reskind == 'u':
+        elif w_restype.is_unsigned() or w_restype.is_pointer():
             return self._call_uint(space, argchain)
-        elif reskind == 'f':
+        elif w_restype.is_char():
+            intres = self.func.call(argchain, rffi.UCHAR)
+            return space.wrap(chr(intres))
+        elif w_restype.is_unichar():
+            intres = self.func.call(argchain, rffi.WCHAR_T)
+            return space.wrap(unichr(intres))
+        elif w_restype.is_double():
             floatres = self.func.call(argchain, rffi.DOUBLE)
             return space.wrap(floatres)
-        else:
+        elif w_restype.is_singlefloat():
+            # the result is a float, but widened to be inside a double
+            floatres = self.func.call(argchain, rffi.FLOAT)
+            return space.wrap(floatres)
+        elif w_restype.is_struct():
+            w_datashape = w_restype.w_datashape
+            assert isinstance(w_datashape, W_Structure)
+            ptrval = self.func.call(argchain, rffi.ULONG, is_struct=True)
+            return w_datashape.fromaddress(space, ptrval)
+        elif w_restype.is_void():
             voidres = self.func.call(argchain, lltype.Void)
             assert voidres is None
             return space.w_None
+        else:
+            assert False, "Return value shape '%s' not supported" % w_restype
 
     def _call_int(self, space, argchain):
         # if the declared return type of the function is smaller than LONG,
@@ -138,6 +314,10 @@
             # special case
             uintres = call(argchain, rffi.ULONG)
             return space.wrap(uintres)
+        elif restype is libffi.types.pointer:
+            ptrres = call(argchain, rffi.VOIDP)
+            uintres = rffi.cast(rffi.ULONG, ptrres)
+            return space.wrap(uintres)
         elif restype is libffi.types.uint:
             intres = rffi.cast(rffi.LONG, call(argchain, rffi.UINT))
         elif restype is libffi.types.ushort:
@@ -149,16 +329,52 @@
                                  space.wrap('Unsupported restype'))
         return space.wrap(intres)
 
+    @jit.dont_look_inside
+    def _call_longlong(self, space, argchain, reskind):
+        # this is a hack: we store the 64 bits of the long long into the 64
+        # bits of a float (i.e., a C double)
+        floatres = self.func.call(argchain, rffi.LONGLONG)
+        llres = libffi.float2longlong(floatres)
+        if reskind == 'I':
+            return space.wrap(llres)
+        elif reskind == 'U':
+            ullres = rffi.cast(rffi.ULONGLONG, llres)
+            return space.wrap(ullres)
+        else:
+            assert False
+
     def getaddr(self, space):
         """
         Return the physical address in memory of the function
         """
         return space.wrap(rffi.cast(rffi.LONG, self.func.funcsym))
 
+
+
+def unpack_argtypes(space, w_argtypes, w_restype):
+    argtypes_w = [space.interp_w(W_FFIType, w_argtype)
+                  for w_argtype in space.listview(w_argtypes)]
+    argtypes = [unwrap_ffitype(space, w_argtype) for w_argtype in
+                argtypes_w]
+    w_restype = space.interp_w(W_FFIType, w_restype)
+    restype = unwrap_ffitype(space, w_restype, allow_void=True)
+    return argtypes_w, argtypes, w_restype, restype
+
+ at unwrap_spec(addr=r_uint, name=str)
+def descr_fromaddr(space, w_cls, addr, name, w_argtypes, w_restype):
+    argtypes_w, argtypes, w_restype, restype = unpack_argtypes(space,
+                                                               w_argtypes,
+                                                               w_restype)
+    addr = rffi.cast(rffi.VOIDP, addr)
+    func = libffi.Func(name, argtypes, restype, addr)
+    return W_FuncPtr(func, argtypes_w, w_restype)
+
+
 W_FuncPtr.typedef = TypeDef(
-    'FuncPtr',
+    '_ffi.FuncPtr',
     __call__ = interp2app(W_FuncPtr.call),
     getaddr = interp2app(W_FuncPtr.getaddr),
+    fromaddr = interp2app(descr_fromaddr, as_classmethod=True)
     )
 
 
@@ -167,40 +383,57 @@
 
 class W_CDLL(Wrappable):
     def __init__(self, space, name):
+        self.space = space
+        if name is None:
+            self.name = "<None>"
+        else:
+            self.name = name
         try:
             self.cdll = libffi.CDLL(name)
         except DLOpenError, e:
-            raise operationerrfmt(space.w_OSError, '%s: %s', name,
+            raise operationerrfmt(space.w_OSError, '%s: %s', self.name,
                                   e.msg or 'unspecified error')
-        self.name = name
-        self.space = space
-
-    def ffitype(self, w_argtype, allow_void=False):
-        res = self.space.interp_w(W_FFIType, w_argtype).ffitype
-        if res is libffi.types.void and not allow_void:
-            space = self.space
-            msg = 'void is not a valid argument type'
-            raise OperationError(space.w_TypeError, space.wrap(msg))
-        return res
 
     @unwrap_spec(name=str)
     def getfunc(self, space, name, w_argtypes, w_restype):
-        argtypes = [self.ffitype(w_argtype) for w_argtype in
-                    space.listview(w_argtypes)]
-        restype = self.ffitype(w_restype, allow_void=True)
-        func = self.cdll.getpointer(name, argtypes, restype)
-        return W_FuncPtr(func)
+        argtypes_w, argtypes, w_restype, restype = unpack_argtypes(space,
+                                                                   w_argtypes,
+                                                                   w_restype)
+        try:
+            func = self.cdll.getpointer(name, argtypes, restype)
+        except KeyError:
+            raise operationerrfmt(space.w_AttributeError,
+                                  "No symbol %s found in library %s", name, self.name)
+            
+        return W_FuncPtr(func, argtypes_w, w_restype)
 
+    @unwrap_spec(name=str)
+    def getaddressindll(self, space, name):
+        try:
+            address_as_uint = rffi.cast(lltype.Unsigned,
+                                        self.cdll.getaddressindll(name))
+        except KeyError:
+            raise operationerrfmt(space.w_ValueError,
+                                  "No symbol %s found in library %s", name, self.name)
+        return space.wrap(address_as_uint)
 
- at unwrap_spec(name=str)
+ at unwrap_spec(name='str_or_None')
 def descr_new_cdll(space, w_type, name):
     return space.wrap(W_CDLL(space, name))
 
 
 W_CDLL.typedef = TypeDef(
-    'CDLL',
+    '_ffi.CDLL',
     __new__     = interp2app(descr_new_cdll),
     getfunc     = interp2app(W_CDLL.getfunc),
+    getaddressindll = interp2app(W_CDLL.getaddressindll),
     )
 
 # ========================================================================
+
+def get_libc(space):
+    from pypy.rlib.clibffi import get_libc_name
+    try:
+        return space.wrap(W_CDLL(space, get_libc_name()))
+    except OSError, e:
+        raise wrap_oserror(space, e)
diff --git a/pypy/module/_ffi/test/test__ffi.py b/pypy/module/_ffi/test/test__ffi.py
--- a/pypy/module/_ffi/test/test__ffi.py
+++ b/pypy/module/_ffi/test/test__ffi.py
@@ -17,7 +17,13 @@
 
         c_file = udir.ensure("test__ffi", dir=1).join("foolib.c")
         # automatically collect the C source from the docstrings of the tests
-        snippets = []
+        snippets = ["""
+        #ifdef _WIN32
+        #define DLLEXPORT __declspec(dllexport)
+        #else
+        #define DLLEXPORT
+        #endif
+        """]
         for name in dir(cls):
             if name.startswith('test_'):
                 meth = getattr(cls, name)
@@ -35,8 +41,9 @@
         from pypy.rpython.lltypesystem import rffi
         from pypy.rlib.libffi import get_libc_name, CDLL, types
         from pypy.rlib.test.test_libffi import get_libm_name
-        space = gettestobjspace(usemodules=('_ffi',))
+        space = gettestobjspace(usemodules=('_ffi', '_rawffi'))
         cls.space = space
+        cls.w_iswin32 = space.wrap(sys.platform == 'win32')
         cls.w_libfoo_name = space.wrap(cls.prepare_c_example())
         cls.w_libc_name = space.wrap(get_libc_name())
         libm_name = get_libm_name(sys.platform)
@@ -45,6 +52,13 @@
         pow = libm.getpointer('pow', [], types.void)
         pow_addr = rffi.cast(rffi.LONG, pow.funcsym)
         cls.w_pow_addr = space.wrap(pow_addr)
+        #
+        # these are needed for test_single_float_args
+        from ctypes import c_float
+        f_12_34 = c_float(12.34).value
+        f_56_78 = c_float(56.78).value
+        f_result = c_float(f_12_34 + f_56_78).value
+        cls.w_f_12_34_plus_56_78 = space.wrap(f_result)
 
     def test_libload(self):
         import _ffi
@@ -54,10 +68,20 @@
         import _ffi
         raises(OSError, _ffi.CDLL, "xxxxx_this_name_does_not_exist_xxxxx")
 
+    def test_libload_None(self):
+        if self.iswin32:
+            skip("unix specific")
+        from _ffi import CDLL, types
+        # this should return *all* loaded libs, dlopen(NULL)
+        dll = CDLL(None)
+        # Assume CPython, or PyPy compiled with cpyext
+        res = dll.getfunc('Py_IsInitialized', [], types.slong)()
+        assert res == 1
+
     def test_simple_types(self):
         from _ffi import types
-        assert str(types.sint) == '<ffi type sint>'
-        assert str(types.uint) == '<ffi type uint>'
+        assert str(types.sint) == "<ffi type sint>"
+        assert str(types.uint) == "<ffi type uint>"
         
     def test_callfunc(self):
         from _ffi import CDLL, types
@@ -70,10 +94,27 @@
         libm = CDLL(self.libm_name)
         pow = libm.getfunc('pow', [types.double, types.double], types.double)
         assert pow.getaddr() == self.pow_addr
-        
+
+    def test_getaddressindll(self):
+        import sys
+        from _ffi import CDLL, types
+        libm = CDLL(self.libm_name)
+        pow_addr = libm.getaddressindll('pow')
+        assert pow_addr == self.pow_addr & (sys.maxint*2-1)
+
+    def test_func_fromaddr(self):
+        import sys
+        from _ffi import CDLL, types, FuncPtr
+        libm = CDLL(self.libm_name)
+        pow_addr = libm.getaddressindll('pow')
+        pow = FuncPtr.fromaddr(pow_addr, 'pow', [types.double, types.double],
+                               types.double)
+        assert pow(2, 3) == 8
+
+
     def test_int_args(self):
         """
-            int sum_xy(int x, int y)
+            DLLEXPORT int sum_xy(int x, int y)
             {
                 return x+y;
             }
@@ -86,8 +127,8 @@
     def test_void_result(self):
         """
             int dummy = 0;
-            void set_dummy(int val) { dummy = val; }
-            int get_dummy() { return dummy; }
+            DLLEXPORT void set_dummy(int val) { dummy = val; }
+            DLLEXPORT int get_dummy() { return dummy; }
         """
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
@@ -96,10 +137,105 @@
         assert get_dummy() == 0
         assert set_dummy(42) is None
         assert get_dummy() == 42
+        set_dummy(0)
+
+    def test_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr() { return &dummy; }
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val) { *ptr = val; }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], types.void_p)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr',
+                                        [types.void_p, types.sint],
+                                        types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        set_val_to_ptr(ptr, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr, 0)
+
+    def test_convert_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr(); // defined in test_pointer_args
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val); // ditto
+        """
+        from _ffi import CDLL, types
+
+        class MyPointerWrapper(object):
+            def __init__(self, value):
+                self.value = value
+            def _as_ffi_pointer_(self, ffitype):
+                assert ffitype is types.void_p
+                return self.value
+        
+        libfoo = CDLL(self.libfoo_name)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], types.void_p)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr',
+                                        [types.void_p, types.sint],
+                                        types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        assert type(ptr) in (int, long)
+        ptr2 = MyPointerWrapper(ptr)
+        set_val_to_ptr(ptr2, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr2, 0)
+
+    def test_typed_pointer(self):
+        from _ffi import types
+        intptr = types.Pointer(types.sint) # create a typed pointer to sint
+        assert intptr.deref_pointer() is types.sint
+        assert str(intptr) == '<ffi type (pointer to sint)>'
+        assert types.sint.deref_pointer() is None
+        raises(TypeError, "types.Pointer(42)")
+
+    def test_pointer_identity(self):
+        from _ffi import types
+        x = types.Pointer(types.slong)
+        y = types.Pointer(types.slong)
+        z = types.Pointer(types.char)
+        assert x is y
+        assert x is not z
+
+    def test_typed_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr(); // defined in test_pointer_args
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val); // ditto
+        """
+        from _ffi import CDLL, types
+
+        libfoo = CDLL(self.libfoo_name)
+        intptr = types.Pointer(types.sint)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], intptr)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr', [intptr, types.sint], types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        set_val_to_ptr(ptr, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr, 0)
+
+    def test_huge_pointer_args(self):
+        """
+            #include <stdlib.h>
+            DLLEXPORT long is_null_ptr(void* ptr) { return ptr == NULL; }
+        """
+        import sys
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        is_null_ptr = libfoo.getfunc('is_null_ptr', [types.void_p], types.ulong)
+        assert not is_null_ptr(sys.maxint+1)
 
     def test_unsigned_long_args(self):
         """
-            unsigned long sum_xy_ul(unsigned long x, unsigned long y)
+            DLLEXPORT unsigned long sum_xy_ul(unsigned long x, unsigned long y)
             {
                 return x+y;
             }
@@ -114,12 +250,11 @@
 
     def test_unsigned_short_args(self):
         """
-            unsigned short sum_xy_us(unsigned short x, unsigned short y)
+            DLLEXPORT unsigned short sum_xy_us(unsigned short x, unsigned short y)
             {
                 return x+y;
             }
         """
-        import sys
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
         sum_xy = libfoo.getfunc('sum_xy_us', [types.ushort, types.ushort],
@@ -127,6 +262,166 @@
         assert sum_xy(32000, 8000) == 40000
         assert sum_xy(60000, 30000) == 90000 % 65536
 
+    def test_unsigned_byte_args(self):
+        """
+            DLLEXPORT unsigned char sum_xy_ub(unsigned char x, unsigned char y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_us', [types.ubyte, types.ubyte],
+                                types.ubyte)
+        assert sum_xy(100, 40) == 140
+        assert sum_xy(200, 60) == 260 % 256
+
+    def test_signed_byte_args(self):
+        """
+            DLLEXPORT signed char sum_xy_sb(signed char x, signed char y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_sb', [types.sbyte, types.sbyte],
+                                types.sbyte)
+        assert sum_xy(10, 20) == 30
+        assert sum_xy(100, 28) == -128
+
+    def test_char_args(self):
+        """
+            DLLEXPORT char my_toupper(char x)
+            {
+                return x - ('a'-'A');
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        my_toupper = libfoo.getfunc('my_toupper', [types.char],
+                                    types.char)
+        assert my_toupper('c') == 'C'
+
+    def test_unichar_args(self):
+        """
+            #include <stddef.h>
+            DLLEXPORT wchar_t sum_xy_wc(wchar_t x, wchar_t y)
+            {
+                return x + y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_wc', [types.unichar, types.unichar],
+                                types.unichar)
+        res = sum_xy(unichr(1000), unichr(2000))
+        assert type(res) is unicode
+        assert ord(res) == 3000
+
+    def test_single_float_args(self):
+        """
+            DLLEXPORT float sum_xy_float(float x, float y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_float', [types.float, types.float],
+                                types.float)
+        res = sum_xy(12.34, 56.78)
+        assert res == self.f_12_34_plus_56_78
+
+
+    def test_slonglong_args(self):
+        """
+            DLLEXPORT long long sum_xy_longlong(long long x, long long y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        maxint32 = 2147483647 # we cannot really go above maxint on 64 bits
+                              # (and we would not test anything, as there long
+                              # is the same as long long)
+
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_longlong', [types.slonglong, types.slonglong],
+                                types.slonglong)
+        x = maxint32+1
+        y = maxint32+2
+        res = sum_xy(x, y)
+        expected = maxint32*2 + 3
+        assert res == expected
+
+    def test_ulonglong_args(self):
+        """
+            DLLEXPORT unsigned long long sum_xy_ulonglong(unsigned long long x,
+                                                unsigned long long y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        maxint64 = 9223372036854775807 # maxint64+1 does not fit into a
+                                       # longlong, but it does into a
+                                       # ulonglong
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_ulonglong', [types.ulonglong, types.ulonglong],
+                                types.ulonglong)
+        x = maxint64+1
+        y = 2
+        res = sum_xy(x, y)
+        expected = maxint64 + 3
+        assert res == expected
+
+    def test_byval_argument(self):
+        """
+            struct Point {
+                long x;
+                long y;
+            };
+
+            DLLEXPORT long sum_point(struct Point p) {
+                return p.x + p.y;
+            }
+        """
+        import _rawffi
+        from _ffi import CDLL, types
+        POINT = _rawffi.Structure([('x', 'l'), ('y', 'l')])
+        ffi_point = POINT.get_ffi_type()
+        libfoo = CDLL(self.libfoo_name)
+        sum_point = libfoo.getfunc('sum_point', [ffi_point], types.slong)
+        #
+        p = POINT()
+        p.x = 30
+        p.y = 12
+        res = sum_point(p)
+        assert res == 42
+        p.free()
+
+    def test_byval_result(self):
+        """
+            DLLEXPORT struct Point make_point(long x, long y) {
+                struct Point p;
+                p.x = x;
+                p.y = y;
+                return p;
+            }
+        """
+        import _rawffi
+        from _ffi import CDLL, types
+        POINT = _rawffi.Structure([('x', 'l'), ('y', 'l')])
+        ffi_point = POINT.get_ffi_type()
+        libfoo = CDLL(self.libfoo_name)
+        make_point = libfoo.getfunc('make_point', [types.slong, types.slong], ffi_point)
+        #
+        p = make_point(12, 34)
+        assert p.x == 12
+        assert p.y == 34
+        p.free()
+
     def test_TypeError_numargs(self):
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
@@ -142,3 +437,10 @@
     def test_OSError_loading(self):
         from _ffi import CDLL, types
         raises(OSError, "CDLL('I do not exist')")
+
+    def test_AttributeError_missing_function(self):
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        raises(AttributeError, "libfoo.getfunc('I_do_not_exist', [], types.void)")
+        libnone = CDLL(None)
+        raises(AttributeError, "libnone.getfunc('I_do_not_exist', [], types.void)")
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -3,6 +3,8 @@
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.tool.autopath import pypydir
 
+UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'
+
 
 class EncodeDecodeError(Exception):
     def __init__(self, start, end, reason):
@@ -103,8 +105,12 @@
                                           [DECODEBUF_P], rffi.SSIZE_T)
 pypy_cjk_dec_inbuf_consumed = llexternal('pypy_cjk_dec_inbuf_consumed',
                                          [DECODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_dec_replace_on_error = llexternal('pypy_cjk_dec_replace_on_error',
+                                           [DECODEBUF_P, rffi.CWCHARP,
+                                            rffi.SSIZE_T, rffi.SSIZE_T],
+                                           rffi.SSIZE_T)
 
-def decode(codec, stringdata):
+def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None):
     inleft = len(stringdata)
     inbuf = rffi.get_nonmovingbuffer(stringdata)
     try:
@@ -112,10 +118,12 @@
         if not decodebuf:
             raise MemoryError
         try:
-            r = pypy_cjk_dec_chunk(decodebuf)
-            if r != 0:
-                multibytecodec_decerror(decodebuf, r)
-                assert False
+            while True:
+                r = pypy_cjk_dec_chunk(decodebuf)
+                if r == 0:
+                    break
+                multibytecodec_decerror(decodebuf, r, errors,
+                                        errorcb, namecb, stringdata)
             src = pypy_cjk_dec_outbuf(decodebuf)
             length = pypy_cjk_dec_outlen(decodebuf)
             return rffi.wcharpsize2unicode(src, length)
@@ -126,7 +134,8 @@
     finally:
         rffi.free_nonmovingbuffer(stringdata, inbuf)
 
-def multibytecodec_decerror(decodebuf, e):
+def multibytecodec_decerror(decodebuf, e, errors,
+                            errorcb, namecb, stringdata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -138,12 +147,27 @@
     else:
         raise RuntimeError
     #
-    # if errors == ERROR_REPLACE:...
-    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    # compute the unicode to use as a replacement -> 'replace', and
+    # the current position in the input 'unicodedata' -> 'end'
     start = pypy_cjk_dec_inbuf_consumed(decodebuf)
     end = start + esize
-    if 1:  # errors == ERROR_STRICT:
+    if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
+    elif errors == "ignore":
+        replace = u""
+    elif errors == "replace":
+        replace = UNICODE_REPLACEMENT_CHARACTER
+    else:
+        assert errorcb
+        replace, end = errorcb(errors, namecb, reason,
+                               stringdata, start, end)
+    inbuf = rffi.get_nonmoving_unicodebuffer(replace)
+    try:
+        r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
+    finally:
+        rffi.free_nonmoving_unicodebuffer(replace, inbuf)
+    if r == MBERR_NOMEMORY:
+        raise MemoryError
 
 # ____________________________________________________________
 # Encoding
@@ -165,8 +189,12 @@
                                           [ENCODEBUF_P], rffi.SSIZE_T)
 pypy_cjk_enc_inbuf_consumed = llexternal('pypy_cjk_enc_inbuf_consumed',
                                          [ENCODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_enc_replace_on_error = llexternal('pypy_cjk_enc_replace_on_error',
+                                           [ENCODEBUF_P, rffi.CCHARP,
+                                            rffi.SSIZE_T, rffi.SSIZE_T],
+                                           rffi.SSIZE_T)
 
-def encode(codec, unicodedata):
+def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
     inleft = len(unicodedata)
     inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata)
     try:
@@ -174,14 +202,18 @@
         if not encodebuf:
             raise MemoryError
         try:
-            r = pypy_cjk_enc_chunk(encodebuf)
-            if r != 0:
-                multibytecodec_encerror(encodebuf, r)
-                assert False
-            r = pypy_cjk_enc_reset(encodebuf)
-            if r != 0:
-                multibytecodec_encerror(encodebuf, r)
-                assert False
+            while True:
+                r = pypy_cjk_enc_chunk(encodebuf)
+                if r == 0:
+                    break
+                multibytecodec_encerror(encodebuf, r, errors,
+                                        codec, errorcb, namecb, unicodedata)
+            while True:
+                r = pypy_cjk_enc_reset(encodebuf)
+                if r == 0:
+                    break
+                multibytecodec_encerror(encodebuf, r, errors,
+                                        codec, errorcb, namecb, unicodedata)
             src = pypy_cjk_enc_outbuf(encodebuf)
             length = pypy_cjk_enc_outlen(encodebuf)
             return rffi.charpsize2str(src, length)
@@ -192,7 +224,8 @@
     finally:
         rffi.free_nonmoving_unicodebuffer(unicodedata, inbuf)
 
-def multibytecodec_encerror(encodebuf, e):
+def multibytecodec_encerror(encodebuf, e, errors,
+                            codec, errorcb, namecb, unicodedata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -204,9 +237,27 @@
     else:
         raise RuntimeError
     #
-    # if errors == ERROR_REPLACE:...
-    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    # compute the string to use as a replacement -> 'replace', and
+    # the current position in the input 'unicodedata' -> 'end'
     start = pypy_cjk_enc_inbuf_consumed(encodebuf)
     end = start + esize
-    if 1:  # errors == ERROR_STRICT:
+    if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
+    elif errors == "ignore":
+        replace = ""
+    elif errors == "replace":
+        try:
+            replace = encode(codec, u"?")
+        except EncodeDecodeError:
+            replace = "?"
+    else:
+        assert errorcb
+        replace, end = errorcb(errors, namecb, reason,
+                               unicodedata, start, end)
+    inbuf = rffi.get_nonmovingbuffer(replace)
+    try:
+        r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
+    finally:
+        rffi.free_nonmovingbuffer(replace, inbuf)
+    if r == MBERR_NOMEMORY:
+        raise MemoryError
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -3,6 +3,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.error import OperationError
 from pypy.module._multibytecodec import c_codecs
+from pypy.module._codecs.interp_codecs import CodecState
 
 
 class MultibyteCodec(Wrappable):
@@ -13,13 +14,13 @@
 
     @unwrap_spec(input=str, errors="str_or_None")
     def decode(self, space, input, errors=None):
-        if errors is not None and errors != 'strict':
-            raise OperationError(space.w_NotImplementedError,    # XXX
-                                 space.wrap("errors='%s' in _multibytecodec"
-                                            % errors))
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
         #
         try:
-            output = c_codecs.decode(self.codec, input)
+            output = c_codecs.decode(self.codec, input, errors,
+                                     state.decode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
             raise OperationError(
                 space.w_UnicodeDecodeError,
@@ -37,13 +38,13 @@
 
     @unwrap_spec(input=unicode, errors="str_or_None")
     def encode(self, space, input, errors=None):
-        if errors is not None and errors != 'strict':
-            raise OperationError(space.w_NotImplementedError,    # XXX
-                                 space.wrap("errors='%s' in _multibytecodec"
-                                            % errors))
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
         #
         try:
-            output = c_codecs.encode(self.codec, input)
+            output = c_codecs.encode(self.codec, input, errors,
+                                     state.encode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
             raise OperationError(
                 space.w_UnicodeEncodeError,
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -36,6 +36,37 @@
         e = raises(UnicodeDecodeError, codec.decode, "~{xyz}").value
         assert e.args == ('hz', '~{xyz}', 2, 4, 'illegal multibyte sequence')
 
+    def test_decode_hz_ignore(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("def~{}abc", errors='ignore')
+        assert r == (u'def\u5fcf', 9)
+        r = codec.decode("def~{}abc", 'ignore')
+        assert r == (u'def\u5fcf', 9)
+
+    def test_decode_hz_replace(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("def~{}abc", errors='replace')
+        assert r == (u'def\ufffd\u5fcf', 9)
+        r = codec.decode("def~{}abc", 'replace')
+        assert r == (u'def\ufffd\u5fcf', 9)
+
+    def test_decode_custom_error_handler(self):
+        import codecs
+        codecs.register_error("test.decode_custom_error_handler",
+                              lambda e: (u'\u1234\u5678', e.end))
+        u = "abc\xDD".decode("hz", "test.decode_custom_error_handler")
+        assert u == u'abc\u1234\u5678'
+
+    def test_decode_custom_error_handler_overflow(self):
+        import codecs
+        import sys
+        codecs.register_error("test.test_decode_custom_error_handler_overflow",
+                              lambda e: (u'', sys.maxint + 1))
+        raises((IndexError, OverflowError), "abc\xDD".decode, "hz",
+               "test.test_decode_custom_error_handler_overflow")
+
     def test_encode_hz(self):
         import _codecs_cn
         codec = _codecs_cn.getcodec("hz")
@@ -54,3 +85,24 @@
         assert e.start == 3
         assert e.end == 4
         assert e.reason == 'illegal multibyte sequence'
+
+    def test_encode_hz_ignore(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'abc\u1234def', 'ignore')
+        assert r == ('abcdef', 7)
+        assert type(r[0]) is str
+
+    def test_encode_hz_replace(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'abc\u1234def', 'replace')
+        assert r == ('abc?def', 7)
+        assert type(r[0]) is str
+
+    def test_encode_custom_error_handler(self):
+        import codecs
+        codecs.register_error("test.multi_bad_handler", lambda e: (repl, 1))
+        repl = u"\u2014"
+        s = u"\uDDA1".encode("gbk", "test.multi_bad_handler")
+        assert s == '\xA1\xAA'
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -36,6 +36,16 @@
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
+def test_decode_hz_ignore():
+    c = getcodec("hz")
+    u = decode(c, 'def~{}abc', 'ignore')
+    assert u == u'def\u5fcf'
+
+def test_decode_hz_replace():
+    c = getcodec("hz")
+    u = decode(c, 'def~{}abc', 'replace')
+    assert u == u'def\ufffd\u5fcf'
+
 def test_encode_hz():
     c = getcodec("hz")
     s = encode(c, u'foobar')
@@ -51,6 +61,16 @@
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
+def test_encode_hz_ignore():
+    c = getcodec("hz")
+    s = encode(c, u'abc\u1234def', 'ignore')
+    assert s == 'abcdef'
+
+def test_encode_hz_replace():
+    c = getcodec("hz")
+    s = encode(c, u'abc\u1234def', 'replace')
+    assert s == 'abc?def'
+
 def test_encode_jisx0208():
     c = getcodec('iso2022_jp')
     s = encode(c, u'\u83ca\u5730\u6642\u592b')
diff --git a/pypy/module/_multiprocessing/test/test_memory.py b/pypy/module/_multiprocessing/test/test_memory.py
--- a/pypy/module/_multiprocessing/test/test_memory.py
+++ b/pypy/module/_multiprocessing/test/test_memory.py
@@ -3,7 +3,7 @@
 class AppTestMemory:
     def setup_class(cls):
         space = gettestobjspace(
-            usemodules=('_multiprocessing', 'mmap', '_rawffi'))
+            usemodules=('_multiprocessing', 'mmap', '_rawffi', '_ffi'))
         cls.space = space
 
     def test_address_of(self):
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -250,6 +250,13 @@
     def get_basic_ffi_type(self):
         raise NotImplementedError
 
+    def descr_get_ffi_type(self, space):
+        # XXX: this assumes that you have the _ffi module enabled. In the long
+        # term, probably we will move the code for build structures and arrays
+        # from _rawffi to _ffi
+        from pypy.module._ffi.interp_ffi import W_FFIType
+        return W_FFIType('<unknown>', self.get_basic_ffi_type(), self)
+
     @unwrap_spec(n=int)
     def descr_size_alignment(self, space, n=1):
         return space.newtuple([space.wrap(self.size * n),
diff --git a/pypy/module/_rawffi/structure.py b/pypy/module/_rawffi/structure.py
--- a/pypy/module/_rawffi/structure.py
+++ b/pypy/module/_rawffi/structure.py
@@ -248,7 +248,8 @@
     alignment   = interp_attrproperty('alignment', W_Structure),
     fieldoffset = interp2app(W_Structure.descr_fieldoffset),
     fieldsize   = interp2app(W_Structure.descr_fieldsize),
-    size_alignment = interp2app(W_Structure.descr_size_alignment)
+    size_alignment = interp2app(W_Structure.descr_size_alignment),
+    get_ffi_type   = interp2app(W_Structure.descr_get_ffi_type),
 )
 W_Structure.typedef.acceptable_as_base_class = False
 
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -526,15 +526,7 @@
 
     def array_tostring__Array(space, self):
         cbuf = self.charbuf()
-        s = ''.join([cbuf[i] for i in xrange(self.len * mytype.bytes)])
-        return self.space.wrap(s)
-##
-##         s = ''
-##         i = 0
-##         while i < self.len * mytype.bytes:
-##             s += cbuf[i]
-##             i += 1
-##         return self.space.wrap(s)
+        return self.space.wrap(rffi.charpsize2str(cbuf, self.len * mytype.bytes))
 
     def array_fromfile__Array_ANY_ANY(space, self, w_f, w_n):
         if not isinstance(w_f, W_File):
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -348,6 +348,7 @@
     '_Py_TrueStruct#': ('PyObject*', 'space.w_True'),
     '_Py_ZeroStruct#': ('PyObject*', 'space.w_False'),
     '_Py_NotImplementedStruct#': ('PyObject*', 'space.w_NotImplemented'),
+    '_Py_EllipsisObject#': ('PyObject*', 'space.w_Ellipsis'),
     'PyDateTimeAPI': ('PyDateTime_CAPI*', 'None'),
     }
 FORWARD_DECLS = []
@@ -966,6 +967,7 @@
     state = space.fromcache(State)
     if state.find_extension(name, path) is not None:
         return
+    old_context = state.package_context
     state.package_context = name, path
     try:
         from pypy.rlib import rdynload
@@ -991,7 +993,7 @@
         generic_cpy_call(space, initfunc)
         state.check_and_raise_exception()
     finally:
-        state.package_context = None, None
+        state.package_context = old_context
     state.fixup_extension(name, path)
 
 @specialize.ll()
diff --git a/pypy/module/cpyext/classobject.py b/pypy/module/cpyext/classobject.py
--- a/pypy/module/cpyext/classobject.py
+++ b/pypy/module/cpyext/classobject.py
@@ -31,4 +31,9 @@
         return w_result
     return w_instance.w_class.lookup(space, name)
 
+ at cpython_api([PyObject, PyObject, PyObject], PyObject)
+def PyClass_New(space, w_bases, w_dict, w_name):
+    w_classobj = space.gettypefor(W_ClassObject)
+    return space.call_function(w_classobj,
+                               w_name, w_bases, w_dict)
 
diff --git a/pypy/module/cpyext/frameobject.py b/pypy/module/cpyext/frameobject.py
--- a/pypy/module/cpyext/frameobject.py
+++ b/pypy/module/cpyext/frameobject.py
@@ -1,6 +1,7 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
-    cpython_api, bootstrap_function, PyObjectFields, cpython_struct)
+    cpython_api, bootstrap_function, PyObjectFields, cpython_struct,
+    CANNOT_FAIL)
 from pypy.module.cpyext.pyobject import (
     PyObject, Py_DecRef, make_ref, from_ref, track_reference,
     make_typedescr, get_typedescr)
@@ -9,6 +10,7 @@
 from pypy.module.cpyext.funcobject import PyCodeObject
 from pypy.interpreter.pyframe import PyFrame
 from pypy.interpreter.pycode import PyCode
+from pypy.interpreter.pytraceback import PyTraceback
 
 PyFrameObjectStruct = lltype.ForwardReference()
 PyFrameObject = lltype.Ptr(PyFrameObjectStruct)
@@ -80,3 +82,8 @@
     frame = space.interp_w(PyFrame, w_frame)
     record_application_traceback(space, state.operror, frame, 0)
     return 0
+
+ at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
+def PyTraceBack_Check(space, w_obj):
+    obj = space.interpclass_w(w_obj)
+    return obj is not None and isinstance(obj, PyTraceback)
diff --git a/pypy/module/cpyext/funcobject.py b/pypy/module/cpyext/funcobject.py
--- a/pypy/module/cpyext/funcobject.py
+++ b/pypy/module/cpyext/funcobject.py
@@ -69,6 +69,10 @@
     assert isinstance(w_method, Method)
     return borrow_from(w_method, w_method.w_class)
 
+ at cpython_api([PyObject], PyObject)
+def PyClassMethod_New(space, w_function):
+    return space.call_method(space.builtin, "classmethod", w_function)
+
 def unwrap_list_of_strings(space, w_list):
     return [space.str_w(w_item) for w_item in space.fixedview(w_list)]
 
diff --git a/pypy/module/cpyext/intobject.py b/pypy/module/cpyext/intobject.py
--- a/pypy/module/cpyext/intobject.py
+++ b/pypy/module/cpyext/intobject.py
@@ -4,7 +4,7 @@
 from pypy.module.cpyext.api import (
     cpython_api, build_type_checkers, PyObject,
     CONST_STRING, CANNOT_FAIL, Py_ssize_t)
-from pypy.rlib.rarithmetic import r_uint
+from pypy.rlib.rarithmetic import r_uint, intmask, LONG_TEST
 import sys
 
 PyInt_Check, PyInt_CheckExact = build_type_checkers("Int")
@@ -73,13 +73,24 @@
                              space.wrap("an integer is required, got NULL"))
     return space.int_w(w_obj) # XXX this is wrong on win64
 
+LONG_MAX = int(LONG_TEST - 1)
+
+ at cpython_api([rffi.SIZE_T], PyObject)
+def PyInt_FromSize_t(space, ival):
+    """Create a new integer object with a value of ival. If the value exceeds
+    LONG_MAX, a long integer object is returned.
+    """
+    if ival <= LONG_MAX:
+        return space.wrap(intmask(ival))
+    return space.wrap(ival)
+
 @cpython_api([Py_ssize_t], PyObject)
 def PyInt_FromSsize_t(space, ival):
     """Create a new integer object with a value of ival. If the value is larger
     than LONG_MAX or smaller than LONG_MIN, a long integer object is
     returned.
     """
-    return space.wrap(ival) # XXX this is wrong on win64
+    return space.wrap(ival)
 
 @cpython_api([CONST_STRING, rffi.CCHARPP, rffi.INT_real], PyObject)
 def PyInt_FromString(space, str, pend, base):
diff --git a/pypy/module/cpyext/number.py b/pypy/module/cpyext/number.py
--- a/pypy/module/cpyext/number.py
+++ b/pypy/module/cpyext/number.py
@@ -49,6 +49,13 @@
     failure.  This is the equivalent of the Python expression long(o)."""
     return space.long(w_obj)
 
+ at cpython_api([PyObject], PyObject)
+def PyNumber_Index(space, w_obj):
+    """Returns the o converted to a Python int or long on success or NULL with a
+    TypeError exception raised on failure.
+    """
+    return space.index(w_obj)
+
 def func_rename(newname):
     return lambda func: func_with_new_name(func, newname)
 
diff --git a/pypy/module/cpyext/src/modsupport.c b/pypy/module/cpyext/src/modsupport.c
--- a/pypy/module/cpyext/src/modsupport.c
+++ b/pypy/module/cpyext/src/modsupport.c
@@ -611,8 +611,8 @@
 	if (result != NULL && n > 0) {
 		for (i = 0; i < n; ++i) {
 			tmp = (PyObject *)va_arg(va, PyObject *);
+			Py_INCREF(tmp);
 			PyTuple_SET_ITEM(result, i, tmp);
-			Py_INCREF(tmp);
 		}
 	}
 	return result;
diff --git a/pypy/module/cpyext/stringobject.py b/pypy/module/cpyext/stringobject.py
--- a/pypy/module/cpyext/stringobject.py
+++ b/pypy/module/cpyext/stringobject.py
@@ -2,7 +2,7 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
     cpython_api, cpython_struct, bootstrap_function, build_type_checkers,
-    PyObjectFields, Py_ssize_t, CONST_STRING)
+    PyObjectFields, Py_ssize_t, CONST_STRING, CANNOT_FAIL)
 from pypy.module.cpyext.pyerrors import PyErr_BadArgument
 from pypy.module.cpyext.pyobject import (
     PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference,
@@ -203,6 +203,10 @@
     ref[0] = rffi.cast(PyObject, py_newstr)
     return 0
 
+ at cpython_api([PyObject, PyObject], rffi.INT, error=CANNOT_FAIL)
+def _PyString_Eq(space, w_str1, w_str2):
+    return space.eq_w(w_str1, w_str2)
+
 @cpython_api([PyObjectP, PyObject], lltype.Void)
 def PyString_Concat(space, ref, w_newpart):
     """Create a new string object in *string containing the contents of newpart
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -172,12 +172,6 @@
     This is equivalent to (PyBUF_ND)."""
     raise NotImplementedError
 
- at cpython_api([Py_buffer], lltype.Void)
-def PyBuffer_Release(space, view):
-    """Release the buffer view.  This should be called when the buffer
-    is no longer being used as it may free memory from it."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP], Py_ssize_t, error=CANNOT_FAIL)
 def PyBuffer_SizeFromFormat(space, format):
     """Return the implied ~Py_buffer.itemsize from the struct-stype
@@ -198,13 +192,6 @@
     given shape with the given number of bytes per element."""
     raise NotImplementedError
 
- at cpython_api([Py_buffer, PyObject, rffi.VOIDP, Py_ssize_t, rffi.INT_real, rffi.INT_real], rffi.INT_real, error=-1)
-def PyBuffer_FillInfo(space, view, obj, buf, len, readonly, infoflags):
-    """Fill in a buffer-info structure, view, correctly for an exporter that can
-    only share a contiguous chunk of memory of "unsigned bytes" of the given
-    length.  Return 0 on success and -1 (with raising an error) on error."""
-    raise NotImplementedError
-
 @cpython_api([Py_buffer], PyObject)
 def PyMemoryView_FromBuffer(space, view):
     """Create a memoryview object wrapping the given buffer-info structure view.
@@ -1094,14 +1081,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([PyObject], PyObject)
-def PyImport_ReloadModule(space, m):
-    """Reload a module.  This is best described by referring to the built-in
-    Python function reload(), as the standard reload() function calls this
-    function directly.  Return a new reference to the reloaded module, or NULL
-    with an exception set on failure (the module still exists in this case)."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP, PyObject], PyObject)
 def PyImport_ExecCodeModule(space, name, co):
     """Given a module name (possibly of the form package.module) and a code
@@ -1140,13 +1119,6 @@
     of the bytecode file, in little-endian byte order."""
     raise NotImplementedError
 
- at cpython_api([], PyObject)
-def PyImport_GetModuleDict(space):
-    """Return the dictionary used for the module administration (a.k.a.
-    sys.modules).  Note that this is a per-interpreter variable."""
-    borrow_from()
-    raise NotImplementedError
-
 @cpython_api([PyObject], PyObject)
 def PyImport_GetImporter(space, path):
     """Return an importer object for a sys.path/pkg.__path__ item
@@ -1701,13 +1673,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([rffi.SIZE_T], PyObject)
-def PyInt_FromSize_t(space, ival):
-    """Create a new integer object with a value of ival. If the value exceeds
-    LONG_MAX, a long integer object is returned.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.ULONGLONG, error=-1)
 def PyInt_AsUnsignedLongLongMask(space, io):
     """Will first attempt to cast the object to a PyIntObject or
@@ -1920,13 +1885,6 @@
     Reference counts are still not increased in this case."""
     raise NotImplementedError
 
- at cpython_api([PyObject], PyObject)
-def PyNumber_Index(space, o):
-    """Returns the o converted to a Python int or long on success or NULL with a
-    TypeError exception raised on failure.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject, rffi.INT_real], PyObject)
 def PyNumber_ToBase(space, n, base):
     """Returns the integer n converted to base as a string with a base
@@ -2254,15 +2212,6 @@
     standard C library function exit(status)."""
     raise NotImplementedError
 
- at cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
-def PyTuple_GetSlice(space, p, low, high):
-    """Take a slice of the tuple pointed to by p from low to high and return it
-    as a new tuple.
-
-    This function used an int type for low and high. This might
-    require changes in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
-
 @cpython_api([], rffi.INT_real, error=CANNOT_FAIL)
 def PyTuple_ClearFreeList(space):
     """Clear the free list. Return the total number of freed items.
@@ -2275,14 +2224,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([PyTypeObjectPtr], lltype.Void)
-def PyType_Modified(space, type):
-    """Invalidate the internal lookup cache for the type and all of its
-    subtypes.  This function must be called after any manual
-    modification of the attributes or base classes of the type.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
 def PyType_IS_GC(space, o):
     """Return true if the type object includes support for the cycle detector; this
diff --git a/pypy/module/cpyext/test/test_classobject.py b/pypy/module/cpyext/test/test_classobject.py
--- a/pypy/module/cpyext/test/test_classobject.py
+++ b/pypy/module/cpyext/test/test_classobject.py
@@ -40,3 +40,14 @@
         assert not isinstance(api.PyObject_GetAttr(w_instance, space.wrap('f')), Function)
         # _PyInstance_Lookup returns the raw descriptor
         assert isinstance(api._PyInstance_Lookup(w_instance, space.wrap('f')), Function)
+
+    def test_pyclass_new(self, space, api):
+        w_bases = space.newtuple([])
+        w_dict = space.newdict()
+        w_name = space.wrap("C")
+        w_class = api.PyClass_New(w_bases, w_dict, w_name)
+        assert not space.isinstance_w(w_class, space.w_type)
+        w_instance = space.call_function(w_class)
+        assert api.PyInstance_Check(w_instance)
+        assert space.is_true(space.call_method(space.builtin, "isinstance",
+                                               w_instance, w_class))
diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py
--- a/pypy/module/cpyext/test/test_eval.py
+++ b/pypy/module/cpyext/test/test_eval.py
@@ -193,3 +193,32 @@
             return args
         assert module.call_func(f) == ("text", 42, None)
         assert module.call_method("text") == 2
+
+    def test_CallFunctionObjArgs(self):
+        module = self.import_extension('foo', [
+            ("call_func", "METH_VARARGS",
+             """
+                PyObject *t = PyString_FromString("t");
+                PyObject *res = PyObject_CallFunctionObjArgs(
+                   PyTuple_GetItem(args, 0),
+                   Py_None, NULL);
+                Py_DECREF(t);
+                return res;
+             """),
+            ("call_method", "METH_VARARGS",
+             """
+                PyObject *t = PyString_FromString("t");
+                PyObject *count = PyString_FromString("count");
+                PyObject *res = PyObject_CallMethodObjArgs(
+                   PyTuple_GetItem(args, 0),
+                   count, t, NULL);
+                Py_DECREF(t);
+                Py_DECREF(count);
+                return res;
+             """),
+            ])
+        def f(*args):
+            return args
+        assert module.call_func(f) == (None,)
+        assert module.call_method("text") == 2
+        
diff --git a/pypy/module/cpyext/test/test_frameobject.py b/pypy/module/cpyext/test/test_frameobject.py
--- a/pypy/module/cpyext/test/test_frameobject.py
+++ b/pypy/module/cpyext/test/test_frameobject.py
@@ -64,3 +64,31 @@
         # Cython does not work on CPython as well...
         assert exc.traceback.tb_lineno == 42 # should be 48
         assert frame.f_lineno == 42
+
+    def test_traceback_check(self):
+        module = self.import_extension('foo', [
+            ("traceback_check", "METH_NOARGS",
+             """
+                 int check;
+                 PyObject *type, *value, *tb;
+                 PyObject *ret = PyRun_String("XXX", Py_eval_input, 
+                                              Py_None, Py_None);
+                 if (ret) {
+                     Py_DECREF(ret);
+                     PyErr_SetString(PyExc_AssertionError, "should raise");
+                     return NULL;
+                 }
+                 PyErr_Fetch(&type, &value, &tb);
+                 check = PyTraceBack_Check(tb);
+                 Py_XDECREF(type);
+                 Py_XDECREF(value);
+                 Py_XDECREF(tb);
+                 if (check) {
+                     Py_RETURN_TRUE;
+                 }
+                 else {
+                     Py_RETURN_FALSE;
+                 }
+             """),
+            ])
+        assert module.traceback_check()
diff --git a/pypy/module/cpyext/test/test_funcobject.py b/pypy/module/cpyext/test/test_funcobject.py
--- a/pypy/module/cpyext/test/test_funcobject.py
+++ b/pypy/module/cpyext/test/test_funcobject.py
@@ -44,3 +44,19 @@
         assert w_code.co_firstlineno == 3
         rffi.free_charp(filename)
         rffi.free_charp(funcname)
+
+    def test_classmethod(self, space, api):
+        w_function = space.appexec([], """():
+            def method(x): return x
+            return method
+        """)
+        w_class = space.call_function(space.w_type, space.wrap("C"),
+                                      space.newtuple([]), space.newdict())
+        w_instance = space.call_function(w_class)
+        # regular instance method
+        space.setattr(w_class, space.wrap("method"), w_function)
+        assert space.is_w(space.call_method(w_instance, "method"), w_instance)
+        # now a classmethod
+        w_classmethod = api.PyClassMethod_New(w_function)
+        space.setattr(w_class, space.wrap("classmethod"), w_classmethod)
+        assert space.is_w(space.call_method(w_instance, "classmethod"), w_class)
diff --git a/pypy/module/cpyext/test/test_intobject.py b/pypy/module/cpyext/test/test_intobject.py
--- a/pypy/module/cpyext/test/test_intobject.py
+++ b/pypy/module/cpyext/test/test_intobject.py
@@ -50,3 +50,19 @@
             ])
         assert module.from_string() == 0x1234
         assert type(module.from_string()) is int
+
+    def test_size_t(self):
+        module = self.import_extension('foo', [
+            ("values", "METH_NOARGS",
+             """
+                 return Py_BuildValue("NNNN",
+                     PyInt_FromSize_t(123),
+                     PyInt_FromSize_t((size_t)-1),
+                     PyInt_FromSsize_t(123),
+                     PyInt_FromSsize_t((size_t)-1));
+             """),
+            ])
+        values = module.values()
+        types = [type(x) for x in values]
+        assert types == [int, long, int, int]
+        
diff --git a/pypy/module/cpyext/test/test_number.py b/pypy/module/cpyext/test/test_number.py
--- a/pypy/module/cpyext/test/test_number.py
+++ b/pypy/module/cpyext/test/test_number.py
@@ -25,6 +25,15 @@
         assert api.PyInt_CheckExact(w_l)
         w_l = api.PyNumber_Int(space.wrap(2 << 65))
         assert api.PyLong_CheckExact(w_l)
+        w_l = api.PyNumber_Int(space.wrap(42.3))
+        assert api.PyInt_CheckExact(w_l)
+
+    def test_number_index(self, space, api):
+        w_l = api.PyNumber_Index(space.wrap(123L))
+        assert api.PyLong_CheckExact(w_l)
+        w_l = api.PyNumber_Index(space.wrap(42.3))
+        assert w_l is None
+        api.PyErr_Clear()
 
     def test_numbermethods(self, space, api):
         assert "ab" == space.unwrap(
diff --git a/pypy/module/cpyext/test/test_sliceobject.py b/pypy/module/cpyext/test/test_sliceobject.py
--- a/pypy/module/cpyext/test/test_sliceobject.py
+++ b/pypy/module/cpyext/test/test_sliceobject.py
@@ -67,3 +67,14 @@
              """),
             ])
         assert module.nullslice() == slice(None, None, None)
+
+    def test_ellipsis(self):
+        module = self.import_extension('foo', [
+            ("get_ellipsis", "METH_NOARGS",
+             """
+                 PyObject *ret = Py_Ellipsis;
+                 Py_INCREF(ret);
+                 return ret;
+             """),
+            ])
+        assert module.get_ellipsis() is Ellipsis
diff --git a/pypy/module/cpyext/test/test_stringobject.py b/pypy/module/cpyext/test/test_stringobject.py
--- a/pypy/module/cpyext/test/test_stringobject.py
+++ b/pypy/module/cpyext/test/test_stringobject.py
@@ -283,3 +283,7 @@
         self.raises(space, api, TypeError, api.PyString_AsEncodedObject,
             space.wrap(2), lltype.nullptr(rffi.CCHARP.TO), lltype.nullptr(rffi.CCHARP.TO)
         )
+
+    def test_eq(self, space, api):
+        assert 1 == api._PyString_Eq(space.wrap("hello"), space.wrap("hello"))
+        assert 0 == api._PyString_Eq(space.wrap("hello"), space.wrap("world"))
diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py
--- a/pypy/module/cpyext/test/test_tupleobject.py
+++ b/pypy/module/cpyext/test/test_tupleobject.py
@@ -42,3 +42,9 @@
         assert api.PyTuple_Size(atuple) == 2
         assert space.eq_w(space.getitem(atuple, space.wrap(0)), space.wrap(0))
         assert space.eq_w(space.getitem(atuple, space.wrap(1)), space.wrap(1))
+
+    def test_getslice(self, space, api):
+        w_tuple = space.newtuple([space.wrap(i) for i in range(10)])
+        w_slice = api.PyTuple_GetSlice(w_tuple, 3, -3)
+        assert space.eq_w(w_slice,
+                          space.newtuple([space.wrap(i) for i in range(3, 7)]))
diff --git a/pypy/module/cpyext/tupleobject.py b/pypy/module/cpyext/tupleobject.py
--- a/pypy/module/cpyext/tupleobject.py
+++ b/pypy/module/cpyext/tupleobject.py
@@ -79,3 +79,10 @@
     Py_DecRef(space, ref[0])
     ref[0] = make_ref(space, py_newtuple)
     return 0
+
+ at cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
+def PyTuple_GetSlice(space, w_obj, low, high):
+    """Take a slice of the tuple pointed to by p from low to high and return it
+    as a new tuple.
+    """
+    return space.getslice(w_obj, space.wrap(low), space.wrap(high))
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -650,3 +650,13 @@
     name = space.str_w(w_name)
     w_obj = w_type.lookup(name)
     return borrow_from(w_type, w_obj)
+
+ at cpython_api([PyTypeObjectPtr], lltype.Void)
+def PyType_Modified(space, w_obj):
+    """Invalidate the internal lookup cache for the type and all of its
+    subtypes.  This function must be called after any manual
+    modification of the attributes or base classes of the type.
+    """
+    # PyPy already takes care of direct modifications to type.__dict__
+    # (which is a W_DictProxyObject).
+    pass
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -8,6 +8,7 @@
     interpleveldefs = {
         'array': 'interp_numarray.SingleDimArray',
         'zeros': 'interp_numarray.zeros',
+        'empty': 'interp_numarray.zeros',
 
         # ufuncs
         'absolute': 'interp_ufuncs.absolute',
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/micronumpy/compile.py
@@ -0,0 +1,49 @@
+
+""" This is a set of tools for standalone compiling of numpy expressions.
+It should not be imported by the module itself
+"""
+
+from pypy.module.micronumpy.interp_numarray import FloatWrapper, SingleDimArray
+
+class BogusBytecode(Exception):
+    pass
+
+def create_array(size):
+    a = SingleDimArray(size)
+    for i in range(size):
+        a.storage[i] = float(i % 10)
+    return a
+
+class TrivialSpace(object):
+    def wrap(self, x):
+        return x
+
+def numpy_compile(bytecode, array_size):
+    space = TrivialSpace()
+    stack = []
+    i = 0
+    for b in bytecode:
+        if b == 'a':
+            stack.append(create_array(array_size))
+            i += 1
+        elif b == 'f':
+            stack.append(FloatWrapper(1.2))
+        elif b == '+':
+            right = stack.pop()
+            stack.append(stack.pop().descr_add(space, right))
+        elif b == '-':
+            right = stack.pop()
+            stack.append(stack.pop().descr_sub(space, right))
+        elif b == '*':
+            right = stack.pop()
+            stack.append(stack.pop().descr_mul(space, right))
+        elif b == '/':
+            right = stack.pop()
+            stack.append(stack.pop().descr_div(space, right))
+        else:
+            print "Unknown opcode: %s" % b
+            raise BogusBytecode()
+    if len(stack) != 1:
+        print "Bogus bytecode, uneven stack length"
+        raise BogusBytecode()
+    return stack[0]
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -83,7 +83,6 @@
     def descr_len(self, space):
         return self.get_concrete().descr_len(space)
 
-#    unwrap_spec(item=int)
     def descr_getitem(self, space, w_idx):
         # TODO: indexation by tuples
         start, stop, step, slice_length = space.decode_index4(w_idx, self.find_size())
@@ -101,12 +100,6 @@
         self.invalidated()
         return self.get_concrete().descr_setitem(space, item, value)
 
-#    @unwrap_spec(sta=int, sto=int)
-#    def descr_getslice(self, space, sta, sto):
-#        signature = Signature()
-#        res = SingleDimSlice(sta, sto, self, self.signature.transition(signature))
-#        return res
-
 class FloatWrapper(BaseArray):
     """
     Intermediate class representing a float literal.
@@ -231,7 +224,6 @@
     def eval(self, i):
         return self.parent.eval(self.calc_index(i))
 
-#    @unwrap_spec(item=int)
     def getitem(self, item):
         return self.parent.getitem(self.calc_index(item))
 
@@ -242,8 +234,8 @@
     def descr_len(self, space):
         return space.wrap(self.find_size())
         
-#    def calc_index(self, item):
-#        raise NotImplementedError
+    def calc_index(self, item):
+        raise NotImplementedError
 
 class SingleDimSlice(ViewArray):
     _immutable_fields_ = ["start", "stop", "step", "size"]
@@ -296,9 +288,7 @@
     def descr_len(self, space):
         return space.wrap(self.size)
 
-#    @unwrap_spec(item=int)
     def getitem(self, item):
-#FIXME        item = self.getindex(space, item)
         return self.storage[item]
 
     @unwrap_spec(item=int, value=float)
@@ -330,7 +320,6 @@
     __len__ = interp2app(BaseArray.descr_len),
     __getitem__ = interp2app(BaseArray.descr_getitem),
     __setitem__ = interp2app(BaseArray.descr_setitem),
-#    __getslice__ = interp2app(BaseArray.descr_getslice),
 
     __add__ = interp2app(BaseArray.descr_add),
     __sub__ = interp2app(BaseArray.descr_sub),
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -18,6 +18,16 @@
         a[13] = 5.3
         assert a[13] == 5.3
 
+    def test_empty(self):
+        """
+        Test that empty() works.
+        """
+
+        from numpy import empty
+        a = empty(2)
+        a[1] = 1.0
+        assert a[1] == 1.0
+
     def test_iterator_init(self):
         from numpy import array
         a = array(range(5))
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -1,8 +1,9 @@
 from pypy.jit.metainterp.test.support import LLJitMixin
+from pypy.rpython.test.test_llinterp import interpret
 from pypy.module.micronumpy.interp_numarray import (SingleDimArray, Signature,
     FloatWrapper, Call1, Call2, SingleDimSlice, add, mul)
 from pypy.module.micronumpy.interp_ufuncs import negative
-
+from pypy.module.micronumpy.compile import numpy_compile
 
 class FakeSpace(object):
     pass
@@ -126,3 +127,19 @@
                           'setarrayitem_raw': 1, 'int_add': 1,
                           'int_lt': 1, 'guard_true': 1, 'jump': 1})
         assert result == f(5)
+
+class TestTranslation(object):
+    def test_compile(self):
+        x = numpy_compile('aa+f*f/a-', 10)
+        x = x.compute()
+        assert isinstance(x, SingleDimArray)
+        assert x.size == 10
+        assert x.storage[0] == 0
+        assert x.storage[1] == ((1 + 1) * 1.2) / 1.2 - 1
+    
+    def test_translation(self):
+        # we import main to check if the target compiles
+        from pypy.translator.goal.targetnumpystandalone import main
+        from pypy.rpython.annlowlevel import llstr
+        
+        interpret(main, [llstr('af+'), 100])
diff --git a/pypy/module/posix/app_posix.py b/pypy/module/posix/app_posix.py
--- a/pypy/module/posix/app_posix.py
+++ b/pypy/module/posix/app_posix.py
@@ -107,6 +107,9 @@
 def tmpnam():
     """Return an absolute pathname of a file that did not exist at the
     time the call is made."""
+    from warnings import warn
+    warn(RuntimeWarning("tmpnam is a potential security risk to your program"))
+
     import tempfile
     return tempfile.mktemp()
 
@@ -114,6 +117,9 @@
     """Return an absolute pathname of a file that did not exist at the
     time the call is made.  The directory and a prefix may be specified
     as strings; they may be omitted or None if not needed."""
+    from warnings import warn
+    warn(RuntimeWarning("tempnam is a potential security risk to your program"))
+
     import tempfile
     return tempfile.mktemp('', prefix or 'tmp', dir)
 
diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -847,6 +847,21 @@
                 assert os.path.basename(s1).startswith(prefix or 'tmp')
                 assert os.path.basename(s2).startswith(prefix or 'tmp')
 
+    def test_tmpnam_warning(self):
+        import warnings, os
+        #
+        def f_tmpnam_warning(): os.tmpnam()    # a single line
+        #
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            f_tmpnam_warning()
+            assert len(w) == 1
+            assert issubclass(w[-1].category, RuntimeWarning)
+            assert "potential security risk" in str(w[-1].message)
+            # check that the warning points to the call to os.tmpnam(),
+            # not to some code inside app_posix.py
+            assert w[-1].lineno == f_tmpnam_warning.func_code.co_firstlineno
+
 
 class AppTestEnvironment(object):
     def setup_class(cls):
diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -16,6 +16,7 @@
 from pypy.interpreter.baseobjspace import ObjSpace, W_Root
 from opcode import opmap
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.nonconst import NonConstant
 
 PyFrame._virtualizable2_ = ['last_instr', 'pycode',
                             'valuestackdepth', 'valuestack_w[*]',
@@ -57,11 +58,14 @@
         
         space = self.space
         cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
         if space.is_true(cache.w_compile_hook):
-            memo = {}
-            list_w = [space.wrap(logger.repr_of_resop(memo, op))
+            logops = logger._make_log_operations()
+            list_w = [space.wrap(logops.repr_of_resop(op))
                       for op in operations]
             pycode = cast_base_ptr_to_instance(PyCode, ll_pycode)
+            cache.in_recursion = True
             try:
                 space.call_function(cache.w_compile_hook,
                                     space.wrap('main'),
@@ -72,14 +76,18 @@
                                     space.newlist(list_w))
             except OperationError, e:
                 e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
+            cache.in_recursion = False
 
     def on_compile_bridge(self, logger, orig_looptoken, operations, n):
         space = self.space
         cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
         if space.is_true(cache.w_compile_hook):
-            memo = {}
-            list_w = [space.wrap(logger.repr_of_resop(memo, op))
+            logops = logger._make_log_operations()
+            list_w = [space.wrap(logops.repr_of_resop(op))
                       for op in operations]
+            cache.in_recursion = True
             try:
                 space.call_function(cache.w_compile_hook,
                                     space.wrap('main'),
@@ -88,6 +96,7 @@
                                     space.newlist(list_w))
             except OperationError, e:
                 e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
+            cache.in_recursion = False
 
 pypyjitdriver = PyPyJitDriver(get_printable_location = get_printable_location,
                               get_jitcell_at = get_jitcell_at,
@@ -191,6 +200,8 @@
     return space.call_args(w_callable, __args__)
 
 class Cache(object):
+    in_recursion = False
+    
     def __init__(self, space):
         self.w_compile_hook = space.w_None
 
@@ -209,8 +220,13 @@
     for jit merge point. in case it's `main` it'll be a tuple
     (code, offset, is_being_profiled)
 
+    Note that jit hook is not reentrant. It means that if the code
+    inside the jit hook is itself jitted, it will get compiled, but the
+    jit hook won't be called for that.
+
     XXX write down what else
     """
     cache = space.fromcache(Cache)
     cache.w_compile_hook = w_hook
+    cache.in_recursion = NonConstant(False)
     return space.w_None
diff --git a/pypy/module/pypyjit/test/test_jit_hook.py b/pypy/module/pypyjit/test/test_jit_hook.py
--- a/pypy/module/pypyjit/test/test_jit_hook.py
+++ b/pypy/module/pypyjit/test/test_jit_hook.py
@@ -87,3 +87,19 @@
             sys.stderr = prev
         assert 'jit hook' in s.getvalue()
         assert 'ZeroDivisionError' in s.getvalue()
+
+    def test_non_reentrant(self):
+        import pypyjit
+        l = []
+        
+        def hook(*args):
+            l.append(None)
+            self.on_compile()
+            self.on_compile_bridge()
+        
+        pypyjit.set_compile_hook(hook)
+        self.on_compile()
+        assert len(l) == 1 # and did not crash
+        self.on_compile_bridge()
+        assert len(l) == 2 # and did not crash
+        
diff --git a/pypy/module/pypyjit/test/test_jit_setup.py b/pypy/module/pypyjit/test/test_jit_setup.py
--- a/pypy/module/pypyjit/test/test_jit_setup.py
+++ b/pypy/module/pypyjit/test/test_jit_setup.py
@@ -24,3 +24,13 @@
                 i += 1
 
         assert list(gen(3)) == [0, 1, 4]
+
+def test_interface_residual_call():
+    space = gettestobjspace(usemodules=['pypyjit'])
+    space.appexec([], """():
+        import pypyjit
+        def f(*args, **kwds):
+            return (args, kwds)
+        res = pypyjit.residual_call(f, 4, x=6)
+        assert res == ((4,), {'x': 6})
+    """)
diff --git a/pypy/module/pypyjit/test/test_pypy_c.py b/pypy/module/pypyjit/test/test_pypy_c.py
deleted file mode 100644
--- a/pypy/module/pypyjit/test/test_pypy_c.py
+++ /dev/null
@@ -1,430 +0,0 @@
-from pypy.conftest import gettestobjspace, option
-from pypy.tool.udir import udir
-import py
-from py.test import skip
-import sys, os, re
-import subprocess
-
-class BytecodeTrace(list):
-    def get_opnames(self, prefix=""):
-        return [op.getopname() for op in self
-                    if op.getopname().startswith(prefix)]
-
-    def __repr__(self):
-        return "%s%s" % (self.bytecode, list.__repr__(self))
-
-ZERO_OP_BYTECODES = [
-    'POP_TOP',
-    'ROT_TWO',
-    'ROT_THREE',
-    'DUP_TOP',
-    'ROT_FOUR',
-    'NOP',
-    'DUP_TOPX',
-    'LOAD_CONST',
-    'JUMP_FORWARD',
-    #'JUMP_ABSOLUTE' in theory, but contains signals stuff
-    #'LOAD_FAST' should be here, but currently needs a guard for nonzeroness
-    'STORE_FAST',
-    ]
-
-
-r_bridge = re.compile(r"bridge out of Guard (\d+)")
-
-def from_entry_bridge(text, allparts):
-    firstline = text.splitlines()[0]
-    if 'entry bridge' in firstline:
-        return True
-    match = r_bridge.search(firstline)
-    if match:
-        search = '<Guard' + match.group(1) + '>'
-        for part in allparts:
-            if search in part:
-                break
-        else:
-            raise AssertionError, "%s not found??" % (search,)
-        return from_entry_bridge(part, allparts)
-    return False
-
-def test_from_entry_bridge():
-    assert from_entry_bridge(
-        "# Loop 4 : entry bridge with 31 ops\n[p0, etc", [])
-    assert not from_entry_bridge(
-        "# Loop 1 : loop with 31 ops\n[p0, p1, etc", [])
-    assert not from_entry_bridge(
-        "# bridge out of Guard 5 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : loop with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n"])
-    assert from_entry_bridge(
-        "# bridge out of Guard 5 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : entry bridge with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n"])
-    assert not from_entry_bridge(
-        "# bridge out of Guard 51 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : loop with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n",
-         "# bridge out of Guard 5 with 13 ops\n"
-             "[p0, p1]\n"
-             "guard_other(p1, descr=<Guard51>)\n"])
-    assert from_entry_bridge(
-        "# bridge out of Guard 51 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : entry bridge with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n",
-         "# bridge out of Guard 5 with 13 ops\n"
-             "[p0, p1]\n"
-             "guard_other(p1, descr=<Guard51>)\n"])
-
-
-class PyPyCJITTests(object):
-    def run_source(self, source, expected_max_ops, *testcases, **kwds):
-        assert isinstance(expected_max_ops, int)
-        threshold = kwds.pop('threshold', 3)
-        self.count_debug_merge_point = \
-                                     kwds.pop('count_debug_merge_point', True)
-        if kwds:
-            raise TypeError, 'Unsupported keyword arguments: %s' % kwds.keys()
-        source = py.code.Source(source)
-        filepath = self.tmpdir.join('case%d.py' % self.counter)
-        logfilepath = filepath.new(ext='.log')
-        self.__class__.counter += 1
-        f = filepath.open('w')
-        print >> f, source
-        # some support code...
-        print >> f, py.code.Source("""
-            import sys
-            # we don't want to see the small bridges created
-            # by the checkinterval reaching the limit
-            sys.setcheckinterval(10000000)
-            try: # make the file runnable by CPython
-                import pypyjit
-                pypyjit.set_param(threshold=%d)
-            except ImportError:
-                pass
-
-            def check(args, expected):
-                #print >> sys.stderr, 'trying:', args
-                result = main(*args)
-                #print >> sys.stderr, 'got:', repr(result)
-                assert result == expected
-                assert type(result) is type(expected)
-        """ % threshold)
-        for testcase in testcases * 2:
-            print >> f, "check(%r, %r)" % testcase
-        print >> f, "print 'OK :-)'"
-        f.close()
-
-        print logfilepath
-        env = os.environ.copy()
-        env['PYPYLOG'] = ":%s" % (logfilepath,)
-        p = subprocess.Popen([self.pypy_c, str(filepath)],
-                             env=env, stdout=subprocess.PIPE)
-        result, _ = p.communicate()
-        assert result
-        if result.strip().startswith('SKIP:'):
-            py.test.skip(result.strip())
-        assert result.splitlines()[-1].strip() == 'OK :-)'
-        self.parse_loops(logfilepath)
-        self.print_loops()
-        print logfilepath
-        if self.total_ops > expected_max_ops:
-            assert 0, "too many operations: got %d, expected maximum %d" % (
-                self.total_ops, expected_max_ops)
-        return result
-
-    def parse_loops(self, opslogfile):
-        from pypy.tool import logparser
-        assert opslogfile.check()
-        log = logparser.parse_log_file(str(opslogfile))
-        parts = logparser.extract_category(log, 'jit-log-opt-')
-        self.rawloops = [part for part in parts
-                         if not from_entry_bridge(part, parts)]
-        self.loops, self.sliced_loops, self.total_ops = \
-                                           self.parse_rawloops(self.rawloops)
-        self.check_0_op_bytecodes()
-        self.rawentrybridges = [part for part in parts
-                                if from_entry_bridge(part, parts)]
-        _, self.sliced_entrybridge, _ = \
-                                    self.parse_rawloops(self.rawentrybridges)
-
-        from pypy.jit.tool.jitoutput import parse_prof
-        summaries  = logparser.extract_category(log, 'jit-summary')
-        if len(summaries) > 0:
-            self.jit_summary = parse_prof(summaries[-1])
-        else:
-            self.jit_summary = None
-        
-
-    def parse_rawloops(self, rawloops):
-        from pypy.jit.tool.oparser import parse
-        loops = [parse(part, no_namespace=True) for part in rawloops]
-        sliced_loops = [] # contains all bytecodes of all loops
-        total_ops = 0
-        for loop in loops:
-            for op in loop.operations:
-                if op.getopname() == "debug_merge_point":
-                    sliced_loop = BytecodeTrace()
-                    sliced_loop.bytecode = op.getarg(0)._get_str().rsplit(" ", 1)[1]
-                    sliced_loops.append(sliced_loop)
-                    if self.count_debug_merge_point:
-                        total_ops += 1
-                else:
-                    sliced_loop.append(op)
-                    total_ops += 1
-        return loops, sliced_loops, total_ops
-
-    def check_0_op_bytecodes(self):
-        for bytecodetrace in self.sliced_loops:
-            if bytecodetrace.bytecode not in ZERO_OP_BYTECODES:
-                continue
-            assert not bytecodetrace
-
-    def get_by_bytecode(self, name, from_entry_bridge=False):
-        if from_entry_bridge:
-            sliced_loops = self.sliced_entrybridge
-        else:
-            sliced_loops = self.sliced_loops
-        return [ops for ops in sliced_loops if ops.bytecode == name]
-
-    def print_loops(self):
-        for rawloop in self.rawloops:
-            print
-            print '@' * 79
-            print
-            print rawloop.rstrip()
-        print
-        print '@' * 79
-
-
-    def test_richards(self):
-        self.run_source('''
-            import sys; sys.path[:] = %r
-            from pypy.translator.goal import richards
-
-            def main():
-                return richards.main(iterations = 1)
-        ''' % (sys.path,), 7200,
-                   ([], 42))
-
-
-    def test_overflow_checking(self):
-        startvalue = sys.maxint - 2147483647
-        self.run_source('''
-        def main():
-            def f(a,b):
-                if a < 0: return -1
-                return a-b
-            total = %d
-            for i in range(100000):
-                total += f(i, 5)
-            return total
-        ''' % startvalue, 170, ([], startvalue + 4999450000L))
-
-    def test_shift(self):
-        from sys import maxint
-        maxvals = (-maxint-1, -maxint, maxint-1, maxint)
-        for a in (-4, -3, -2, -1, 0, 1, 2, 3, 4) + maxvals:
-            for b in (0, 1, 2, 31, 32, 33, 61, 62, 63):
-                r = 0
-                if (a >> b) >= 0:
-                    r += 2000
-                if (a << b) > 2:
-                    r += 20000000
-                if abs(a) < 10 and b < 5:
-                    ops = 13
-                else:
-                    ops = 29
-
-                self.run_source('''
-                def main(a, b):
-                    i = sa = 0
-                    while i < 2000:
-                        if a > 0: # Specialises the loop
-                            pass
-                        if b < 2 and b > 0:
-                            pass
-                        if (a >> b) >= 0:
-                            sa += 1
-                        if (a << b) > 2:
-                            sa += 10000
-                        i += 1
-                    return sa
-                ''', ops, ([a, b], r), count_debug_merge_point=False)
-
-    def test_revert_shift(self):
-        from sys import maxint
-        tests = []
-        for a in (1, 4, 8, 100):
-            for b in (-10, 10, -201, 201, -maxint/3, maxint/3):
-                for c in (-10, 10, -maxint/3, maxint/3):
-                    tests.append(([a, b, c], long(4000*(a+b+c))))
-        self.run_source('''
-        def main(a, b, c):
-            from sys import maxint
-            i = sa = 0
-            while i < 2000:
-                if 0 < a < 10: pass
-                if -100 < b < 100: pass
-                if -maxint/2 < c < maxint/2: pass
-                sa += (a<<a)>>a
-                sa += (b<<a)>>a
-                sa += (c<<a)>>a
-                sa += (a<<100)>>100
-                sa += (b<<100)>>100
-                sa += (c<<100)>>100
-                i += 1
-            return long(sa)
-        ''', 93, count_debug_merge_point=False, *tests)
-        
-    def test_division_to_rshift(self):
-        avalues = ('a', 'b', 7, -42, 8)
-        bvalues = ['b'] + range(-10, 0) + range(1,10)
-        code = ''
-        a1, b1, res1 = 10, 20, 0
-        a2, b2, res2 = 10, -20, 0
-        a3, b3, res3 = -10, -20, 0
-        def dd(a, b, aval, bval):
-            m = {'a': aval, 'b': bval}
-            if not isinstance(a, int):
-                a=m[a]
-            if not isinstance(b, int):
-                b=m[b]
-            return a/b
-        for a in avalues:
-            for b in bvalues:
-                code += '                sa += %s / %s\n' % (a, b)
-                res1 += dd(a, b, a1, b1)
-                res2 += dd(a, b, a2, b2)
-                res3 += dd(a, b, a3, b3)
-        # The purpose of this test is to check that we get
-        # the correct results, not really to count operations.
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 2000:
-%s                
-                i += 1
-            return sa
-        ''' % code, sys.maxint, ([a1, b1], 2000 * res1),
-                                ([a2, b2], 2000 * res2),
-                                ([a3, b3], 2000 * res3))
-
-    def test_mod(self):
-        avalues = ('a', 'b', 7, -42, 8)
-        bvalues = ['b'] + range(-10, 0) + range(1,10)
-        code = ''
-        a1, b1, res1 = 10, 20, 0
-        a2, b2, res2 = 10, -20, 0
-        a3, b3, res3 = -10, -20, 0
-        def dd(a, b, aval, bval):
-            m = {'a': aval, 'b': bval}
-            if not isinstance(a, int):
-                a=m[a]
-            if not isinstance(b, int):
-                b=m[b]
-            return a % b
-        for a in avalues:
-            for b in bvalues:
-                code += '                sa += %s %% %s\n' % (a, b)
-                res1 += dd(a, b, a1, b1)
-                res2 += dd(a, b, a2, b2)
-                res3 += dd(a, b, a3, b3)
-        # The purpose of this test is to check that we get
-        # the correct results, not really to count operations.
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 2000:
-                if a > 0: pass
-                if 1 < b < 2: pass
-%s
-                i += 1
-            return sa
-        ''' % code, sys.maxint, ([a1, b1], 2000 * res1),
-                                ([a2, b2], 2000 * res2),
-                                ([a3, b3], 2000 * res3))
-
-    def test_dont_trace_every_iteration(self):
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 200:
-                if a > 0: pass
-                if 1 < b < 2: pass
-                sa += a % b
-                i += 1
-            return sa
-        ''', 22,  ([10, 20], 200 * (10 % 20)),
-                 ([-10, -20], 200 * (-10 % -20)),
-                        count_debug_merge_point=False)
-        assert self.jit_summary.tracing_no == 2
-    def test_id_compare_optimization(self):
-        # XXX: lower the instruction count, 35 is the old value.
-        self.run_source("""
-        class A(object):
-            pass
-        def main():
-            i = 0
-            a = A()
-            while i < 5:
-                if A() != a:
-                    pass
-                i += 1
-        """, 35, ([], None))
-        _, compare = self.get_by_bytecode("COMPARE_OP")
-        assert "call" not in compare.get_opnames()
-
-class AppTestJIT(PyPyCJITTests):
-    def setup_class(cls):
-        if not option.runappdirect:
-            py.test.skip("meant only for pypy-c")
-        # the next line skips stuff if the pypy-c is not a jit build
-        cls.space = gettestobjspace(usemodules=['pypyjit'])
-        cls.tmpdir = udir.join('pypy-jit')
-        cls.tmpdir.ensure(dir=1)
-        cls.counter = 0
-        cls.pypy_c = sys.executable
-
-class TestJIT(PyPyCJITTests):
-    def setup_class(cls):
-        if option.pypy_c is None:
-            py.test.skip("pass --pypy!")
-        if not has_info(option.pypy_c, 'translation.jit'):
-            py.test.skip("must give a pypy-c with the jit enabled")
-        cls.tmpdir = udir.join('pypy-jit')
-        cls.tmpdir.ensure(dir=1)
-        cls.counter = 0
-        cls.pypy_c = option.pypy_c
-
-
-def test_interface_residual_call():
-    space = gettestobjspace(usemodules=['pypyjit'])
-    space.appexec([], """():
-        import pypyjit
-        def f(*args, **kwds):
-            return (args, kwds)
-        res = pypyjit.residual_call(f, 4, x=6)
-        assert res == ((4,), {'x': 6})
-    """)
-
-
-def has_info(pypy_c, option):
-    g = os.popen('"%s" --info' % pypy_c, 'r')
-    lines = g.readlines()
-    g.close()
-    if not lines:
-        raise ValueError("cannot execute %r" % pypy_c)
-    for line in lines:
-        line = line.strip()
-        if line.startswith(option + ':'):
-            line = line[len(option)+1:].strip()
-            if line == 'True':
-                return True
-            elif line == 'False':
-                return False
-            else:
-                return line
-    raise ValueError(option + ' not found in ' + pypy_c)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_model.py b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
rename from pypy/module/pypyjit/test_pypy_c/test_model.py
rename to pypy/module/pypyjit/test_pypy_c/test_00_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
@@ -5,6 +5,7 @@
 from lib_pypy import disassembler
 from pypy.tool.udir import udir
 from pypy.tool import logparser
+from pypy.jit.tool.jitoutput import parse_prof
 from pypy.module.pypyjit.test_pypy_c.model import Log, find_ids_range, find_ids, \
     LoopWithIds, OpMatcher
 
@@ -21,6 +22,7 @@
         self.filepath = self.tmpdir.join(meth.im_func.func_name + '.py')
 
     def run(self, func_or_src, args=[], import_site=False, **jitopts):
+        jitopts.setdefault('threshold', 200)
         src = py.code.Source(func_or_src)
         if isinstance(func_or_src, types.FunctionType):
             funcname = func_or_src.func_name
@@ -63,6 +65,13 @@
         rawtraces = logparser.extract_category(rawlog, 'jit-log-opt-')
         log = Log(rawtraces)
         log.result = eval(stdout)
+        #
+        summaries  = logparser.extract_category(rawlog, 'jit-summary')
+        if len(summaries) > 0:
+            log.jit_summary = parse_prof(summaries[-1])
+        else:
+            log.jit_summary = None
+        #
         return log
 
     def run_and_check(self, src, args=[], **jitopts):
diff --git a/pypy/module/pypyjit/test_pypy_c/test__ffi.py b/pypy/module/pypyjit/test_pypy_c/test__ffi.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test__ffi.py
@@ -0,0 +1,133 @@
+import py
+import sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class Test__ffi(BaseTestPyPyC):
+
+    def test__ffi_call(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            try:
+                from _ffi import CDLL, types
+            except ImportError:
+                sys.stderr.write('SKIP: cannot import _ffi\n')
+                return 0
+
+            libm = CDLL(libm_name)
+            pow = libm.getfunc('pow', [types.double, types.double],
+                               types.double)
+            i = 0
+            res = 0
+            while i < 300:
+                tmp = pow(2, 3)   # ID: fficall
+                res += tmp
+                i += 1
+            return pow.getaddr(), res
+        #
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        pow_addr, res = log.result
+        assert res == 8.0 * 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('fficall', """
+            p16 = getfield_gc(ConstPtr(ptr15), descr=<.* .*Function.inst_name .*>)
+            guard_not_invalidated(descr=...)
+            i17 = force_token()
+            setfield_gc(p0, i17, descr=<.* .*PyFrame.vable_token .*>)
+            f21 = call_release_gil(%d, 2.000000, 3.000000, descr=<FloatCallDescr>)
+            guard_not_forced(descr=...)
+            guard_no_exception(descr=...)
+        """ % pow_addr)
+
+
+    def test__ffi_call_frame_does_not_escape(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            try:
+                from _ffi import CDLL, types
+            except ImportError:
+                sys.stderr.write('SKIP: cannot import _ffi\n')
+                return 0
+
+            libm = CDLL(libm_name)
+            pow = libm.getfunc('pow', [types.double, types.double],
+                               types.double)
+
+            def mypow(a, b):
+                return pow(a, b)
+
+            i = 0
+            res = 0
+            while i < 300:
+                tmp = mypow(2, 3)
+                res += tmp
+                i += 1
+            return pow.getaddr(), res
+        #
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        pow_addr, res = log.result
+        assert res == 8.0 * 300
+        loop, = log.loops_by_filename(self.filepath)
+        opnames = log.opnames(loop.allops())
+        # we only force the virtualref, not its content
+        assert opnames.count('new_with_vtable') == 1
+
+    def test__ffi_call_releases_gil(self):
+        from pypy.rlib.test.test_libffi import get_libc_name
+        def main(libc_name, n):
+            import time
+            from threading import Thread
+            from _ffi import CDLL, types
+            #
+            libc = CDLL(libc_name)
+            sleep = libc.getfunc('sleep', [types.uint], types.uint)
+            delays = [0]*n + [1]
+            #
+            def loop_of_sleeps(i, delays):
+                for delay in delays:
+                    sleep(delay)    # ID: sleep
+            #
+            threads = [Thread(target=loop_of_sleeps, args=[i, delays]) for i in range(5)]
+            start = time.time()
+            for i, thread in enumerate(threads):
+                thread.start()
+            for thread in threads:
+                thread.join()
+            end = time.time()
+            return end - start
+        #
+        log = self.run(main, [get_libc_name(), 200], threshold=150)
+        assert 1 <= log.result <= 1.5 # at most 0.5 seconds of overhead
+        loops = log.loops_by_id('sleep')
+        assert len(loops) == 1 # make sure that we actually JITted the loop
+
+
+    def test_ctypes_call(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            import ctypes
+            libm = ctypes.CDLL(libm_name)
+            fabs = libm.fabs
+            fabs.argtypes = [ctypes.c_double]
+            fabs.restype = ctypes.c_double
+            x = -4
+            i = 0
+            while i < 300:
+                x = fabs(x)
+                x = x - 100
+                i += 1
+            return fabs._ptr.getaddr(), x
+
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        fabs_addr, res = log.result
+        assert res == -4.0
+        loop, = log.loops_by_filename(self.filepath)
+        ops = loop.allops()
+        opnames = log.opnames(ops)
+        assert opnames.count('new_with_vtable') == 1 # only the virtualref
+        assert opnames.count('call_release_gil') == 1
+        idx = opnames.index('call_release_gil')
+        call = ops[idx]
+        assert int(call.args[0]) == fabs_addr
diff --git a/pypy/module/pypyjit/test_pypy_c/test_array.py b/pypy/module/pypyjit/test_pypy_c/test_array.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_array.py
@@ -0,0 +1,186 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestArray(BaseTestPyPyC):
+
+    def test_arraycopy_disappears(self):
+        def main(n):
+            i = 0
+            while i < n:
+                t = (1, 2, 3, i + 1)
+                t2 = t[:]
+                del t
+                i = t2[3]
+                del t2
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, i6)
+            guard_true(i7, descr=<Guard3>)
+            i9 = int_add(i5, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
+        """)
+
+    def test_array_sum(self):
+        def main():
+            from array import array
+            img = array("i", range(128) * 5) * 480
+            l, i = 0, 0
+            while i < len(img):
+                l += img[i]
+                i += 1
+            return l
+        #
+        log = self.run(main, [])
+        assert log.result == 19507200
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i13 = int_lt(i7, i9)
+            guard_true(i13, descr=<Guard3>)
+            i15 = getarrayitem_raw(i10, i7, descr=<.*ArrayNoLengthDescr>)
+            i16 = int_add_ovf(i8, i15)
+            guard_no_overflow(descr=<Guard4>)
+            i18 = int_add(i7, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, p6, i18, i16, i9, i10, descr=<Loop0>)
+        """)
+
+    def test_array_intimg(self):
+        def main():
+            from array import array
+            img = array('i', range(3)) * (350 * 480)
+            intimg = array('i', (0,)) * (640 * 480)
+            l, i = 0, 640
+            while i < 640 * 480:
+                assert len(img) == 3*350*480
+                assert len(intimg) == 640*480
+                l = l + img[i]
+                intimg[i] = (intimg[i-640] + l)
+                i += 1
+            return intimg[i - 1]
+        #
+        log = self.run(main, [])
+        assert log.result == 73574560
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i13 = int_lt(i8, 307200)
+            guard_true(i13, descr=<Guard3>)
+        # the bound check guard on img has been killed (thanks to the asserts)
+            i14 = getarrayitem_raw(i10, i8, descr=<.*ArrayNoLengthDescr>)
+            i15 = int_add_ovf(i9, i14)
+            guard_no_overflow(descr=<Guard4>)
+            i17 = int_sub(i8, 640)
+        # the bound check guard on intimg has been killed (thanks to the asserts)
+            i18 = getarrayitem_raw(i11, i17, descr=<.*ArrayNoLengthDescr>)
+            i19 = int_add_ovf(i18, i15)
+            guard_no_overflow(descr=<Guard5>)
+        # on 64bit, there is a guard checking that i19 actually fits into 32bit
+            ...
+            setarrayitem_raw(i11, i8, _, descr=<.*ArrayNoLengthDescr>)
+            i28 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, p6, p7, i28, i15, i10, i11, descr=<Loop0>)
+        """)
+
+
+    def test_zeropadded(self):
+        def main():
+            from array import array
+            class ZeroPadded(array):
+                def __new__(cls, l):
+                    self = array.__new__(cls, 'd', range(l))
+                    return self
+
+                def __getitem__(self, i):
+                    if i < 0 or i >= len(self):
+                        return 0
+                    return array.__getitem__(self, i) # ID: get
+            #
+            buf = ZeroPadded(2000)
+            i = 10
+            sa = 0
+            while i < 2000 - 10:
+                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
+                i += 1
+            return sa
+
+        log = self.run(main, [])
+        assert log.result == 9895050.0
+        loop, = log.loops_by_filename(self.filepath)
+        #
+        # check that the overloaded __getitem__ does not introduce double
+        # array bound checks.
+        #
+        # The force_token()s are still there, but will be eliminated by the
+        # backend regalloc, so they are harmless
+        assert loop.match(ignore_ops=['force_token'],
+                          expected_src="""
+            ...
+            i20 = int_ge(i18, i8)
+            guard_false(i20, descr=...)
+            f21 = getarrayitem_raw(i13, i18, descr=...)
+            f23 = getarrayitem_raw(i13, i14, descr=...)
+            f24 = float_add(f21, f23)
+            f26 = getarrayitem_raw(i13, i6, descr=...)
+            f27 = float_add(f24, f26)
+            i29 = int_add(i6, 1)
+            i31 = int_ge(i29, i8)
+            guard_false(i31, descr=...)
+            f33 = getarrayitem_raw(i13, i29, descr=...)
+            f34 = float_add(f27, f33)
+            i36 = int_add(i6, 2)
+            i38 = int_ge(i36, i8)
+            guard_false(i38, descr=...)
+            f39 = getarrayitem_raw(i13, i36, descr=...)
+            ...
+        """)
+
+    def test_circular(self):
+        def main():
+            from array import array
+            class Circular(array):
+                def __new__(cls):
+                    self = array.__new__(cls, 'd', range(256))
+                    return self
+                def __getitem__(self, i):
+                    assert len(self) == 256
+                    return array.__getitem__(self, i & 255)
+            #
+            buf = Circular()
+            i = 10
+            sa = 0
+            while i < 2000 - 10:
+                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
+                i += 1
+            return sa
+        #
+        log = self.run(main, [])
+        assert log.result == 1239690.0
+        loop, = log.loops_by_filename(self.filepath)
+        #
+        # check that the array bound checks are removed
+        #
+        # The force_token()s are still there, but will be eliminated by the
+        # backend regalloc, so they are harmless
+        assert loop.match(ignore_ops=['force_token'],
+                          expected_src="""
+            ...
+            i17 = int_and(i14, 255)
+            f18 = getarrayitem_raw(i8, i17, descr=...)
+            f20 = getarrayitem_raw(i8, i9, descr=...)
+            f21 = float_add(f18, f20)
+            f23 = getarrayitem_raw(i8, i10, descr=...)
+            f24 = float_add(f21, f23)
+            i26 = int_add(i6, 1)
+            i29 = int_and(i26, 255)
+            f30 = getarrayitem_raw(i8, i29, descr=...)
+            f31 = float_add(f24, f30)
+            i33 = int_add(i6, 2)
+            i36 = int_and(i33, 255)
+            f37 = getarrayitem_raw(i8, i36, descr=...)
+            ...
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py b/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py
@@ -0,0 +1,233 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestBoolRewrite(BaseTestPyPyC):
+
+    def test_boolrewrite_inverse(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(i >= y)
+
+        where x and y can be either constants or variables. There are cases in
+        which the second guard is proven to be always true.
+        """
+
+        for a, b, res, opt_expected in (('2000', '2000', 20001000, True),
+                                        ( '500',  '500', 15001500, True),
+                                        ( '300',  '600', 16001700, False),
+                                        (   'a',    'b', 16001700, False),
+                                        (   'a',    'a', 13001700, True)):
+            src = """
+                def main():
+                    sa = 0
+                    a = 300
+                    b = 600
+                    for i in range(1000):
+                        if i < %s:         # ID: lt
+                            sa += 1
+                        else:
+                            sa += 2
+                        #
+                        if i >= %s:        # ID: ge
+                            sa += 10000
+                        else:
+                            sa += 20000
+                    return sa
+            """ % (a, b)
+            #
+            log = self.run(src, [], threshold=400)
+            assert log.result == res
+            loop, = log.loops_by_filename(self.filepath)
+            le_ops = log.opnames(loop.ops_by_id('lt'))
+            ge_ops = log.opnames(loop.ops_by_id('ge'))
+            assert le_ops.count('int_lt') == 1
+            #
+            if opt_expected:
+                assert ge_ops.count('int_ge') == 0
+            else:
+                # if this assert fails it means that the optimization was
+                # applied even if we don't expect to. Check whether the
+                # optimization is valid, and either fix the code or fix the
+                # test :-)
+                assert ge_ops.count('int_ge') == 1
+
+    def test_boolrewrite_reflex(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(y > i)
+
+        where x and y can be either constants or variables. There are cases in
+        which the second guard is proven to be always true.
+        """
+        for a, b, res, opt_expected in (('2000', '2000', 10001000, True),
+                                        ( '500',  '500', 15001500, True),
+                                        ( '300',  '600', 14001700, False),
+                                        (   'a',    'b', 14001700, False),
+                                        (   'a',    'a', 17001700, True)):
+
+            src = """
+                def main():
+                    sa = 0
+                    a = 300
+                    b = 600
+                    for i in range(1000):
+                        if i < %s:        # ID: lt
+                            sa += 1
+                        else:
+                            sa += 2
+                        if %s > i:        # ID: gt
+                            sa += 10000
+                        else:
+                            sa += 20000
+                    return sa
+            """ % (a, b)
+            log = self.run(src, [], threshold=400)
+            assert log.result == res
+            loop, = log.loops_by_filename(self.filepath)
+            le_ops = log.opnames(loop.ops_by_id('lt'))
+            gt_ops = log.opnames(loop.ops_by_id('gt'))
+            assert le_ops.count('int_lt') == 1
+            #
+            if opt_expected:
+                assert gt_ops.count('int_gt') == 0
+            else:
+                # if this assert fails it means that the optimization was
+                # applied even if we don't expect to. Check whether the
+                # optimization is valid, and either fix the code or fix the
+                # test :-)
+                assert gt_ops.count('int_gt') == 1
+
+
+    def test_boolrewrite_allcases_inverse(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(i > y)
+
+        with all possible combination of binary comparison operators.  This
+        test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        for op1 in ops:
+            for op2 in ops:
+                for a,b in ((500, 500), (300, 600)):
+                    src = """
+                        def main():
+                            sa = 0
+                            for i in range(300):
+                                if i %s %d:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if i %s %d:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                            return sa
+                    """ % (op1, a, op2, b)
+                    yield self.run_and_check, src
+
+                    src = """
+                        def main():
+                            sa = 0
+                            i = 0.0
+                            while i < 250.0:
+                                if i %s %f:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if i %s %f:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                                i += 0.25
+                            return sa
+                    """ % (op1, float(a)/4.0, op2, float(b)/4.0)
+                    yield self.run_and_check, src
+
+
+    def test_boolrewrite_allcases_reflex(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(x > i)
+
+        with all possible combination of binary comparison operators.  This
+        test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        for op1 in ops:
+            for op2 in ops:
+                for a,b in ((500, 500), (300, 600)):
+                    src = """
+                        def main():
+                            sa = 0
+                            for i in range(300):
+                                if i %s %d:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if %d %s i:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                            return sa
+                    """ % (op1, a, b, op2)
+                    yield self.run_and_check, src
+
+                    src = """
+                        def main():
+                            sa = 0
+                            i = 0.0
+                            while i < 250.0:
+                                if i %s %f:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if %f %s i:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                                i += 0.25
+                            return sa
+                    """ % (op1, float(a)/4.0, float(b)/4.0, op2)
+                    yield self.run_and_check, src
+
+    def test_boolrewrite_ptr(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        compares = ('a == b', 'b == a', 'a != b', 'b != a', 'a == c', 'c != b')
+        for e1 in compares:
+            for e2 in compares:
+                src = """
+                    class tst(object):
+                        pass
+                    def main():
+                        a = tst()
+                        b = tst()
+                        c = tst()
+                        sa = 0
+                        for i in range(300):
+                            if %s:
+                                sa += 1
+                            else:
+                                sa += 2
+                            if %s:
+                                sa += 10000
+                            else:
+                                sa += 20000
+                            if i > 750:
+                                a = b
+                        return sa
+                """ % (e1, e2)
+                yield self.run_and_check, src
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -0,0 +1,381 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestCall(BaseTestPyPyC):
+
+    def test_recursive_call(self):
+        def fn():
+            def rec(n):
+                if n == 0:
+                    return 0
+                return 1 + rec(n-1)
+            #
+            # this loop is traced and then aborted, because the trace is too
+            # long. But then "rec" is marked as "don't inline"
+            i = 0
+            j = 0
+            while i < 20:
+                i += 1
+                j += rec(100)
+            #
+            # next time we try to trace "rec", instead of inlining we compile
+            # it separately and generate a call_assembler
+            i = 0
+            j = 0
+            while i < 20:
+                i += 1
+                j += rec(100) # ID: call_rec
+                a = 0
+            return j
+        #
+        log = self.run(fn, [], threshold=18)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('call_rec', """
+            ...
+            p53 = call_assembler(..., descr=...)
+            guard_not_forced(descr=...)
+            guard_no_exception(descr=...)
+            ...
+        """)
+
+    def test_simple_call(self):
+        src = """
+            OFFSET = 0
+            def f(i):
+                return i + 1 + OFFSET # ID: add
+            def main(n):
+                i = 0
+                while i < n+OFFSET:   # ID: cond
+                    i = f(f(i))       # ID: call
+                    a = 0
+                return i
+        """
+        log = self.run(src, [1000])
+        assert log.result == 1000
+        # first, we test what is inside the entry bridge
+        # -----------------------------------------------
+        entry_bridge, = log.loops_by_id('call', is_entry_bridge=True)
+        # LOAD_GLOBAL of OFFSET
+        ops = entry_bridge.ops_by_id('cond', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["guard_value",
+                                    "getfield_gc", "guard_value",
+                                    "getfield_gc", "guard_isnull",
+                                    "getfield_gc", "guard_nonnull_class"]
+        # LOAD_GLOBAL of OFFSET but in different function partially folded
+        # away
+        # XXX could be improved
+        ops = entry_bridge.ops_by_id('add', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["guard_value", "getfield_gc", "guard_isnull"]
+        #
+        # two LOAD_GLOBAL of f, the second is folded away
+        ops = entry_bridge.ops_by_id('call', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["getfield_gc", "guard_nonnull_class"]
+        #
+        assert entry_bridge.match_by_id('call', """
+            p29 = getfield_gc(ConstPtr(ptr28), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value .*>)
+            guard_nonnull_class(p29, ConstClass(Function), descr=<Guard18>)
+            p33 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_code .*>)
+            guard_value(p33, ConstPtr(ptr34), descr=<Guard19>)
+            p35 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_w_func_globals .*>)
+            p36 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_closure .*>)
+            p38 = call(ConstClass(getexecutioncontext), descr=<GcPtrCallDescr>)
+            p39 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
+            i40 = force_token()
+            p41 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
+            guard_isnull(p41, descr=<Guard20>)
+            i42 = getfield_gc(p38, descr=<NonGcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_profilefunc .*>)
+            i43 = int_is_zero(i42)
+            guard_true(i43, descr=<Guard21>)
+            i50 = force_token()
+        """)
+        #
+        # then, we test the actual loop
+        # -----------------------------
+        loop, = log.loops_by_id('call')
+        assert loop.match("""
+            i12 = int_lt(i5, i6)
+            guard_true(i12, descr=<Guard3>)
+            i13 = force_token()
+            i15 = int_add(i5, 1)
+            i16 = int_add_ovf(i15, i7)
+            guard_no_overflow(descr=<Guard4>)
+            i18 = force_token()
+            i20 = int_add_ovf(i16, 1)
+            guard_no_overflow(descr=<Guard5>)
+            i21 = int_add_ovf(i20, i7)
+            guard_no_overflow(descr=<Guard6>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i21, i6, i7, p8, p9, p10, p11, descr=<Loop0>)
+        """)
+
+    def test_method_call(self):
+        def fn(n):
+            class A(object):
+                def __init__(self, a):
+                    self.a = a
+                def f(self, i):
+                    return self.a + i
+            i = 0
+            a = A(1)
+            while i < n:
+                x = a.f(i)    # ID: meth1
+                i = a.f(x)    # ID: meth2
+            return i
+        #
+        log = self.run(fn, [1000])
+        assert log.result == 1000
+        #
+        # first, we test the entry bridge
+        # -------------------------------
+        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
+        ops = entry_bridge.ops_by_id('meth1', opcode='LOOKUP_METHOD')
+        assert log.opnames(ops) == ['guard_value', 'getfield_gc', 'guard_value',
+                                    'guard_not_invalidated']
+        # the second LOOKUP_METHOD is folded away
+        assert list(entry_bridge.ops_by_id('meth2', opcode='LOOKUP_METHOD')) == []
+        #
+        # then, the actual loop
+        # ----------------------
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i15 = int_lt(i6, i9)
+            guard_true(i15, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i16 = force_token()
+            i17 = int_add_ovf(i10, i6)
+            guard_no_overflow(descr=<Guard5>)
+            i18 = force_token()
+            i19 = int_add_ovf(i10, i17)
+            guard_no_overflow(descr=<Guard6>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i19, p7, i17, i9, i10, p11, p12, p13, descr=<Loop0>)
+        """)
+
+    def test_static_classmethod_call(self):
+        def fn(n):
+            class A(object):
+                @classmethod
+                def f(cls, i):
+                    return i + (cls is A) + 1
+                @staticmethod
+                def g(i):
+                    return i - 1
+            #
+            i = 0
+            a = A()
+            while i < n:
+                x = a.f(i)
+                i = a.g(x)
+            return i
+        #
+        log = self.run(fn, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i14 = int_lt(i6, i9)
+            guard_true(i14, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i15 = force_token()
+            i17 = int_add_ovf(i8, 1)
+            guard_no_overflow(descr=<Guard5>)
+            i18 = force_token()
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i8, p7, i17, i9, p10, p11, p12, descr=<Loop0>)
+        """)
+
+    def test_default_and_kw(self):
+        def main(n):
+            def f(i, j=1):
+                return i + j
+            #
+            i = 0
+            while i < n:
+                i = f(f(i), j=1) # ID: call
+                a = 0
+            return i
+        #
+        log = self.run(main, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_id('call')
+        assert loop.match_by_id('call', """
+            i14 = force_token()
+            i16 = force_token()
+        """)
+
+    def test_kwargs(self):
+        # this is not a very precise test, could be improved
+        def main(x):
+            def g(**args):
+                return len(args)
+            #
+            s = 0
+            d = {}
+            for i in range(x):
+                s += g(**d)       # ID: call
+                d[str(i)] = i
+                if i % 100 == 99:
+                    d = {}
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 49500
+        loop, = log.loops_by_id('call')
+        ops = log.opnames(loop.ops_by_id('call'))
+        guards = [ops for ops in ops if ops.startswith('guard')]
+        assert len(guards) <= 5
+
+    def test_stararg_virtual(self):
+        def main(x):
+            def g(*args):
+                return len(args)
+            def h(a, b, c):
+                return c
+            #
+            s = 0
+            for i in range(x):
+                l = [i, x, 2]
+                s += g(*l)       # ID: g1
+                s += h(*l)       # ID: h1
+                s += g(i, x, 2)  # ID: g2
+                a = 0
+            for i in range(x):
+                l = [x, 2]
+                s += g(i, *l)    # ID: g3
+                s += h(i, *l)    # ID: h2
+                a = 0
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 13000
+        loop0, = log.loops_by_id('g1')
+        assert loop0.match_by_id('g1', """
+            i20 = force_token()
+            setfield_gc(p4, i19, descr=<.*W_AbstractSeqIterObject.inst_index .*>)
+            i22 = int_add_ovf(i8, 3)
+            guard_no_overflow(descr=<Guard4>)
+        """)
+        assert loop0.match_by_id('h1', """
+            i20 = force_token()
+            i22 = int_add_ovf(i8, 2)
+            guard_no_overflow(descr=<Guard5>)
+        """)
+        assert loop0.match_by_id('g2', """
+            i27 = force_token()
+            i29 = int_add_ovf(i26, 3)
+            guard_no_overflow(descr=<Guard6>)
+        """)
+        #
+        loop1, = log.loops_by_id('g3')
+        assert loop1.match_by_id('g3', """
+            i21 = force_token()
+            setfield_gc(p4, i20, descr=<.* .*W_AbstractSeqIterObject.inst_index .*>)
+            i23 = int_add_ovf(i9, 3)
+            guard_no_overflow(descr=<Guard37>)
+        """)
+        assert loop1.match_by_id('h2', """
+            i25 = force_token()
+            i27 = int_add_ovf(i23, 2)
+            guard_no_overflow(descr=<Guard38>)
+        """)
+
+    def test_stararg(self):
+        def main(x):
+            def g(*args):
+                return args[-1]
+            def h(*args):
+                return len(args)
+            #
+            s = 0
+            l = []
+            i = 0
+            while i < x:
+                l.append(1)
+                s += g(*l)     # ID: g
+                i = h(*l)      # ID: h
+                a = 0
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_id('g')
+        ops_g = log.opnames(loop.ops_by_id('g'))
+        ops_h = log.opnames(loop.ops_by_id('h'))
+        ops = ops_g + ops_h
+        assert 'new_with_vtable' not in ops
+        assert 'call_may_force' not in ops
+
+    def test_call_builtin_function(self):
+        def main(n):
+            i = 2
+            l = []
+            while i < n:
+                i += 1
+                l.append(i)    # ID: append
+                a = 0
+            return i, len(l)
+        #
+        log = self.run(main, [1000])
+        assert log.result == (1000, 998)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('append', """
+            i13 = getfield_gc(p8, descr=<SignedFieldDescr list.length .*>)
+            i15 = int_add(i13, 1)
+            call(ConstClass(_ll_list_resize_ge__listPtr_Signed), p8, i15, descr=<VoidCallDescr>)
+            guard_no_exception(descr=<Guard4>)
+            p17 = getfield_gc(p8, descr=<GcPtrFieldDescr list.items .*>)
+            p19 = new_with_vtable(ConstClass(W_IntObject))
+            setfield_gc(p19, i12, descr=<SignedFieldDescr .*W_IntObject.inst_intval .*>)
+            setarrayitem_gc(p17, i13, p19, descr=<GcPtrArrayDescr>)
+        """)
+
+    def test_blockstack_virtualizable(self):
+        def main(n):
+            from pypyjit import residual_call
+            i = 0
+            while i < n:
+                try:
+                    residual_call(len, [])   # ID: call
+                except:
+                    pass
+                i += 1
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_id('call')
+        assert loop.match_by_id('call', opcode='CALL_FUNCTION', expected_src="""
+            # make sure that the "block" is not allocated
+            ...
+            i20 = force_token()
+            setfield_gc(p0, i20, descr=<SignedFieldDescr .*PyFrame.vable_token .*>)
+            p22 = new_with_vtable(19511408)
+            p24 = new_array(1, descr=<GcPtrArrayDescr>)
+            p26 = new_with_vtable(ConstClass(W_ListObject))
+            p27 = new(descr=<SizeDescr .*>)
+            p29 = new_array(0, descr=<GcPtrArrayDescr>)
+            setfield_gc(p27, p29, descr=<GcPtrFieldDescr list.items .*>)
+            setfield_gc(p26, p27, descr=<.* .*W_ListObject.inst_wrappeditems .*>)
+            setarrayitem_gc(p24, 0, p26, descr=<GcPtrArrayDescr>)
+            setfield_gc(p22, p24, descr=<GcPtrFieldDescr .*Arguments.inst_arguments_w .*>)
+            p32 = call_may_force(11376960, p18, p22, descr=<GcPtrCallDescr>)
+            ...
+        """)
+
+    def test_func_defaults(self):
+        def main(n):
+            i = 1
+            while i < n:
+                i += len(xrange(i+1)) - i
+            return i
+
+        log = self.run(main, [10000])
+        assert log.result == 10000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i5, i6)
+            guard_true(i10, descr=<Guard3>)
+            i120 = int_add(i5, 1)
+            guard_not_invalidated(descr=<Guard4>)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_exception.py b/pypy/module/pypyjit/test_pypy_c/test_exception.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_exception.py
@@ -0,0 +1,93 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestException(BaseTestPyPyC):
+
+    def test_cmp_exc(self):
+        def f1(n):
+            # So we don't get a LOAD_GLOBAL op
+            KE = KeyError
+            i = 0
+            while i < n:
+                try:
+                    raise KE
+                except KE: # ID: except
+                    i += 1
+            return i
+
+        log = self.run(f1, [10000])
+        assert log.result == 10000
+        loop, = log.loops_by_id("except")
+        ops = list(loop.ops_by_id("except", opcode="COMPARE_OP"))
+        assert ops == []
+
+    def test_exception_inside_loop_1(self):
+        def main(n):
+            while n:
+                try:
+                    raise ValueError
+                except ValueError:
+                    pass
+                n -= 1
+            return n
+        #
+        log = self.run(main, [1000])
+        assert log.result == 0
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+        i5 = int_is_true(i3)
+        guard_true(i5, descr=<Guard3>)
+        guard_not_invalidated(descr=<Guard4>)
+        --EXC-TICK--
+        i12 = int_sub_ovf(i3, 1)
+        guard_no_overflow(descr=<Guard6>)
+        --TICK--
+        jump(..., descr=<Loop0>)
+        """)
+
+    def test_exception_inside_loop_2(self):
+        def main(n):
+            def g(n):
+                raise ValueError(n)  # ID: raise
+            def f(n):
+                g(n)
+            #
+            while n:
+                try:
+                    f(n)
+                except ValueError:
+                    pass
+                n -= 1
+            return n
+        #
+        log = self.run(main, [1000])
+        assert log.result == 0
+        loop, = log.loops_by_filename(self.filepath)
+        ops = log.opnames(loop.ops_by_id('raise'))
+        assert 'new' not in ops
+
+    def test_reraise(self):
+        def f(n):
+            i = 0
+            while i < n:
+                try:
+                    try:
+                        raise KeyError
+                    except KeyError:
+                        raise
+                except KeyError:
+                    i += 1
+            return i
+
+        log = self.run(f, [100000])
+        assert log.result == 100000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i4, i5)
+            guard_true(i7, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            --EXC-TICK--
+            i14 = int_add(i4, 1)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_import.py b/pypy/module/pypyjit/test_pypy_c/test_import.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_import.py
@@ -0,0 +1,46 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestImport(BaseTestPyPyC):
+
+    def test_import_in_function(self):
+        def main(n):
+            i = 0
+            while i < n:
+                from sys import version  # ID: import
+                i += 1
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_id('import')
+        assert loop.match_by_id('import', """
+            p11 = getfield_gc(ConstPtr(ptr10), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            guard_value(p11, ConstPtr(ptr12), descr=<Guard4>)
+            guard_not_invalidated(descr=<Guard5>)
+            p14 = getfield_gc(ConstPtr(ptr13), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            p16 = getfield_gc(ConstPtr(ptr15), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            guard_value(p14, ConstPtr(ptr17), descr=<Guard6>)
+            guard_isnull(p16, descr=<Guard7>)
+        """)
+
+    def test_import_fast_path(self, tmpdir):
+        pkg = tmpdir.join('mypkg').ensure(dir=True)
+        pkg.join('__init__.py').write("")
+        pkg.join('mod.py').write(str(py.code.Source("""
+            def do_the_import():
+                import sys
+        """)))
+        def main(path, n):
+            import sys
+            sys.path.append(path)
+            from mypkg.mod import do_the_import
+            for i in range(n):
+                do_the_import()
+        #
+        log = self.run(main, [str(tmpdir), 300])
+        loop, = log.loops_by_filename(self.filepath)
+        # this is a check for a slow-down that introduced a
+        # call_may_force(absolute_import_with_lock).
+        for opname in log.opnames(loop.allops(opcode="IMPORT_NAME")):
+            assert 'call' not in opname    # no call-like opcode
diff --git a/pypy/module/pypyjit/test_pypy_c/test_instance.py b/pypy/module/pypyjit/test_pypy_c/test_instance.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_instance.py
@@ -0,0 +1,202 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestInstance(BaseTestPyPyC):
+
+    def test_virtual_instance(self):
+        def main(n):
+            class A(object):
+                pass
+            #
+            i = 0
+            while i < n:
+                a = A()
+                assert isinstance(a, A)
+                assert not isinstance(a, int)
+                a.x = 2
+                i = i + a.x
+            return i
+        #
+        log = self.run(main, [1000], threshold = 400)
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, i6)
+            guard_true(i7, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i9 = int_add_ovf(i5, 2)
+            guard_no_overflow(descr=<Guard5>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
+        """)
+
+    def test_load_attr(self):
+        src = '''
+            class A(object):
+                pass
+            a = A()
+            a.x = 2
+            def main(n):
+                i = 0
+                while i < n:
+                    i = i + a.x
+                return i
+        '''
+        log = self.run(src, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i9 = int_lt(i5, i6)
+            guard_true(i9, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i10 = int_add_ovf(i5, i7)
+            guard_no_overflow(descr=<Guard5>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i10, i6, p7, i7, p8, descr=<Loop0>)
+        """)
+
+    def test_getattr_with_dynamic_attribute(self):
+        src = """
+        class A(object):
+            pass
+
+        l = ["x", "y"]
+
+        def main():
+            sum = 0
+            a = A()
+            a.a1 = 0
+            a.a2 = 0
+            a.a3 = 0
+            a.a4 = 0
+            a.a5 = 0 # workaround, because the first five attributes need a promotion
+            a.x = 1
+            a.y = 2
+            i = 0
+            while i < 500:
+                name = l[i % 2]
+                sum += getattr(a, name)
+                i += 1
+            return sum
+        """
+        log = self.run(src, [])
+        assert log.result == 250 + 250*2
+        loops = log.loops_by_filename(self.filepath)
+        assert len(loops) == 1
+
+    def test_mutate_class(self):
+        def fn(n):
+            class A(object):
+                count = 1
+                def __init__(self, a):
+                    self.a = a
+                def f(self):
+                    return self.count
+            i = 0
+            a = A(1)
+            while i < n:
+                A.count += 1 # ID: mutate
+                i = a.f()    # ID: meth1
+            return i
+        #
+        log = self.run(fn, [1000], threshold=10)
+        assert log.result == 1000
+        #
+        # first, we test the entry bridge
+        # -------------------------------
+        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
+        ops = entry_bridge.ops_by_id('mutate', opcode='LOAD_ATTR')
+        assert log.opnames(ops) == ['guard_value', 'guard_not_invalidated',
+                                    'getfield_gc', 'guard_nonnull_class']
+        # the STORE_ATTR is folded away
+        assert list(entry_bridge.ops_by_id('meth1', opcode='STORE_ATTR')) == []
+        #
+        # then, the actual loop
+        # ----------------------
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = getfield_gc_pure(p5, descr=<SignedFieldDescr .*W_IntObject.inst_intval.*>)
+            i9 = int_lt(i8, i7)
+            guard_true(i9, descr=.*)
+            guard_not_invalidated(descr=.*)
+            i11 = int_add(i8, 1)
+            i12 = force_token()
+            --TICK--
+            p20 = new_with_vtable(ConstClass(W_IntObject))
+            setfield_gc(p20, i11, descr=<SignedFieldDescr.*W_IntObject.inst_intval .*>)
+            setfield_gc(ConstPtr(ptr21), p20, descr=<GcPtrFieldDescr .*TypeCell.inst_w_value .*>)
+            jump(p0, p1, p2, p3, p4, p20, p6, i7, descr=<Loop.>)
+        """)
+
+    def test_oldstyle_newstyle_mix(self):
+        def main():
+            class A:
+                pass
+
+            class B(object, A):
+                def __init__(self, x):
+                    self.x = x
+
+            i = 0
+            b = B(1)
+            while i < 100:
+                v = b.x # ID: loadattr
+                i += v
+            return i
+
+        log = self.run(main, [], threshold=80)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('loadattr',
+        '''
+        guard_not_invalidated(descr=...)
+        i19 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
+        guard_no_exception(descr=...)
+        i21 = int_and(i19, _)
+        i22 = int_is_true(i21)
+        guard_true(i22, descr=...)
+        i26 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
+        guard_no_exception(descr=...)
+        i28 = int_and(i26, _)
+        i29 = int_is_true(i28)
+        guard_true(i29, descr=...)
+        ''')
+
+    def test_python_contains(self):
+        def main():
+            class A(object):
+                def __contains__(self, v):
+                    return True
+
+            i = 0
+            a = A()
+            while i < 100:
+                i += i in a # ID: contains
+                b = 0       # to make sure that JUMP_ABSOLUTE is not part of the ID
+
+        log = self.run(main, [], threshold=80)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("contains", """
+            guard_not_invalidated(descr=...)
+            i11 = force_token()
+            i12 = int_add_ovf(i5, i7)
+            guard_no_overflow(descr=...)
+        """)
+
+    def test_id_compare_optimization(self):
+        def main():
+            class A(object):
+                pass
+            #
+            i = 0
+            a = A()
+            while i < 300:
+                new_a = A()
+                if new_a != a:  # ID: compare
+                    pass
+                i += 1
+            return i
+        #
+        log = self.run(main, [])
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("compare", "") # optimized away
+
diff --git a/pypy/module/pypyjit/test_pypy_c/test_intbound.py b/pypy/module/pypyjit/test_pypy_c/test_intbound.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_intbound.py
@@ -0,0 +1,296 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestIntbound(BaseTestPyPyC):
+
+    def test_intbound_simple(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        nbr = (3, 7)
+        for o1 in ops:
+            for o2 in ops:
+                for n1 in nbr:
+                    for n2 in nbr:
+                        src = '''
+                        def f(i):
+                            a, b = 3, 3
+                            if i %s %d:
+                                a = 0
+                            else:
+                                a = 1
+                            if i %s %d:
+                                b = 0
+                            else:
+                                b = 1
+                            return a + b * 2
+
+                        def main():
+                            res = [0] * 4
+                            idx = []
+                            for i in range(15):
+                                idx.extend([i] * 15)
+                            for i in idx:
+                                res[f(i)] += 1
+                            return res
+
+                        ''' % (o1, n1, o2, n2)
+                        yield self.run_and_check, src
+
+    def test_intbound_addsub_mix(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        tests = ('i > 4', 'i > 2', 'i + 1 > 2', '1 + i > 4',
+                 'i - 1 > 1', '1 - i > 1', '1 - i < -3',
+                 'i == 1', 'i == 5', 'i != 1', '-2 * i < -4')
+        for t1 in tests:
+            for t2 in tests:
+                src = '''
+                def f(i):
+                    a, b = 3, 3
+                    if %s:
+                        a = 0
+                    else:
+                        a = 1
+                    if %s:
+                        b = 0
+                    else:
+                        b = 1
+                    return a + b * 2
+
+                def main():
+                    res = [0] * 4
+                    idx = []
+                    for i in range(15):
+                        idx.extend([i] * 15)
+                    for i in idx:
+                        res[f(i)] += 1
+                    return res
+
+                ''' % (t1, t2)
+                yield self.run_and_check, src
+
+    def test_intbound_gt(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < n:
+                if i > -1:
+                    a += 1
+                if i > -2:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, i9)
+            guard_true(i10, descr=...)
+            i12 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i14 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i17 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i14, i12, i17, i9, descr=<Loop0>)
+        """)
+
+    def test_intbound_sub_lt(self):
+        def main():
+            i, a = 0, 0
+            while i < 300:
+                if i - 10 < 295:
+                    a += 1
+                i += 1
+            return a
+        #
+        log = self.run(main, [])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, 300)
+            guard_true(i7, descr=...)
+            i9 = int_sub_ovf(i5, 10)
+            guard_no_overflow(descr=...)
+            i11 = int_add_ovf(i4, 1)
+            guard_no_overflow(descr=...)
+            i13 = int_add(i5, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, i11, i13, descr=<Loop0>)
+        """)
+
+    def test_intbound_addsub_ge(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < n:
+                if i + 5 >= 5:
+                    a += 1
+                if i - 1 >= -1:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, i9)
+            guard_true(i10, descr=...)
+            i12 = int_add_ovf(i8, 5)
+            guard_no_overflow(descr=...)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i19 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i16, i14, i19, i9, descr=<Loop0>)
+        """)
+
+    def test_intbound_addmul_ge(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < 300:
+                if i + 5 >= 5:
+                    a += 1
+                if 2 * i >= 0:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, 300)
+            guard_true(i10, descr=...)
+            i12 = int_add(i8, 5)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_lshift(i8, 1)
+            i18 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i21 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i18, i14, i21, descr=<Loop0>)
+        """)
+
+    def test_intbound_eq(self):
+        def main(a, n):
+            i, s = 0, 0
+            while i < 300:
+                if a == 7:
+                    s += a + 1
+                elif i == 10:
+                    s += i
+                else:
+                    s += 1
+                i += 1
+            return s
+        #
+        log = self.run(main, [7, 300])
+        assert log.result == main(7, 300)
+        log = self.run(main, [10, 300])
+        assert log.result == main(10, 300)
+        log = self.run(main, [42, 300])
+        assert log.result == main(42, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, 300)
+            guard_true(i10, descr=...)
+            i12 = int_eq(i8, 10)
+            guard_false(i12, descr=...)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, p6, i14, i16, descr=<Loop0>)
+        """)
+
+    def test_intbound_mul(self):
+        def main(a):
+            i, s = 0, 0
+            while i < 300:
+                assert i >= 0
+                if 2 * i < 30000:
+                    s += 1
+                else:
+                    s += a
+                i += 1
+            return s
+        #
+        log = self.run(main, [7])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = int_lt(i6, 300)
+            guard_true(i8, descr=...)
+            i10 = int_lshift(i6, 1)
+            i12 = int_add_ovf(i5, 1)
+            guard_no_overflow(descr=...)
+            i14 = int_add(i6, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i12, i14, descr=<Loop0>)
+        """)
+
+    def test_assert(self):
+        def main(a):
+            i, s = 0, 0
+            while i < 300:
+                assert a == 7
+                s += a + 1
+                i += 1
+            return s
+        log = self.run(main, [7])
+        assert log.result == 300*8
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = int_lt(i6, 300)
+            guard_true(i8, descr=...)
+            i10 = int_add_ovf(i5, 8)
+            guard_no_overflow(descr=...)
+            i12 = int_add(i6, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i10, i12, descr=<Loop0>)
+        """)
+
+    def test_xor(self):
+        def main(b):
+            a = sa = 0
+            while a < 300:
+                if a > 0: # Specialises the loop
+                    pass
+                if b > 10:
+                    pass
+                if a^b >= 0:  # ID: guard
+                    sa += 1
+                sa += a^a     # ID: a_xor_a
+                a += 1
+            return sa
+
+        log = self.run(main, [11])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        # if both are >=0, a^b is known to be >=0
+        # note that we know that b>10
+        assert loop.match_by_id('guard', """
+            i10 = int_xor(i5, i7)
+        """)
+        #
+        # x^x is always optimized to 0
+        assert loop.match_by_id('a_xor_a', "")
+
+        log = self.run(main, [9])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        # we don't know that b>10, hence we cannot optimize it
+        assert loop.match_by_id('guard', """
+            i10 = int_xor(i5, i7)
+            i12 = int_ge(i10, 0)
+            guard_true(i12, descr=...)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_min_max.py b/pypy/module/pypyjit/test_pypy_c/test_min_max.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_min_max.py
@@ -0,0 +1,67 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestMinMax(BaseTestPyPyC):
+
+    def test_min_max(self):
+        def main():
+            i=0
+            sa=0
+            while i < 300:
+                sa+=min(max(i, 3000), 4000)
+                i+=1
+            return sa
+        log = self.run(main, [])
+        assert log.result == 300*3000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i4, 300)
+            guard_true(i7, descr=...)
+            i9 = int_add_ovf(i5, 3000)
+            guard_no_overflow(descr=...)
+            i11 = int_add(i4, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, i11, i9, descr=<Loop0>)
+        """)
+
+    def test_silly_max(self):
+        def main():
+            i = 2
+            sa = 0
+            while i < 300:
+                lst = range(i)
+                sa += max(*lst) # ID: max
+                i += 1
+            return sa
+        log = self.run(main, [])
+        assert log.result == main()
+        loop, = log.loops_by_filename(self.filepath)
+        # We dont want too many guards, but a residual call to min_max_loop
+        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
+        assert len(guards) < 20
+        assert loop.match_by_id('max',"""
+            ...
+            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
+            ...
+        """)
+
+    def test_iter_max(self):
+        def main():
+            i = 2
+            sa = 0
+            while i < 300:
+                lst = range(i)
+                sa += max(lst) # ID: max
+                i += 1
+            return sa
+        log = self.run(main, [])
+        assert log.result == main()
+        loop, = log.loops_by_filename(self.filepath)
+        # We dont want too many guards, but a residual call to min_max_loop
+        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
+        assert len(guards) < 20
+        assert loop.match_by_id('max',"""
+            ...
+            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
+            ...
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py
rename from pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
rename to pypy/module/pypyjit/test_pypy_c/test_misc.py
--- a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py
@@ -1,13 +1,8 @@
-import py, sys, re
-import subprocess
-from lib_pypy import disassembler
-from pypy.tool.udir import udir
-from pypy.tool import logparser
-from pypy.module.pypyjit.test_pypy_c.model import Log
-from pypy.module.pypyjit.test_pypy_c.test_model import BaseTestPyPyC
+import py, sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
 
 
-class TestPyPyCNew(BaseTestPyPyC):
+class TestMisc(BaseTestPyPyC):
     def test_f1(self):
         def f1(n):
             "Arbitrary test function."
@@ -76,377 +71,6 @@
         """)
 
 
-    def test_recursive_call(self):
-        def fn():
-            def rec(n):
-                if n == 0:
-                    return 0
-                return 1 + rec(n-1)
-            #
-            # this loop is traced and then aborted, because the trace is too
-            # long. But then "rec" is marked as "don't inline"
-            i = 0
-            j = 0
-            while i < 20:
-                i += 1
-                j += rec(100)
-            #
-            # next time we try to trace "rec", instead of inlining we compile
-            # it separately and generate a call_assembler
-            i = 0
-            j = 0
-            while i < 20:
-                i += 1
-                j += rec(100) # ID: call_rec
-                a = 0
-            return j
-        #
-        log = self.run(fn, [], threshold=18)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('call_rec', """
-            ...
-            p53 = call_assembler(..., descr=...)
-            guard_not_forced(descr=...)
-            guard_no_exception(descr=...)
-            ...
-        """)
-
-    def test_cmp_exc(self):
-        def f1(n):
-            # So we don't get a LOAD_GLOBAL op
-            KE = KeyError
-            i = 0
-            while i < n:
-                try:
-                    raise KE
-                except KE: # ID: except
-                    i += 1
-            return i
-
-        log = self.run(f1, [10000])
-        assert log.result == 10000
-        loop, = log.loops_by_id("except")
-        ops = list(loop.ops_by_id("except", opcode="COMPARE_OP"))
-        assert ops == []
-
-    def test_simple_call(self):
-        src = """
-            OFFSET = 0
-            def f(i):
-                return i + 1 + OFFSET # ID: add
-            def main(n):
-                i = 0
-                while i < n+OFFSET:   # ID: cond
-                    i = f(f(i))       # ID: call
-                    a = 0
-                return i
-        """
-        log = self.run(src, [1000], threshold=400)
-        assert log.result == 1000
-        # first, we test what is inside the entry bridge
-        # -----------------------------------------------
-        entry_bridge, = log.loops_by_id('call', is_entry_bridge=True)
-        # LOAD_GLOBAL of OFFSET
-        ops = entry_bridge.ops_by_id('cond', opcode='LOAD_GLOBAL')
-        assert log.opnames(ops) == ["guard_value",
-                                    "getfield_gc", "guard_value",
-                                    "getfield_gc", "guard_isnull",
-                                    "getfield_gc", "guard_nonnull_class"]
-        # LOAD_GLOBAL of OFFSET but in different function partially folded
-        # away
-        # XXX could be improved
-        ops = entry_bridge.ops_by_id('add', opcode='LOAD_GLOBAL')
-        assert log.opnames(ops) == ["guard_value", "getfield_gc", "guard_isnull"]
-        #
-        # two LOAD_GLOBAL of f, the second is folded away
-        ops = entry_bridge.ops_by_id('call', opcode='LOAD_GLOBAL')
-        assert log.opnames(ops) == ["getfield_gc", "guard_nonnull_class"]
-        #
-        assert entry_bridge.match_by_id('call', """
-            p29 = getfield_gc(ConstPtr(ptr28), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value .*>)
-            guard_nonnull_class(p29, ConstClass(Function), descr=<Guard18>)
-            p33 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_code .*>)
-            guard_value(p33, ConstPtr(ptr34), descr=<Guard19>)
-            p35 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_w_func_globals .*>)
-            p36 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_closure .*>)
-            p38 = call(ConstClass(getexecutioncontext), descr=<GcPtrCallDescr>)
-            p39 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
-            i40 = force_token()
-            p41 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
-            guard_isnull(p41, descr=<Guard20>)
-            i42 = getfield_gc(p38, descr=<NonGcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_profilefunc .*>)
-            i43 = int_is_zero(i42)
-            guard_true(i43, descr=<Guard21>)
-            i50 = force_token()
-        """)
-        #
-        # then, we test the actual loop
-        # -----------------------------
-        loop, = log.loops_by_id('call')
-        assert loop.match("""
-            i12 = int_lt(i5, i6)
-            guard_true(i12, descr=<Guard3>)
-            i13 = force_token()
-            i15 = int_add(i5, 1)
-            i16 = int_add_ovf(i15, i7)
-            guard_no_overflow(descr=<Guard4>)
-            i18 = force_token()
-            i20 = int_add_ovf(i16, 1)
-            guard_no_overflow(descr=<Guard5>)
-            i21 = int_add_ovf(i20, i7)
-            guard_no_overflow(descr=<Guard6>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i21, i6, i7, p8, p9, p10, p11, descr=<Loop0>)
-        """)
-
-    def test_method_call(self):
-        def fn(n):
-            class A(object):
-                def __init__(self, a):
-                    self.a = a
-                def f(self, i):
-                    return self.a + i
-            i = 0
-            a = A(1)
-            while i < n:
-                x = a.f(i)    # ID: meth1
-                i = a.f(x)    # ID: meth2
-            return i
-        #
-        log = self.run(fn, [1000], threshold=400)
-        assert log.result == 1000
-        #
-        # first, we test the entry bridge
-        # -------------------------------
-        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
-        ops = entry_bridge.ops_by_id('meth1', opcode='LOOKUP_METHOD')
-        assert log.opnames(ops) == ['guard_value', 'getfield_gc', 'guard_value',
-                                    'guard_not_invalidated']
-        # the second LOOKUP_METHOD is folded away
-        assert list(entry_bridge.ops_by_id('meth2', opcode='LOOKUP_METHOD')) == []
-        #
-        # then, the actual loop
-        # ----------------------
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i15 = int_lt(i6, i9)
-            guard_true(i15, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i16 = force_token()
-            i17 = int_add_ovf(i10, i6)
-            guard_no_overflow(descr=<Guard5>)
-            i18 = force_token()
-            i19 = int_add_ovf(i10, i17)
-            guard_no_overflow(descr=<Guard6>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i19, p7, i17, i9, i10, p11, p12, p13, descr=<Loop0>)
-        """)
-
-    def test_static_classmethod_call(self):
-        def fn(n):
-            class A(object):
-                @classmethod
-                def f(cls, i):
-                    return i + (cls is A) + 1
-                @staticmethod
-                def g(i):
-                    return i - 1
-            #
-            i = 0
-            a = A()
-            while i < n:
-                x = a.f(i)
-                i = a.g(x)
-            return i
-        #
-        log = self.run(fn, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i14 = int_lt(i6, i9)
-            guard_true(i14, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i15 = force_token()
-            i17 = int_add_ovf(i8, 1)
-            guard_no_overflow(descr=<Guard5>)
-            i18 = force_token()
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i8, p7, i17, i9, p10, p11, p12, descr=<Loop0>)
-        """)
-
-    def test_default_and_kw(self):
-        def main(n):
-            def f(i, j=1):
-                return i + j
-            #
-            i = 0
-            while i < n:
-                i = f(f(i), j=1) # ID: call
-                a = 0
-            return i
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_id('call')
-        assert loop.match_by_id('call', """
-            i14 = force_token()
-            i16 = force_token()
-        """)
-
-    def test_kwargs(self):
-        # this is not a very precise test, could be improved
-        def main(x):
-            def g(**args):
-                return len(args)
-            #
-            s = 0
-            d = {}
-            for i in range(x):
-                s += g(**d)       # ID: call
-                d[str(i)] = i
-                if i % 100 == 99:
-                    d = {}
-            return s
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 49500
-        loop, = log.loops_by_id('call')
-        ops = log.opnames(loop.ops_by_id('call'))
-        guards = [ops for ops in ops if ops.startswith('guard')]
-        assert len(guards) <= 5
-
-    def test_stararg_virtual(self):
-        def main(x):
-            def g(*args):
-                return len(args)
-            def h(a, b, c):
-                return c
-            #
-            s = 0
-            for i in range(x):
-                l = [i, x, 2]
-                s += g(*l)       # ID: g1
-                s += h(*l)       # ID: h1
-                s += g(i, x, 2)  # ID: g2
-                a = 0
-            for i in range(x):
-                l = [x, 2]
-                s += g(i, *l)    # ID: g3
-                s += h(i, *l)    # ID: h2
-                a = 0
-            return s
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 13000
-        loop0, = log.loops_by_id('g1')
-        assert loop0.match_by_id('g1', """
-            i20 = force_token()
-            setfield_gc(p4, i19, descr=<.*W_AbstractSeqIterObject.inst_index .*>)
-            i22 = int_add_ovf(i8, 3)
-            guard_no_overflow(descr=<Guard4>)
-        """)
-        assert loop0.match_by_id('h1', """
-            i20 = force_token()
-            i22 = int_add_ovf(i8, 2)
-            guard_no_overflow(descr=<Guard5>)
-        """)
-        assert loop0.match_by_id('g2', """
-            i27 = force_token()
-            i29 = int_add_ovf(i26, 3)
-            guard_no_overflow(descr=<Guard6>)
-        """)
-        #
-        loop1, = log.loops_by_id('g3')
-        assert loop1.match_by_id('g3', """
-            i21 = force_token()
-            setfield_gc(p4, i20, descr=<.* .*W_AbstractSeqIterObject.inst_index .*>)
-            i23 = int_add_ovf(i9, 3)
-            guard_no_overflow(descr=<Guard37>)
-        """)
-        assert loop1.match_by_id('h2', """
-            i25 = force_token()
-            i27 = int_add_ovf(i23, 2)
-            guard_no_overflow(descr=<Guard38>)
-        """)
-
-    def test_stararg(self):
-        def main(x):
-            def g(*args):
-                return args[-1]
-            def h(*args):
-                return len(args)
-            #
-            s = 0
-            l = []
-            i = 0
-            while i < x:
-                l.append(1)
-                s += g(*l)     # ID: g
-                i = h(*l)      # ID: h
-                a = 0
-            return s
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_id('g')
-        ops_g = log.opnames(loop.ops_by_id('g'))
-        ops_h = log.opnames(loop.ops_by_id('h'))
-        ops = ops_g + ops_h
-        assert 'new_with_vtable' not in ops
-        assert 'call_may_force' not in ops
-
-    def test_virtual_instance(self):
-        def main(n):
-            class A(object):
-                pass
-            #
-            i = 0
-            while i < n:
-                a = A()
-                assert isinstance(a, A)
-                assert not isinstance(a, int)
-                a.x = 2
-                i = i + a.x
-            return i
-        #
-        log = self.run(main, [1000], threshold = 400)
-        assert log.result == 1000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i5, i6)
-            guard_true(i7, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i9 = int_add_ovf(i5, 2)
-            guard_no_overflow(descr=<Guard5>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
-        """)
-
-    def test_load_attr(self):
-        src = '''
-            class A(object):
-                pass
-            a = A()
-            a.x = 2
-            def main(n):
-                i = 0
-                while i < n:
-                    i = i + a.x
-                return i
-        '''
-        log = self.run(src, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i9 = int_lt(i5, i6)
-            guard_true(i9, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i10 = int_add_ovf(i5, i7)
-            guard_no_overflow(descr=<Guard5>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i10, i6, p7, i7, p8, descr=<Loop0>)
-        """)
-
     def test_mixed_type_loop(self):
         def main(n):
             i = 0.0
@@ -455,7 +79,7 @@
                 i = j + i
             return i
         #
-        log = self.run(main, [1000], threshold=400)
+        log = self.run(main, [1000])
         assert log.result == 1000.0
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
@@ -466,29 +90,6 @@
             jump(p0, p1, p2, p3, p4, f10, p6, f7, f8, descr=<Loop0>)
         """)
 
-    def test_call_builtin_function(self):
-        def main(n):
-            i = 2
-            l = []
-            while i < n:
-                i += 1
-                l.append(i)    # ID: append
-                a = 0
-            return i, len(l)
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == (1000, 998)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('append', """
-            i13 = getfield_gc(p8, descr=<SignedFieldDescr list.length .*>)
-            i15 = int_add(i13, 1)
-            call(ConstClass(_ll_list_resize_ge__listPtr_Signed), p8, i15, descr=<VoidCallDescr>)
-            guard_no_exception(descr=<Guard4>)
-            p17 = getfield_gc(p8, descr=<GcPtrFieldDescr list.items .*>)
-            p19 = new_with_vtable(ConstClass(W_IntObject))
-            setfield_gc(p19, i12, descr=<SignedFieldDescr .*W_IntObject.inst_intval .*>)
-            setarrayitem_gc(p17, i13, p19, descr=<GcPtrArrayDescr>)
-        """)
 
     def test_range_iter(self):
         def main(n):
@@ -501,7 +102,7 @@
                 a = 0
             return s
         #
-        log = self.run(main, [1000], threshold=400)
+        log = self.run(main, [1000])
         assert log.result == 1000 * 999 / 2
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
@@ -523,76 +124,6 @@
             jump(..., descr=<Loop0>)
         """)
 
-    def test_exception_inside_loop_1(self):
-        def main(n):
-            while n:
-                try:
-                    raise ValueError
-                except ValueError:
-                    pass
-                n -= 1
-            return n
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 0
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-        i5 = int_is_true(i3)
-        guard_true(i5, descr=<Guard3>)
-        guard_not_invalidated(descr=<Guard4>)
-        --EXC-TICK--
-        i12 = int_sub_ovf(i3, 1)
-        guard_no_overflow(descr=<Guard6>)
-        --TICK--
-        jump(..., descr=<Loop0>)
-        """)
-
-    def test_exception_inside_loop_2(self):
-        def main(n):
-            def g(n):
-                raise ValueError(n)  # ID: raise
-            def f(n):
-                g(n)
-            #
-            while n:
-                try:
-                    f(n)
-                except ValueError:
-                    pass
-                n -= 1
-            return n
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 0
-        loop, = log.loops_by_filename(self.filepath)
-        ops = log.opnames(loop.ops_by_id('raise'))
-        assert 'new' not in ops
-
-    def test_reraise(self):
-        def f(n):
-            i = 0
-            while i < n:
-                try:
-                    try:
-                        raise KeyError
-                    except KeyError:
-                        raise
-                except KeyError:
-                    i += 1
-            return i
-
-        log = self.run(f, [100000])
-        assert log.result == 100000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i4, i5)
-            guard_true(i7, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            --EXC-TICK--
-            i14 = int_add(i4, 1)
-            --TICK--
-            jump(..., descr=<Loop0>)
-        """)
 
     def test_chain_of_guards(self):
         src = """
@@ -612,445 +143,11 @@
                 i += 1
             return sum
         """
-        log = self.run(src, [0], threshold=400)
+        log = self.run(src, [0])
         assert log.result == 500*3
         loops = log.loops_by_filename(self.filepath)
         assert len(loops) == 1
 
-    def test_getattr_with_dynamic_attribute(self):
-        src = """
-        class A(object):
-            pass
-
-        l = ["x", "y"]
-
-        def main():
-            sum = 0
-            a = A()
-            a.a1 = 0
-            a.a2 = 0
-            a.a3 = 0
-            a.a4 = 0
-            a.a5 = 0 # workaround, because the first five attributes need a promotion
-            a.x = 1
-            a.y = 2
-            i = 0
-            while i < 500:
-                name = l[i % 2]
-                sum += getattr(a, name)
-                i += 1
-            return sum
-        """
-        log = self.run(src, [], threshold=400)
-        assert log.result == 250 + 250*2
-        loops = log.loops_by_filename(self.filepath)
-        assert len(loops) == 1
-
-    def test_blockstack_virtualizable(self):
-        def main(n):
-            from pypyjit import residual_call
-            i = 0
-            while i < n:
-                try:
-                    residual_call(len, [])   # ID: call
-                except:
-                    pass
-                i += 1
-            return i
-        #
-        log = self.run(main, [500], threshold=400)
-        assert log.result == 500
-        loop, = log.loops_by_id('call')
-        assert loop.match_by_id('call', opcode='CALL_FUNCTION', expected_src="""
-            # make sure that the "block" is not allocated
-            ...
-            i20 = force_token()
-            setfield_gc(p0, i20, descr=<SignedFieldDescr .*PyFrame.vable_token .*>)
-            p22 = new_with_vtable(19511408)
-            p24 = new_array(1, descr=<GcPtrArrayDescr>)
-            p26 = new_with_vtable(ConstClass(W_ListObject))
-            p27 = new(descr=<SizeDescr .*>)
-            p29 = new_array(0, descr=<GcPtrArrayDescr>)
-            setfield_gc(p27, p29, descr=<GcPtrFieldDescr list.items .*>)
-            setfield_gc(p26, p27, descr=<.* .*W_ListObject.inst_wrappeditems .*>)
-            setarrayitem_gc(p24, 0, p26, descr=<GcPtrArrayDescr>)
-            setfield_gc(p22, p24, descr=<GcPtrFieldDescr .*Arguments.inst_arguments_w .*>)
-            p32 = call_may_force(11376960, p18, p22, descr=<GcPtrCallDescr>)
-            ...
-        """)
-
-    def test_import_in_function(self):
-        def main(n):
-            i = 0
-            while i < n:
-                from sys import version  # ID: import
-                i += 1
-            return i
-        #
-        log = self.run(main, [500], threshold=400)
-        assert log.result == 500
-        loop, = log.loops_by_id('import')
-        assert loop.match_by_id('import', """
-            p11 = getfield_gc(ConstPtr(ptr10), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
-            guard_value(p11, ConstPtr(ptr12), descr=<Guard4>)
-            guard_not_invalidated(descr=<Guard5>)
-            p14 = getfield_gc(ConstPtr(ptr13), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
-            p16 = getfield_gc(ConstPtr(ptr15), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
-            guard_value(p14, ConstPtr(ptr17), descr=<Guard6>)
-            guard_isnull(p16, descr=<Guard7>)
-        """)
-
-    def test_import_fast_path(self, tmpdir):
-        pkg = tmpdir.join('mypkg').ensure(dir=True)
-        pkg.join('__init__.py').write("")
-        pkg.join('mod.py').write(str(py.code.Source("""
-            def do_the_import():
-                import sys
-        """)))
-        def main(path, n):
-            import sys
-            sys.path.append(path)
-            from mypkg.mod import do_the_import
-            for i in range(n):
-                do_the_import()
-        #
-        log = self.run(main, [str(tmpdir), 300], threshold=200)
-        loop, = log.loops_by_filename(self.filepath)
-        # this is a check for a slow-down that introduced a
-        # call_may_force(absolute_import_with_lock).
-        for opname in log.opnames(loop.allops(opcode="IMPORT_NAME")):
-            assert 'call' not in opname    # no call-like opcode
-
-    def test_arraycopy_disappears(self):
-        def main(n):
-            i = 0
-            while i < n:
-                t = (1, 2, 3, i + 1)
-                t2 = t[:]
-                del t
-                i = t2[3]
-                del t2
-            return i
-        #
-        log = self.run(main, [500], threshold=400)
-        assert log.result == 500
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i5, i6)
-            guard_true(i7, descr=<Guard3>)
-            i9 = int_add(i5, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
-        """)
-
-    def test_boolrewrite_inverse(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(i >= y)
-
-        where x and y can be either constants or variables. There are cases in
-        which the second guard is proven to be always true.
-        """
-
-        for a, b, res, opt_expected in (('2000', '2000', 20001000, True),
-                                        ( '500',  '500', 15001500, True),
-                                        ( '300',  '600', 16001700, False),
-                                        (   'a',    'b', 16001700, False),
-                                        (   'a',    'a', 13001700, True)):
-            src = """
-                def main():
-                    sa = 0
-                    a = 300
-                    b = 600
-                    for i in range(1000):
-                        if i < %s:         # ID: lt
-                            sa += 1
-                        else:
-                            sa += 2
-                        #
-                        if i >= %s:        # ID: ge
-                            sa += 10000
-                        else:
-                            sa += 20000
-                    return sa
-            """ % (a, b)
-            #
-            log = self.run(src, [], threshold=400)
-            assert log.result == res
-            loop, = log.loops_by_filename(self.filepath)
-            le_ops = log.opnames(loop.ops_by_id('lt'))
-            ge_ops = log.opnames(loop.ops_by_id('ge'))
-            assert le_ops.count('int_lt') == 1
-            #
-            if opt_expected:
-                assert ge_ops.count('int_ge') == 0
-            else:
-                # if this assert fails it means that the optimization was
-                # applied even if we don't expect to. Check whether the
-                # optimization is valid, and either fix the code or fix the
-                # test :-)
-                assert ge_ops.count('int_ge') == 1
-
-    def test_boolrewrite_reflex(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(y > i)
-
-        where x and y can be either constants or variables. There are cases in
-        which the second guard is proven to be always true.
-        """
-        for a, b, res, opt_expected in (('2000', '2000', 10001000, True),
-                                        ( '500',  '500', 15001500, True),
-                                        ( '300',  '600', 14001700, False),
-                                        (   'a',    'b', 14001700, False),
-                                        (   'a',    'a', 17001700, True)):
-
-            src = """
-                def main():
-                    sa = 0
-                    a = 300
-                    b = 600
-                    for i in range(1000):
-                        if i < %s:        # ID: lt
-                            sa += 1
-                        else:
-                            sa += 2
-                        if %s > i:        # ID: gt
-                            sa += 10000
-                        else:
-                            sa += 20000
-                    return sa
-            """ % (a, b)
-            log = self.run(src, [], threshold=400)
-            assert log.result == res
-            loop, = log.loops_by_filename(self.filepath)
-            le_ops = log.opnames(loop.ops_by_id('lt'))
-            gt_ops = log.opnames(loop.ops_by_id('gt'))
-            assert le_ops.count('int_lt') == 1
-            #
-            if opt_expected:
-                assert gt_ops.count('int_gt') == 0
-            else:
-                # if this assert fails it means that the optimization was
-                # applied even if we don't expect to. Check whether the
-                # optimization is valid, and either fix the code or fix the
-                # test :-)
-                assert gt_ops.count('int_gt') == 1
-
-
-    def test_boolrewrite_allcases_inverse(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(i > y)
-
-        with all possible combination of binary comparison operators.  This
-        test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        ops = ('<', '>', '<=', '>=', '==', '!=')
-        for op1 in ops:
-            for op2 in ops:
-                for a,b in ((500, 500), (300, 600)):
-                    src = """
-                        def main():
-                            sa = 0
-                            for i in range(300):
-                                if i %s %d:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if i %s %d:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                            return sa
-                    """ % (op1, a, op2, b)
-                    self.run_and_check(src, threshold=200)
-
-                    src = """
-                        def main():
-                            sa = 0
-                            i = 0.0
-                            while i < 250.0:
-                                if i %s %f:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if i %s %f:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                                i += 0.25
-                            return sa
-                    """ % (op1, float(a)/4.0, op2, float(b)/4.0)
-                    self.run_and_check(src, threshold=300)
-
-
-    def test_boolrewrite_allcases_reflex(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(x > i)
-
-        with all possible combination of binary comparison operators.  This
-        test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        ops = ('<', '>', '<=', '>=', '==', '!=')
-        for op1 in ops:
-            for op2 in ops:
-                for a,b in ((500, 500), (300, 600)):
-                    src = """
-                        def main():
-                            sa = 0
-                            for i in range(300):
-                                if i %s %d:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if %d %s i:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                            return sa
-                    """ % (op1, a, b, op2)
-                    self.run_and_check(src, threshold=200)
-
-                    src = """
-                        def main():
-                            sa = 0
-                            i = 0.0
-                            while i < 250.0:
-                                if i %s %f:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if %f %s i:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                                i += 0.25
-                            return sa
-                    """ % (op1, float(a)/4.0, float(b)/4.0, op2)
-                    self.run_and_check(src, threshold=300)
-
-    def test_boolrewrite_ptr(self):
-        """
-        This test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        compares = ('a == b', 'b == a', 'a != b', 'b != a', 'a == c', 'c != b')
-        for e1 in compares:
-            for e2 in compares:
-                src = """
-                    class tst(object):
-                        pass
-                    def main():
-                        a = tst()
-                        b = tst()
-                        c = tst()
-                        sa = 0
-                        for i in range(300):
-                            if %s:
-                                sa += 1
-                            else:
-                                sa += 2
-                            if %s:
-                                sa += 10000
-                            else:
-                                sa += 20000
-                            if i > 750:
-                                a = b
-                        return sa
-                """ % (e1, e2)
-                self.run_and_check(src, threshold=200)
-
-    def test_array_sum(self):
-        def main():
-            from array import array
-            img = array("i", range(128) * 5) * 480
-            l, i = 0, 0
-            while i < len(img):
-                l += img[i]
-                i += 1
-            return l
-        #
-        log = self.run(main, [])
-        assert log.result == 19507200
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i13 = int_lt(i7, i9)
-            guard_true(i13, descr=<Guard3>)
-            i15 = getarrayitem_raw(i10, i7, descr=<.*ArrayNoLengthDescr>)
-            i16 = int_add_ovf(i8, i15)
-            guard_no_overflow(descr=<Guard4>)
-            i18 = int_add(i7, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, i18, i16, i9, i10, descr=<Loop0>)
-        """)
-
-    def test_array_intimg(self):
-        def main():
-            from array import array
-            img = array('i', range(3)) * (350 * 480)
-            intimg = array('i', (0,)) * (640 * 480)
-            l, i = 0, 640
-            while i < 640 * 480:
-                assert len(img) == 3*350*480
-                assert len(intimg) == 640*480
-                l = l + img[i]
-                intimg[i] = (intimg[i-640] + l)
-                i += 1
-            return intimg[i - 1]
-        #
-        log = self.run(main, [])
-        assert log.result == 73574560
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i13 = int_lt(i8, 307200)
-            guard_true(i13, descr=<Guard3>)
-        # the bound check guard on img has been killed (thanks to the asserts)
-            i14 = getarrayitem_raw(i10, i8, descr=<.*ArrayNoLengthDescr>)
-            i15 = int_add_ovf(i9, i14)
-            guard_no_overflow(descr=<Guard4>)
-            i17 = int_sub(i8, 640)
-        # the bound check guard on intimg has been killed (thanks to the asserts)
-            i18 = getarrayitem_raw(i11, i17, descr=<.*ArrayNoLengthDescr>)
-            i19 = int_add_ovf(i18, i15)
-            guard_no_overflow(descr=<Guard5>)
-        # on 64bit, there is a guard checking that i19 actually fits into 32bit
-            ...
-            setarrayitem_raw(i11, i8, _, descr=<.*ArrayNoLengthDescr>)
-            i28 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, p7, i28, i15, i10, i11, descr=<Loop0>)
-        """)
-
-    def test_func_defaults(self):
-        def main(n):
-            i = 1
-            while i < n:
-                i += len(xrange(i+1)) - i
-            return i
-
-        log = self.run(main, [10000])
-        assert log.result == 10000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i5, i6)
-            guard_true(i10, descr=<Guard3>)
-            i120 = int_add(i5, 1)
-            guard_not_invalidated(descr=<Guard4>)
-            --TICK--
-            jump(..., descr=<Loop0>)
-        """)
 
     def test_unpack_iterable_non_list_tuple(self):
         def main(n):
@@ -1085,649 +182,53 @@
             jump(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, i28, i25, i19, i13, p14, p15, descr=<Loop0>)
         """)
 
-    def test_mutate_class(self):
-        def fn(n):
-            class A(object):
-                count = 1
-                def __init__(self, a):
-                    self.a = a
-                def f(self):
-                    return self.count
-            i = 0
-            a = A(1)
-            while i < n:
-                A.count += 1 # ID: mutate
-                i = a.f()    # ID: meth1
-            return i
+
+    def test_dont_trace_every_iteration(self):
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+                if a > 0:
+                    pass
+                if 1 < b < 2:
+                    pass
+                sa += a % b
+                i += 1
+            return sa
         #
-        log = self.run(fn, [1000], threshold=10)
-        assert log.result == 1000
-        #
-        # first, we test the entry bridge
-        # -------------------------------
-        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
-        ops = entry_bridge.ops_by_id('mutate', opcode='LOAD_ATTR')
-        assert log.opnames(ops) == ['guard_value', 'guard_not_invalidated',
-                                    'getfield_gc', 'guard_nonnull_class']
-        # the STORE_ATTR is folded away
-        assert list(entry_bridge.ops_by_id('meth1', opcode='STORE_ATTR')) == []
-        #
-        # then, the actual loop
-        # ----------------------
+        log = self.run(main, [10, 20])
+        assert log.result == 300 * (10 % 20)
+        assert log.jit_summary.tracing_no == 1
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
-            i8 = getfield_gc_pure(p5, descr=<SignedFieldDescr .*W_IntObject.inst_intval.*>)
-            i9 = int_lt(i8, i7)
-            guard_true(i9, descr=.*)
-            guard_not_invalidated(descr=.*)
-            i11 = int_add(i8, 1)
-            i12 = force_token()
+            i11 = int_lt(i7, 300)
+            guard_true(i11, descr=<Guard3>)
+            i12 = int_add_ovf(i8, i9)
+            guard_no_overflow(descr=<Guard4>)
+            i14 = int_add(i7, 1)
             --TICK--
-            p20 = new_with_vtable(ConstClass(W_IntObject))
-            setfield_gc(p20, i11, descr=<SignedFieldDescr.*W_IntObject.inst_intval .*>)
-            setfield_gc(ConstPtr(ptr21), p20, descr=<GcPtrFieldDescr .*TypeCell.inst_w_value .*>)
-            jump(p0, p1, p2, p3, p4, p20, p6, i7, descr=<Loop.>)
+            jump(..., descr=...)
         """)
+        #
+        log = self.run(main, [-10, -20])
+        assert log.result == 300 * (-10 % -20)
+        assert log.jit_summary.tracing_no == 1
 
 
-    def test_intbound_simple(self):
+    def test_overflow_checking(self):
         """
         This test only checks that we get the expected result, not that any
         optimization has been applied.
         """
-        ops = ('<', '>', '<=', '>=', '==', '!=')
-        nbr = (3, 7)
-        for o1 in ops:
-            for o2 in ops:
-                for n1 in nbr:
-                    for n2 in nbr:
-                        src = '''
-                        def f(i):
-                            a, b = 3, 3
-                            if i %s %d:
-                                a = 0
-                            else:
-                                a = 1
-                            if i %s %d:
-                                b = 0
-                            else:
-                                b = 1
-                            return a + b * 2
-
-                        def main():
-                            res = [0] * 4
-                            idx = []
-                            for i in range(15):
-                                idx.extend([i] * 15)
-                            for i in idx:
-                                res[f(i)] += 1
-                            return res
-
-                        ''' % (o1, n1, o2, n2)
-                        self.run_and_check(src, threshold=200)
-
-    def test_intbound_addsub_mix(self):
-        """
-        This test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        tests = ('i > 4', 'i > 2', 'i + 1 > 2', '1 + i > 4',
-                 'i - 1 > 1', '1 - i > 1', '1 - i < -3',
-                 'i == 1', 'i == 5', 'i != 1', '-2 * i < -4')
-        for t1 in tests:
-            for t2 in tests:
-                src = '''
-                def f(i):
-                    a, b = 3, 3
-                    if %s:
-                        a = 0
-                    else:
-                        a = 1
-                    if %s:
-                        b = 0
-                    else:
-                        b = 1
-                    return a + b * 2
-
-                def main():
-                    res = [0] * 4
-                    idx = []
-                    for i in range(15):
-                        idx.extend([i] * 15)
-                    for i in idx:
-                        res[f(i)] += 1
-                    return res
-
-                ''' % (t1, t2)
-                self.run_and_check(src, threshold=200)
-
-    def test_intbound_gt(self):
-        def main(n):
-            i, a, b = 0, 0, 0
-            while i < n:
-                if i > -1:
-                    a += 1
-                if i > -2:
-                    b += 1
-                i += 1
-            return (a, b)
+        def main():
+            import sys
+            def f(a,b):
+                if a < 0: return -1
+                return a-b
+            #
+            total = sys.maxint - 2147483647
+            for i in range(100000):
+                total += f(i, 5)
+            #
+            return total
         #
-        log = self.run(main, [300], threshold=200)
-        assert log.result == (300, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, i9)
-            guard_true(i10, descr=...)
-            i12 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i14 = int_add_ovf(i6, 1)
-            guard_no_overflow(descr=...)
-            i17 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i14, i12, i17, i9, descr=<Loop0>)
-        """)
-
-    def test_intbound_sub_lt(self):
-        def main():
-            i, a = 0, 0
-            while i < 300:
-                if i - 10 < 295:
-                    a += 1
-                i += 1
-            return a
-        #
-        log = self.run(main, [], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i5, 300)
-            guard_true(i7, descr=...)
-            i9 = int_sub_ovf(i5, 10)
-            guard_no_overflow(descr=...)
-            i11 = int_add_ovf(i4, 1)
-            guard_no_overflow(descr=...)
-            i13 = int_add(i5, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, i11, i13, descr=<Loop0>)
-        """)
-
-    def test_intbound_addsub_ge(self):
-        def main(n):
-            i, a, b = 0, 0, 0
-            while i < n:
-                if i + 5 >= 5:
-                    a += 1
-                if i - 1 >= -1:
-                    b += 1
-                i += 1
-            return (a, b)
-        #
-        log = self.run(main, [300], threshold=200)
-        assert log.result == (300, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, i9)
-            guard_true(i10, descr=...)
-            i12 = int_add_ovf(i8, 5)
-            guard_no_overflow(descr=...)
-            i14 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i16 = int_add_ovf(i6, 1)
-            guard_no_overflow(descr=...)
-            i19 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i16, i14, i19, i9, descr=<Loop0>)
-        """)
-
-    def test_intbound_addmul_ge(self):
-        def main(n):
-            i, a, b = 0, 0, 0
-            while i < 300:
-                if i + 5 >= 5:
-                    a += 1
-                if 2 * i >= 0:
-                    b += 1
-                i += 1
-            return (a, b)
-        #
-        log = self.run(main, [300], threshold=200)
-        assert log.result == (300, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, 300)
-            guard_true(i10, descr=...)
-            i12 = int_add(i8, 5)
-            i14 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i16 = int_lshift(i8, 1)
-            i18 = int_add_ovf(i6, 1)
-            guard_no_overflow(descr=...)
-            i21 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i18, i14, i21, descr=<Loop0>)
-        """)
-
-    def test_intbound_eq(self):
-        def main(a, n):
-            i, s = 0, 0
-            while i < 300:
-                if a == 7:
-                    s += a + 1
-                elif i == 10:
-                    s += i
-                else:
-                    s += 1
-                i += 1
-            return s
-        #
-        log = self.run(main, [7, 300], threshold=200)
-        assert log.result == main(7, 300)
-        log = self.run(main, [10, 300], threshold=200)
-        assert log.result == main(10, 300)
-        log = self.run(main, [42, 300], threshold=200)
-        assert log.result == main(42, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, 300)
-            guard_true(i10, descr=...)
-            i12 = int_eq(i8, 10)
-            guard_false(i12, descr=...)
-            i14 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i16 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, i14, i16, descr=<Loop0>)
-        """)
-
-    def test_intbound_mul(self):
-        def main(a):
-            i, s = 0, 0
-            while i < 300:
-                assert i >= 0
-                if 2 * i < 30000:
-                    s += 1
-                else:
-                    s += a
-                i += 1
-            return s
-        #
-        log = self.run(main, [7], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i8 = int_lt(i6, 300)
-            guard_true(i8, descr=...)
-            i10 = int_lshift(i6, 1)
-            i12 = int_add_ovf(i5, 1)
-            guard_no_overflow(descr=...)
-            i14 = int_add(i6, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i12, i14, descr=<Loop0>)
-        """)
-
-    def test_assert(self):
-        def main(a):
-            i, s = 0, 0
-            while i < 300:
-                assert a == 7
-                s += a + 1
-                i += 1
-            return s
-        log = self.run(main, [7], threshold=200)
-        assert log.result == 300*8
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i8 = int_lt(i6, 300)
-            guard_true(i8, descr=...)
-            i10 = int_add_ovf(i5, 8)
-            guard_no_overflow(descr=...)
-            i12 = int_add(i6, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i10, i12, descr=<Loop0>)
-        """)
-
-    def test_zeropadded(self):
-        def main():
-            from array import array
-            class ZeroPadded(array):
-                def __new__(cls, l):
-                    self = array.__new__(cls, 'd', range(l))
-                    return self
-
-                def __getitem__(self, i):
-                    if i < 0 or i >= len(self):
-                        return 0
-                    return array.__getitem__(self, i) # ID: get
-            #
-            buf = ZeroPadded(2000)
-            i = 10
-            sa = 0
-            while i < 2000 - 10:
-                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
-                i += 1
-            return sa
-
-        log = self.run(main, [], threshold=200)
-        assert log.result == 9895050.0
-        loop, = log.loops_by_filename(self.filepath)
-        #
-        # check that the overloaded __getitem__ does not introduce double
-        # array bound checks.
-        #
-        # The force_token()s are still there, but will be eliminated by the
-        # backend regalloc, so they are harmless
-        assert loop.match(ignore_ops=['force_token'],
-                          expected_src="""
-            ...
-            i20 = int_ge(i18, i8)
-            guard_false(i20, descr=...)
-            f21 = getarrayitem_raw(i13, i18, descr=...)
-            f23 = getarrayitem_raw(i13, i14, descr=...)
-            f24 = float_add(f21, f23)
-            f26 = getarrayitem_raw(i13, i6, descr=...)
-            f27 = float_add(f24, f26)
-            i29 = int_add(i6, 1)
-            i31 = int_ge(i29, i8)
-            guard_false(i31, descr=...)
-            f33 = getarrayitem_raw(i13, i29, descr=...)
-            f34 = float_add(f27, f33)
-            i36 = int_add(i6, 2)
-            i38 = int_ge(i36, i8)
-            guard_false(i38, descr=...)
-            f39 = getarrayitem_raw(i13, i36, descr=...)
-            ...
-        """)
-
-
-    def test_circular(self):
-        def main():
-            from array import array
-            class Circular(array):
-                def __new__(cls):
-                    self = array.__new__(cls, 'd', range(256))
-                    return self
-                def __getitem__(self, i):
-                    assert len(self) == 256
-                    return array.__getitem__(self, i & 255)
-            #
-            buf = Circular()
-            i = 10
-            sa = 0
-            while i < 2000 - 10:
-                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
-                i += 1
-            return sa
-        #
-        log = self.run(main, [], threshold=200)
-        assert log.result == 1239690.0
-        loop, = log.loops_by_filename(self.filepath)
-        #
-        # check that the array bound checks are removed
-        #
-        # The force_token()s are still there, but will be eliminated by the
-        # backend regalloc, so they are harmless
-        assert loop.match(ignore_ops=['force_token'],
-                          expected_src="""
-            ...
-            i17 = int_and(i14, 255)
-            f18 = getarrayitem_raw(i8, i17, descr=...)
-            f20 = getarrayitem_raw(i8, i9, descr=...)
-            f21 = float_add(f18, f20)
-            f23 = getarrayitem_raw(i8, i10, descr=...)
-            f24 = float_add(f21, f23)
-            i26 = int_add(i6, 1)
-            i29 = int_and(i26, 255)
-            f30 = getarrayitem_raw(i8, i29, descr=...)
-            f31 = float_add(f24, f30)
-            i33 = int_add(i6, 2)
-            i36 = int_and(i33, 255)
-            f37 = getarrayitem_raw(i8, i36, descr=...)
-            ...
-        """)
-
-    def test_min_max(self):
-        def main():
-            i=0
-            sa=0
-            while i < 300:
-                sa+=min(max(i, 3000), 4000)
-                i+=1
-            return sa
-        log = self.run(main, [], threshold=200)
-        assert log.result == 300*3000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i4, 300)
-            guard_true(i7, descr=...)
-            i9 = int_add_ovf(i5, 3000)
-            guard_no_overflow(descr=...)
-            i11 = int_add(i4, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, i11, i9, descr=<Loop0>)
-        """)
-
-    def test_silly_max(self):
-        def main():
-            i = 2
-            sa = 0
-            while i < 300:
-                lst = range(i)
-                sa += max(*lst) # ID: max
-                i += 1
-            return sa
-        log = self.run(main, [], threshold=200)
-        assert log.result == main()
-        loop, = log.loops_by_filename(self.filepath)
-        # We dont want too many guards, but a residual call to min_max_loop
-        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
-        assert len(guards) < 20
-        assert loop.match_by_id('max',"""
-            ...
-            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
-            ...
-        """)
-
-    def test_iter_max(self):
-        def main():
-            i = 2
-            sa = 0
-            while i < 300:
-                lst = range(i)
-                sa += max(lst) # ID: max
-                i += 1
-            return sa
-        log = self.run(main, [], threshold=200)
-        assert log.result == main()
-        loop, = log.loops_by_filename(self.filepath)
-        # We dont want too many guards, but a residual call to min_max_loop
-        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
-        assert len(guards) < 20
-        assert loop.match_by_id('max',"""
-            ...
-            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
-            ...
-        """)
-
-    def test__ffi_call(self):
-        from pypy.rlib.test.test_libffi import get_libm_name
-        def main(libm_name):
-            try:
-                from _ffi import CDLL, types
-            except ImportError:
-                sys.stderr.write('SKIP: cannot import _ffi\n')
-                return 0
-
-            libm = CDLL(libm_name)
-            pow = libm.getfunc('pow', [types.double, types.double],
-                               types.double)
-            i = 0
-            res = 0
-            while i < 300:
-                res += pow(2, 3)
-                i += 1
-            return pow.getaddr(), res
-        #
-        libm_name = get_libm_name(sys.platform)
-        log = self.run(main, [libm_name], threshold=200)
-        pow_addr, res = log.result
-        assert res == 8.0 * 300
-        loop, = log.loops_by_filename(self.filepath)
-        # XXX: write the actual test when we merge this to jitypes2
-        ## ops = self.get_by_bytecode('CALL_FUNCTION')
-        ## assert len(ops) == 2 # we get two loops, because of specialization
-        ## call_function = ops[0]
-        ## last_ops = [op.getopname() for op in call_function[-5:]]
-        ## assert last_ops == ['force_token',
-        ##                     'setfield_gc',
-        ##                     'call_may_force',
-        ##                     'guard_not_forced',
-        ##                     'guard_no_exception']
-        ## call = call_function[-3]
-        ## assert call.getarg(0).value == pow_addr
-        ## assert call.getarg(1).value == 2.0
-        ## assert call.getarg(2).value == 3.0
-
-    def test_xor(self):
-        def main(b):
-            a = sa = 0
-            while a < 300:
-                if a > 0: # Specialises the loop
-                    pass
-                if b > 10:
-                    pass
-                if a^b >= 0:  # ID: guard
-                    sa += 1
-                sa += a^a     # ID: a_xor_a
-                a += 1
-            return sa
-
-        log = self.run(main, [11], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        # if both are >=0, a^b is known to be >=0
-        # note that we know that b>10
-        assert loop.match_by_id('guard', """
-            i10 = int_xor(i5, i7)
-        """)
-        #
-        # x^x is always optimized to 0
-        assert loop.match_by_id('a_xor_a', "")
-
-        log = self.run(main, [9], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        # we don't know that b>10, hence we cannot optimize it
-        assert loop.match_by_id('guard', """
-            i10 = int_xor(i5, i7)
-            i12 = int_ge(i10, 0)
-            guard_true(i12, descr=...)
-        """)
-
-    def test_shift_intbound(self):
-        def main(b):
-            res = 0
-            a = 0
-            while a < 300:
-                assert a >= 0
-                assert 0 <= b <= 10
-                val = a >> b
-                if val >= 0:    # ID: rshift
-                    res += 1
-                val = a << b
-                if val >= 0:    # ID: lshift
-                    res += 2
-                a += 1
-            return res
-        #
-        log = self.run(main, [2], threshold=200)
-        assert log.result == 300*3
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('rshift', "")  # guard optimized away
-        assert loop.match_by_id('lshift', "")  # guard optimized away
-
-    def test_lshift_and_then_rshift(self):
-        py.test.skip('fixme, this optimization is disabled')
-        def main(b):
-            res = 0
-            a = 0
-            while res < 300:
-                assert a >= 0
-                assert 0 <= b <= 10
-                res = (a << b) >> b     # ID: shift
-                a += 1
-            return res
-        #
-        log = self.run(main, [2], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('shift', "")  # optimized away
-
-    def test_division_to_rshift(self):
-        py.test.skip('in-progress')
-        def main(b):
-            res = 0
-            a = 0
-            while a < 300:
-                assert a >= 0
-                assert 0 <= b <= 10
-                res = a/b     # ID: div
-                a += 1
-            return res
-        #
-        log = self.run(main, [3], threshold=200)
-        #assert log.result == 149
-        loop, = log.loops_by_filename(self.filepath)
-        import pdb;pdb.set_trace()
-        assert loop.match_by_id('div', "")  # optimized away
-
-    def test_oldstyle_newstyle_mix(self):
-        def main():
-            class A:
-                pass
-
-            class B(object, A):
-                def __init__(self, x):
-                    self.x = x
-
-            i = 0
-            b = B(1)
-            while i < 100:
-                v = b.x # ID: loadattr
-                i += v
-            return i
-
-        log = self.run(main, [], threshold=80)
-        loop, = log.loops_by_filename(self.filepath)
-        loop.match_by_id('loadattr',
-        '''
-        guard_not_invalidated(descr=...)
-        i19 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
-        guard_no_exception(descr=...)
-        i21 = int_and(i19, _)
-        i22 = int_is_true(i21)
-        guard_true(i22, descr=...)
-        i26 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
-        guard_no_exception(descr=...)
-        i28 = int_and(i26, _)
-        i29 = int_is_true(i28)
-        guard_true(i29, descr=...)
-        ''')
-
-    def test_python_contains(self):
-        def main():
-            class A(object):
-                def __contains__(self, v):
-                    return True
-
-            i = 0
-            a = A()
-            while i < 100:
-                i += i in a # ID: contains
-
-            log = self.run(main, [], threshold=80)
-            loop, = log.loops_by_filename(self.filemath)
-            # XXX: haven't confirmed his is correct, it's probably missing a
-            # few instructions
-            loop.match_by_id("contains", """
-                i1 = int_add(i0, 1)
-            """)
+        self.run_and_check(main, [])
diff --git a/pypy/module/pypyjit/test_pypy_c/test_shift.py b/pypy/module/pypyjit/test_pypy_c/test_shift.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_shift.py
@@ -0,0 +1,166 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestShift(BaseTestPyPyC):
+
+    def test_shift_intbound(self):
+        def main(b):
+            res = 0
+            a = 0
+            while a < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                val = a >> b
+                if val >= 0:    # ID: rshift
+                    res += 1
+                val = a << b
+                if val >= 0:    # ID: lshift
+                    res += 2
+                a += 1
+            return res
+        #
+        log = self.run(main, [2])
+        assert log.result == 300*3
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('rshift', "")  # guard optimized away
+        assert loop.match_by_id('lshift', "")  # guard optimized away
+
+    def test_lshift_and_then_rshift(self):
+        py.test.skip('fixme, this optimization is disabled')
+        def main(b):
+            res = 0
+            a = 0
+            while res < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                res = (a << b) >> b     # ID: shift
+                a += 1
+            return res
+        #
+        log = self.run(main, [2])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('shift', "")  # optimized away
+
+    def test_division_to_rshift(self):
+        def main(b):
+            res = 0
+            a = 0
+            while a < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                res = a/b     # ID: div
+                a += 1
+            return res
+        #
+        log = self.run(main, [3])
+        assert log.result == 99
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('div', """
+            i10 = int_floordiv(i6, i7)
+            i11 = int_mul(i10, i7)
+            i12 = int_sub(i6, i11)
+            i14 = int_rshift(i12, 63)
+            i15 = int_add(i10, i14)
+        """)
+
+    def test_division_to_rshift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        avalues = ('a', 'b', 7, -42, 8)
+        bvalues = ['b'] + range(-10, 0) + range(1,10)
+        code = ''
+        for a in avalues:
+            for b in bvalues:
+                code += '                sa += %s / %s\n' % (a, b)
+        src = """
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+%s
+                i += 1
+            return sa
+        """ % code
+        self.run_and_check(src, [ 10,  20])
+        self.run_and_check(src, [ 10, -20])
+        self.run_and_check(src, [-10, -20])
+
+    def test_mod(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        avalues = ('a', 'b', 7, -42, 8)
+        bvalues = ['b'] + range(-10, 0) + range(1,10)
+        code = ''
+        for a in avalues:
+            for b in bvalues:
+                code += '                sa += %s %% %s\n' % (a, b)
+        src = """
+        def main(a, b):
+            i = sa = 0
+            while i < 2000:
+                if a > 0: pass
+                if 1 < b < 2: pass
+%s
+                i += 1
+            return sa
+        """ % code
+        self.run_and_check(src, [ 10,  20])
+        self.run_and_check(src, [ 10, -20])
+        self.run_and_check(src, [-10, -20])
+
+    def test_shift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        from sys import maxint
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+                if a > 0: # Specialises the loop
+                    pass
+                if b < 2 and b > 0:
+                    pass
+                if (a >> b) >= 0:
+                    sa += 1
+                if (a << b) > 2:
+                    sa += 10000
+                i += 1
+            return sa
+        #
+        maxvals = (-maxint-1, -maxint, maxint-1, maxint)
+        for a in (-4, -3, -2, -1, 0, 1, 2, 3, 4) + maxvals:
+            for b in (0, 1, 2, 31, 32, 33, 61, 62, 63):
+                yield self.run_and_check, main, [a, b]
+
+    def test_revert_shift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        from sys import maxint
+
+        def main(a, b, c):
+            from sys import maxint
+            i = sa = 0
+            while i < 300:
+                if 0 < a < 10: pass
+                if -100 < b < 100: pass
+                if -maxint/2 < c < maxint/2: pass
+                sa += (a<<a)>>a
+                sa += (b<<a)>>a
+                sa += (c<<a)>>a
+                sa += (a<<100)>>100
+                sa += (b<<100)>>100
+                sa += (c<<100)>>100
+                i += 1
+            return long(sa)
+
+        for a in (1, 4, 8, 100):
+            for b in (-10, 10, -201, 201, -maxint/3, maxint/3):
+                for c in (-10, 10, -maxint/3, maxint/3):
+                    yield self.run_and_check, main, [a, b, c]
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
--- a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
+++ b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
@@ -43,6 +43,12 @@
 	qsort(base, num, width, compare);
 }
 
+EXPORT(char) deref_LP_c_char_p(char** argv)
+{
+    char* s = *argv;
+    return s[0];
+}
+
 EXPORT(int *) _testfunc_ai8(int a[8])
 {
 	return a;
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/support.py b/pypy/module/test_lib_pypy/ctypes_tests/support.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/support.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/support.py
@@ -1,4 +1,5 @@
 import py
+import sys
 import ctypes
 
 py.test.importorskip("ctypes", "1.0.2")
@@ -14,6 +15,16 @@
         if _rawffi:
             py.test.skip("white-box tests for pypy _rawffi based ctypes impl")
 
+def del_funcptr_refs_maybe(obj, attrname):
+    dll = getattr(obj, attrname, None)
+    if not dll:
+        return
+    _FuncPtr = dll._FuncPtr
+    for name in dir(dll):
+        obj = getattr(dll, name, None)
+        if isinstance(obj, _FuncPtr):
+            delattr(dll, name)
+
 class BaseCTypesTestChecker:
     def setup_class(cls):
         if _rawffi:
@@ -21,8 +32,21 @@
             for _ in range(4):
                 gc.collect()
             cls.old_num = _rawffi._num_of_allocated_objects()
-    
+
+
     def teardown_class(cls):
+        if sys.pypy_translation_info['translation.gc'] == 'boehm':
+            return # it seems that boehm has problems with __del__, so not
+                   # everything is freed
+        #
+        mod = sys.modules[cls.__module__]
+        del_funcptr_refs_maybe(mod, 'dll')
+        del_funcptr_refs_maybe(mod, 'dll2')
+        del_funcptr_refs_maybe(mod, 'lib')
+        del_funcptr_refs_maybe(mod, 'testdll')
+        del_funcptr_refs_maybe(mod, 'ctdll')
+        del_funcptr_refs_maybe(cls, '_dll')
+        #
         if hasattr(cls, 'old_num'):
             import gc
             for _ in range(4):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
@@ -0,0 +1,103 @@
+from ctypes import CDLL, POINTER, pointer, c_byte, c_int, c_char_p
+import sys
+import py
+from support import BaseCTypesTestChecker
+
+class MyCDLL(CDLL):
+    def __getattr__(self, attr):
+        fn = self[attr] # this way it's not cached as an attribute
+        fn._slowpath_allowed = False
+        return fn
+
+def setup_module(mod):
+    import conftest
+    _ctypes_test = str(conftest.sofile)
+    mod.dll = MyCDLL(_ctypes_test)  # slowpath not allowed
+    mod.dll2 = CDLL(_ctypes_test)   # slowpath allowed
+
+
+class TestFastpath(BaseCTypesTestChecker):
+
+    def test_fastpath_forbidden(self):
+        def myfunc():
+            pass
+        #
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        #
+        # so far, it's still using the slowpath
+        assert not tf_b._is_fastpath
+        tf_b.callable = myfunc
+        tf_b.argtypes = (c_byte,)
+        # errcheck prevented the fastpath to kick in
+        assert not tf_b._is_fastpath
+        #
+        del tf_b.callable
+        tf_b.argtypes = (c_byte,) # try to re-enable the fastpath
+        assert tf_b._is_fastpath
+        #
+        assert not tf_b._slowpath_allowed
+        py.test.raises(AssertionError, "tf_b.callable = myfunc")
+        py.test.raises(AssertionError, "tf_b('aaa')") # force a TypeError
+
+    def test_simple_args(self):
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        assert tf_b(-126) == -42
+
+    def test_pointer_args(self):
+        f = dll._testfunc_p_p
+        f.restype = POINTER(c_int)
+        f.argtypes = [POINTER(c_int)]
+        v = c_int(42)
+        result = f(pointer(v))
+        assert type(result) == POINTER(c_int)
+        assert result.contents.value == 42
+
+    def test_simple_pointer_args(self):
+        f = dll.my_strchr
+        f.argtypes = [c_char_p, c_int]
+        f.restype = c_char_p
+        mystr = c_char_p("abcd")
+        result = f(mystr, ord("b"))
+        assert result == "bcd"
+
+    @py.test.mark.xfail
+    def test_strings(self):
+        f = dll.my_strchr
+        f.argtypes = [c_char_p, c_int]
+        f.restype = c_char_p
+        # python strings need to be converted to c_char_p, but this is
+        # supported only in the slow path so far
+        result = f("abcd", ord("b"))
+        assert result == "bcd"
+
+    def test_errcheck(self):
+        def errcheck(result, func, args):
+            return 'hello'
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.errcheck = errcheck
+        assert tf_b(-126) == 'hello'
+
+
+class TestFallbackToSlowpath(BaseCTypesTestChecker):
+
+    def test_argtypes_is_None(self):
+        tf_b = dll2.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_char_p,)  # this is intentionally wrong
+        tf_b.argtypes = None # kill the fast path
+        assert not tf_b._is_fastpath
+        assert tf_b(-126) == -42
+
+    def test_callable_is_None(self):
+        tf_b = dll2.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.callable = lambda x: x+1
+        assert not tf_b._is_fastpath
+        assert tf_b(-126) == -125
+        tf_b.callable = None
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
@@ -91,6 +91,13 @@
         result = f(0, 0, 0, 0, 0, 0)
         assert result == u'\x00'
 
+    def test_char_result(self):
+        f = dll._testfunc_i_bhilfd
+        f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double]
+        f.restype = c_char
+        result = f(0, 0, 0, 0, 0, 0)
+        assert result == '\x00'
+
     def test_voidresult(self):
         f = dll._testfunc_v
         f.restype = None
@@ -211,8 +218,19 @@
         result = f(byref(c_int(99)))
         assert not result.contents == 99
 
+    def test_convert_pointers(self):
+        f = dll.deref_LP_c_char_p
+        f.restype = c_char
+        f.argtypes = [POINTER(c_char_p)]
+        #
+        s = c_char_p('hello world')
+        ps = pointer(s)
+        assert f(ps) == 'h'
+        assert f(s) == 'h'  # automatic conversion from char** to char*
+
     def test_errors_1(self):
         f = dll._testfunc_p_p
+        f.argtypes = [POINTER(c_int)]
         f.restype = c_int
 
         class X(Structure):
@@ -428,6 +446,16 @@
         u = dll.ret_un_func(a[1])
         assert u.y == 33*10000
 
+    def test_cache_funcptr(self):
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        assert tf_b(-126) == -42
+        ptr = tf_b._ptr
+        assert ptr is not None
+        assert tf_b(-126) == -42
+        assert tf_b._ptr is ptr
+
     def test_warnings(self):
         import warnings
         warnings.simplefilter("always")
@@ -439,6 +467,22 @@
             assert "C function without declared arguments called" in str(w[0].message)
             assert "C function without declared return type called" in str(w[1].message)
 
+    def test_errcheck(self):
+        py.test.skip('fixme')
+        def errcheck(result, func, args):
+            assert result == -42
+            assert type(result) is int
+            arg, = args
+            assert arg == -126
+            assert type(arg) is int
+            return result
+        #
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.errcheck = errcheck
+        assert tf_b(-126) == -42
+        del tf_b.errcheck
         with warnings.catch_warnings(record=True) as w:
             dll.get_an_integer.argtypes = []
             dll.get_an_integer()
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py b/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
@@ -12,8 +12,10 @@
     from _ctypes.function import CFuncPtr
 
     def guess(value):
-        cobj = CFuncPtr._conv_param(None, value)
-        return type(cobj)
+        cobj, ctype = CFuncPtr._conv_param(None, value)
+        return ctype
+        ## cobj = CFuncPtr._conv_param(None, value)
+        ## return type(cobj)
 
     assert guess(13) == c_int
     assert guess(0) == c_int
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
@@ -125,6 +125,9 @@
             if t is c_longdouble:   # no support for 'g' in the struct module
                 continue
             code = t._type_ # the typecode
+            if code == 'g':
+                # typecode not supported by "struct"
+                continue
             align = struct.calcsize("c%c" % code) - struct.calcsize(code)
 
             # alignment of the type...
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
@@ -12,6 +12,13 @@
     mod._ctypes_test = str(conftest.sofile)
 
 class TestPointers(BaseCTypesTestChecker):
+
+    def test_get_ffi_argtype(self):
+        P = POINTER(c_int)
+        ffitype = P.get_ffi_argtype()
+        assert P.get_ffi_argtype() is ffitype
+        assert ffitype.deref_pointer() is c_int.get_ffi_argtype()
+    
     def test_pointer_crash(self):
 
         class A(POINTER(c_ulong)):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
@@ -15,6 +15,10 @@
         mod.wcslen.argtypes = [ctypes.c_wchar_p]
         mod.func = dll._testfunc_p_p
 
+    def teardown_module(mod):
+        del mod.func
+        del mod.wcslen
+
     class TestUnicode(BaseCTypesTestChecker):
         def setup_method(self, method):
             self.prev_conv_mode = ctypes.set_conversion_mode("ascii", "strict")
diff --git a/pypy/rlib/libffi.py b/pypy/rlib/libffi.py
--- a/pypy/rlib/libffi.py
+++ b/pypy/rlib/libffi.py
@@ -1,12 +1,15 @@
+from __future__ import with_statement
+
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rlib.objectmodel import specialize, enforceargs, we_are_translated
-from pypy.rlib.rarithmetic import intmask, r_uint
+from pypy.rlib.rarithmetic import intmask, r_uint, r_singlefloat
 from pypy.rlib import jit
 from pypy.rlib import clibffi
 from pypy.rlib.clibffi import get_libc_name, FUNCFLAG_CDECL, AbstractFuncPtr, \
-    push_arg_as_ffiptr, c_ffi_call
+    push_arg_as_ffiptr, c_ffi_call, FFI_TYPE_STRUCT
 from pypy.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal
 from pypy.rlib.rdynload import DLLHANDLE
+from pypy.rlib.longlong2float import longlong2float, float2longlong
 
 class types(object):
     """
@@ -31,6 +34,9 @@
                 setattr(cls, name, value)
         cls.slong = clibffi.cast_type_to_ffitype(rffi.LONG)
         cls.ulong = clibffi.cast_type_to_ffitype(rffi.ULONG)
+        cls.slonglong = clibffi.cast_type_to_ffitype(rffi.LONGLONG)
+        cls.ulonglong = clibffi.cast_type_to_ffitype(rffi.ULONGLONG)
+        cls.wchar_t = clibffi.cast_type_to_ffitype(lltype.UniChar)
         del cls._import
 
     @staticmethod
@@ -41,7 +47,8 @@
         """
         if   ffi_type is types.void:    return 'v'
         elif ffi_type is types.double:  return 'f'
-        elif ffi_type is types.pointer: return 'i'
+        elif ffi_type is types.float:   return 's'
+        elif ffi_type is types.pointer: return 'u'
         #
         elif ffi_type is types.schar:   return 'i'
         elif ffi_type is types.uchar:   return 'u'
@@ -58,13 +65,19 @@
         elif ffi_type is types.uint16:  return 'u'
         elif ffi_type is types.sint32:  return 'i'
         elif ffi_type is types.uint32:  return 'u'
-        ## we only support integers that fit in a lltype.Signed (==rffi.LONG)
-        ## (on 64-bit platforms, types.sint64 is types.slong and the case is
-        ## caught above)
-        ## elif ffi_type is types.sint64:  return 'i'
-        ## elif ffi_type is types.uint64:  return 'u'
+        ## (note that on 64-bit platforms, types.sint64 is types.slong and the
+        ## case is caught above)
+        elif ffi_type is types.sint64:  return 'I'
+        elif ffi_type is types.uint64:  return 'U'
+        #
+        elif types.is_struct(ffi_type): return 'S'
         raise KeyError
 
+    @staticmethod
+    @jit.purefunction
+    def is_struct(ffi_type):
+        return intmask(ffi_type.c_type) == intmask(FFI_TYPE_STRUCT)
+
 types._import()
 
 @specialize.arg(0)
@@ -78,8 +91,11 @@
     sz = rffi.sizeof(TYPE)
     return sz <= rffi.sizeof(rffi.LONG)
 
+
 # ======================================================================
 
+IS_32_BIT = (r_uint.BITS == 32)
+
 @specialize.memo()
 def _check_type(TYPE):
     if isinstance(TYPE, lltype.Ptr):
@@ -105,11 +121,37 @@
             val = rffi.cast(rffi.LONG, val)
         elif TYPE is rffi.DOUBLE:
             cls = FloatArg
+        elif TYPE is rffi.LONGLONG or TYPE is rffi.ULONGLONG:
+            raise TypeError, 'r_(u)longlong not supported by arg(), use arg_(u)longlong()'
+        elif TYPE is rffi.FLOAT:
+            raise TypeError, 'r_singlefloat not supported by arg(), use arg_singlefloat()'
         else:
             raise TypeError, 'Unsupported argument type: %s' % TYPE
         self._append(cls(val))
         return self
 
+    def arg_raw(self, val):
+        self._append(RawArg(val))
+
+    def arg_longlong(self, val):
+        """
+        Note: this is a hack. So far, the JIT does not support long longs, so
+        you must pass it as if it were a python Float (rffi.DOUBLE).  You can
+        use the convenience functions longlong2float and float2longlong to do
+        the conversions.  Note that if you use long longs, the call won't
+        be jitted at all.
+        """
+        assert IS_32_BIT      # use a normal integer on 64-bit platforms
+        self._append(LongLongArg(val))
+
+    def arg_singlefloat(self, val):
+        """
+        Note: you must pass a python Float (rffi.DOUBLE), not a r_singlefloat
+        (else the jit complains).  Note that if you use single floats, the
+        call won't be jitted at all.
+        """
+        self._append(SingleFloatArg(val))
+
     def _append(self, arg):
         if self.first is None:
             self.first = self.last = arg
@@ -132,8 +174,9 @@
     def push(self, func, ll_args, i):
         func._push_int(self.intval, ll_args, i)
 
+
 class FloatArg(AbstractArg):
-    """ An argument holding a float
+    """ An argument holding a python float (i.e. a C double)
     """
 
     def __init__(self, floatval):
@@ -142,6 +185,37 @@
     def push(self, func, ll_args, i):
         func._push_float(self.floatval, ll_args, i)
 
+class RawArg(AbstractArg):
+    """ An argument holding a raw pointer to put inside ll_args
+    """
+
+    def __init__(self, ptrval):
+        self.ptrval = ptrval
+
+    def push(self, func, ll_args, i):
+        func._push_raw(self.ptrval, ll_args, i)
+
+class SingleFloatArg(AbstractArg):
+    """ An argument representing a C float (but holding a C double)
+    """
+
+    def __init__(self, floatval):
+        self.floatval = floatval
+
+    def push(self, func, ll_args, i):
+        func._push_single_float(self.floatval, ll_args, i)
+
+
+class LongLongArg(AbstractArg):
+    """ An argument representing a C long long (but holding a C double)
+    """
+
+    def __init__(self, floatval):
+        self.floatval = floatval
+
+    def push(self, func, ll_args, i):
+        func._push_longlong(self.floatval, ll_args, i)
+
 
 # ======================================================================
 
@@ -164,8 +238,8 @@
     # ========================================================================
 
     @jit.unroll_safe
-    @specialize.arg(2)
-    def call(self, argchain, RESULT):
+    @specialize.arg(2, 3)
+    def call(self, argchain, RESULT, is_struct=False):
         # WARNING!  This code is written carefully in a way that the JIT
         # optimizer will see a sequence of calls like the following:
         #
@@ -179,6 +253,7 @@
         # the optimizer will fail to recognize the pattern and won't turn it
         # into a fast CALL.  Note that "arg = arg.next" is optimized away,
         # assuming that archain is completely virtual.
+        self = jit.hint(self, promote=True)
         if argchain.numargs != len(self.argtypes):
             raise TypeError, 'Wrong number of arguments: %d expected, got %d' %\
                 (argchain.numargs, len(self.argtypes))
@@ -190,10 +265,24 @@
             i += 1
             arg = arg.next
         #
-        if _fits_into_long(RESULT):
+        if is_struct:
+            assert types.is_struct(self.restype)
+            res = self._do_call_raw(self.funcsym, ll_args)
+        elif _fits_into_long(RESULT):
+            assert not types.is_struct(self.restype)
             res = self._do_call_int(self.funcsym, ll_args)
         elif RESULT is rffi.DOUBLE:
             return self._do_call_float(self.funcsym, ll_args)
+        elif RESULT is rffi.FLOAT:
+            # XXX: even if RESULT is FLOAT, we still return a DOUBLE, else the
+            # jit complains. Note that the jit is disabled in this case
+            return self._do_call_single_float(self.funcsym, ll_args)
+        elif RESULT is rffi.LONGLONG or RESULT is rffi.ULONGLONG:
+            # XXX: even if RESULT is LONGLONG, we still return a DOUBLE, else the
+            # jit complains. Note that the jit is disabled in this case
+            # (it's not a typo, we really return a DOUBLE)
+            assert IS_32_BIT
+            return self._do_call_longlong(self.funcsym, ll_args)
         elif RESULT is lltype.Void:
             return self._do_call_void(self.funcsym, ll_args)
         else:
@@ -222,11 +311,26 @@
     def _push_int(self, value, ll_args, i):
         self._push_arg(value, ll_args, i)
 
+    @jit.dont_look_inside
+    def _push_raw(self, value, ll_args, i):
+        ll_args[i] = value
+
     @jit.oopspec('libffi_push_float(self, value, ll_args, i)')
     @enforceargs(   None, float, None,    int) # fix the annotation for tests
     def _push_float(self, value, ll_args, i):
         self._push_arg(value, ll_args, i)
 
+    @jit.dont_look_inside
+    def _push_single_float(self, value, ll_args, i):
+        self._push_arg(r_singlefloat(value), ll_args, i)
+
+    @jit.dont_look_inside
+    def _push_longlong(self, floatval, ll_args, i):
+        """
+        Takes a longlong represented as a python Float. It's a hack for the
+        jit, else we could not see the whole libffi module at all"""  
+        self._push_arg(float2longlong(floatval), ll_args, i)
+
     @jit.oopspec('libffi_call_int(self, funcsym, ll_args)')
     def _do_call_int(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, rffi.LONG)
@@ -235,6 +339,21 @@
     def _do_call_float(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, rffi.DOUBLE)
 
+    @jit.dont_look_inside
+    def _do_call_single_float(self, funcsym, ll_args):
+        single_res = self._do_call(funcsym, ll_args, rffi.FLOAT)
+        return float(single_res)
+
+    @jit.dont_look_inside
+    def _do_call_raw(self, funcsym, ll_args):
+        # same as _do_call_int, but marked as jit.dont_look_inside
+        return self._do_call(funcsym, ll_args, rffi.LONG)
+
+    @jit.dont_look_inside
+    def _do_call_longlong(self, funcsym, ll_args):
+        llres = self._do_call(funcsym, ll_args, rffi.LONGLONG)
+        return longlong2float(llres)
+
     @jit.oopspec('libffi_call_void(self, funcsym, ll_args)')
     def _do_call_void(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, lltype.Void)
@@ -265,7 +384,14 @@
                             rffi.cast(rffi.VOIDPP, ll_args))
         if RESULT is not lltype.Void:
             TP = lltype.Ptr(rffi.CArray(RESULT))
-            res = rffi.cast(TP, ll_result)[0]
+            buf = rffi.cast(TP, ll_result)
+            if types.is_struct(self.restype):
+                assert RESULT == rffi.LONG
+                # for structs, we directly return the buffer and transfer the
+                # ownership
+                res = rffi.cast(RESULT, buf)
+            else:
+                res = buf[0]
         else:
             res = None
         self._free_buffers(ll_result, ll_args)
@@ -274,11 +400,19 @@
 
     def _free_buffers(self, ll_result, ll_args):
         if ll_result:
-            lltype.free(ll_result, flavor='raw')
+            self._free_buffer_maybe(rffi.cast(rffi.VOIDP, ll_result), self.restype)
         for i in range(len(self.argtypes)):
-            lltype.free(ll_args[i], flavor='raw')
+            argtype = self.argtypes[i]
+            self._free_buffer_maybe(ll_args[i], argtype)
         lltype.free(ll_args, flavor='raw')
 
+    def _free_buffer_maybe(self, buf, ffitype):
+        # if it's a struct, the buffer is not freed and the ownership is
+        # already of the caller (in case of ll_args buffers) or transferred to
+        # it (in case of ll_result buffer)
+        if not types.is_struct(ffitype):
+            lltype.free(buf, flavor='raw')
+
 
 # ======================================================================
 
@@ -288,11 +422,8 @@
     def __init__(self, libname):
         """Load the library, or raises DLOpenError."""
         self.lib = rffi.cast(DLLHANDLE, 0)
-        ll_libname = rffi.str2charp(libname)
-        try:
+        with rffi.scoped_str2charp(libname) as ll_libname:
             self.lib = dlopen(ll_libname)
-        finally:
-            lltype.free(ll_libname, flavor='raw')
 
     def __del__(self):
         if self.lib:
@@ -302,3 +433,6 @@
     def getpointer(self, name, argtypes, restype, flags=FUNCFLAG_CDECL):
         return Func(name, argtypes, restype, dlsym(self.lib, name),
                     flags=flags, keepalive=self)
+
+    def getaddressindll(self, name):
+        return dlsym(self.lib, name)
diff --git a/pypy/rlib/longlong2float.py b/pypy/rlib/longlong2float.py
--- a/pypy/rlib/longlong2float.py
+++ b/pypy/rlib/longlong2float.py
@@ -30,14 +30,19 @@
     return llval
 
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
-eci = ExternalCompilationInfo(post_include_bits=["""
+eci = ExternalCompilationInfo(includes=['string.h', 'assert.h'],
+                              post_include_bits=["""
 static double pypy__longlong2float(long long x) {
-    char *p = (char*)&x;
-    return *((double*)p);
+    double dd;
+    assert(sizeof(double) == 8 && sizeof(long long) == 8);
+    memcpy(&dd, &x, 8);
+    return dd;
 }
 static long long pypy__float2longlong(double x) {
-    char *p = (char*)&x;
-    return *((long long*)p);
+    long long ll;
+    assert(sizeof(double) == 8 && sizeof(long long) == 8);
+    memcpy(&ll, &x, 8);
+    return ll;
 }
 """])
 
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -191,6 +191,21 @@
         hop.exception_cannot_occur()
         return hop.genop('gc_can_move', hop.args_v, resulttype=hop.r_result)
 
+def _make_sure_does_not_move(p):
+    """'p' is a non-null GC object.  This (tries to) make sure that the
+    object does not move any more, by forcing collections if needed.
+    Warning: should ideally only be used with the minimark GC, and only
+    on objects that are already a bit old, so have a chance to be
+    already non-movable."""
+    if not we_are_translated():
+        return
+    i = 0
+    while can_move(p):
+        if i > 6:
+            raise NotImplementedError("can't make object non-movable!")
+        collect(i)
+        i += 1
+
 def _heap_stats():
     raise NotImplementedError # can't be run directly
 
diff --git a/pypy/rlib/rrandom.py b/pypy/rlib/rrandom.py
--- a/pypy/rlib/rrandom.py
+++ b/pypy/rlib/rrandom.py
@@ -24,8 +24,7 @@
     def __init__(self, seed=r_uint(0)):
         self.state = [r_uint(0)] * N
         self.index = 0
-        if seed:
-            self.init_genrand(seed)
+        self.init_genrand(seed)
 
     def init_genrand(self, s):
         mt = self.state
diff --git a/pypy/rlib/streamio.py b/pypy/rlib/streamio.py
--- a/pypy/rlib/streamio.py
+++ b/pypy/rlib/streamio.py
@@ -141,7 +141,8 @@
 def construct_stream_tower(stream, buffering, universal, reading, writing,
                            binary):
     if buffering == 0:   # no buffering
-        pass
+        if reading:      # force some minimal buffering for readline()
+            stream = ReadlineInputStream(stream)
     elif buffering == 1:   # line-buffering
         if writing:
             stream = LineBufferingOutputStream(stream)
@@ -749,6 +750,113 @@
                                               flush_buffers=False)
 
 
+class ReadlineInputStream(Stream):
+
+    """Minimal buffering input stream.
+
+    Only does buffering for readline().  The other kinds of reads, and
+    all writes, are not buffered at all.
+    """
+
+    bufsize = 2**13 # 8 K
+
+    def __init__(self, base, bufsize=-1):
+        self.base = base
+        self.do_read = base.read   # function to fill buffer some more
+        self.do_seek = base.seek   # seek to a byte offset
+        if bufsize == -1:     # Get default from the class
+            bufsize = self.bufsize
+        self.bufsize = bufsize  # buffer size (hint only)
+        self.buf = None         # raw data (may contain "\n")
+        self.bufstart = 0
+
+    def flush_buffers(self):
+        if self.buf is not None:
+            try:
+                self.do_seek(self.bufstart-len(self.buf), 1)
+            except MyNotImplementedError:
+                pass
+            else:
+                self.buf = None
+                self.bufstart = 0
+
+    def readline(self):
+        if self.buf is not None:
+            i = self.buf.find('\n', self.bufstart)
+        else:
+            self.buf = ''
+            i = -1
+        #
+        if i < 0:
+            self.buf = self.buf[self.bufstart:]
+            self.bufstart = 0
+            while True:
+                bufsize = max(self.bufsize, len(self.buf) >> 2)
+                data = self.do_read(bufsize)
+                if not data:
+                    result = self.buf              # end-of-file reached
+                    self.buf = None
+                    return result
+                startsearch = len(self.buf)   # there is no '\n' in buf so far
+                self.buf += data
+                i = self.buf.find('\n', startsearch)
+                if i >= 0:
+                    break
+        #
+        i += 1
+        result = self.buf[self.bufstart:i]
+        self.bufstart = i
+        return result
+
+    def peek(self):
+        if self.buf is None:
+            return ''
+        if self.bufstart > 0:
+            self.buf = self.buf[self.bufstart:]
+            self.bufstart = 0
+        return self.buf
+
+    def tell(self):
+        pos = self.base.tell()
+        if self.buf is not None:
+            pos -= (len(self.buf) - self.bufstart)
+        return pos
+
+    def readall(self):
+        result = self.base.readall()
+        if self.buf is not None:
+            result = self.buf[self.bufstart:] + result
+            self.buf = None
+            self.bufstart = 0
+        return result
+
+    def read(self, n):
+        if self.buf is None:
+            return self.do_read(n)
+        else:
+            m = n - (len(self.buf) - self.bufstart)
+            start = self.bufstart
+            if m > 0:
+                result = self.buf[start:] + self.do_read(m)
+                self.buf = None
+                self.bufstart = 0
+                return result
+            elif n >= 0:
+                self.bufstart = start + n
+                return self.buf[start : self.bufstart]
+            else:
+                return ''
+
+    seek       = PassThrough("seek",      flush_buffers=True)
+    write      = PassThrough("write",     flush_buffers=True)
+    truncate   = PassThrough("truncate",  flush_buffers=True)
+    flush      = PassThrough("flush",     flush_buffers=True)
+    flushable  = PassThrough("flushable", flush_buffers=False)
+    close      = PassThrough("close",     flush_buffers=False)
+    try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
+                                              flush_buffers=False)
+
+
 class BufferingOutputStream(Stream):
 
     """Standard buffering output stream.
diff --git a/pypy/rlib/test/test_libffi.py b/pypy/rlib/test/test_libffi.py
--- a/pypy/rlib/test/test_libffi.py
+++ b/pypy/rlib/test/test_libffi.py
@@ -2,8 +2,10 @@
 import sys
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rpython.lltypesystem.ll2ctypes import ALLOCATED
-from pypy.rlib.test.test_clibffi import BaseFfiTest, get_libm_name
+from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
+from pypy.rlib.test.test_clibffi import BaseFfiTest, get_libm_name, make_struct_ffitype_e
 from pypy.rlib.libffi import CDLL, Func, get_libc_name, ArgChain, types
+from pypy.rlib.libffi import longlong2float, float2longlong, IS_32_BIT
 
 class TestLibffiMisc(BaseFfiTest):
 
@@ -50,6 +52,18 @@
         del lib
         assert not ALLOCATED
 
+    def test_longlong_as_float(self):
+        from pypy.translator.c.test.test_genc import compile
+        maxint64 = r_longlong(9223372036854775807)
+        def fn(x):
+            d = longlong2float(x)
+            ll = float2longlong(d)
+            return ll
+        assert fn(maxint64) == maxint64
+        #
+        fn2 = compile(fn, [r_longlong])
+        res = fn2(maxint64)
+        assert res == maxint64
 
 class TestLibffiCall(BaseFfiTest):
     """
@@ -97,7 +111,7 @@
     def get_libfoo(self):
         return self.CDLL(self.libfoo_name)
 
-    def call(self, funcspec, args, RESULT, init_result=0):
+    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
         """
         Call the specified function after constructing and ArgChain with the
         arguments in ``args``.
@@ -114,8 +128,20 @@
         func = lib.getpointer(name, argtypes, restype)
         chain = ArgChain()
         for arg in args:
-            chain.arg(arg)
-        return func.call(chain, RESULT)
+            if isinstance(arg, r_singlefloat):
+                chain.arg_singlefloat(float(arg))
+            elif IS_32_BIT and isinstance(arg, r_longlong):
+                chain.arg_longlong(longlong2float(arg))
+            elif IS_32_BIT and isinstance(arg, r_ulonglong):
+                arg = rffi.cast(rffi.LONGLONG, arg)
+                chain.arg_longlong(longlong2float(arg))
+            elif isinstance(arg, tuple):
+                methname, arg = arg
+                meth = getattr(chain, methname)
+                meth(arg)
+            else:
+                chain.arg(arg)
+        return func.call(chain, RESULT, is_struct=is_struct)
 
     def check_loops(self, *args, **kwds):
         """
@@ -137,7 +163,7 @@
         res = self.call(func, [38, 4.2], rffi.LONG)
         assert res == 42
         self.check_loops({
-                'call_may_force': 1,
+                'call_release_gil': 1,
                 'guard_no_exception': 1,
                 'guard_not_forced': 1,
                 'int_add': 1,
@@ -150,7 +176,7 @@
         func = (libm, 'pow', [types.double, types.double], types.double)
         res = self.call(func, [2.0, 3.0], rffi.DOUBLE, init_result=0.0)
         assert res == 8.0
-        self.check_loops(call_may_force=1, guard_no_exception=1, guard_not_forced=1)
+        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_result(self):
         """
@@ -163,7 +189,7 @@
         func = (libfoo, 'cast_to_uchar_and_ovf', [types.sint], types.uchar)
         res = self.call(func, [0], rffi.UCHAR)
         assert res == 200
-        self.check_loops(call_may_force=1, guard_no_exception=1, guard_not_forced=1)
+        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_argument(self):
         """
@@ -267,6 +293,76 @@
         res = self.call(get_dummy, [], rffi.LONG)
         assert res == initval+1
 
+    def test_single_float_args(self):
+        """
+            float sum_xy_float(float x, float y)
+            {
+                return x+y;
+            }
+        """
+        from ctypes import c_float # this is used only to compute the expected result
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_float', [types.float, types.float], types.float)
+        x = r_singlefloat(12.34)
+        y = r_singlefloat(56.78)
+        res = self.call(func, [x, y], rffi.FLOAT, init_result=0.0)
+        expected = c_float(c_float(12.34).value + c_float(56.78).value).value
+        assert res == expected
+
+    def test_slonglong_args(self):
+        """
+            long long sum_xy_longlong(long long x, long long y)
+            {
+                return x+y;
+            }
+        """
+        maxint32 = 2147483647 # we cannot really go above maxint on 64 bits
+                              # (and we would not test anything, as there long
+                              # is the same as long long)
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_longlong', [types.slonglong, types.slonglong],
+                types.slonglong)
+        if IS_32_BIT:
+            x = r_longlong(maxint32+1)
+            y = r_longlong(maxint32+2)
+            zero = longlong2float(r_longlong(0))
+        else:
+            x = maxint32+1
+            y = maxint32+2
+            zero = 0
+        res = self.call(func, [x, y], rffi.LONGLONG, init_result=zero)
+        if IS_32_BIT:
+            # obscure, on 32bit it's really a long long, so it returns a
+            # DOUBLE because of the JIT hack
+            res = float2longlong(res)
+        expected = maxint32*2 + 3
+        assert res == expected
+
+    def test_ulonglong_args(self):
+        """
+            unsigned long long sum_xy_ulonglong(unsigned long long x,
+                                                unsigned long long y)
+            {
+                return x+y;
+            }
+        """
+        maxint64 = 9223372036854775807 # maxint64+1 does not fit into a
+                                       # longlong, but it does into a
+                                       # ulonglong
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_ulonglong', [types.ulonglong, types.ulonglong],
+                types.ulonglong)
+        x = r_ulonglong(maxint64+1)
+        y = r_ulonglong(2)
+        res = self.call(func, [x, y], rffi.ULONGLONG, init_result=0)
+        if IS_32_BIT:
+            # obscure, on 32bit it's really a long long, so it returns a
+            # DOUBLE because of the JIT hack
+            res = float2longlong(res)
+            res = rffi.cast(rffi.ULONGLONG, res)
+        expected = maxint64 + 3
+        assert res == expected
+
     def test_wrong_number_of_arguments(self):
         from pypy.rpython.llinterp import LLException
         libfoo = self.get_libfoo() 
@@ -287,3 +383,57 @@
 
         my_raises("self.call(func, [38], rffi.LONG)") # one less
         my_raises("self.call(func, [38, 12.3, 42], rffi.LONG)") # one more
+
+
+    def test_byval_argument(self):
+        """
+            struct Point {
+                long x;
+                long y;
+            };
+
+            long sum_point(struct Point p) {
+                return p.x + p.y;
+            }
+        """
+        libfoo = CDLL(self.libfoo_name)
+        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.slong, types.slong])
+        ffi_point = ffi_point_struct.ffistruct
+        sum_point = (libfoo, 'sum_point', [ffi_point], types.slong)
+        #
+        ARRAY = rffi.CArray(rffi.LONG)
+        buf = lltype.malloc(ARRAY, 2, flavor='raw')
+        buf[0] = 30
+        buf[1] = 12
+        adr = rffi.cast(rffi.VOIDP, buf)
+        res = self.call(sum_point, [('arg_raw', adr)], rffi.LONG, init_result=0)
+        assert res == 42
+        # check that we still have the ownership on the buffer
+        assert buf[0] == 30
+        assert buf[1] == 12
+        lltype.free(buf, flavor='raw')
+        lltype.free(ffi_point_struct, flavor='raw')
+
+    def test_byval_result(self):
+        """
+            struct Point make_point(long x, long y) {
+                struct Point p;
+                p.x = x;
+                p.y = y;
+                return p;
+            }
+        """
+        libfoo = CDLL(self.libfoo_name)
+        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.slong, types.slong])
+        ffi_point = ffi_point_struct.ffistruct
+
+        libfoo = CDLL(self.libfoo_name)
+        make_point = (libfoo, 'make_point', [types.slong, types.slong], ffi_point)
+        #
+        PTR = lltype.Ptr(rffi.CArray(rffi.LONG))
+        p = self.call(make_point, [12, 34], PTR, init_result=lltype.nullptr(PTR.TO),
+                      is_struct=True)
+        assert p[0] == 12
+        assert p[1] == 34
+        lltype.free(p, flavor='raw')
+        lltype.free(ffi_point_struct, flavor='raw')
diff --git a/pypy/rlib/test/test_rrandom.py b/pypy/rlib/test/test_rrandom.py
--- a/pypy/rlib/test/test_rrandom.py
+++ b/pypy/rlib/test/test_rrandom.py
@@ -3,6 +3,12 @@
 
 # the numbers were created by using CPython's _randommodule.c
 
+def test_init_from_zero():
+    rnd = Random(0)
+    assert rnd.state[:14] == [0, 1, 1812433255, 1900727105, 1208447044,
+            2481403966, 4042607538, 337614300, 3232553940,
+            1018809052, 3202401494, 1775180719, 3192392114, 594215549]
+
 def test_init_from_seed():
     rnd = Random(1000)
     assert rnd.state[:14] == [1000, 4252021385, 1724402292, 571538732,
diff --git a/pypy/rlib/test/test_streamio.py b/pypy/rlib/test/test_streamio.py
--- a/pypy/rlib/test/test_streamio.py
+++ b/pypy/rlib/test/test_streamio.py
@@ -1008,6 +1008,75 @@
             assert base.buf == data
 
 
+class TestReadlineInputStream:
+
+    packets = ["a", "b", "\n", "def", "\nxy\npq\nuv", "wx"]
+    lines = ["ab\n", "def\n", "xy\n", "pq\n", "uvwx"]
+
+    def makeStream(self, seek=False, tell=False, bufsize=-1):
+        base = TSource(self.packets)
+        self.source = base
+        def f(*args):
+            if seek is False:
+                raise NotImplementedError     # a bug!
+            if seek is None:
+                raise streamio.MyNotImplementedError   # can be caught
+            raise ValueError(seek)  # uh?
+        if not tell:
+            base.tell = f
+        if not seek:
+            base.seek = f
+        return streamio.ReadlineInputStream(base, bufsize)
+
+    def test_readline(self):
+        for file in [self.makeStream(), self.makeStream(bufsize=2)]:
+            i = 0
+            while 1:
+                r = file.readline()
+                if r == "":
+                    break
+                assert self.lines[i] == r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_read_interleaved(self):
+        for file in [self.makeStream(seek=True),
+                     self.makeStream(seek=True, bufsize=2)]:
+            i = 0
+            while 1:
+                firstchar = file.read(1)
+                if firstchar == "":
+                    break
+                r = file.readline()
+                assert r != ""
+                assert self.lines[i] == firstchar + r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_read_interleaved_no_seek(self):
+        for file in [self.makeStream(seek=None),
+                     self.makeStream(seek=None, bufsize=2)]:
+            i = 0
+            while 1:
+                firstchar = file.read(1)
+                if firstchar == "":
+                    break
+                r = file.readline()
+                assert r != ""
+                assert self.lines[i] == firstchar + r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_readall(self):
+        file = self.makeStream(seek=True, tell=True, bufsize=2)
+        r = file.readline()
+        assert r == 'ab\n'
+        assert file.tell() == 3
+        r = file.readall()
+        assert r == 'def\nxy\npq\nuvwx'
+        r = file.readall()
+        assert r == ''
+
 
 # Speed test
 
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -418,6 +418,9 @@
     instance._storage = ctypes_storage
     assert ctypes_storage   # null pointer?
 
+class NotCtypesAllocatedStructure(ValueError):
+    pass
+
 class _parentable_mixin(object):
     """Mixin added to _parentable containers when they become ctypes-based.
     (This is done by changing the __class__ of the instance to reference
@@ -436,7 +439,7 @@
     def _addressof_storage(self):
         "Returns the storage address as an int"
         if self._storage is None or self._storage is True:
-            raise ValueError("Not a ctypes allocated structure")
+            raise NotCtypesAllocatedStructure("Not a ctypes allocated structure")
         return intmask(ctypes.cast(self._storage, ctypes.c_void_p).value)
 
     def _free(self):
diff --git a/pypy/rpython/lltypesystem/lltype.py b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -831,7 +831,7 @@
     raise TypeError, "unsupported cast"
 
 def _cast_whatever(TGT, value):
-    from pypy.rpython.lltypesystem import llmemory
+    from pypy.rpython.lltypesystem import llmemory, rffi
     ORIG = typeOf(value)
     if ORIG == TGT:
         return value
@@ -847,6 +847,8 @@
                 return cast_pointer(TGT, value)
         elif ORIG == llmemory.Address:
             return llmemory.cast_adr_to_ptr(value, TGT)
+        elif TGT == rffi.VOIDP and ORIG == Unsigned:
+            return rffi.cast(TGT, value)
         elif ORIG == Signed:
             return cast_int_to_ptr(TGT, value)
     elif TGT == llmemory.Address and isinstance(ORIG, Ptr):
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -139,10 +139,10 @@
         source = py.code.Source("""
             def call_external_function(%(argnames)s):
                 before = aroundstate.before
-                after = aroundstate.after
                 if before: before()
                 # NB. it is essential that no exception checking occurs here!
                 res = funcptr(%(argnames)s)
+                after = aroundstate.after
                 if after: after()
                 return res
         """ % locals())
@@ -244,7 +244,7 @@
     def __init__(self):
         self.callbacks = {}
 
-def _make_wrapper_for(TP, callable, callbackholder, aroundstate=None):
+def _make_wrapper_for(TP, callable, callbackholder=None, aroundstate=None):
     """ Function creating wrappers for callbacks. Note that this is
     cheating as we assume constant callbacks and we just memoize wrappers
     """
@@ -253,21 +253,18 @@
     if hasattr(callable, '_errorcode_'):
         errorcode = callable._errorcode_
     else:
-        errorcode = TP.TO.RESULT._example()
+        errorcode = TP.TO.RESULT._defl()
     callable_name = getattr(callable, '__name__', '?')
-    callbackholder.callbacks[callable] = True
+    if callbackholder is not None:
+        callbackholder.callbacks[callable] = True
     args = ', '.join(['a%d' % i for i in range(len(TP.TO.ARGS))])
     source = py.code.Source(r"""
         def wrapper(%s):    # no *args - no GIL for mallocing the tuple
             llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
             if aroundstate is not None:
-                before = aroundstate.before
                 after = aroundstate.after
-            else:
-                before = None
-                after = None
-            if after:
-                after()
+                if after:
+                    after()
             # from now on we hold the GIL
             stackcounter.stacks_counter += 1
             try:
@@ -281,8 +278,10 @@
                     traceback.print_exc()
                 result = errorcode
             stackcounter.stacks_counter -= 1
-            if before:
-                before()
+            if aroundstate is not None:
+                before = aroundstate.before
+                if before:
+                    before()
             # here we don't hold the GIL any more. As in the wrapper() produced
             # by llexternal, it is essential that no exception checking occurs
             # after the call to before().
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -1020,6 +1020,7 @@
                 objhdr.tid |= GCFLAG_CARDS_SET
 
         remember_young_pointer_from_array._dont_inline_ = True
+        assert self.card_page_indices > 0
         self.remember_young_pointer_from_array = (
             remember_young_pointer_from_array)
 
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -860,9 +860,9 @@
 
     def gct_get_write_barrier_from_array_failing_case(self, hop):
         op = hop.spaceop
-        hop.genop("same_as",
-                  [self.write_barrier_from_array_failing_case_ptr],
-                  resultvar=op.result)
+        v = getattr(self, 'write_barrier_from_array_failing_case_ptr',
+                    lltype.nullptr(op.result.concretetype.TO))
+        hop.genop("same_as", [v], resultvar=op.result)
 
     def gct_zero_gc_pointers_inside(self, hop):
         if not self.malloc_zero_filled:
diff --git a/pypy/rpython/module/test/test_posix.py b/pypy/rpython/module/test/test_posix.py
--- a/pypy/rpython/module/test/test_posix.py
+++ b/pypy/rpython/module/test/test_posix.py
@@ -43,6 +43,17 @@
         for i in range(len(stat)):
             assert long(getattr(func, 'item%d' % i)) == stat[i]
 
+    def test_stat_exception(self):
+        def fo():
+            try:
+                posix.stat('I/do/not/exist')
+            except OSError:
+                return True
+            else:
+                return False
+        res = self.interpret(fo,[])
+        assert res
+
     def test_times(self):
         import py; py.test.skip("llinterp does not like tuple returns")
         from pypy.rpython.test.test_llinterp import interpret
@@ -205,5 +216,8 @@
     def test_stat(self):
         py.test.skip("ootypesystem does not support os.stat")
 
+    def test_stat_exception(self):
+        py.test.skip("ootypesystem does not support os.stat")
+
     def test_chown(self):
         py.test.skip("ootypesystem does not support os.chown")
diff --git a/pypy/tool/gcc_cache.py b/pypy/tool/gcc_cache.py
--- a/pypy/tool/gcc_cache.py
+++ b/pypy/tool/gcc_cache.py
@@ -39,7 +39,16 @@
         data = ''
     if not (data.startswith('True') or data.startswith('FAIL\n')):
         try:
-            platform.compile(c_files, eci)
+            _previous = platform.log_errors
+            try:
+                platform.log_errors = False
+                platform.compile(c_files, eci)
+            finally:
+                del platform.log_errors
+                # ^^^remove from the instance --- needed so that it can
+                # compare equal to another instance without it
+                if platform.log_errors != _previous:
+                    platform.log_errors = _previous
             data = 'True'
             path.write(data)
         except CompilationError, e:
diff --git a/pypy/tool/jitlogparser/parser.py b/pypy/tool/jitlogparser/parser.py
--- a/pypy/tool/jitlogparser/parser.py
+++ b/pypy/tool/jitlogparser/parser.py
@@ -1,4 +1,5 @@
 import re, sys
+
 from pypy.jit.metainterp.resoperation import rop, opname
 from pypy.jit.tool.oparser import OpParser
 
@@ -51,6 +52,7 @@
 
     # factory method
     Op = Op
+    use_mock_model = True
 
     @classmethod
     def parse_from_input(cls, input):
@@ -61,7 +63,7 @@
         if not argspec.strip():
             return [], None
         if opname == 'debug_merge_point':
-            return argspec.rsplit(", ", 1), None
+            return argspec.split(", ", 1), None
         else:
             args = argspec.split(', ')
             descr = None
@@ -95,12 +97,12 @@
 
     def __init__(self, operations, storage):
         if operations[0].name == 'debug_merge_point':
-            self.inline_level = int(operations[0].args[1])
+            self.inline_level = int(operations[0].args[0])
             m = re.search('<code object ([<>\w]+), file \'(.+?)\', line (\d+)> #(\d+) (\w+)',
-                         operations[0].getarg(0))
+                         operations[0].getarg(1))
             if m is None:
                 # a non-code loop, like StrLiteralSearch or something
-                self.bytecode_name = operations[0].args[0].split(" ")[0][1:]
+                self.bytecode_name = operations[0].args[1].split(" ")[0][1:]
             else:
                 self.name, self.filename, lineno, bytecode_no, self.bytecode_name = m.groups()
                 self.startlineno = int(lineno)
diff --git a/pypy/tool/jitlogparser/test/test_parser.py b/pypy/tool/jitlogparser/test/test_parser.py
--- a/pypy/tool/jitlogparser/test/test_parser.py
+++ b/pypy/tool/jitlogparser/test/test_parser.py
@@ -29,7 +29,7 @@
 def test_parse_non_code():
     ops = parse('''
     []
-    debug_merge_point("SomeRandomStuff", 0)
+    debug_merge_point(0, "SomeRandomStuff")
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 1
@@ -38,10 +38,10 @@
 def test_split():
     ops = parse('''
     [i0]
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -54,12 +54,12 @@
 def test_inlined_call():
     ops = parse("""
     []
-    debug_merge_point('<code object inlined_call, file 'source.py', line 12> #28 CALL_FUNCTION', 0)
+    debug_merge_point(0, '<code object inlined_call, file 'source.py', line 12> #28 CALL_FUNCTION')
     i18 = getfield_gc(p0, descr=<BoolFieldDescr pypy.interpreter.pyframe.PyFrame.inst_is_being_profiled 89>)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #0 LOAD_FAST', 1)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #3 LOAD_CONST', 1)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #7 RETURN_VALUE', 1)
-    debug_merge_point('<code object inlined_call, file 'source.py', line 12> #31 STORE_FAST', 0)
+    debug_merge_point(1, '<code object inner, file 'source.py', line 9> #0 LOAD_FAST')
+    debug_merge_point(1, '<code object inner, file 'source.py', line 9> #3 LOAD_CONST')
+    debug_merge_point(1, '<code object inner, file 'source.py', line 9> #7 RETURN_VALUE')
+    debug_merge_point(0, '<code object inlined_call, file 'source.py', line 12> #31 STORE_FAST')
     """)
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 3 # two chunks + inlined call
@@ -72,10 +72,10 @@
 def test_name():
     ops = parse('''
     [i0]
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -89,10 +89,10 @@
     ops = parse('''
     [i0]
     i3 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -102,10 +102,10 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #0 LOAD_FAST", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #3 LOAD_FAST", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #6 BINARY_ADD", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #7 RETURN_VALUE", 0)
+    debug_merge_point(0, "<code object f, file '%(fname)s', line 2> #0 LOAD_FAST")
+    debug_merge_point(0, "<code object f, file '%(fname)s', line 2> #3 LOAD_FAST")
+    debug_merge_point(0, "<code object f, file '%(fname)s', line 2> #6 BINARY_ADD")
+    debug_merge_point(0, "<code object f, file '%(fname)s', line 2> #7 RETURN_VALUE")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.chunks[1].lineno == 3
@@ -114,11 +114,11 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #9 LOAD_FAST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #12 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #22 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #28 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #6 SETUP_LOOP", 0)
+    debug_merge_point(0, "<code object g, file '%(fname)s', line 5> #9 LOAD_FAST")
+    debug_merge_point(0, "<code object g, file '%(fname)s', line 5> #12 LOAD_CONST")
+    debug_merge_point(0, "<code object g, file '%(fname)s', line 5> #22 LOAD_CONST")
+    debug_merge_point(0, "<code object g, file '%(fname)s', line 5> #28 LOAD_CONST")
+    debug_merge_point(0, "<code object g, file '%(fname)s', line 5> #6 SETUP_LOOP")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.linerange == (7, 9)
@@ -128,7 +128,7 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse("""
     [p6, p1]
-    debug_merge_point('<code object h, file '%(fname)s', line 11> #17 FOR_ITER', 0)
+    debug_merge_point(0, '<code object h, file '%(fname)s', line 11> #17 FOR_ITER')
     guard_class(p6, 144264192, descr=<Guard2>)
     p12 = getfield_gc(p6, descr=<GcPtrFieldDescr pypy.objspace.std.iterobject.W_AbstractSeqIterObject.inst_w_seq 12>)
     """ % locals())
@@ -174,7 +174,7 @@
 
 def test_parsing_strliteral():
     loop = parse("""
-    debug_merge_point('StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]', 0)
+    debug_merge_point(0, 'StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]')
     """)
     ops = Function.from_operations(loop.operations, LoopStorage())
     chunk = ops.chunks[0]
diff --git a/pypy/tool/test/test_gcc_cache.py b/pypy/tool/test/test_gcc_cache.py
--- a/pypy/tool/test/test_gcc_cache.py
+++ b/pypy/tool/test/test_gcc_cache.py
@@ -1,11 +1,13 @@
-
+import sys
 from pypy.tool.gcc_cache import *
 from pypy.tool.udir import udir
-import md5
+import md5, cStringIO
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 
+localudir = udir.join('test_gcc_cache').ensure(dir=1)
+
 def test_gcc_exec():
-    f = udir.join("x.c")
+    f = localudir.join("x.c")
     f.write("""
     #include <stdio.h>
     #include <test_gcc_exec.h>
@@ -15,8 +17,8 @@
        return 0;
     }
     """)
-    dir1 = udir.join('test_gcc_exec_dir1').ensure(dir=1)
-    dir2 = udir.join('test_gcc_exec_dir2').ensure(dir=1)
+    dir1 = localudir.join('test_gcc_exec_dir1').ensure(dir=1)
+    dir2 = localudir.join('test_gcc_exec_dir2').ensure(dir=1)
     dir1.join('test_gcc_exec.h').write('#define ANSWER 3\n')
     dir2.join('test_gcc_exec.h').write('#define ANSWER 42\n')
     eci = ExternalCompilationInfo(include_dirs=[str(dir1)])
@@ -36,7 +38,7 @@
     print '>>>'
 
 def test_gcc_ask():
-    f = udir.join("y.c")
+    f = localudir.join("y.c")
     f.write("""
     #include <stdio.h>
     #include <test_gcc_ask.h>
@@ -46,8 +48,8 @@
        return 0;
     }
     """)
-    dir1 = udir.join('test_gcc_ask_dir1').ensure(dir=1)
-    dir2 = udir.join('test_gcc_ask_dir2').ensure(dir=1)
+    dir1 = localudir.join('test_gcc_ask_dir1').ensure(dir=1)
+    dir2 = localudir.join('test_gcc_ask_dir2').ensure(dir=1)
     dir1.join('test_gcc_ask.h').write('/* hello world */\n')
     dir2.join('test_gcc_ask.h').write('#error boom\n')
     eci = ExternalCompilationInfo(include_dirs=[str(dir1)])
@@ -63,3 +65,15 @@
     print '<<<'
     print err
     print '>>>'
+
+def test_gcc_ask_doesnt_log_errors():
+    f = localudir.join('z.c')
+    f.write("""this file is not valid C code\n""")
+    eci = ExternalCompilationInfo()
+    oldstderr = sys.stderr
+    try:
+        sys.stderr = capture = cStringIO.StringIO()
+        py.test.raises(CompilationError, try_compile_cache, [f], eci)
+    finally:
+        sys.stderr = oldstderr
+    assert 'ERROR' not in capture.getvalue().upper()
diff --git a/pypy/translator/c/gc.py b/pypy/translator/c/gc.py
--- a/pypy/translator/c/gc.py
+++ b/pypy/translator/c/gc.py
@@ -297,6 +297,13 @@
 
     gc_startup_code = RefcountingGcPolicy.gc_startup_code.im_func
 
+    def compilation_info(self):
+        eci = BasicGcPolicy.compilation_info(self)
+        eci = eci.merge(ExternalCompilationInfo(
+            post_include_bits=['#define USING_NO_GC_AT_ALL'],
+            ))
+        return eci
+
 
 class FrameworkGcPolicy(BasicGcPolicy):
     transformerclass = framework.FrameworkGCTransformer
diff --git a/pypy/translator/c/gcc/instruction.py b/pypy/translator/c/gcc/instruction.py
--- a/pypy/translator/c/gcc/instruction.py
+++ b/pypy/translator/c/gcc/instruction.py
@@ -187,8 +187,8 @@
 
     def requestgcroots(self, tracker):
         # no need to track the value of these registers in the caller
-        # function if we are the main(), or if we are flagged as a
-        # "bottom" function (a callback from C code)
+        # function if we are flagged as a "bottom" function (a callback
+        # from C code, or pypy_main_function())
         if tracker.is_stack_bottom:
             return {}
         else:
diff --git a/pypy/translator/c/gcc/test/elf/track10.s b/pypy/translator/c/gcc/test/elf/track10.s
--- a/pypy/translator/c/gcc/test/elf/track10.s
+++ b/pypy/translator/c/gcc/test/elf/track10.s
@@ -1,5 +1,5 @@
-	.type	main, @function
-main:
+	.type	main1, @function
+main1:
 	pushl	%ebx
 	call	pypy_f
 	;; expected {4(%esp) | (%esp), %esi, %edi, %ebp | %ebx}
@@ -11,4 +11,4 @@
 	/* GCROOT %ebx */
 	popl	%ebx
 	ret
-	.size	main, .-main
+	.size	main1, .-main1
diff --git a/pypy/translator/c/gcc/test/elf/track12.s b/pypy/translator/c/gcc/test/elf/track12.s
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/gcc/test/elf/track12.s
@@ -0,0 +1,9 @@
+	.type	pypy_f, @function
+pypy_f:
+	pushl   4(%esp)
+	call    pypy_other
+	;; expected {4(%esp) | %ebx, %esi, %edi, %ebp | (%esp)}
+	popl    %eax
+	/* GCROOT %eax */
+	ret
+	.size	pypy_f, .-pypy_f
diff --git a/pypy/translator/c/gcc/test/elf/track13.s b/pypy/translator/c/gcc/test/elf/track13.s
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/gcc/test/elf/track13.s
@@ -0,0 +1,9 @@
+	.type	pypy_f, @function
+pypy_f:
+	call    pypy_other
+	;; expected {(%esp) | %ebx, %esi, %edi, %ebp | 8(%esp)}
+	pushl   8(%esp)
+	popl    %eax
+	/* GCROOT %eax */
+	ret
+	.size	pypy_f, .-pypy_f
diff --git a/pypy/translator/c/gcc/test/elf/track4.s b/pypy/translator/c/gcc/test/elf/track4.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/elf/track4.s
+++ /dev/null
@@ -1,52 +0,0 @@
-	.type	main, @function
-main:
-	;; this is an artificial example showing what kind of code gcc
-	;; can produce for main()
-	pushl	%ebp
-	movl	%eax, $globalptr1
-	movl	%esp, %ebp
-	pushl	%edi
-	subl	$8, %esp
-	andl	$-16, %esp
-	movl	%ebx, -8(%ebp)
-	movl	8(%ebp), %edi
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %esi, -4(%ebp), (%ebp) | %edi}
-.L1:
-	cmpl	$0, %eax
-	je	.L3
-.L2:
-	;; inlined function here with -fomit-frame-pointer
-	movl	%eax, -12(%ebp)
-	movl	%edi, %edx
-	subl	$16, %esp
-	movl	%eax, (%esp)
-	movl	$42, %edi
-	movl	%edx, 4(%esp)
-	movl	%esi, %ebx
-	movl	$nonsense, %esi
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %ebx, -4(%ebp), (%ebp) | 4(%esp), -12(%ebp)}
-	addl	%edi, %eax
-	movl	4(%esp), %eax
-	movl	%ebx, %esi
-	addl	$16, %esp
-	movl	%eax, %edi
-	movl	-12(%ebp), %eax
-#APP
-	/* GCROOT %eax */
-#NO_APP
-	;; end of inlined function
-.L3:
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %esi, -4(%ebp), (%ebp) | %edi}
-#APP
-	/* GCROOT %edi */
-#NO_APP
-	movl	-8(%ebp), %ebx
-	movl	-4(%ebp), %edi
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-
-	.size	main, .-main
diff --git a/pypy/translator/c/gcc/test/elf/track6.s b/pypy/translator/c/gcc/test/elf/track6.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/elf/track6.s
+++ /dev/null
@@ -1,26 +0,0 @@
-	.type	main, @function
-main:
-	;; a minimal example showing what kind of code gcc
-	;; can produce for main(): some local variable accesses
-	;; are relative to %ebp, while others are relative to
-	;; %esp, and the difference %ebp-%esp is not constant
-	;; because of the 'andl' to align the stack
-	pushl	%ebp
-	movl	%esp, %ebp
-	subl	$8, %esp
-	andl	$-16, %esp
-	movl	$globalptr1, -4(%ebp)
-	movl	$globalptr2, (%esp)
-	pushl	$0
-	call	foobar
-	;; expected {4(%ebp) | %ebx, %esi, %edi, (%ebp) | 4(%esp), -4(%ebp)}
-	popl	%eax
-#APP
-	/* GCROOT -4(%ebp) */
-	/* GCROOT (%esp) */
-#NO_APP
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-
-	.size	main, .-main
diff --git a/pypy/translator/c/gcc/test/elf/track7.s b/pypy/translator/c/gcc/test/elf/track7.s
--- a/pypy/translator/c/gcc/test/elf/track7.s
+++ b/pypy/translator/c/gcc/test/elf/track7.s
@@ -1,5 +1,5 @@
-	.type	main, @function
-main:
+	.type	main1, @function
+main1:
 	;; cmovCOND tests.
 	pushl	%ebx
 	movl	12(%esp), %ebx
@@ -16,4 +16,4 @@
 	popl	%ebx
 	ret
 
-	.size	main, .-main
+	.size	main1, .-main1
diff --git a/pypy/translator/c/gcc/test/msvc/track6.s b/pypy/translator/c/gcc/test/msvc/track6.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/msvc/track6.s
+++ /dev/null
@@ -1,15 +0,0 @@
-_TEXT	SEGMENT
-_pypy_g_foo PROC					; COMDAT
-
-	push	ebp
-	mov	ebp, esp
-	and	esp, -64
-	sub	esp, 12
-	push	esi
-	call	_pypy_g_something_else
-	;; expected {4(%ebp) | %ebx, (%esp), %edi, (%ebp) | }
-	pop	esi
-	mov	esp, ebp
-	pop	ebp
-	ret	0
-_pypy_g_foo ENDP
diff --git a/pypy/translator/c/gcc/test/msvc/track_and_esp.s b/pypy/translator/c/gcc/test/msvc/track_and_esp.s
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/gcc/test/msvc/track_and_esp.s
@@ -0,0 +1,466 @@
+PUBLIC	??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@ ; `string'
+PUBLIC	_pypy_g_ll_math_ll_math_frexp
+;	COMDAT ??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@
+CONST	SEGMENT
+??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@ DB 'pypy_g_ll_math_l'
+	DB	'l_math_frexp', 00H				; `string'
+; Function compile flags: /Ogtpy
+CONST	ENDS
+;	COMDAT _pypy_g_ll_math_ll_math_frexp
+_TEXT	SEGMENT
+_l_mantissa_0$ = -8					; size = 8
+_l_v21638$ = -8						; size = 8
+_l_x_14$ = 8						; size = 8
+_pypy_g_ll_math_ll_math_frexp PROC			; COMDAT
+
+; 58245: struct pypy_tuple2_0 *pypy_g_ll_math_ll_math_frexp(double l_x_14) {
+
+	push	ebp
+	mov	ebp, esp
+	and	esp, -64				; ffffffc0H
+
+; 58246: 	long *l_exp_p_0; double l_mantissa_0; bool_t l_v21641;
+; 58247: 	bool_t l_v21643; bool_t l_v21644; bool_t l_v21646; bool_t l_v21647;
+; 58248: 	bool_t l_v21652; bool_t l_v21653; bool_t l_v21660; bool_t l_v21666;
+; 58249: 	bool_t l_v21670; bool_t l_v21674; bool_t l_v21676; double l_v21638;
+; 58250: 	long l_v21637; long l_v21649; long l_v21651; long l_v21677;
+; 58251: 	long l_v21678; struct pypy_exceptions_Exception0 *l_v21687;
+; 58252: 	struct pypy_header0 *l_v21654; struct pypy_object0 *l_v21682;
+; 58253: 	struct pypy_object0 *l_v21691; struct pypy_object_vtable0 *l_v21665;
+; 58254: 	struct pypy_object_vtable0 *l_v21669;
+; 58255: 	struct pypy_object_vtable0 *l_v21675;
+; 58256: 	struct pypy_object_vtable0 *l_v21683; struct pypy_tuple2_0 *l_v21640;
+; 58257: 	struct pypy_tuple2_0 *l_v21695; void* l_v21639; void* l_v21648;
+; 58258: 	void* l_v21650; void* l_v21656; void* l_v21658; void* l_v21659;
+; 58259: 	void* l_v21668; void* l_v21672; void* l_v21679; void* l_v21688;
+; 58260: 	void* l_v21696;
+; 58261: 	goto block0;
+; 58262: 
+; 58263:     block0:
+; 58264: 	l_v21641 = pypy_g_ll_math_ll_math_isnan(l_x_14);
+
+	fld	QWORD PTR _l_x_14$[ebp]
+	sub	esp, 52					; 00000034H
+	push	ebx
+	push	esi
+	push	edi
+	sub	esp, 8
+	fstp	QWORD PTR [esp]
+$block0$88239:
+	call	_pypy_g_ll_math_ll_math_isnan
+
+; 58265: 	pypy_asm_gc_nocollect(pypy_g_ll_math_ll_math_isnan);
+; 58266: 	l_v21643 = l_v21641;
+; 58267: 	if (l_v21643) {
+; 58268: 		l_v21637 = 0L;
+; 58269: 		l_v21638 = l_x_14;
+
+	fld	QWORD PTR _l_x_14$[ebp]
+	add	esp, 8
+	test	al, al
+
+; 58270: 		goto block3;
+
+	jne	SHORT $LN10 at pypy_g_ll_@159
+
+; 58271: 	}
+; 58272: 	goto block1;
+; 58273: 
+; 58274:     block1:
+; 58275: 	l_v21644 = pypy_g_ll_math_ll_math_isinf(l_x_14);
+
+	sub	esp, 8
+	fstp	QWORD PTR [esp]
+$block1$88243:
+	call	_pypy_g_ll_math_ll_math_isinf
+	add	esp, 8
+
+; 58276: 	pypy_asm_gc_nocollect(pypy_g_ll_math_ll_math_isinf);
+; 58277: 	l_v21646 = l_v21644;
+; 58278: 	if (l_v21646) {
+
+	test	al, al
+	je	SHORT $block2$88245
+
+; 58279: 		l_v21637 = 0L;
+; 58280: 		l_v21638 = l_x_14;
+
+	fld	QWORD PTR _l_x_14$[ebp]
+$LN10 at pypy_g_ll_@159:
+
+; 58288: 		goto block14;
+; 58289: 	}
+; 58290: 	l_v21637 = 0L;
+
+	xor	edi, edi
+$LN30 at pypy_g_ll_@159:
+
+; 58291: 	l_v21638 = l_x_14;
+; 58292: 	goto block3;
+; 58293: 
+; 58294:     block3:
+; 58295: 	l_v21648 = (&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC)->ssgc_inst_free;
+
+	mov	esi, DWORD PTR _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC+4
+	fstp	QWORD PTR _l_v21638$[esp+64]
+
+; 58296: 	OP_RAW_MALLOC_USAGE((0 + ROUND_UP_FOR_ALLOCATION(sizeof(struct pypy_tuple2_0), sizeof(struct pypy_forwarding_stub0))), l_v21649);
+; 58297: 	l_v21650 = (&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC)->ssgc_inst_top_of_space;
+; 58298: 	OP_ADR_DELTA(l_v21650, l_v21648, l_v21651);
+
+	mov	eax, DWORD PTR _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC+12
+	sub	eax, esi
+
+; 58299: 	OP_INT_GT(l_v21649, l_v21651, l_v21652);
+
+	cmp	eax, 24					; 00000018H
+$block3$88242:
+
+; 58300: 	if (l_v21652) {
+
+	jge	$block4$88260
+
+; 58334: 	l_v21695 = l_v21640;
+; 58335: 	goto block8;
+; 58336: 
+; 58337:     block8:
+; 58338: 	RPY_DEBUG_RETURN();
+; 58339: 	return l_v21695;
+; 58340: 
+; 58341:     block9:
+; 58342: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+; 58343: 	l_v21695 = ((struct pypy_tuple2_0 *) NULL);
+; 58344: 	goto block8;
+; 58345: 
+; 58346:     block10:
+; 58347: 	abort();  /* debug_llinterpcall should be unreachable */
+; 58348: 	l_v21665 = (&pypy_g_ExcData)->ed_exc_type;
+; 58349: 	l_v21666 = (l_v21665 == NULL);
+; 58350: 	if (!l_v21666) {
+; 58351: 		goto block11;
+; 58352: 	}
+; 58353: 	goto block5;
+; 58354: 
+; 58355:     block11:
+; 58356: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+; 58357: 	l_v21696 = NULL;
+; 58358: 	goto block6;
+; 58359: 
+; 58360:     block12:
+; 58361: 	l_v21668 = pypy_g_SemiSpaceGC_obtain_free_space((&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC), (0 + ROUND_UP_FOR_ALLOCATION(sizeof(struct pypy_tuple2_0), sizeof(struct pypy_forwarding_stub0))));
+
+	push	24					; 00000018H
+	push	OFFSET _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC
+$block12$88259:
+	call	_pypy_g_SemiSpaceGC_obtain_free_space
+
+; 58362: 	l_v21669 = (&pypy_g_ExcData)->ed_exc_type;
+; 58363: 	l_v21670 = (l_v21669 == NULL);
+
+	xor	ecx, ecx
+	add	esp, 8
+	cmp	DWORD PTR _pypy_g_ExcData, ecx
+
+; 58364: 	if (!l_v21670) {
+
+	je	$LN5 at pypy_g_ll_@159
+
+; 58368: 	goto block4;
+; 58369: 
+; 58370:     block13:
+; 58371: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+
+	mov	eax, DWORD PTR _pypydtcount
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?N@??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], ecx
+	inc	eax
+	and	eax, 8191				; 00001fffH
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?8??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], ecx
+	inc	eax
+	and	eax, 8191				; 00001fffH
+	mov	DWORD PTR _pypydtcount, eax
+$block13$88313:
+$block9$88285:
+	xor	eax, eax
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	pop	esi
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+$block2$88245:
+
+; 58281: 		goto block3;
+; 58282: 	}
+; 58283: 	goto block2;
+; 58284: 
+; 58285:     block2:
+; 58286: 	OP_FLOAT_IS_TRUE(l_x_14, l_v21647);
+
+	fldz
+	fld	QWORD PTR _l_x_14$[ebp]
+	fucom	ST(1)
+	fnstsw	ax
+	fstp	ST(1)
+	test	ah, 68					; 00000044H
+
+; 58287: 	if (l_v21647) {
+
+	jnp	$LN10 at pypy_g_ll_@159
+
+; 58372: 	l_v21696 = NULL;
+; 58373: 	goto block6;
+; 58374: 
+; 58375:     block14:
+; 58376: 	l_v21672 = pypy_g__ll_malloc_varsize_no_length__Signed_Signed_Sign(1L, (0 + 0), sizeof(long));
+
+	push	4
+	fstp	ST(0)
+	push	0
+	push	1
+$block14$88247:
+	call	_pypy_g__ll_malloc_varsize_no_length__Signed_Signed_Sign
+	mov	esi, eax
+
+; 58377: 	OP_TRACK_ALLOC_START(l_v21672, /* nothing */);
+
+	push	OFFSET ??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@
+	push	esi
+	call	_pypy_debug_alloc_start
+	add	esp, 20					; 00000014H
+
+; 58378: 	l_exp_p_0 = (long *)l_v21672;
+; 58379: 	l_v21674 = (l_exp_p_0 != NULL);
+
+	test	esi, esi
+
+; 58380: 	if (!l_v21674) {
+
+	jne	SHORT $block15$88324
+
+; 58418: 	goto block8;
+; 58419: 
+; 58420:     block18:
+; 58421: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+
+	mov	eax, DWORD PTR _pypydtcount
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?BB@??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], esi
+	inc	eax
+	and	eax, 8191				; 00001fffH
+	mov	DWORD PTR _pypydtcount, eax
+$block18$88323:
+
+; 58422: 	l_v21695 = ((struct pypy_tuple2_0 *) NULL);
+
+	xor	eax, eax
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	pop	esi
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+$block15$88324:
+
+; 58381: 		goto block18;
+; 58382: 	}
+; 58383: 	goto block15;
+; 58384: 
+; 58385:     block15:
+; 58386: 	l_mantissa_0 = pypy_g_frexp__Float_arrayPtr_star_2(l_x_14, l_exp_p_0);
+
+	fld	QWORD PTR _l_x_14$[ebp]
+	push	esi
+	sub	esp, 8
+	fstp	QWORD PTR [esp]
+	call	_pypy_g_frexp__Float_arrayPtr_star_2
+
+; 58387: 	l_v21675 = (&pypy_g_ExcData)->ed_exc_type;
+; 58388: 	l_v21676 = (l_v21675 == NULL);
+
+	mov	edi, DWORD PTR _pypy_g_ExcData
+	fstp	QWORD PTR _l_mantissa_0$[esp+76]
+	add	esp, 12					; 0000000cH
+	test	edi, edi
+
+; 58389: 	if (!l_v21676) {
+
+	je	SHORT $block16$88328
+
+; 58403: 
+; 58404:     block17:
+; 58405: 	l_v21682 = (&pypy_g_ExcData)->ed_exc_value;
+; 58406: 	l_v21683 = (&pypy_g_ExcData)->ed_exc_type;
+; 58407: 	PYPY_DEBUG_CATCH_EXCEPTION("ll_math_ll_math_frexp", l_v21683, l_v21683 == (&pypy_g_py__code_assertion_AssertionError_vtable.ae_super.ae_super.se_super.e_super) || l_v21683 == (&pypy_g_exceptions_NotImplementedError_vtable.nie_super.re_super.se_super.e_super));
+
+	mov	eax, DWORD PTR _pypydtcount
+	mov	ebx, DWORD PTR _pypy_g_ExcData+4
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?BA@??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], edi
+	inc	eax
+	and	eax, 8191				; 00001fffH
+$block17$88327:
+	mov	DWORD PTR _pypydtcount, eax
+	cmp	edi, OFFSET _pypy_g_py__code_assertion_AssertionError_vtable
+	je	SHORT $LN1 at pypy_g_ll_@159
+	cmp	edi, OFFSET _pypy_g_exceptions_NotImplementedError_vtable
+	jne	SHORT $LN2 at pypy_g_ll_@159
+$LN1 at pypy_g_ll_@159:
+	call	_pypy_debug_catch_fatal_exception
+$LN2 at pypy_g_ll_@159:
+
+; 58408: 	(&pypy_g_ExcData)->ed_exc_value = ((struct pypy_object0 *) NULL);
+
+	xor	eax, eax
+
+; 58409: 	(&pypy_g_ExcData)->ed_exc_type = ((struct pypy_object_vtable0 *) NULL);
+; 58410: 	l_v21687 = (struct pypy_exceptions_Exception0 *)l_v21682;
+; 58411: 	l_v21688 = (void*)l_exp_p_0;
+; 58412: 	OP_TRACK_ALLOC_STOP(l_v21688, /* nothing */);
+
+	push	esi
+	mov	DWORD PTR _pypy_g_ExcData+4, eax
+	mov	DWORD PTR _pypy_g_ExcData, eax
+	call	_pypy_debug_alloc_stop
+
+; 58413: 	OP_RAW_FREE(l_v21688, /* nothing */);
+
+	push	esi
+	call	_PyObject_Free
+
+; 58414: 	l_v21691 = (struct pypy_object0 *)l_v21687;
+; 58415: 	pypy_g_RPyReRaiseException(l_v21683, l_v21691);
+
+	push	ebx
+	push	edi
+	call	_pypy_g_RPyReRaiseException
+	add	esp, 16					; 00000010H
+
+; 58416: 	pypy_asm_gc_nocollect(pypy_g_RPyReRaiseException);
+; 58417: 	l_v21695 = ((struct pypy_tuple2_0 *) NULL);
+
+	xor	eax, eax
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	pop	esi
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+$block16$88328:
+
+; 58390: 		goto block17;
+; 58391: 	}
+; 58392: 	goto block16;
+; 58393: 
+; 58394:     block16:
+; 58395: 	l_v21677 = RPyBareItem(l_exp_p_0, 0L);
+; 58396: 	l_v21678 = (long)(l_v21677);
+
+	mov	edi, DWORD PTR [esi]
+
+; 58397: 	l_v21679 = (void*)l_exp_p_0;
+; 58398: 	OP_TRACK_ALLOC_STOP(l_v21679, /* nothing */);
+
+	push	esi
+	call	_pypy_debug_alloc_stop
+
+; 58399: 	OP_RAW_FREE(l_v21679, /* nothing */);
+
+	push	esi
+	call	_PyObject_Free
+
+; 58400: 	l_v21637 = l_v21678;
+; 58401: 	l_v21638 = l_mantissa_0;
+
+	fld	QWORD PTR _l_mantissa_0$[esp+72]
+	add	esp, 8
+
+; 58402: 	goto block3;
+
+	jmp	$LN30 at pypy_g_ll_@159
+$LN5 at pypy_g_ll_@159:
+
+; 58365: 		goto block13;
+; 58366: 	}
+; 58367: 	l_v21639 = l_v21668;
+
+	mov	esi, eax
+$block4$88260:
+$block5$88263:
+
+; 58301: 		goto block12;
+; 58302: 	}
+; 58303: 	l_v21639 = l_v21648;
+; 58304: 	goto block4;
+; 58305: 
+; 58306:     block4:
+; 58307: 	OP_INT_IS_TRUE(RUNNING_ON_LLINTERP, l_v21653);
+; 58308: 	if (l_v21653) {
+; 58309: 		goto block10;
+; 58310: 	}
+; 58311: 	goto block5;
+; 58312: 
+; 58313:     block5:
+; 58314: 	l_v21654 = (struct pypy_header0 *)l_v21639;
+; 58315: 	RPyField(l_v21654, h_tid) = (GROUP_MEMBER_OFFSET(struct group_pypy_g_typeinfo_s, member20)+0L);
+
+	test	esi, esi
+	jne	SHORT $LN18 at pypy_g_ll_@159
+	call	_RPyAbort
+$LN18 at pypy_g_ll_@159:
+
+; 58316: 	OP_ADR_ADD(l_v21639, (0 + ROUND_UP_FOR_ALLOCATION(sizeof(struct pypy_tuple2_0), sizeof(struct pypy_forwarding_stub0))), l_v21656);
+; 58317: 	(&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC)->ssgc_inst_free = l_v21656;
+; 58318: 	OP_ADR_ADD(l_v21639, 0, l_v21658);
+; 58319: 	l_v21659 = (void*)l_v21658;
+; 58320: 	l_v21696 = l_v21659;
+; 58321: 	goto block6;
+; 58322: 
+; 58323:     block6:
+; 58324: 	l_v21640 = (struct pypy_tuple2_0 *)l_v21696;
+; 58325: 	l_v21660 = (l_v21640 != NULL);
+; 58326: 	if (!l_v21660) {
+; 58327: 		goto block9;
+; 58328: 	}
+; 58329: 	goto block7;
+; 58330: 
+; 58331:     block7:
+; 58332: 	RPyField(l_v21640, t_item0) = l_v21638;
+
+	fld	QWORD PTR _l_v21638$[esp+64]
+	mov	DWORD PTR [esi], 81			; 00000051H
+	lea	ecx, DWORD PTR [esi+24]
+	mov	DWORD PTR _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC+4, ecx
+	fstp	QWORD PTR [esi+8]
+
+; 58333: 	RPyField(l_v21640, t_item1) = l_v21637;
+
+	mov	DWORD PTR [esi+16], edi
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	mov	eax, esi
+	pop	esi
+$block6$88281:
+$block8$88289:
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+_pypy_g_ll_math_ll_math_frexp ENDP
+_TEXT	ENDS
diff --git a/pypy/translator/c/gcc/trackgcroot.py b/pypy/translator/c/gcc/trackgcroot.py
--- a/pypy/translator/c/gcc/trackgcroot.py
+++ b/pypy/translator/c/gcc/trackgcroot.py
@@ -39,10 +39,15 @@
         self.uses_frame_pointer = False
         self.r_localvar = self.r_localvarnofp
         self.filetag = filetag
-        # a "stack bottom" function is either main() or a callback from C code
+        # a "stack bottom" function is either pypy_main_function() or a
+        # callback from C code.  In both cases they are identified by
+        # the presence of pypy_asm_stack_bottom().
         self.is_stack_bottom = False
 
     def computegcmaptable(self, verbose=0):
+        if self.funcname in ['main', '_main']:
+            return []     # don't analyze main(), its prologue may contain
+                          # strange instructions
         self.findlabels()
         self.parse_instructions()
         try:
@@ -226,7 +231,7 @@
         # in the frame at this point.  This doesn't count the return address
         # which is the word immediately following the frame in memory.
         # The 'framesize' is set to an odd value if it is only an estimate
-        # (see visit_andl()).
+        # (see InsnCannotFollowEsp).
 
         def walker(insn, size_delta):
             check = deltas.setdefault(insn, size_delta)
@@ -266,7 +271,8 @@
 
             match = self.r_localvar_esp.match(localvar)
             if match:
-                if localvar == self.TOP_OF_STACK: # for pushl and popl, by
+                if localvar == self.TOP_OF_STACK_MINUS_WORD:
+                                                  # for pushl and popl, by
                     hint = None                   # default ebp addressing is
                 else:                             # a bit nicer
                     hint = 'esp'
@@ -521,10 +527,8 @@
         target = match.group("target")
         if target == self.ESP:
             # only for  andl $-16, %esp  used to align the stack in main().
-            # The exact amount of adjutment is not known yet, so we use
-            # an odd-valued estimate to make sure the real value is not used
-            # elsewhere by the FunctionGcRootTracker.
-            return InsnCannotFollowEsp()
+            # main() should not be seen at all.
+            raise AssertionError("instruction unexpected outside of main()")
         else:
             return self.binary_insn(line)
 
@@ -588,10 +592,12 @@
     def _visit_push(self, line):
         match = self.r_unaryinsn.match(line)
         source = match.group(1)
-        return [InsnStackAdjust(-self.WORD)] + self.insns_for_copy(source, self.TOP_OF_STACK)
+        return self.insns_for_copy(source, self.TOP_OF_STACK_MINUS_WORD) + \
+               [InsnStackAdjust(-self.WORD)]
 
     def _visit_pop(self, target):
-        return self.insns_for_copy(self.TOP_OF_STACK, target) + [InsnStackAdjust(+self.WORD)]
+        return [InsnStackAdjust(+self.WORD)] + \
+               self.insns_for_copy(self.TOP_OF_STACK_MINUS_WORD, target)
 
     def _visit_prologue(self):
         # for the prologue of functions that use %ebp as frame pointer
@@ -983,15 +989,15 @@
     OPERAND = r'(?:[-\w$%+.:@"]+(?:[(][\w%,]+[)])?|[(][\w%,]+[)])'
     LABEL   = r'([a-zA-Z_$.][a-zA-Z0-9_$@.]*)'
     OFFSET_LABELS   = 2**30
-    TOP_OF_STACK = '0(%esp)'
+    TOP_OF_STACK_MINUS_WORD = '-4(%esp)'
 
     r_functionstart = re.compile(r"\t.type\s+"+LABEL+",\s*[@]function\s*$")
     r_functionend   = re.compile(r"\t.size\s+"+LABEL+",\s*[.]-"+LABEL+"\s*$")
-    LOCALVAR        = r"%eax|%edx|%ecx|%ebx|%esi|%edi|%ebp|\d*[(]%esp[)]"
+    LOCALVAR        = r"%eax|%edx|%ecx|%ebx|%esi|%edi|%ebp|-?\d*[(]%esp[)]"
     LOCALVARFP      = LOCALVAR + r"|-?\d*[(]%ebp[)]"
     r_localvarnofp  = re.compile(LOCALVAR)
     r_localvarfp    = re.compile(LOCALVARFP)
-    r_localvar_esp  = re.compile(r"(\d*)[(]%esp[)]")
+    r_localvar_esp  = re.compile(r"(-?\d*)[(]%esp[)]")
     r_localvar_ebp  = re.compile(r"(-?\d*)[(]%ebp[)]")
 
     r_rel_label      = re.compile(r"(\d+):\s*$")
@@ -1044,7 +1050,7 @@
     OPERAND = r'(?:[-\w$%+.:@"]+(?:[(][\w%,]+[)])?|[(][\w%,]+[)])'
     LABEL   = r'([a-zA-Z_$.][a-zA-Z0-9_$@.]*)'
     OFFSET_LABELS   = 2**30
-    TOP_OF_STACK = '0(%rsp)'
+    TOP_OF_STACK_MINUS_WORD = '-8(%rsp)'
 
     r_functionstart = re.compile(r"\t.type\s+"+LABEL+",\s*[@]function\s*$")
     r_functionend   = re.compile(r"\t.size\s+"+LABEL+",\s*[.]-"+LABEL+"\s*$")
@@ -1140,7 +1146,7 @@
     CALLEE_SAVE_REGISTERS = ['ebx', 'esi', 'edi', 'ebp']
     REG2LOC = dict((_reg, LOC_REG | ((_i+1)<<2))
                    for _i, _reg in enumerate(CALLEE_SAVE_REGISTERS))
-    TOP_OF_STACK = 'DWORD PTR [esp]'
+    TOP_OF_STACK_MINUS_WORD = 'DWORD PTR [esp-4]'
 
     OPERAND = r'(?:(:?WORD|DWORD|BYTE) PTR |OFFSET )?[_\w?:@$]*(?:[-+0-9]+)?(:?\[[-+*\w0-9]+\])?'
     LABEL   = r'([a-zA-Z_$@.][a-zA-Z0-9_$@.]*)'
@@ -1323,12 +1329,11 @@
         self.verbose = verbose
         self.shuffle = shuffle
         self.gcmaptable = []
-        self.seen_main = False
 
-    def process(self, iterlines, newfile, entrypoint='main', filename='?'):
+    def process(self, iterlines, newfile, filename='?'):
         for in_function, lines in self.find_functions(iterlines):
             if in_function:
-                tracker = self.process_function(lines, entrypoint, filename)
+                tracker = self.process_function(lines, filename)
                 lines = tracker.lines
             self.write_newfile(newfile, lines, filename.split('.')[0])
         if self.verbose == 1:
@@ -1337,11 +1342,9 @@
     def write_newfile(self, newfile, lines, grist):
         newfile.writelines(lines)
 
-    def process_function(self, lines, entrypoint, filename):
+    def process_function(self, lines, filename):
         tracker = self.FunctionGcRootTracker(
             lines, filetag=getidentifier(filename))
-        is_main = tracker.funcname == entrypoint
-        tracker.is_stack_bottom = is_main
         if self.verbose == 1:
             sys.stderr.write('.')
         elif self.verbose > 1:
@@ -1356,7 +1359,6 @@
             self.gcmaptable[:0] = table
         else:
             self.gcmaptable.extend(table)
-        self.seen_main |= is_main
         return tracker
 
 class ElfAssemblerParser(AssemblerParser):
@@ -1432,11 +1434,6 @@
         if functionlines:
             yield in_function, functionlines
 
-    def process_function(self, lines, entrypoint, filename):
-        entrypoint = '_' + entrypoint
-        return super(DarwinAssemblerParser, self).process_function(
-            lines, entrypoint, filename)
-
 class DarwinAssemblerParser64(DarwinAssemblerParser):
     format = "darwin64"
     FunctionGcRootTracker = DarwinFunctionGcRootTracker64
@@ -1494,11 +1491,6 @@
             "missed the end of the previous function")
         yield False, functionlines
 
-    def process_function(self, lines, entrypoint, filename):
-        entrypoint = '_' + entrypoint
-        return super(MsvcAssemblerParser, self).process_function(
-            lines, entrypoint, filename)
-
     def write_newfile(self, newfile, lines, grist):
         newlines = []
         for line in lines:
@@ -1560,24 +1552,21 @@
         self.shuffle = shuffle     # to debug the sorting logic in asmgcroot.py
         self.format = format
         self.gcmaptable = []
-        self.seen_main = False
 
     def dump_raw_table(self, output):
-        print >> output, "seen_main = %d" % (self.seen_main,)
+        print 'raw table'
         for entry in self.gcmaptable:
             print >> output, entry
 
     def reload_raw_table(self, input):
         firstline = input.readline()
-        assert firstline.startswith("seen_main = ")
-        self.seen_main |= bool(int(firstline[len("seen_main = "):].strip()))
+        assert firstline == 'raw table\n'
         for line in input:
             entry = eval(line)
             assert type(entry) is tuple
             self.gcmaptable.append(entry)
 
     def dump(self, output):
-        assert self.seen_main
 
         def _globalname(name, disp=""):
             return tracker_cls.function_names_prefix + name
@@ -1649,8 +1638,8 @@
             s = """\
             /* See description in asmgcroot.py */
             .cfi_startproc
-            movq\t%rdi, %rdx\t/* 1st argument, which is the callback */
-            movq\t%rsi, %rcx\t/* 2nd argument, which is gcrootanchor */
+            /* %rdi is the 1st argument, which is the callback */
+            /* %rsi is the 2nd argument, which is gcrootanchor */
             movq\t%rsp, %rax\t/* my frame top address */
             pushq\t%rax\t\t/* ASM_FRAMEDATA[8] */
             pushq\t%rbp\t\t/* ASM_FRAMEDATA[7] */
@@ -1663,15 +1652,15 @@
             /* Add this ASM_FRAMEDATA to the front of the circular linked */
             /* list.  Let's call it 'self'.                               */
 
-            movq\t8(%rcx), %rax\t/* next = gcrootanchor->next */
+            movq\t8(%rsi), %rax\t/* next = gcrootanchor->next */
             pushq\t%rax\t\t\t\t/* self->next = next */
-            pushq\t%rcx\t\t\t/* self->prev = gcrootanchor */
-            movq\t%rsp, 8(%rcx)\t/* gcrootanchor->next = self */
+            pushq\t%rsi\t\t\t/* self->prev = gcrootanchor */
+            movq\t%rsp, 8(%rsi)\t/* gcrootanchor->next = self */
             movq\t%rsp, 0(%rax)\t\t\t/* next->prev = self */
             .cfi_def_cfa_offset 80\t/* 9 pushes + the retaddr = 80 bytes */
 
             /* note: the Mac OS X 16 bytes aligment must be respected. */
-            call\t*%rdx\t\t/* invoke the callback */
+            call\t*%rdi\t\t/* invoke the callback */
 
             /* Detach this ASM_FRAMEDATA from the circular linked list */
             popq\t%rsi\t\t/* prev = self->prev */
@@ -1688,7 +1677,7 @@
             popq\t%rcx\t\t/* ignored      ASM_FRAMEDATA[8] */
 
             /* the return value is the one of the 'call' above, */
-            /* because %rax (and possibly %rdx) are unmodified  */
+            /* because %rax is unmodified  */
             ret
             .cfi_endproc
             """
@@ -1835,11 +1824,11 @@
             """.replace("__gccallshapes", _globalname("__gccallshapes"))
             output.writelines(shapelines)
 
-    def process(self, iterlines, newfile, entrypoint='main', filename='?'):
+    def process(self, iterlines, newfile, filename='?'):
         parser = PARSERS[format](verbose=self.verbose, shuffle=self.shuffle)
         for in_function, lines in parser.find_functions(iterlines):
             if in_function:
-                tracker = parser.process_function(lines, entrypoint, filename)
+                tracker = parser.process_function(lines, filename)
                 lines = tracker.lines
             parser.write_newfile(newfile, lines, filename.split('.')[0])
         if self.verbose == 1:
@@ -1848,7 +1837,6 @@
             self.gcmaptable[:0] = parser.gcmaptable
         else:
             self.gcmaptable.extend(parser.gcmaptable)
-        self.seen_main |= parser.seen_main
 
 
 class UnrecognizedOperation(Exception):
@@ -1915,7 +1903,6 @@
             format = 'elf64'
         else:
             format = 'elf'
-    entrypoint = 'main'
     while len(sys.argv) > 1:
         if sys.argv[1] == '-v':
             del sys.argv[1]
@@ -1929,9 +1916,9 @@
         elif sys.argv[1].startswith('-f'):
             format = sys.argv[1][2:]
             del sys.argv[1]
-        elif sys.argv[1].startswith('-m'):
-            entrypoint = sys.argv[1][2:]
-            del sys.argv[1]
+        elif sys.argv[1].startswith('-'):
+            print >> sys.stderr, "unrecognized option:", sys.argv[1]
+            sys.exit(1)
         else:
             break
     tracker = GcRootTracker(verbose=verbose, shuffle=shuffle, format=format)
@@ -1940,7 +1927,7 @@
         firstline = f.readline()
         f.seek(0)
         assert firstline, "file %r is empty!" % (fn,)
-        if firstline.startswith('seen_main = '):
+        if firstline == 'raw table\n':
             tracker.reload_raw_table(f)
             f.close()
         else:
@@ -1948,7 +1935,7 @@
             lblfn = fn[:-2] + '.lbl.s'
             g = open(lblfn, 'w')
             try:
-                tracker.process(f, g, entrypoint=entrypoint, filename=fn)
+                tracker.process(f, g, filename=fn)
             except:
                 g.close()
                 os.unlink(lblfn)
diff --git a/pypy/translator/c/genc.py b/pypy/translator/c/genc.py
--- a/pypy/translator/c/genc.py
+++ b/pypy/translator/c/genc.py
@@ -602,7 +602,7 @@
                         'cmd /c $(MASM) /nologo /Cx /Cp /Zm /coff /Fo$@ /c $< $(INCLUDEDIRS)')
                 mk.rule('.c.gcmap', '',
                         ['$(CC) /nologo $(ASM_CFLAGS) /c /FAs /Fa$*.s $< $(INCLUDEDIRS)',
-                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -m$(PYPY_MAIN_FUNCTION) -t $*.s > $@']
+                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
                         )
                 mk.rule('gcmaptable.c', '$(GCMAPFILES)',
                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
@@ -613,7 +613,7 @@
                 mk.rule('%.lbl.s %.gcmap', '%.s',
                         [python +
                              '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
-                             '-m$(PYPY_MAIN_FUNCTION) -t $< > $*.gctmp',
+                             '-t $< > $*.gctmp',
                          'mv $*.gctmp $*.gcmap'])
                 mk.rule('gcmaptable.s', '$(GCMAPFILES)',
                         [python +
@@ -900,8 +900,9 @@
     print >> f, '}'
 
 def commondefs(defines):
-    from pypy.rlib.rarithmetic import LONG_BIT
+    from pypy.rlib.rarithmetic import LONG_BIT, LONGLONG_BIT
     defines['PYPY_LONG_BIT'] = LONG_BIT
+    defines['PYPY_LONGLONG_BIT'] = LONGLONG_BIT
 
 def add_extra_files(eci):
     srcdir = py.path.local(autopath.pypydir).join('translator', 'c', 'src')
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -1,4 +1,5 @@
 #include <stdlib.h>
+#include <string.h>
 #include "src/cjkcodecs/multibytecodec.h"
 
 
@@ -93,6 +94,22 @@
   return d->inbuf - d->inbuf_start;
 }
 
+Py_ssize_t pypy_cjk_dec_replace_on_error(struct pypy_cjk_dec_s* d,
+                                         Py_UNICODE *newbuf, Py_ssize_t newlen,
+                                         Py_ssize_t in_offset)
+{
+  if (newlen > 0)
+    {
+      if (d->outbuf + newlen > d->outbuf_end)
+        if (expand_decodebuffer(d, newlen) == -1)
+          return MBERR_NOMEMORY;
+      memcpy(d->outbuf, newbuf, newlen * sizeof(Py_UNICODE));
+      d->outbuf += newlen;
+    }
+  d->inbuf = d->inbuf_start + in_offset;
+  return 0;
+}
+
 /************************************************************/
 
 struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
@@ -209,3 +226,19 @@
 {
   return d->inbuf - d->inbuf_start;
 }
+
+Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
+                                         char *newbuf, Py_ssize_t newlen,
+                                         Py_ssize_t in_offset)
+{
+  if (newlen > 0)
+    {
+      if (d->outbuf + newlen > d->outbuf_end)
+        if (expand_encodebuffer(d, newlen) == -1)
+          return MBERR_NOMEMORY;
+      memcpy(d->outbuf, newbuf, newlen);
+      d->outbuf += newlen;
+    }
+  d->inbuf = d->inbuf_start + in_offset;
+  return 0;
+}
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -102,6 +102,8 @@
 Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *);
 Py_ssize_t pypy_cjk_dec_inbuf_remaining(struct pypy_cjk_dec_s *d);
 Py_ssize_t pypy_cjk_dec_inbuf_consumed(struct pypy_cjk_dec_s* d);
+Py_ssize_t pypy_cjk_dec_replace_on_error(struct pypy_cjk_dec_s* d,
+                                         Py_UNICODE *, Py_ssize_t, Py_ssize_t);
 
 struct pypy_cjk_enc_s {
   const MultibyteCodec *codec;
@@ -119,6 +121,8 @@
 Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *);
 Py_ssize_t pypy_cjk_enc_inbuf_remaining(struct pypy_cjk_enc_s *d);
 Py_ssize_t pypy_cjk_enc_inbuf_consumed(struct pypy_cjk_enc_s* d);
+Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
+                                         char *, Py_ssize_t, Py_ssize_t);
 
 /* list of codecs defined in the .c files */
 
diff --git a/pypy/translator/c/src/int.h b/pypy/translator/c/src/int.h
--- a/pypy/translator/c/src/int.h
+++ b/pypy/translator/c/src/int.h
@@ -73,15 +73,28 @@
 
 /* NB. shifting has same limitations as C: the shift count must be
        >= 0 and < LONG_BITS. */
-#define OP_INT_RSHIFT(x,y,r)    r = Py_ARITHMETIC_RIGHT_SHIFT(long, x, y)
-#define OP_UINT_RSHIFT(x,y,r)   r = (x) >> (y)
-#define OP_LLONG_RSHIFT(x,y,r)  r = Py_ARITHMETIC_RIGHT_SHIFT(PY_LONG_LONG,x,y)
-#define OP_ULLONG_RSHIFT(x,y,r) r = (x) >> (y)
+#define CHECK_SHIFT_RANGE(y, bits) RPyAssert(y >= 0 && y < bits, \
+	       "The shift count is outside of the supported range")
 
-#define OP_INT_LSHIFT(x,y,r)    r = (x) << (y)
-#define OP_UINT_LSHIFT(x,y,r)   r = (x) << (y)
-#define OP_LLONG_LSHIFT(x,y,r)  r = (x) << (y)
-#define OP_ULLONG_LSHIFT(x,y,r) r = (x) << (y)
+
+#define OP_INT_RSHIFT(x,y,r)    CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+						r = Py_ARITHMETIC_RIGHT_SHIFT(long, x, (y))
+#define OP_UINT_RSHIFT(x,y,r)   CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+						r = (x) >> (y)
+#define OP_LLONG_RSHIFT(x,y,r)  CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+						r = Py_ARITHMETIC_RIGHT_SHIFT(PY_LONG_LONG,x, (y))
+#define OP_ULLONG_RSHIFT(x,y,r) CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+						r = (x) >> (y)
+
+
+#define OP_INT_LSHIFT(x,y,r)    CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+							r = (x) << (y)
+#define OP_UINT_LSHIFT(x,y,r)   CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+							r = (x) << (y)
+#define OP_LLONG_LSHIFT(x,y,r)  CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+							r = (x) << (y)
+#define OP_ULLONG_LSHIFT(x,y,r) CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+							r = (x) << (y)
 
 #define OP_INT_LSHIFT_OVF(x,y,r) \
 	OP_INT_LSHIFT(x,y,r); \
diff --git a/pypy/translator/c/src/main.h b/pypy/translator/c/src/main.h
--- a/pypy/translator/c/src/main.h
+++ b/pypy/translator/c/src/main.h
@@ -23,12 +23,19 @@
 #include "src/winstuff.c"
 #endif
 
-int PYPY_MAIN_FUNCTION(int argc, char *argv[])
+#ifdef __GNUC__
+/* Hack to prevent this function from being inlined.  Helps asmgcc
+   because the main() function has often a different prologue/epilogue. */
+int pypy_main_function(int argc, char *argv[]) __attribute__((__noinline__));
+#endif
+
+int pypy_main_function(int argc, char *argv[])
 {
     char *errmsg;
     int i, exitcode;
     RPyListOfString *list;
 
+    pypy_asm_stack_bottom();
     instrument_setup();
 
     if (sizeof(void*) != SIZEOF_LONG) {
@@ -74,4 +81,9 @@
     abort();
 }
 
+int PYPY_MAIN_FUNCTION(int argc, char *argv[])
+{
+    return pypy_main_function(argc, argv);
+}
+
 #endif /* PYPY_NOT_MAIN_FILE */
diff --git a/pypy/translator/c/src/mem.h b/pypy/translator/c/src/mem.h
--- a/pypy/translator/c/src/mem.h
+++ b/pypy/translator/c/src/mem.h
@@ -222,6 +222,15 @@
 
 #endif /* USING_BOEHM_GC */
 
+
+#ifdef USING_NO_GC_AT_ALL
+#define OP_BOEHM_ZERO_MALLOC(size, r, restype, is_atomic, is_varsize)  \
+  r = (restype) calloc(1, size);
+#define OP_BOEHM_DISAPPEARING_LINK(link, obj, r)  /* nothing */
+#define OP_GC__DISABLE_FINALIZERS(r)  /* nothing */
+#define OP_GC__ENABLE_FINALIZERS(r)  /* nothing */
+#endif
+
 /************************************************************/
 /* weakref support */
 
diff --git a/pypy/translator/c/test/test_standalone.py b/pypy/translator/c/test/test_standalone.py
--- a/pypy/translator/c/test/test_standalone.py
+++ b/pypy/translator/c/test/test_standalone.py
@@ -596,6 +596,42 @@
         # The traceback stops at f() because it's the first function that
         # captures the AssertionError, which makes the program abort.
 
+    def test_int_lshift_too_large(self):
+        from pypy.rlib.rarithmetic import LONG_BIT, LONGLONG_BIT
+        def entry_point(argv):
+            a = int(argv[1])
+            b = int(argv[2])
+            print a << b
+            return 0
+
+        t, cbuilder = self.compile(entry_point, debug=True)
+        out = cbuilder.cmdexec("10 2", expect_crash=False)
+        assert out.strip() == str(10 << 2)
+        cases = [-4, LONG_BIT, LONGLONG_BIT]
+        for x in cases:
+            out, err = cbuilder.cmdexec("%s %s" % (1, x), expect_crash=True)
+            lines = err.strip()
+            assert 'The shift count is outside of the supported range' in lines
+
+    def test_llong_rshift_too_large(self):
+        from pypy.rlib.rarithmetic import LONG_BIT, LONGLONG_BIT
+        def entry_point(argv):
+            a = r_longlong(int(argv[1]))
+            b = r_longlong(int(argv[2]))
+            print a >> b
+            return 0
+
+        t, cbuilder = self.compile(entry_point, debug=True)
+        out = cbuilder.cmdexec("10 2", expect_crash=False)
+        assert out.strip() == str(10 >> 2)
+        out = cbuilder.cmdexec("%s %s" % (-42, LONGLONG_BIT - 1), expect_crash=False)
+        assert out.strip() == '-1'
+        cases = [-4, LONGLONG_BIT]
+        for x in cases:
+            out, err = cbuilder.cmdexec("%s %s" % (1, x), expect_crash=True)
+            lines = err.strip()
+            assert 'The shift count is outside of the supported range' in lines
+
     def test_ll_assert_error_debug(self):
         def entry_point(argv):
             ll_assert(len(argv) != 1, "foobar")
diff --git a/pypy/translator/driver.py b/pypy/translator/driver.py
--- a/pypy/translator/driver.py
+++ b/pypy/translator/driver.py
@@ -559,6 +559,7 @@
                 shutil.copy(str(soname), str(newsoname))
                 self.log.info("copied: %s" % (newsoname,))
             self.c_entryp = newexename
+        self.log.info('usession directory: %s' % (udir,))
         self.log.info("created: %s" % (self.c_entryp,))
 
     def task_compile_c(self):
diff --git a/pypy/translator/goal/targetnumpystandalone.py b/pypy/translator/goal/targetnumpystandalone.py
--- a/pypy/translator/goal/targetnumpystandalone.py
+++ b/pypy/translator/goal/targetnumpystandalone.py
@@ -10,46 +10,32 @@
 """
 
 import time
-from pypy.module.micronumpy.numarray import SingleDimArray, Code, compute
+from pypy.module.micronumpy.compile import numpy_compile
 from pypy.jit.codewriter.policy import JitPolicy
-
-def create_array(size):
-    a = SingleDimArray(size)
-    for i in range(size):
-        a.storage[i] = float(i % 10)
-    return a
+from pypy.rpython.annlowlevel import hlstr
 
 def entry_point(argv):
     if len(argv) != 3:
         print __doc__
         return 1
-    bytecode = argv[1]
-    for b in bytecode:
-        if b not in 'alf':
-            print "WRONG BYTECODE"
-            print __doc__
-            return 2
     try:
         size = int(argv[2])
     except ValueError:
         print "INVALID LITERAL FOR INT:", argv[2]
         print __doc__
         return 3
-    no_arrays = bytecode.count('l')
-    no_floats = bytecode.count('f')
-    arrays = []
-    floats = []
-    for i in range(no_arrays):
-        arrays.append(create_array(size))
-    for i in range(no_floats):
-        floats.append(float(i + 1))
-    code = Code(bytecode, arrays, floats)
     t0 = time.time()
-    compute(code)
-    print "bytecode:", bytecode, "size:", size
+    main(argv[0], size)
+    print "bytecode:", argv[0], "size:", size
     print "took:", time.time() - t0
     return 0
 
+def main(bc, size):
+    if not isinstance(bc, str):
+        bc = hlstr(bc) # for tests
+    a = numpy_compile(bc, size)
+    a = a.compute()
+
 def target(*args):
     return entry_point, None
 
diff --git a/pypy/translator/goal/translate.py b/pypy/translator/goal/translate.py
--- a/pypy/translator/goal/translate.py
+++ b/pypy/translator/goal/translate.py
@@ -149,6 +149,9 @@
             log.ERROR("Could not find target %r" % (arg, ))
             sys.exit(1)
 
+    # apply the platform settings
+    set_platform(config)
+
     targetspec = translateconfig.targetspec
     targetspec_dic = load_target(targetspec)
 
@@ -164,9 +167,6 @@
                 existing_config=config,
                 translating=True)
 
-    # apply the platform settings
-    set_platform(config)
-
     # apply the optimization level settings
     set_opt_level(config, translateconfig.opt)
 
diff --git a/pypy/translator/platform/__init__.py b/pypy/translator/platform/__init__.py
--- a/pypy/translator/platform/__init__.py
+++ b/pypy/translator/platform/__init__.py
@@ -38,6 +38,7 @@
     c_environ = None
 
     relevant_environ = ()
+    log_errors = True
 
     so_prefixes = ('',)
 
@@ -120,11 +121,12 @@
         if returncode != 0:
             errorfile = outname.new(ext='errors')
             errorfile.write(stderr, 'wb')
-            stderrlines = stderr.splitlines()
-            for line in stderrlines:
-                log.Error(line)
-            # ^^^ don't use ERROR, because it might actually be fine.
-            # Also, ERROR confuses lib-python/conftest.py.
+            if self.log_errors:
+                stderrlines = stderr.splitlines()
+                for line in stderrlines:
+                    log.Error(line)
+                # ^^^ don't use ERROR, because it might actually be fine.
+                # Also, ERROR confuses lib-python/conftest.py.
             raise CompilationError(stdout, stderr)
         else:
             for line in stderr.splitlines():