[pypy-commit] pypy jit-short_from_state: hg merge default

Sun Jun 19 16:18:17 CEST 2011

Author: Hakan Ardo <hakan at debian.org>
Branch: jit-short_from_state
Changeset: r45006:21147601bbcf
Date: 2011-06-19 16:22 +0200
http://bitbucket.org/pypy/pypy/changeset/21147601bbcf/

Log:	hg merge default

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -64,6 +64,7 @@
 ^pypy/doc/image/lattice3\.png$
 ^pypy/doc/image/stackless_informal\.png$
 ^pypy/doc/image/parsing_example.+\.png$
+^pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test\.o$
 ^compiled
 ^.git/
 ^release/
diff --git a/_pytest/__init__.py b/_pytest/__init__.py
--- a/_pytest/__init__.py
+++ b/_pytest/__init__.py
@@ -1,2 +1,2 @@
 #
-__version__ = '2.0.3'
+__version__ = '2.1.0.dev4'
diff --git a/_pytest/assertion.py b/_pytest/assertion.py
deleted file mode 100644
--- a/_pytest/assertion.py
+++ /dev/null
@@ -1,177 +0,0 @@
-"""
-support for presented detailed information in failing assertions.
-"""
-import py
-import sys
-from _pytest.monkeypatch import monkeypatch
-
-def pytest_addoption(parser):
-    group = parser.getgroup("debugconfig")
-    group._addoption('--no-assert', action="store_true", default=False,
-        dest="noassert",
-        help="disable python assert expression reinterpretation."),
-
-def pytest_configure(config):
-    # The _reprcompare attribute on the py.code module is used by
-    # py._code._assertionnew to detect this plugin was loaded and in
-    # turn call the hooks defined here as part of the
-    # DebugInterpreter.
-    m = monkeypatch()
-    config._cleanup.append(m.undo)
-    warn_about_missing_assertion()
-    if not config.getvalue("noassert") and not config.getvalue("nomagic"):
-        def callbinrepr(op, left, right):
-            hook_result = config.hook.pytest_assertrepr_compare(
-                config=config, op=op, left=left, right=right)
-            for new_expl in hook_result:
-                if new_expl:
-                    return '\n~'.join(new_expl)
-        m.setattr(py.builtin.builtins,
-                  'AssertionError', py.code._AssertionError)
-        m.setattr(py.code, '_reprcompare', callbinrepr)
-
-def warn_about_missing_assertion():
-    try:
-        assert False
-    except AssertionError:
-        pass
-    else:
-        sys.stderr.write("WARNING: failing tests may report as passing because "
-        "assertions are turned off!  (are you using python -O?)\n")
-
-# Provide basestring in python3
-try:
-    basestring = basestring
-except NameError:
-    basestring = str
-
-
-def pytest_assertrepr_compare(op, left, right):
-    """return specialised explanations for some operators/operands"""
-    width = 80 - 15 - len(op) - 2 # 15 chars indentation, 1 space around op
-    left_repr = py.io.saferepr(left, maxsize=int(width/2))
-    right_repr = py.io.saferepr(right, maxsize=width-len(left_repr))
-    summary = '%s %s %s' % (left_repr, op, right_repr)
-
-    issequence = lambda x: isinstance(x, (list, tuple))
-    istext = lambda x: isinstance(x, basestring)
-    isdict = lambda x: isinstance(x, dict)
-    isset = lambda x: isinstance(x, set)
-
-    explanation = None
-    try:
-        if op == '==':
-            if istext(left) and istext(right):
-                explanation = _diff_text(left, right)
-            elif issequence(left) and issequence(right):
-                explanation = _compare_eq_sequence(left, right)
-            elif isset(left) and isset(right):
-                explanation = _compare_eq_set(left, right)
-            elif isdict(left) and isdict(right):
-                explanation = _diff_text(py.std.pprint.pformat(left),
-                                         py.std.pprint.pformat(right))
-        elif op == 'not in':
-            if istext(left) and istext(right):
-                explanation = _notin_text(left, right)
-    except py.builtin._sysex:
-        raise
-    except:
-        excinfo = py.code.ExceptionInfo()
-        explanation = ['(pytest_assertion plugin: representation of '
-            'details failed. Probably an object has a faulty __repr__.)',
-            str(excinfo)
-            ]
-
-
-    if not explanation:
-        return None
-
-    # Don't include pageloads of data, should be configurable
-    if len(''.join(explanation)) > 80*8:
-        explanation = ['Detailed information too verbose, truncated']
-
-    return [summary] + explanation
-
-
-def _diff_text(left, right):
-    """Return the explanation for the diff between text
-
-    This will skip leading and trailing characters which are
-    identical to keep the diff minimal.
-    """
-    explanation = []
-    i = 0 # just in case left or right has zero length
-    for i in range(min(len(left), len(right))):
-        if left[i] != right[i]:
-            break
-    if i > 42:
-        i -= 10                 # Provide some context
-        explanation = ['Skipping %s identical '
-                       'leading characters in diff' % i]
-        left = left[i:]
-        right = right[i:]
-    if len(left) == len(right):
-        for i in range(len(left)):
-            if left[-i] != right[-i]:
-                break
-        if i > 42:
-            i -= 10     # Provide some context
-            explanation += ['Skipping %s identical '
-                            'trailing characters in diff' % i]
-            left = left[:-i]
-            right = right[:-i]
-    explanation += [line.strip('\n')
-                    for line in py.std.difflib.ndiff(left.splitlines(),
-                                                     right.splitlines())]
-    return explanation
-
-
-def _compare_eq_sequence(left, right):
-    explanation = []
-    for i in range(min(len(left), len(right))):
-        if left[i] != right[i]:
-            explanation += ['At index %s diff: %r != %r' %
-                            (i, left[i], right[i])]
-            break
-    if len(left) > len(right):
-        explanation += ['Left contains more items, '
-            'first extra item: %s' % py.io.saferepr(left[len(right)],)]
-    elif len(left) < len(right):
-        explanation += ['Right contains more items, '
-            'first extra item: %s' % py.io.saferepr(right[len(left)],)]
-    return explanation # + _diff_text(py.std.pprint.pformat(left),
-                       #             py.std.pprint.pformat(right))
-
-
-def _compare_eq_set(left, right):
-    explanation = []
-    diff_left = left - right
-    diff_right = right - left
-    if diff_left:
-        explanation.append('Extra items in the left set:')
-        for item in diff_left:
-            explanation.append(py.io.saferepr(item))
-    if diff_right:
-        explanation.append('Extra items in the right set:')
-        for item in diff_right:
-            explanation.append(py.io.saferepr(item))
-    return explanation
-
-
-def _notin_text(term, text):
-    index = text.find(term)
-    head = text[:index]
-    tail = text[index+len(term):]
-    correct_text = head + tail
-    diff = _diff_text(correct_text, text)
-    newdiff = ['%s is contained here:' % py.io.saferepr(term, maxsize=42)]
-    for line in diff:
-        if line.startswith('Skipping'):
-            continue
-        if line.startswith('- '):
-            continue
-        if line.startswith('+ '):
-            newdiff.append('  ' + line[2:])
-        else:
-            newdiff.append(line)
-    return newdiff
diff --git a/_pytest/assertion/__init__.py b/_pytest/assertion/__init__.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/__init__.py
@@ -0,0 +1,128 @@
+"""
+support for presenting detailed information in failing assertions.
+"""
+import py
+import imp
+import marshal
+import struct
+import sys
+import pytest
+from _pytest.monkeypatch import monkeypatch
+from _pytest.assertion import reinterpret, util
+
+try:
+    from _pytest.assertion.rewrite import rewrite_asserts
+except ImportError:
+    rewrite_asserts = None
+else:
+    import ast
+
+def pytest_addoption(parser):
+    group = parser.getgroup("debugconfig")
+    group.addoption('--assertmode', action="store", dest="assertmode",
+                    choices=("on", "old", "off", "default"), default="default",
+                    metavar="on|old|off",
+                    help="""control assertion debugging tools.
+'off' performs no assertion debugging.
+'old' reinterprets the expressions in asserts to glean information.
+'on' (the default) rewrites the assert statements in test modules to provide
+sub-expression results.""")
+    group.addoption('--no-assert', action="store_true", default=False,
+        dest="noassert", help="DEPRECATED equivalent to --assertmode=off")
+    group.addoption('--nomagic', action="store_true", default=False,
+        dest="nomagic", help="DEPRECATED equivalent to --assertmode=off")
+
+class AssertionState:
+    """State for the assertion plugin."""
+
+    def __init__(self, config, mode):
+        self.mode = mode
+        self.trace = config.trace.root.get("assertion")
+
+def pytest_configure(config):
+    warn_about_missing_assertion()
+    mode = config.getvalue("assertmode")
+    if config.getvalue("noassert") or config.getvalue("nomagic"):
+        if mode not in ("off", "default"):
+            raise pytest.UsageError("assertion options conflict")
+        mode = "off"
+    elif mode == "default":
+        mode = "on"
+    if mode != "off":
+        def callbinrepr(op, left, right):
+            hook_result = config.hook.pytest_assertrepr_compare(
+                config=config, op=op, left=left, right=right)
+            for new_expl in hook_result:
+                if new_expl:
+                    return '\n~'.join(new_expl)
+        m = monkeypatch()
+        config._cleanup.append(m.undo)
+        m.setattr(py.builtin.builtins, 'AssertionError',
+                  reinterpret.AssertionError)
+        m.setattr(util, '_reprcompare', callbinrepr)
+    if mode == "on" and rewrite_asserts is None:
+        mode = "old"
+    config._assertstate = AssertionState(config, mode)
+    config._assertstate.trace("configured with mode set to %r" % (mode,))
+
+def _write_pyc(co, source_path):
+    if hasattr(imp, "cache_from_source"):
+        # Handle PEP 3147 pycs.
+        pyc = py.path.local(imp.cache_from_source(str(source_path)))
+        pyc.ensure()
+    else:
+        pyc = source_path + "c"
+    mtime = int(source_path.mtime())
+    fp = pyc.open("wb")
+    try:
+        fp.write(imp.get_magic())
+        fp.write(struct.pack("<l", mtime))
+        marshal.dump(co, fp)
+    finally:
+        fp.close()
+    return pyc
+
+def before_module_import(mod):
+    if mod.config._assertstate.mode != "on":
+        return
+    # Some deep magic: load the source, rewrite the asserts, and write a
+    # fake pyc, so that it'll be loaded when the module is imported.
+    source = mod.fspath.read()
+    try:
+        tree = ast.parse(source)
+    except SyntaxError:
+        # Let this pop up again in the real import.
+        mod.config._assertstate.trace("failed to parse: %r" % (mod.fspath,))
+        return
+    rewrite_asserts(tree)
+    try:
+        co = compile(tree, str(mod.fspath), "exec")
+    except SyntaxError:
+        # It's possible that this error is from some bug in the assertion
+        # rewriting, but I don't know of a fast way to tell.
+        mod.config._assertstate.trace("failed to compile: %r" % (mod.fspath,))
+        return
+    mod._pyc = _write_pyc(co, mod.fspath)
+    mod.config._assertstate.trace("wrote pyc: %r" % (mod._pyc,))
+
+def after_module_import(mod):
+    if not hasattr(mod, "_pyc"):
+        return
+    state = mod.config._assertstate
+    try:
+        mod._pyc.remove()
+    except py.error.ENOENT:
+        state.trace("couldn't find pyc: %r" % (mod._pyc,))
+    else:
+        state.trace("removed pyc: %r" % (mod._pyc,))
+
+def warn_about_missing_assertion():
+    try:
+        assert False
+    except AssertionError:
+        pass
+    else:
+        sys.stderr.write("WARNING: failing tests may report as passing because "
+        "assertions are turned off!  (are you using python -O?)\n")
+
+pytest_assertrepr_compare = util.assertrepr_compare
diff --git a/_pytest/assertion/newinterpret.py b/_pytest/assertion/newinterpret.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/newinterpret.py
@@ -0,0 +1,333 @@
+"""
+Find intermediate evalutation results in assert statements through builtin AST.
+This should replace oldinterpret.py eventually.
+"""
+
+import sys
+import ast
+
+import py
+from _pytest.assertion import util
+from _pytest.assertion.reinterpret import BuiltinAssertionError
+
+
+if sys.platform.startswith("java") and sys.version_info < (2, 5, 2):
+    # See http://bugs.jython.org/issue1497
+    _exprs = ("BoolOp", "BinOp", "UnaryOp", "Lambda", "IfExp", "Dict",
+              "ListComp", "GeneratorExp", "Yield", "Compare", "Call",
+              "Repr", "Num", "Str", "Attribute", "Subscript", "Name",
+              "List", "Tuple")
+    _stmts = ("FunctionDef", "ClassDef", "Return", "Delete", "Assign",
+              "AugAssign", "Print", "For", "While", "If", "With", "Raise",
+              "TryExcept", "TryFinally", "Assert", "Import", "ImportFrom",
+              "Exec", "Global", "Expr", "Pass", "Break", "Continue")
+    _expr_nodes = set(getattr(ast, name) for name in _exprs)
+    _stmt_nodes = set(getattr(ast, name) for name in _stmts)
+    def _is_ast_expr(node):
+        return node.__class__ in _expr_nodes
+    def _is_ast_stmt(node):
+        return node.__class__ in _stmt_nodes
+else:
+    def _is_ast_expr(node):
+        return isinstance(node, ast.expr)
+    def _is_ast_stmt(node):
+        return isinstance(node, ast.stmt)
+
+
+class Failure(Exception):
+    """Error found while interpreting AST."""
+
+    def __init__(self, explanation=""):
+        self.cause = sys.exc_info()
+        self.explanation = explanation
+
+
+def interpret(source, frame, should_fail=False):
+    mod = ast.parse(source)
+    visitor = DebugInterpreter(frame)
+    try:
+        visitor.visit(mod)
+    except Failure:
+        failure = sys.exc_info()[1]
+        return getfailure(failure)
+    if should_fail:
+        return ("(assertion failed, but when it was re-run for "
+                "printing intermediate values, it did not fail.  Suggestions: "
+                "compute assert expression before the assert or use --no-assert)")
+
+def run(offending_line, frame=None):
+    if frame is None:
+        frame = py.code.Frame(sys._getframe(1))
+    return interpret(offending_line, frame)
+
+def getfailure(e):
+    explanation = util.format_explanation(e.explanation)
+    value = e.cause[1]
+    if str(value):
+        lines = explanation.split('\n')
+        lines[0] += "  << %s" % (value,)
+        explanation = '\n'.join(lines)
+    text = "%s: %s" % (e.cause[0].__name__, explanation)
+    if text.startswith('AssertionError: assert '):
+        text = text[16:]
+    return text
+
+operator_map = {
+    ast.BitOr : "|",
+    ast.BitXor : "^",
+    ast.BitAnd : "&",
+    ast.LShift : "<<",
+    ast.RShift : ">>",
+    ast.Add : "+",
+    ast.Sub : "-",
+    ast.Mult : "*",
+    ast.Div : "/",
+    ast.FloorDiv : "//",
+    ast.Mod : "%",
+    ast.Eq : "==",
+    ast.NotEq : "!=",
+    ast.Lt : "<",
+    ast.LtE : "<=",
+    ast.Gt : ">",
+    ast.GtE : ">=",
+    ast.Pow : "**",
+    ast.Is : "is",
+    ast.IsNot : "is not",
+    ast.In : "in",
+    ast.NotIn : "not in"
+}
+
+unary_map = {
+    ast.Not : "not %s",
+    ast.Invert : "~%s",
+    ast.USub : "-%s",
+    ast.UAdd : "+%s"
+}
+
+
+class DebugInterpreter(ast.NodeVisitor):
+    """Interpret AST nodes to gleam useful debugging information. """
+
+    def __init__(self, frame):
+        self.frame = frame
+
+    def generic_visit(self, node):
+        # Fallback when we don't have a special implementation.
+        if _is_ast_expr(node):
+            mod = ast.Expression(node)
+            co = self._compile(mod)
+            try:
+                result = self.frame.eval(co)
+            except Exception:
+                raise Failure()
+            explanation = self.frame.repr(result)
+            return explanation, result
+        elif _is_ast_stmt(node):
+            mod = ast.Module([node])
+            co = self._compile(mod, "exec")
+            try:
+                self.frame.exec_(co)
+            except Exception:
+                raise Failure()
+            return None, None
+        else:
+            raise AssertionError("can't handle %s" %(node,))
+
+    def _compile(self, source, mode="eval"):
+        return compile(source, "<assertion interpretation>", mode)
+
+    def visit_Expr(self, expr):
+        return self.visit(expr.value)
+
+    def visit_Module(self, mod):
+        for stmt in mod.body:
+            self.visit(stmt)
+
+    def visit_Name(self, name):
+        explanation, result = self.generic_visit(name)
+        # See if the name is local.
+        source = "%r in locals() is not globals()" % (name.id,)
+        co = self._compile(source)
+        try:
+            local = self.frame.eval(co)
+        except Exception:
+            # have to assume it isn't
+            local = None
+        if local is None or not self.frame.is_true(local):
+            return name.id, result
+        return explanation, result
+
+    def visit_Compare(self, comp):
+        left = comp.left
+        left_explanation, left_result = self.visit(left)
+        for op, next_op in zip(comp.ops, comp.comparators):
+            next_explanation, next_result = self.visit(next_op)
+            op_symbol = operator_map[op.__class__]
+            explanation = "%s %s %s" % (left_explanation, op_symbol,
+                                        next_explanation)
+            source = "__exprinfo_left %s __exprinfo_right" % (op_symbol,)
+            co = self._compile(source)
+            try:
+                result = self.frame.eval(co, __exprinfo_left=left_result,
+                                         __exprinfo_right=next_result)
+            except Exception:
+                raise Failure(explanation)
+            try:
+                if not self.frame.is_true(result):
+                    break
+            except KeyboardInterrupt:
+                raise
+            except:
+                break
+            left_explanation, left_result = next_explanation, next_result
+
+        if util._reprcompare is not None:
+            res = util._reprcompare(op_symbol, left_result, next_result)
+            if res:
+                explanation = res
+        return explanation, result
+
+    def visit_BoolOp(self, boolop):
+        is_or = isinstance(boolop.op, ast.Or)
+        explanations = []
+        for operand in boolop.values:
+            explanation, result = self.visit(operand)
+            explanations.append(explanation)
+            if result == is_or:
+                break
+        name = is_or and " or " or " and "
+        explanation = "(" + name.join(explanations) + ")"
+        return explanation, result
+
+    def visit_UnaryOp(self, unary):
+        pattern = unary_map[unary.op.__class__]
+        operand_explanation, operand_result = self.visit(unary.operand)
+        explanation = pattern % (operand_explanation,)
+        co = self._compile(pattern % ("__exprinfo_expr",))
+        try:
+            result = self.frame.eval(co, __exprinfo_expr=operand_result)
+        except Exception:
+            raise Failure(explanation)
+        return explanation, result
+
+    def visit_BinOp(self, binop):
+        left_explanation, left_result = self.visit(binop.left)
+        right_explanation, right_result = self.visit(binop.right)
+        symbol = operator_map[binop.op.__class__]
+        explanation = "(%s %s %s)" % (left_explanation, symbol,
+                                      right_explanation)
+        source = "__exprinfo_left %s __exprinfo_right" % (symbol,)
+        co = self._compile(source)
+        try:
+            result = self.frame.eval(co, __exprinfo_left=left_result,
+                                     __exprinfo_right=right_result)
+        except Exception:
+            raise Failure(explanation)
+        return explanation, result
+
+    def visit_Call(self, call):
+        func_explanation, func = self.visit(call.func)
+        arg_explanations = []
+        ns = {"__exprinfo_func" : func}
+        arguments = []
+        for arg in call.args:
+            arg_explanation, arg_result = self.visit(arg)
+            arg_name = "__exprinfo_%s" % (len(ns),)
+            ns[arg_name] = arg_result
+            arguments.append(arg_name)
+            arg_explanations.append(arg_explanation)
+        for keyword in call.keywords:
+            arg_explanation, arg_result = self.visit(keyword.value)
+            arg_name = "__exprinfo_%s" % (len(ns),)
+            ns[arg_name] = arg_result
+            keyword_source = "%s=%%s" % (keyword.arg)
+            arguments.append(keyword_source % (arg_name,))
+            arg_explanations.append(keyword_source % (arg_explanation,))
+        if call.starargs:
+            arg_explanation, arg_result = self.visit(call.starargs)
+            arg_name = "__exprinfo_star"
+            ns[arg_name] = arg_result
+            arguments.append("*%s" % (arg_name,))
+            arg_explanations.append("*%s" % (arg_explanation,))
+        if call.kwargs:
+            arg_explanation, arg_result = self.visit(call.kwargs)
+            arg_name = "__exprinfo_kwds"
+            ns[arg_name] = arg_result
+            arguments.append("**%s" % (arg_name,))
+            arg_explanations.append("**%s" % (arg_explanation,))
+        args_explained = ", ".join(arg_explanations)
+        explanation = "%s(%s)" % (func_explanation, args_explained)
+        args = ", ".join(arguments)
+        source = "__exprinfo_func(%s)" % (args,)
+        co = self._compile(source)
+        try:
+            result = self.frame.eval(co, **ns)
+        except Exception:
+            raise Failure(explanation)
+        pattern = "%s\n{%s = %s\n}"
+        rep = self.frame.repr(result)
+        explanation = pattern % (rep, rep, explanation)
+        return explanation, result
+
+    def _is_builtin_name(self, name):
+        pattern = "%r not in globals() and %r not in locals()"
+        source = pattern % (name.id, name.id)
+        co = self._compile(source)
+        try:
+            return self.frame.eval(co)
+        except Exception:
+            return False
+
+    def visit_Attribute(self, attr):
+        if not isinstance(attr.ctx, ast.Load):
+            return self.generic_visit(attr)
+        source_explanation, source_result = self.visit(attr.value)
+        explanation = "%s.%s" % (source_explanation, attr.attr)
+        source = "__exprinfo_expr.%s" % (attr.attr,)
+        co = self._compile(source)
+        try:
+            result = self.frame.eval(co, __exprinfo_expr=source_result)
+        except Exception:
+            raise Failure(explanation)
+        explanation = "%s\n{%s = %s.%s\n}" % (self.frame.repr(result),
+                                              self.frame.repr(result),
+                                              source_explanation, attr.attr)
+        # Check if the attr is from an instance.
+        source = "%r in getattr(__exprinfo_expr, '__dict__', {})"
+        source = source % (attr.attr,)
+        co = self._compile(source)
+        try:
+            from_instance = self.frame.eval(co, __exprinfo_expr=source_result)
+        except Exception:
+            from_instance = None
+        if from_instance is None or self.frame.is_true(from_instance):
+            rep = self.frame.repr(result)
+            pattern = "%s\n{%s = %s\n}"
+            explanation = pattern % (rep, rep, explanation)
+        return explanation, result
+
+    def visit_Assert(self, assrt):
+        test_explanation, test_result = self.visit(assrt.test)
+        explanation = "assert %s" % (test_explanation,)
+        if not self.frame.is_true(test_result):
+            try:
+                raise BuiltinAssertionError
+            except Exception:
+                raise Failure(explanation)
+        return explanation, test_result
+
+    def visit_Assign(self, assign):
+        value_explanation, value_result = self.visit(assign.value)
+        explanation = "... = %s" % (value_explanation,)
+        name = ast.Name("__exprinfo_expr", ast.Load(),
+                        lineno=assign.value.lineno,
+                        col_offset=assign.value.col_offset)
+        new_assign = ast.Assign(assign.targets, name, lineno=assign.lineno,
+                                col_offset=assign.col_offset)
+        mod = ast.Module([new_assign])
+        co = self._compile(mod, "exec")
+        try:
+            self.frame.exec_(co, __exprinfo_expr=value_result)
+        except Exception:
+            raise Failure(explanation)
+        return explanation, value_result
diff --git a/_pytest/assertion/oldinterpret.py b/_pytest/assertion/oldinterpret.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/oldinterpret.py
@@ -0,0 +1,552 @@
+import py
+import sys, inspect
+from compiler import parse, ast, pycodegen
+from _pytest.assertion.util import format_explanation
+from _pytest.assertion.reinterpret import BuiltinAssertionError
+
+passthroughex = py.builtin._sysex
+
+class Failure:
+    def __init__(self, node):
+        self.exc, self.value, self.tb = sys.exc_info()
+        self.node = node
+
+class View(object):
+    """View base class.
+
+    If C is a subclass of View, then C(x) creates a proxy object around
+    the object x.  The actual class of the proxy is not C in general,
+    but a *subclass* of C determined by the rules below.  To avoid confusion
+    we call view class the class of the proxy (a subclass of C, so of View)
+    and object class the class of x.
+
+    Attributes and methods not found in the proxy are automatically read on x.
+    Other operations like setting attributes are performed on the proxy, as
+    determined by its view class.  The object x is available from the proxy
+    as its __obj__ attribute.
+
+    The view class selection is determined by the __view__ tuples and the
+    optional __viewkey__ method.  By default, the selected view class is the
+    most specific subclass of C whose __view__ mentions the class of x.
+    If no such subclass is found, the search proceeds with the parent
+    object classes.  For example, C(True) will first look for a subclass
+    of C with __view__ = (..., bool, ...) and only if it doesn't find any
+    look for one with __view__ = (..., int, ...), and then ..., object,...
+    If everything fails the class C itself is considered to be the default.
+
+    Alternatively, the view class selection can be driven by another aspect
+    of the object x, instead of the class of x, by overriding __viewkey__.
+    See last example at the end of this module.
+    """
+
+    _viewcache = {}
+    __view__ = ()
+
+    def __new__(rootclass, obj, *args, **kwds):
+        self = object.__new__(rootclass)
+        self.__obj__ = obj
+        self.__rootclass__ = rootclass
+        key = self.__viewkey__()
+        try:
+            self.__class__ = self._viewcache[key]
+        except KeyError:
+            self.__class__ = self._selectsubclass(key)
+        return self
+
+    def __getattr__(self, attr):
+        # attributes not found in the normal hierarchy rooted on View
+        # are looked up in the object's real class
+        return getattr(self.__obj__, attr)
+
+    def __viewkey__(self):
+        return self.__obj__.__class__
+
+    def __matchkey__(self, key, subclasses):
+        if inspect.isclass(key):
+            keys = inspect.getmro(key)
+        else:
+            keys = [key]
+        for key in keys:
+            result = [C for C in subclasses if key in C.__view__]
+            if result:
+                return result
+        return []
+
+    def _selectsubclass(self, key):
+        subclasses = list(enumsubclasses(self.__rootclass__))
+        for C in subclasses:
+            if not isinstance(C.__view__, tuple):
+                C.__view__ = (C.__view__,)
+        choices = self.__matchkey__(key, subclasses)
+        if not choices:
+            return self.__rootclass__
+        elif len(choices) == 1:
+            return choices[0]
+        else:
+            # combine the multiple choices
+            return type('?', tuple(choices), {})
+
+    def __repr__(self):
+        return '%s(%r)' % (self.__rootclass__.__name__, self.__obj__)
+
+
+def enumsubclasses(cls):
+    for subcls in cls.__subclasses__():
+        for subsubclass in enumsubclasses(subcls):
+            yield subsubclass
+    yield cls
+
+
+class Interpretable(View):
+    """A parse tree node with a few extra methods."""
+    explanation = None
+
+    def is_builtin(self, frame):
+        return False
+
+    def eval(self, frame):
+        # fall-back for unknown expression nodes
+        try:
+            expr = ast.Expression(self.__obj__)
+            expr.filename = '<eval>'
+            self.__obj__.filename = '<eval>'
+            co = pycodegen.ExpressionCodeGenerator(expr).getCode()
+            result = frame.eval(co)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+        self.result = result
+        self.explanation = self.explanation or frame.repr(self.result)
+
+    def run(self, frame):
+        # fall-back for unknown statement nodes
+        try:
+            expr = ast.Module(None, ast.Stmt([self.__obj__]))
+            expr.filename = '<run>'
+            co = pycodegen.ModuleCodeGenerator(expr).getCode()
+            frame.exec_(co)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+
+    def nice_explanation(self):
+        return format_explanation(self.explanation)
+
+
+class Name(Interpretable):
+    __view__ = ast.Name
+
+    def is_local(self, frame):
+        source = '%r in locals() is not globals()' % self.name
+        try:
+            return frame.is_true(frame.eval(source))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def is_global(self, frame):
+        source = '%r in globals()' % self.name
+        try:
+            return frame.is_true(frame.eval(source))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def is_builtin(self, frame):
+        source = '%r not in locals() and %r not in globals()' % (
+            self.name, self.name)
+        try:
+            return frame.is_true(frame.eval(source))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def eval(self, frame):
+        super(Name, self).eval(frame)
+        if not self.is_local(frame):
+            self.explanation = self.name
+
+class Compare(Interpretable):
+    __view__ = ast.Compare
+
+    def eval(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        for operation, expr2 in self.ops:
+            if hasattr(self, 'result'):
+                # shortcutting in chained expressions
+                if not frame.is_true(self.result):
+                    break
+            expr2 = Interpretable(expr2)
+            expr2.eval(frame)
+            self.explanation = "%s %s %s" % (
+                expr.explanation, operation, expr2.explanation)
+            source = "__exprinfo_left %s __exprinfo_right" % operation
+            try:
+                self.result = frame.eval(source,
+                                         __exprinfo_left=expr.result,
+                                         __exprinfo_right=expr2.result)
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+            expr = expr2
+
+class And(Interpretable):
+    __view__ = ast.And
+
+    def eval(self, frame):
+        explanations = []
+        for expr in self.nodes:
+            expr = Interpretable(expr)
+            expr.eval(frame)
+            explanations.append(expr.explanation)
+            self.result = expr.result
+            if not frame.is_true(expr.result):
+                break
+        self.explanation = '(' + ' and '.join(explanations) + ')'
+
+class Or(Interpretable):
+    __view__ = ast.Or
+
+    def eval(self, frame):
+        explanations = []
+        for expr in self.nodes:
+            expr = Interpretable(expr)
+            expr.eval(frame)
+            explanations.append(expr.explanation)
+            self.result = expr.result
+            if frame.is_true(expr.result):
+                break
+        self.explanation = '(' + ' or '.join(explanations) + ')'
+
+
+# == Unary operations ==
+keepalive = []
+for astclass, astpattern in {
+    ast.Not    : 'not __exprinfo_expr',
+    ast.Invert : '(~__exprinfo_expr)',
+    }.items():
+
+    class UnaryArith(Interpretable):
+        __view__ = astclass
+
+        def eval(self, frame, astpattern=astpattern):
+            expr = Interpretable(self.expr)
+            expr.eval(frame)
+            self.explanation = astpattern.replace('__exprinfo_expr',
+                                                  expr.explanation)
+            try:
+                self.result = frame.eval(astpattern,
+                                         __exprinfo_expr=expr.result)
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+
+    keepalive.append(UnaryArith)
+
+# == Binary operations ==
+for astclass, astpattern in {
+    ast.Add    : '(__exprinfo_left + __exprinfo_right)',
+    ast.Sub    : '(__exprinfo_left - __exprinfo_right)',
+    ast.Mul    : '(__exprinfo_left * __exprinfo_right)',
+    ast.Div    : '(__exprinfo_left / __exprinfo_right)',
+    ast.Mod    : '(__exprinfo_left % __exprinfo_right)',
+    ast.Power  : '(__exprinfo_left ** __exprinfo_right)',
+    }.items():
+
+    class BinaryArith(Interpretable):
+        __view__ = astclass
+
+        def eval(self, frame, astpattern=astpattern):
+            left = Interpretable(self.left)
+            left.eval(frame)
+            right = Interpretable(self.right)
+            right.eval(frame)
+            self.explanation = (astpattern
+                                .replace('__exprinfo_left',  left .explanation)
+                                .replace('__exprinfo_right', right.explanation))
+            try:
+                self.result = frame.eval(astpattern,
+                                         __exprinfo_left=left.result,
+                                         __exprinfo_right=right.result)
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+
+    keepalive.append(BinaryArith)
+
+
+class CallFunc(Interpretable):
+    __view__ = ast.CallFunc
+
+    def is_bool(self, frame):
+        source = 'isinstance(__exprinfo_value, bool)'
+        try:
+            return frame.is_true(frame.eval(source,
+                                            __exprinfo_value=self.result))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def eval(self, frame):
+        node = Interpretable(self.node)
+        node.eval(frame)
+        explanations = []
+        vars = {'__exprinfo_fn': node.result}
+        source = '__exprinfo_fn('
+        for a in self.args:
+            if isinstance(a, ast.Keyword):
+                keyword = a.name
+                a = a.expr
+            else:
+                keyword = None
+            a = Interpretable(a)
+            a.eval(frame)
+            argname = '__exprinfo_%d' % len(vars)
+            vars[argname] = a.result
+            if keyword is None:
+                source += argname + ','
+                explanations.append(a.explanation)
+            else:
+                source += '%s=%s,' % (keyword, argname)
+                explanations.append('%s=%s' % (keyword, a.explanation))
+        if self.star_args:
+            star_args = Interpretable(self.star_args)
+            star_args.eval(frame)
+            argname = '__exprinfo_star'
+            vars[argname] = star_args.result
+            source += '*' + argname + ','
+            explanations.append('*' + star_args.explanation)
+        if self.dstar_args:
+            dstar_args = Interpretable(self.dstar_args)
+            dstar_args.eval(frame)
+            argname = '__exprinfo_kwds'
+            vars[argname] = dstar_args.result
+            source += '**' + argname + ','
+            explanations.append('**' + dstar_args.explanation)
+        self.explanation = "%s(%s)" % (
+            node.explanation, ', '.join(explanations))
+        if source.endswith(','):
+            source = source[:-1]
+        source += ')'
+        try:
+            self.result = frame.eval(source, **vars)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+        if not node.is_builtin(frame) or not self.is_bool(frame):
+            r = frame.repr(self.result)
+            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
+
+class Getattr(Interpretable):
+    __view__ = ast.Getattr
+
+    def eval(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        source = '__exprinfo_expr.%s' % self.attrname
+        try:
+            self.result = frame.eval(source, __exprinfo_expr=expr.result)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+        self.explanation = '%s.%s' % (expr.explanation, self.attrname)
+        # if the attribute comes from the instance, its value is interesting
+        source = ('hasattr(__exprinfo_expr, "__dict__") and '
+                  '%r in __exprinfo_expr.__dict__' % self.attrname)
+        try:
+            from_instance = frame.is_true(
+                frame.eval(source, __exprinfo_expr=expr.result))
+        except passthroughex:
+            raise
+        except:
+            from_instance = True
+        if from_instance:
+            r = frame.repr(self.result)
+            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
+
+# == Re-interpretation of full statements ==
+
+class Assert(Interpretable):
+    __view__ = ast.Assert
+
+    def run(self, frame):
+        test = Interpretable(self.test)
+        test.eval(frame)
+        # print the result as  'assert <explanation>'
+        self.result = test.result
+        self.explanation = 'assert ' + test.explanation
+        if not frame.is_true(test.result):
+            try:
+                raise BuiltinAssertionError
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+
+class Assign(Interpretable):
+    __view__ = ast.Assign
+
+    def run(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        self.result = expr.result
+        self.explanation = '... = ' + expr.explanation
+        # fall-back-run the rest of the assignment
+        ass = ast.Assign(self.nodes, ast.Name('__exprinfo_expr'))
+        mod = ast.Module(None, ast.Stmt([ass]))
+        mod.filename = '<run>'
+        co = pycodegen.ModuleCodeGenerator(mod).getCode()
+        try:
+            frame.exec_(co, __exprinfo_expr=expr.result)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+
+class Discard(Interpretable):
+    __view__ = ast.Discard
+
+    def run(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        self.result = expr.result
+        self.explanation = expr.explanation
+
+class Stmt(Interpretable):
+    __view__ = ast.Stmt
+
+    def run(self, frame):
+        for stmt in self.nodes:
+            stmt = Interpretable(stmt)
+            stmt.run(frame)
+
+
+def report_failure(e):
+    explanation = e.node.nice_explanation()
+    if explanation:
+        explanation = ", in: " + explanation
+    else:
+        explanation = ""
+    sys.stdout.write("%s: %s%s\n" % (e.exc.__name__, e.value, explanation))
+
+def check(s, frame=None):
+    if frame is None:
+        frame = sys._getframe(1)
+        frame = py.code.Frame(frame)
+    expr = parse(s, 'eval')
+    assert isinstance(expr, ast.Expression)
+    node = Interpretable(expr.node)
+    try:
+        node.eval(frame)
+    except passthroughex:
+        raise
+    except Failure:
+        e = sys.exc_info()[1]
+        report_failure(e)
+    else:
+        if not frame.is_true(node.result):
+            sys.stderr.write("assertion failed: %s\n" % node.nice_explanation())
+
+
+###########################################################
+# API / Entry points
+# #########################################################
+
+def interpret(source, frame, should_fail=False):
+    module = Interpretable(parse(source, 'exec').node)
+    #print "got module", module
+    if isinstance(frame, py.std.types.FrameType):
+        frame = py.code.Frame(frame)
+    try:
+        module.run(frame)
+    except Failure:
+        e = sys.exc_info()[1]
+        return getfailure(e)
+    except passthroughex:
+        raise
+    except:
+        import traceback
+        traceback.print_exc()
+    if should_fail:
+        return ("(assertion failed, but when it was re-run for "
+                "printing intermediate values, it did not fail.  Suggestions: "
+                "compute assert expression before the assert or use --nomagic)")
+    else:
+        return None
+
+def getmsg(excinfo):
+    if isinstance(excinfo, tuple):
+        excinfo = py.code.ExceptionInfo(excinfo)
+    #frame, line = gettbline(tb)
+    #frame = py.code.Frame(frame)
+    #return interpret(line, frame)
+
+    tb = excinfo.traceback[-1]
+    source = str(tb.statement).strip()
+    x = interpret(source, tb.frame, should_fail=True)
+    if not isinstance(x, str):
+        raise TypeError("interpret returned non-string %r" % (x,))
+    return x
+
+def getfailure(e):
+    explanation = e.node.nice_explanation()
+    if str(e.value):
+        lines = explanation.split('\n')
+        lines[0] += "  << %s" % (e.value,)
+        explanation = '\n'.join(lines)
+    text = "%s: %s" % (e.exc.__name__, explanation)
+    if text.startswith('AssertionError: assert '):
+        text = text[16:]
+    return text
+
+def run(s, frame=None):
+    if frame is None:
+        frame = sys._getframe(1)
+        frame = py.code.Frame(frame)
+    module = Interpretable(parse(s, 'exec').node)
+    try:
+        module.run(frame)
+    except Failure:
+        e = sys.exc_info()[1]
+        report_failure(e)
+
+
+if __name__ == '__main__':
+    # example:
+    def f():
+        return 5
+    def g():
+        return 3
+    def h(x):
+        return 'never'
+    check("f() * g() == 5")
+    check("not f()")
+    check("not (f() and g() or 0)")
+    check("f() == g()")
+    i = 4
+    check("i == f()")
+    check("len(f()) == 0")
+    check("isinstance(2+3+4, float)")
+
+    run("x = i")
+    check("x == 5")
+
+    run("assert not f(), 'oops'")
+    run("a, b, c = 1, 2")
+    run("a, b, c = f()")
+
+    check("max([f(),g()]) == 4")
+    check("'hello'[g()] == 'h'")
+    run("'guk%d' % h(f())")
diff --git a/_pytest/assertion/reinterpret.py b/_pytest/assertion/reinterpret.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/reinterpret.py
@@ -0,0 +1,48 @@
+import sys
+import py
+
+BuiltinAssertionError = py.builtin.builtins.AssertionError
+
+class AssertionError(BuiltinAssertionError):
+    def __init__(self, *args):
+        BuiltinAssertionError.__init__(self, *args)
+        if args:
+            try:
+                self.msg = str(args[0])
+            except py.builtin._sysex:
+                raise
+            except:
+                self.msg = "<[broken __repr__] %s at %0xd>" %(
+                    args[0].__class__, id(args[0]))
+        else:
+            f = py.code.Frame(sys._getframe(1))
+            try:
+                source = f.code.fullsource
+                if source is not None:
+                    try:
+                        source = source.getstatement(f.lineno, assertion=True)
+                    except IndexError:
+                        source = None
+                    else:
+                        source = str(source.deindent()).strip()
+            except py.error.ENOENT:
+                source = None
+                # this can also occur during reinterpretation, when the
+                # co_filename is set to "<run>".
+            if source:
+                self.msg = reinterpret(source, f, should_fail=True)
+            else:
+                self.msg = "<could not determine information>"
+            if not self.args:
+                self.args = (self.msg,)
+
+if sys.version_info > (3, 0):
+    AssertionError.__module__ = "builtins"
+    reinterpret_old = "old reinterpretation not available for py3"
+else:
+    from _pytest.assertion.oldinterpret import interpret as reinterpret_old
+if sys.version_info >= (2, 6) or (sys.platform.startswith("java")):
+    from _pytest.assertion.newinterpret import interpret as reinterpret
+else:
+    reinterpret = reinterpret_old
+
diff --git a/_pytest/assertion/rewrite.py b/_pytest/assertion/rewrite.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/rewrite.py
@@ -0,0 +1,340 @@
+"""Rewrite assertion AST to produce nice error messages"""
+
+import ast
+import collections
+import itertools
+import sys
+
+import py
+from _pytest.assertion import util
+
+
+def rewrite_asserts(mod):
+    """Rewrite the assert statements in mod."""
+    AssertionRewriter().run(mod)
+
+
+_saferepr = py.io.saferepr
+from _pytest.assertion.util import format_explanation as _format_explanation
+
+def _format_boolop(operands, explanations, is_or):
+    show_explanations = []
+    for operand, expl in zip(operands, explanations):
+        show_explanations.append(expl)
+        if operand == is_or:
+            break
+    return "(" + (is_or and " or " or " and ").join(show_explanations) + ")"
+
+def _call_reprcompare(ops, results, expls, each_obj):
+    for i, res, expl in zip(range(len(ops)), results, expls):
+        try:
+            done = not res
+        except Exception:
+            done = True
+        if done:
+            break
+    if util._reprcompare is not None:
+        custom = util._reprcompare(ops[i], each_obj[i], each_obj[i + 1])
+        if custom is not None:
+            return custom
+    return expl
+
+
+unary_map = {
+    ast.Not : "not %s",
+    ast.Invert : "~%s",
+    ast.USub : "-%s",
+    ast.UAdd : "+%s"
+}
+
+binop_map = {
+    ast.BitOr : "|",
+    ast.BitXor : "^",
+    ast.BitAnd : "&",
+    ast.LShift : "<<",
+    ast.RShift : ">>",
+    ast.Add : "+",
+    ast.Sub : "-",
+    ast.Mult : "*",
+    ast.Div : "/",
+    ast.FloorDiv : "//",
+    ast.Mod : "%",
+    ast.Eq : "==",
+    ast.NotEq : "!=",
+    ast.Lt : "<",
+    ast.LtE : "<=",
+    ast.Gt : ">",
+    ast.GtE : ">=",
+    ast.Pow : "**",
+    ast.Is : "is",
+    ast.IsNot : "is not",
+    ast.In : "in",
+    ast.NotIn : "not in"
+}
+
+
+def set_location(node, lineno, col_offset):
+    """Set node location information recursively."""
+    def _fix(node, lineno, col_offset):
+        if "lineno" in node._attributes:
+            node.lineno = lineno
+        if "col_offset" in node._attributes:
+            node.col_offset = col_offset
+        for child in ast.iter_child_nodes(node):
+            _fix(child, lineno, col_offset)
+    _fix(node, lineno, col_offset)
+    return node
+
+
+class AssertionRewriter(ast.NodeVisitor):
+
+    def run(self, mod):
+        """Find all assert statements in *mod* and rewrite them."""
+        if not mod.body:
+            # Nothing to do.
+            return
+        # Insert some special imports at the top of the module but after any
+        # docstrings and __future__ imports.
+        aliases = [ast.alias(py.builtin.builtins.__name__, "@py_builtins"),
+                   ast.alias("_pytest.assertion.rewrite", "@pytest_ar")]
+        expect_docstring = True
+        pos = 0
+        lineno = 0
+        for item in mod.body:
+            if (expect_docstring and isinstance(item, ast.Expr) and
+                isinstance(item.value, ast.Str)):
+                doc = item.value.s
+                if "PYTEST_DONT_REWRITE" in doc:
+                    # The module has disabled assertion rewriting.
+                    return
+                lineno += len(doc) - 1
+                expect_docstring = False
+            elif (not isinstance(item, ast.ImportFrom) or item.level > 0 and
+                  item.identifier != "__future__"):
+                lineno = item.lineno
+                break
+            pos += 1
+        imports = [ast.Import([alias], lineno=lineno, col_offset=0)
+                   for alias in aliases]
+        mod.body[pos:pos] = imports
+        # Collect asserts.
+        nodes = collections.deque([mod])
+        while nodes:
+            node = nodes.popleft()
+            for name, field in ast.iter_fields(node):
+                if isinstance(field, list):
+                    new = []
+                    for i, child in enumerate(field):
+                        if isinstance(child, ast.Assert):
+                            # Transform assert.
+                            new.extend(self.visit(child))
+                        else:
+                            new.append(child)
+                            if isinstance(child, ast.AST):
+                                nodes.append(child)
+                    setattr(node, name, new)
+                elif (isinstance(field, ast.AST) and
+                      # Don't recurse into expressions as they can't contain
+                      # asserts.
+                      not isinstance(field, ast.expr)):
+                    nodes.append(field)
+
+    def variable(self):
+        """Get a new variable."""
+        # Use a character invalid in python identifiers to avoid clashing.
+        name = "@py_assert" + str(next(self.variable_counter))
+        self.variables.add(name)
+        return name
+
+    def assign(self, expr):
+        """Give *expr* a name."""
+        name = self.variable()
+        self.statements.append(ast.Assign([ast.Name(name, ast.Store())], expr))
+        return ast.Name(name, ast.Load())
+
+    def display(self, expr):
+        """Call py.io.saferepr on the expression."""
+        return self.helper("saferepr", expr)
+
+    def helper(self, name, *args):
+        """Call a helper in this module."""
+        py_name = ast.Name("@pytest_ar", ast.Load())
+        attr = ast.Attribute(py_name, "_" + name, ast.Load())
+        return ast.Call(attr, list(args), [], None, None)
+
+    def builtin(self, name):
+        """Return the builtin called *name*."""
+        builtin_name = ast.Name("@py_builtins", ast.Load())
+        return ast.Attribute(builtin_name, name, ast.Load())
+
+    def explanation_param(self, expr):
+        specifier = "py" + str(next(self.variable_counter))
+        self.explanation_specifiers[specifier] = expr
+        return "%(" + specifier + ")s"
+
+    def push_format_context(self):
+        self.explanation_specifiers = {}
+        self.stack.append(self.explanation_specifiers)
+
+    def pop_format_context(self, expl_expr):
+        current = self.stack.pop()
+        if self.stack:
+            self.explanation_specifiers = self.stack[-1]
+        keys = [ast.Str(key) for key in current.keys()]
+        format_dict = ast.Dict(keys, list(current.values()))
+        form = ast.BinOp(expl_expr, ast.Mod(), format_dict)
+        name = "@py_format" + str(next(self.variable_counter))
+        self.on_failure.append(ast.Assign([ast.Name(name, ast.Store())], form))
+        return ast.Name(name, ast.Load())
+
+    def generic_visit(self, node):
+        """Handle expressions we don't have custom code for."""
+        assert isinstance(node, ast.expr)
+        res = self.assign(node)
+        return res, self.explanation_param(self.display(res))
+
+    def visit_Assert(self, assert_):
+        if assert_.msg:
+            # There's already a message. Don't mess with it.
+            return [assert_]
+        self.statements = []
+        self.variables = set()
+        self.variable_counter = itertools.count()
+        self.stack = []
+        self.on_failure = []
+        self.push_format_context()
+        # Rewrite assert into a bunch of statements.
+        top_condition, explanation = self.visit(assert_.test)
+        # Create failure message.
+        body = self.on_failure
+        negation = ast.UnaryOp(ast.Not(), top_condition)
+        self.statements.append(ast.If(negation, body, []))
+        explanation = "assert " + explanation
+        template = ast.Str(explanation)
+        msg = self.pop_format_context(template)
+        fmt = self.helper("format_explanation", msg)
+        err_name = ast.Name("AssertionError", ast.Load())
+        exc = ast.Call(err_name, [fmt], [], None, None)
+        if sys.version_info[0] >= 3:
+            raise_ = ast.Raise(exc, None)
+        else:
+            raise_ = ast.Raise(exc, None, None)
+        body.append(raise_)
+        # Delete temporary variables.
+        names = [ast.Name(name, ast.Del()) for name in self.variables]
+        if names:
+            delete = ast.Delete(names)
+            self.statements.append(delete)
+        # Fix line numbers.
+        for stmt in self.statements:
+            set_location(stmt, assert_.lineno, assert_.col_offset)
+        return self.statements
+
+    def visit_Name(self, name):
+        # Check if the name is local or not.
+        locs = ast.Call(self.builtin("locals"), [], [], None, None)
+        globs = ast.Call(self.builtin("globals"), [], [], None, None)
+        ops = [ast.In(), ast.IsNot()]
+        test = ast.Compare(ast.Str(name.id), ops, [locs, globs])
+        expr = ast.IfExp(test, self.display(name), ast.Str(name.id))
+        return name, self.explanation_param(expr)
+
+    def visit_BoolOp(self, boolop):
+        operands = []
+        explanations = []
+        self.push_format_context()
+        for operand in boolop.values:
+            res, explanation = self.visit(operand)
+            operands.append(res)
+            explanations.append(explanation)
+        expls = ast.Tuple([ast.Str(expl) for expl in explanations], ast.Load())
+        is_or = ast.Num(isinstance(boolop.op, ast.Or))
+        expl_template = self.helper("format_boolop",
+                                    ast.Tuple(operands, ast.Load()), expls,
+                                    is_or)
+        expl = self.pop_format_context(expl_template)
+        res = self.assign(ast.BoolOp(boolop.op, operands))
+        return res, self.explanation_param(expl)
+
+    def visit_UnaryOp(self, unary):
+        pattern = unary_map[unary.op.__class__]
+        operand_res, operand_expl = self.visit(unary.operand)
+        res = self.assign(ast.UnaryOp(unary.op, operand_res))
+        return res, pattern % (operand_expl,)
+
+    def visit_BinOp(self, binop):
+        symbol = binop_map[binop.op.__class__]
+        left_expr, left_expl = self.visit(binop.left)
+        right_expr, right_expl = self.visit(binop.right)
+        explanation = "(%s %s %s)" % (left_expl, symbol, right_expl)
+        res = self.assign(ast.BinOp(left_expr, binop.op, right_expr))
+        return res, explanation
+
+    def visit_Call(self, call):
+        new_func, func_expl = self.visit(call.func)
+        arg_expls = []
+        new_args = []
+        new_kwargs = []
+        new_star = new_kwarg = None
+        for arg in call.args:
+            res, expl = self.visit(arg)
+            new_args.append(res)
+            arg_expls.append(expl)
+        for keyword in call.keywords:
+            res, expl = self.visit(keyword.value)
+            new_kwargs.append(ast.keyword(keyword.arg, res))
+            arg_expls.append(keyword.arg + "=" + expl)
+        if call.starargs:
+            new_star, expl = self.visit(call.starargs)
+            arg_expls.append("*" + expl)
+        if call.kwargs:
+            new_kwarg, expl = self.visit(call.kwarg)
+            arg_expls.append("**" + expl)
+        expl = "%s(%s)" % (func_expl, ', '.join(arg_expls))
+        new_call = ast.Call(new_func, new_args, new_kwargs, new_star, new_kwarg)
+        res = self.assign(new_call)
+        res_expl = self.explanation_param(self.display(res))
+        outer_expl = "%s\n{%s = %s\n}" % (res_expl, res_expl, expl)
+        return res, outer_expl
+
+    def visit_Attribute(self, attr):
+        if not isinstance(attr.ctx, ast.Load):
+            return self.generic_visit(attr)
+        value, value_expl = self.visit(attr.value)
+        res = self.assign(ast.Attribute(value, attr.attr, ast.Load()))
+        res_expl = self.explanation_param(self.display(res))
+        pat = "%s\n{%s = %s.%s\n}"
+        expl = pat % (res_expl, res_expl, value_expl, attr.attr)
+        return res, expl
+
+    def visit_Compare(self, comp):
+        self.push_format_context()
+        left_res, left_expl = self.visit(comp.left)
+        res_variables = [self.variable() for i in range(len(comp.ops))]
+        load_names = [ast.Name(v, ast.Load()) for v in res_variables]
+        store_names = [ast.Name(v, ast.Store()) for v in res_variables]
+        it = zip(range(len(comp.ops)), comp.ops, comp.comparators)
+        expls = []
+        syms = []
+        results = [left_res]
+        for i, op, next_operand in it:
+            next_res, next_expl = self.visit(next_operand)
+            results.append(next_res)
+            sym = binop_map[op.__class__]
+            syms.append(ast.Str(sym))
+            expl = "%s %s %s" % (left_expl, sym, next_expl)
+            expls.append(ast.Str(expl))
+            res_expr = ast.Compare(left_res, [op], [next_res])
+            self.statements.append(ast.Assign([store_names[i]], res_expr))
+            left_res, left_expl = next_res, next_expl
+        # Use py.code._reprcompare if that's available.
+        expl_call = self.helper("call_reprcompare",
+                                ast.Tuple(syms, ast.Load()),
+                                ast.Tuple(load_names, ast.Load()),
+                                ast.Tuple(expls, ast.Load()),
+                                ast.Tuple(results, ast.Load()))
+        if len(comp.ops) > 1:
+            res = ast.BoolOp(ast.And(), load_names)
+        else:
+            res = load_names[0]
+        return res, self.explanation_param(self.pop_format_context(expl_call))
diff --git a/_pytest/assertion/util.py b/_pytest/assertion/util.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/util.py
@@ -0,0 +1,213 @@
+"""Utilities for assertion debugging"""
+
+import py
+
+
+# The _reprcompare attribute on the util module is used by the new assertion
+# interpretation code and assertion rewriter to detect this plugin was
+# loaded and in turn call the hooks defined here as part of the
+# DebugInterpreter.
+_reprcompare = None
+
+def format_explanation(explanation):
+    """This formats an explanation
+
+    Normally all embedded newlines are escaped, however there are
+    three exceptions: \n{, \n} and \n~.  The first two are intended
+    cover nested explanations, see function and attribute explanations
+    for examples (.visit_Call(), visit_Attribute()).  The last one is
+    for when one explanation needs to span multiple lines, e.g. when
+    displaying diffs.
+    """
+    # simplify 'assert False where False = ...'
+    where = 0
+    while True:
+        start = where = explanation.find("False\n{False = ", where)
+        if where == -1:
+            break
+        level = 0
+        for i, c in enumerate(explanation[start:]):
+            if c == "{":
+                level += 1
+            elif c == "}":
+                level -= 1
+                if not level:
+                    break
+        else:
+            raise AssertionError("unbalanced braces: %r" % (explanation,))
+        end = start + i
+        where = end
+        if explanation[end - 1] == '\n':
+            explanation = (explanation[:start] + explanation[start+15:end-1] +
+                           explanation[end+1:])
+            where -= 17
+    raw_lines = (explanation or '').split('\n')
+    # escape newlines not followed by {, } and ~
+    lines = [raw_lines[0]]
+    for l in raw_lines[1:]:
+        if l.startswith('{') or l.startswith('}') or l.startswith('~'):
+            lines.append(l)
+        else:
+            lines[-1] += '\\n' + l
+
+    result = lines[:1]
+    stack = [0]
+    stackcnt = [0]
+    for line in lines[1:]:
+        if line.startswith('{'):
+            if stackcnt[-1]:
+                s = 'and   '
+            else:
+                s = 'where '
+            stack.append(len(result))
+            stackcnt[-1] += 1
+            stackcnt.append(0)
+            result.append(' +' + '  '*(len(stack)-1) + s + line[1:])
+        elif line.startswith('}'):
+            assert line.startswith('}')
+            stack.pop()
+            stackcnt.pop()
+            result[stack[-1]] += line[1:]
+        else:
+            assert line.startswith('~')
+            result.append('  '*len(stack) + line[1:])
+    assert len(stack) == 1
+    return '\n'.join(result)
+
+
+# Provide basestring in python3
+try:
+    basestring = basestring
+except NameError:
+    basestring = str
+
+
+def assertrepr_compare(op, left, right):
+    """return specialised explanations for some operators/operands"""
+    width = 80 - 15 - len(op) - 2 # 15 chars indentation, 1 space around op
+    left_repr = py.io.saferepr(left, maxsize=int(width/2))
+    right_repr = py.io.saferepr(right, maxsize=width-len(left_repr))
+    summary = '%s %s %s' % (left_repr, op, right_repr)
+
+    issequence = lambda x: isinstance(x, (list, tuple))
+    istext = lambda x: isinstance(x, basestring)
+    isdict = lambda x: isinstance(x, dict)
+    isset = lambda x: isinstance(x, set)
+
+    explanation = None
+    try:
+        if op == '==':
+            if istext(left) and istext(right):
+                explanation = _diff_text(left, right)
+            elif issequence(left) and issequence(right):
+                explanation = _compare_eq_sequence(left, right)
+            elif isset(left) and isset(right):
+                explanation = _compare_eq_set(left, right)
+            elif isdict(left) and isdict(right):
+                explanation = _diff_text(py.std.pprint.pformat(left),
+                                         py.std.pprint.pformat(right))
+        elif op == 'not in':
+            if istext(left) and istext(right):
+                explanation = _notin_text(left, right)
+    except py.builtin._sysex:
+        raise
+    except:
+        excinfo = py.code.ExceptionInfo()
+        explanation = ['(pytest_assertion plugin: representation of '
+            'details failed. Probably an object has a faulty __repr__.)',
+            str(excinfo)
+            ]
+
+
+    if not explanation:
+        return None
+
+    # Don't include pageloads of data, should be configurable
+    if len(''.join(explanation)) > 80*8:
+        explanation = ['Detailed information too verbose, truncated']
+
+    return [summary] + explanation
+
+
+def _diff_text(left, right):
+    """Return the explanation for the diff between text
+
+    This will skip leading and trailing characters which are
+    identical to keep the diff minimal.
+    """
+    explanation = []
+    i = 0 # just in case left or right has zero length
+    for i in range(min(len(left), len(right))):
+        if left[i] != right[i]:
+            break
+    if i > 42:
+        i -= 10                 # Provide some context
+        explanation = ['Skipping %s identical '
+                       'leading characters in diff' % i]
+        left = left[i:]
+        right = right[i:]
+    if len(left) == len(right):
+        for i in range(len(left)):
+            if left[-i] != right[-i]:
+                break
+        if i > 42:
+            i -= 10     # Provide some context
+            explanation += ['Skipping %s identical '
+                            'trailing characters in diff' % i]
+            left = left[:-i]
+            right = right[:-i]
+    explanation += [line.strip('\n')
+                    for line in py.std.difflib.ndiff(left.splitlines(),
+                                                     right.splitlines())]
+    return explanation
+
+
+def _compare_eq_sequence(left, right):
+    explanation = []
+    for i in range(min(len(left), len(right))):
+        if left[i] != right[i]:
+            explanation += ['At index %s diff: %r != %r' %
+                            (i, left[i], right[i])]
+            break
+    if len(left) > len(right):
+        explanation += ['Left contains more items, '
+            'first extra item: %s' % py.io.saferepr(left[len(right)],)]
+    elif len(left) < len(right):
+        explanation += ['Right contains more items, '
+            'first extra item: %s' % py.io.saferepr(right[len(left)],)]
+    return explanation # + _diff_text(py.std.pprint.pformat(left),
+                       #             py.std.pprint.pformat(right))
+
+
+def _compare_eq_set(left, right):
+    explanation = []
+    diff_left = left - right
+    diff_right = right - left
+    if diff_left:
+        explanation.append('Extra items in the left set:')
+        for item in diff_left:
+            explanation.append(py.io.saferepr(item))
+    if diff_right:
+        explanation.append('Extra items in the right set:')
+        for item in diff_right:
+            explanation.append(py.io.saferepr(item))
+    return explanation
+
+
+def _notin_text(term, text):
+    index = text.find(term)
+    head = text[:index]
+    tail = text[index+len(term):]
+    correct_text = head + tail
+    diff = _diff_text(correct_text, text)
+    newdiff = ['%s is contained here:' % py.io.saferepr(term, maxsize=42)]
+    for line in diff:
+        if line.startswith('Skipping'):
+            continue
+        if line.startswith('- '):
+            continue
+        if line.startswith('+ '):
+            newdiff.append('  ' + line[2:])
+        else:
+            newdiff.append(line)
+    return newdiff
diff --git a/_pytest/doctest.py b/_pytest/doctest.py
--- a/_pytest/doctest.py
+++ b/_pytest/doctest.py
@@ -59,7 +59,7 @@
                 inner_excinfo = py.code.ExceptionInfo(excinfo.value.exc_info)
                 lines += ["UNEXPECTED EXCEPTION: %s" %
                             repr(inner_excinfo.value)]
-
+                lines += py.std.traceback.format_exception(*excinfo.value.exc_info)
             return ReprFailDoctest(reprlocation, lines)
         else:
             return super(DoctestItem, self).repr_failure(excinfo)
diff --git a/_pytest/helpconfig.py b/_pytest/helpconfig.py
--- a/_pytest/helpconfig.py
+++ b/_pytest/helpconfig.py
@@ -16,9 +16,6 @@
     group.addoption('--traceconfig',
                action="store_true", dest="traceconfig", default=False,
                help="trace considerations of conftest.py files."),
-    group._addoption('--nomagic',
-               action="store_true", dest="nomagic", default=False,
-               help="don't reinterpret asserts, no traceback cutting. ")
     group.addoption('--debug',
                action="store_true", dest="debug", default=False,
                help="generate and show internal debugging information.")
diff --git a/_pytest/junitxml.py b/_pytest/junitxml.py
--- a/_pytest/junitxml.py
+++ b/_pytest/junitxml.py
@@ -65,7 +65,8 @@
 
 class LogXML(object):
     def __init__(self, logfile, prefix):
-        self.logfile = logfile
+        logfile = os.path.expanduser(os.path.expandvars(logfile))
+        self.logfile = os.path.normpath(logfile)
         self.prefix = prefix
         self.test_logs = []
         self.passed = self.skipped = 0
@@ -76,7 +77,7 @@
         names = report.nodeid.split("::")
         names[0] = names[0].replace("/", '.')
         names = tuple(names)
-        d = {'time': self._durations.pop(names, "0")}
+        d = {'time': self._durations.pop(report.nodeid, "0")}
         names = [x.replace(".py", "") for x in names if x != "()"]
         classnames = names[:-1]
         if self.prefix:
@@ -170,12 +171,11 @@
             self.append_skipped(report)
 
     def pytest_runtest_call(self, item, __multicall__):
-        names = tuple(item.listnames())
         start = time.time()
         try:
             return __multicall__.execute()
         finally:
-            self._durations[names] = time.time() - start
+            self._durations[item.nodeid] = time.time() - start
 
     def pytest_collectreport(self, report):
         if not report.passed:
diff --git a/_pytest/main.py b/_pytest/main.py
--- a/_pytest/main.py
+++ b/_pytest/main.py
@@ -46,23 +46,25 @@
 
 
 def pytest_namespace():
-    return dict(collect=dict(Item=Item, Collector=Collector, File=File))
+    collect = dict(Item=Item, Collector=Collector, File=File, Session=Session)
+    return dict(collect=collect)
         
 def pytest_configure(config):
     py.test.config = config # compatibiltiy
     if config.option.exitfirst:
         config.option.maxfail = 1
 
-def pytest_cmdline_main(config):
-    """ default command line protocol for initialization, session,
-    running tests and reporting. """
+def wrap_session(config, doit):
+    """Skeleton command line program"""
     session = Session(config)
     session.exitstatus = EXIT_OK
+    initstate = 0
     try:
         config.pluginmanager.do_configure(config)
+        initstate = 1
         config.hook.pytest_sessionstart(session=session)
-        config.hook.pytest_collection(session=session)
-        config.hook.pytest_runtestloop(session=session)
+        initstate = 2
+        doit(config, session)
     except pytest.UsageError:
         raise
     except KeyboardInterrupt:
@@ -77,18 +79,24 @@
             sys.stderr.write("mainloop: caught Spurious SystemExit!\n")
     if not session.exitstatus and session._testsfailed:
         session.exitstatus = EXIT_TESTSFAILED
-    config.hook.pytest_sessionfinish(session=session,
-        exitstatus=session.exitstatus)
-    config.pluginmanager.do_unconfigure(config)
+    if initstate >= 2:
+        config.hook.pytest_sessionfinish(session=session,
+            exitstatus=session.exitstatus)
+    if initstate >= 1:
+        config.pluginmanager.do_unconfigure(config)
     return session.exitstatus
 
+def pytest_cmdline_main(config):
+    return wrap_session(config, _main)
+
+def _main(config, session):
+    """ default command line protocol for initialization, session,
+    running tests and reporting. """
+    config.hook.pytest_collection(session=session)
+    config.hook.pytest_runtestloop(session=session)
+
 def pytest_collection(session):
-    session.perform_collect()
-    hook = session.config.hook
-    hook.pytest_collection_modifyitems(session=session,
-        config=session.config, items=session.items)
-    hook.pytest_collection_finish(session=session)
-    return True
+    return session.perform_collect()
 
 def pytest_runtestloop(session):
     if session.config.option.collectonly:
@@ -374,6 +382,16 @@
         return HookProxy(fspath, self.config)
 
     def perform_collect(self, args=None, genitems=True):
+        hook = self.config.hook
+        try:
+            items = self._perform_collect(args, genitems)
+            hook.pytest_collection_modifyitems(session=self,
+                config=self.config, items=items)
+        finally:
+            hook.pytest_collection_finish(session=self)
+        return items
+
+    def _perform_collect(self, args, genitems):
         if args is None:
             args = self.config.args
         self.trace("perform_collect", self, args)
diff --git a/_pytest/mark.py b/_pytest/mark.py
--- a/_pytest/mark.py
+++ b/_pytest/mark.py
@@ -153,7 +153,7 @@
 
     def __repr__(self):
         return "<MarkInfo %r args=%r kwargs=%r>" % (
-                self._name, self.args, self.kwargs)
+                self.name, self.args, self.kwargs)
 
 def pytest_itemcollected(item):
     if not isinstance(item, pytest.Function):
diff --git a/_pytest/pytester.py b/_pytest/pytester.py
--- a/_pytest/pytester.py
+++ b/_pytest/pytester.py
@@ -6,7 +6,7 @@
 import inspect
 import time
 from fnmatch import fnmatch
-from _pytest.main import Session
+from _pytest.main import Session, EXIT_OK
 from py.builtin import print_
 from _pytest.core import HookRelay
 
@@ -292,13 +292,19 @@
         assert '::' not in str(arg)
         p = py.path.local(arg)
         x = session.fspath.bestrelpath(p)
-        return session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionstart(session=session)
+        res = session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionfinish(session=session, exitstatus=EXIT_OK)
+        return res
 
     def getpathnode(self, path):
-        config = self.parseconfig(path)
+        config = self.parseconfigure(path)
         session = Session(config)
         x = session.fspath.bestrelpath(path)
-        return session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionstart(session=session)
+        res = session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionfinish(session=session, exitstatus=EXIT_OK)
+        return res
 
     def genitems(self, colitems):
         session = colitems[0].session
@@ -312,7 +318,9 @@
         config = self.parseconfigure(*args)
         rec = self.getreportrecorder(config)
         session = Session(config)
+        config.hook.pytest_sessionstart(session=session)
         session.perform_collect()
+        config.hook.pytest_sessionfinish(session=session, exitstatus=EXIT_OK)
         return session.items, rec
 
     def runitem(self, source):
@@ -382,6 +390,8 @@
             c.basetemp = py.path.local.make_numbered_dir(prefix="reparse",
                 keep=0, rootdir=self.tmpdir, lock_timeout=None)
             c.parse(args)
+            c.pluginmanager.do_configure(c)
+            self.request.addfinalizer(lambda: c.pluginmanager.do_unconfigure(c))
             return c
         finally:
             py.test.config = oldconfig
diff --git a/_pytest/python.py b/_pytest/python.py
--- a/_pytest/python.py
+++ b/_pytest/python.py
@@ -226,8 +226,13 @@
 
     def _importtestmodule(self):
         # we assume we are only called once per module
+        from _pytest import assertion
+        assertion.before_module_import(self)
         try:
-            mod = self.fspath.pyimport(ensuresyspath=True)
+            try:
+                mod = self.fspath.pyimport(ensuresyspath=True)
+            finally:
+                assertion.after_module_import(self)
         except SyntaxError:
             excinfo = py.code.ExceptionInfo()
             raise self.CollectError(excinfo.getrepr(style="short"))
@@ -374,7 +379,7 @@
         # test generators are seen as collectors but they also
         # invoke setup/teardown on popular request
         # (induced by the common "test_*" naming shared with normal tests)
-        self.config._setupstate.prepare(self)
+        self.session._setupstate.prepare(self)
         # see FunctionMixin.setup and test_setupstate_is_preserved_134
         self._preservedparent = self.parent.obj
         l = []
@@ -721,7 +726,7 @@
 
     def _addfinalizer(self, finalizer, scope):
         colitem = self._getscopeitem(scope)
-        self.config._setupstate.addfinalizer(
+        self._pyfuncitem.session._setupstate.addfinalizer(
             finalizer=finalizer, colitem=colitem)
 
     def __repr__(self):
@@ -742,8 +747,10 @@
         raise self.LookupError(msg)
 
 def showfuncargs(config):
-    from _pytest.main import Session
-    session = Session(config)
+    from _pytest.main import wrap_session
+    return wrap_session(config, _showfuncargs_main)
+
+def _showfuncargs_main(config, session):
     session.perform_collect()
     if session.items:
         plugins = session.items[0].getplugins()
diff --git a/_pytest/runner.py b/_pytest/runner.py
--- a/_pytest/runner.py
+++ b/_pytest/runner.py
@@ -14,17 +14,15 @@
 #
 # pytest plugin hooks
 
-# XXX move to pytest_sessionstart and fix py.test owns tests
-def pytest_configure(config):
-    config._setupstate = SetupState()
+def pytest_sessionstart(session):
+    session._setupstate = SetupState()
 
 def pytest_sessionfinish(session, exitstatus):
-    if hasattr(session.config, '_setupstate'):
-        hook = session.config.hook
-        rep = hook.pytest__teardown_final(session=session)
-        if rep:
-            hook.pytest__teardown_final_logerror(session=session, report=rep)
-            session.exitstatus = 1
+    hook = session.config.hook
+    rep = hook.pytest__teardown_final(session=session)
+    if rep:
+        hook.pytest__teardown_final_logerror(session=session, report=rep)
+        session.exitstatus = 1
 
 class NodeInfo:
     def __init__(self, location):
@@ -46,16 +44,16 @@
     return reports
 
 def pytest_runtest_setup(item):
-    item.config._setupstate.prepare(item)
+    item.session._setupstate.prepare(item)
 
 def pytest_runtest_call(item):
     item.runtest()
 
 def pytest_runtest_teardown(item):
-    item.config._setupstate.teardown_exact(item)
+    item.session._setupstate.teardown_exact(item)
 
 def pytest__teardown_final(session):
-    call = CallInfo(session.config._setupstate.teardown_all, when="teardown")
+    call = CallInfo(session._setupstate.teardown_all, when="teardown")
     if call.excinfo:
         ntraceback = call.excinfo.traceback .cut(excludepath=py._pydir)
         call.excinfo.traceback = ntraceback.filter()
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -569,7 +569,6 @@
 #
 import os
 import time
-import socket
 import getpass
 
 class ReallyRunFileExternal(py.test.collect.Item): 
diff --git a/lib-python/modified-2.7/ctypes/__init__.py b/lib-python/modified-2.7/ctypes/__init__.py
--- a/lib-python/modified-2.7/ctypes/__init__.py
+++ b/lib-python/modified-2.7/ctypes/__init__.py
@@ -7,6 +7,7 @@
 
 __version__ = "1.1.0"
 
+import _ffi
 from _ctypes import Union, Structure, Array
 from _ctypes import _Pointer
 from _ctypes import CFuncPtr as _CFuncPtr
@@ -350,7 +351,7 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            self._handle = _dlopen(self._name, mode)
+            self._handle = _ffi.CDLL(name)
         else:
             self._handle = handle
 
diff --git a/lib-python/modified-2.7/ctypes/test/test_cfuncs.py b/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
--- a/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
+++ b/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
@@ -3,8 +3,8 @@
 
 import unittest
 from ctypes import *
-
 import _ctypes_test
+from test.test_support import impl_detail
 
 class CFunctions(unittest.TestCase):
     _dll = CDLL(_ctypes_test.__file__)
@@ -158,12 +158,14 @@
         self.assertEqual(self._dll.tf_bd(0, 42.), 14.)
         self.assertEqual(self.S(), 42)
 
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdouble(self):
         self._dll.tf_D.restype = c_longdouble
         self._dll.tf_D.argtypes = (c_longdouble,)
         self.assertEqual(self._dll.tf_D(42.), 14.)
         self.assertEqual(self.S(), 42)
-
+        
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdouble_plus(self):
         self._dll.tf_bD.restype = c_longdouble
         self._dll.tf_bD.argtypes = (c_byte, c_longdouble)
diff --git a/lib-python/modified-2.7/ctypes/test/test_functions.py b/lib-python/modified-2.7/ctypes/test/test_functions.py
--- a/lib-python/modified-2.7/ctypes/test/test_functions.py
+++ b/lib-python/modified-2.7/ctypes/test/test_functions.py
@@ -8,6 +8,7 @@
 from ctypes import *
 import sys, unittest
 from ctypes.test import xfail
+from test.test_support import impl_detail
 
 try:
     WINFUNCTYPE
@@ -144,6 +145,7 @@
         self.assertEqual(result, -21)
         self.assertEqual(type(result), float)
 
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdoubleresult(self):
         f = dll._testfunc_D_bhilfD
         f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_longdouble]
diff --git a/lib-python/modified-2.7/ctypes/test/test_libc.py b/lib-python/modified-2.7/ctypes/test/test_libc.py
--- a/lib-python/modified-2.7/ctypes/test/test_libc.py
+++ b/lib-python/modified-2.7/ctypes/test/test_libc.py
@@ -26,6 +26,7 @@
         self.assertEqual(chars.raw, "   ,,aaaadmmmnpppsss\x00")
 
     def test_no_more_xfail(self):
+        import socket
         import ctypes.test
         self.assertTrue(not hasattr(ctypes.test, 'xfail'),
                         "You should incrementally grep for '@xfail' and remove them, they are real failures")
diff --git a/lib-python/modified-2.7/distutils/sysconfig.py b/lib-python/modified-2.7/distutils/sysconfig.py
--- a/lib-python/modified-2.7/distutils/sysconfig.py
+++ b/lib-python/modified-2.7/distutils/sysconfig.py
@@ -20,8 +20,10 @@
 if '__pypy__' in sys.builtin_module_names:
     from distutils.sysconfig_pypy import *
     from distutils.sysconfig_pypy import _config_vars # needed by setuptools
+    from distutils.sysconfig_pypy import _variable_rx # read_setup_file()
 else:
     from distutils.sysconfig_cpython import *
     from distutils.sysconfig_cpython import _config_vars # needed by setuptools
+    from distutils.sysconfig_cpython import _variable_rx # read_setup_file()
 
 
diff --git a/lib-python/modified-2.7/distutils/sysconfig_pypy.py b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
--- a/lib-python/modified-2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
@@ -116,3 +116,7 @@
     if compiler.compiler_type == "unix":
         compiler.compiler_so.extend(['-fPIC', '-Wimplicit'])
         compiler.shared_lib_extension = get_config_var('SO')
+
+from sysconfig_cpython import (
+    parse_makefile, _variable_rx, expand_makefile_vars)
+
diff --git a/lib-python/modified-2.7/test/test_extcall.py b/lib-python/modified-2.7/test/test_extcall.py
--- a/lib-python/modified-2.7/test/test_extcall.py
+++ b/lib-python/modified-2.7/test/test_extcall.py
@@ -299,7 +299,7 @@
         def f(a):
             return a
         self.assertEqual(f(**{u'a': 4}), 4)
-        self.assertRaises(TypeError, lambda: f(**{u'st&#246;ren': 4}))
+        self.assertRaises(TypeError, f, **{u'st&#246;ren': 4})
         self.assertRaises(TypeError, f, **{u'someLongString':2})
         try:
             f(a=4, **{u'a': 4})
diff --git a/lib-python/2.7/test/test_multibytecodec.py b/lib-python/modified-2.7/test/test_multibytecodec.py
copy from lib-python/2.7/test/test_multibytecodec.py
copy to lib-python/modified-2.7/test/test_multibytecodec.py
--- a/lib-python/2.7/test/test_multibytecodec.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec.py
@@ -42,7 +42,7 @@
         dec = codecs.getdecoder('euc-kr')
         myreplace  = lambda exc: (u'', sys.maxint+1)
         codecs.register_error('test.cjktest', myreplace)
-        self.assertRaises(IndexError, dec,
+        self.assertRaises((IndexError, OverflowError), dec,
                           'apple\x92ham\x93spam', 'test.cjktest')
 
     def test_codingspec(self):
diff --git a/lib-python/2.7/test/test_multibytecodec_support.py b/lib-python/modified-2.7/test/test_multibytecodec_support.py
copy from lib-python/2.7/test/test_multibytecodec_support.py
copy to lib-python/modified-2.7/test/test_multibytecodec_support.py
--- a/lib-python/2.7/test/test_multibytecodec_support.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec_support.py
@@ -107,8 +107,8 @@
         def myreplace(exc):
             return (u'x', sys.maxint + 1)
         codecs.register_error("test.cjktest", myreplace)
-        self.assertRaises(IndexError, self.encode, self.unmappedunicode,
-                          'test.cjktest')
+        self.assertRaises((IndexError, OverflowError), self.encode,
+                          self.unmappedunicode, 'test.cjktest')
 
     def test_callback_None_index(self):
         def myreplace(exc):
diff --git a/lib-python/modified-2.7/test/test_support.py b/lib-python/modified-2.7/test/test_support.py
--- a/lib-python/modified-2.7/test/test_support.py
+++ b/lib-python/modified-2.7/test/test_support.py
@@ -1066,7 +1066,7 @@
         if '--pdb' in sys.argv:
             import pdb, traceback
             traceback.print_tb(exc_info[2])
-            pdb.post_mortem(exc_info[2], pdb.Pdb)
+            pdb.post_mortem(exc_info[2])
 
 # ----------------------------------
 
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -208,6 +208,9 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _to_ffi_param(self):
+        return self._get_buffer_value()
+
 ARRAY_CACHE = {}
 
 def create_array_type(base, length):
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -1,5 +1,6 @@
 
 import _rawffi
+import _ffi
 import sys
 
 keepalive_key = str # XXX fix this when provided with test
@@ -46,6 +47,14 @@
         else:
             return self.from_param(as_parameter)
 
+    def get_ffi_param(self, value):
+        return self.from_param(value)._to_ffi_param()
+
+    def get_ffi_argtype(self):
+        if self._ffiargtype:
+            return self._ffiargtype
+        return _shape_to_ffi_type(self._ffiargshape)
+
     def _CData_output(self, resbuffer, base=None, index=-1):
         #assert isinstance(resbuffer, _rawffi.ArrayInstance)
         """Used when data exits ctypes and goes into user code.
@@ -99,6 +108,7 @@
     """
     __metaclass__ = _CDataMeta
     _objects = None
+    _ffiargtype = None
 
     def __init__(self, *args, **kwds):
         raise TypeError("%s has no type" % (type(self),))
@@ -119,6 +129,12 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _to_ffi_param(self):
+        if self.__class__._is_pointer_like():
+            return self._get_buffer_value()
+        else:
+            return self.value
+
     def __buffer__(self):
         return buffer(self._buffer)
 
@@ -150,7 +166,7 @@
     return pointer(cdata)
 
 def cdata_from_address(self, address):
-    # fix the address, in case it's unsigned
+    # fix the address: turn it into as unsigned, in case it's a negative number
     address = address & (sys.maxint * 2 + 1)
     instance = self.__new__(self)
     lgt = getattr(self, '_length_', 1)
@@ -159,3 +175,48 @@
 
 def addressof(tp):
     return tp._buffer.buffer
+
+
+# ----------------------------------------------------------------------
+
+def is_struct_shape(shape):
+    # see the corresponding code to set the shape in
+    # _ctypes.structure._set_shape
+    return (isinstance(shape, tuple) and
+            len(shape) == 2 and
+            isinstance(shape[0], _rawffi.Structure) and
+            shape[1] == 1)
+
+def _shape_to_ffi_type(shape):
+    try:
+        return _shape_to_ffi_type.typemap[shape]
+    except KeyError:
+        pass
+    if is_struct_shape(shape):
+        return shape[0].get_ffi_type()
+    #
+    assert False, 'unknown shape %s' % (shape,)
+
+
+_shape_to_ffi_type.typemap =  {
+    'c' : _ffi.types.char,
+    'b' : _ffi.types.sbyte,
+    'B' : _ffi.types.ubyte,
+    'h' : _ffi.types.sshort,
+    'u' : _ffi.types.unichar,
+    'H' : _ffi.types.ushort,
+    'i' : _ffi.types.sint,
+    'I' : _ffi.types.uint,
+    'l' : _ffi.types.slong,
+    'L' : _ffi.types.ulong,
+    'q' : _ffi.types.slonglong,
+    'Q' : _ffi.types.ulonglong,
+    'f' : _ffi.types.float,
+    'd' : _ffi.types.double,
+    's' : _ffi.types.void_p,
+    'P' : _ffi.types.void_p,
+    'z' : _ffi.types.void_p,
+    'O' : _ffi.types.void_p,
+    'Z' : _ffi.types.void_p,
+    }
+
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -1,12 +1,15 @@
+
+from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
+from _ctypes.primitive import SimpleType, _SimpleCData
+from _ctypes.basics import ArgumentError, keepalive_key
+from _ctypes.basics import is_struct_shape
+from _ctypes.builtin import set_errno, set_last_error
 import _rawffi
+import _ffi
 import sys
 import traceback
 import warnings
 
-from _ctypes.basics import ArgumentError, keepalive_key
-from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
-from _ctypes.builtin import set_errno, set_last_error
-from _ctypes.primitive import SimpleType
 
 # XXX this file needs huge refactoring I fear
 
@@ -24,6 +27,7 @@
 
 WIN64 = sys.platform == 'win32' and sys.maxint == 2**63 - 1
 
+
 def get_com_error(errcode, riid, pIunk):
     "Win32 specific: build a COM Error exception"
     # XXX need C support code
@@ -36,6 +40,7 @@
     funcptr.restype = int
     return funcptr(*args)
 
+
 class CFuncPtrType(_CDataMeta):
     # XXX write down here defaults and such things
 
@@ -50,6 +55,7 @@
 
     from_address = cdata_from_address
 
+
 class CFuncPtr(_CData):
     __metaclass__ = CFuncPtrType
 
@@ -65,10 +71,12 @@
     callable = None
     _ptr = None
     _buffer = None
+    _address = None
     # win32 COM properties
     _paramflags = None
     _com_index = None
     _com_iid = None
+    _is_fastpath = False
 
     __restype_set = False
 
@@ -85,8 +93,11 @@
                     raise TypeError(
                         "item %d in _argtypes_ has no from_param method" % (
                             i + 1,))
-            self._argtypes_ = argtypes
-
+            #
+            if all([hasattr(argtype, '_ffiargshape') for argtype in argtypes]):
+                fastpath_cls = make_fastpath_subclass(self.__class__)
+                fastpath_cls.enable_fastpath_maybe(self)
+            self._argtypes_ = list(argtypes)
     argtypes = property(_getargtypes, _setargtypes)
 
     def _getparamflags(self):
@@ -133,6 +144,7 @@
 
     paramflags = property(_getparamflags, _setparamflags)
 
+
     def _getrestype(self):
         return self._restype_
 
@@ -146,27 +158,24 @@
                 callable(restype)):
             raise TypeError("restype must be a type, a callable, or None")
         self._restype_ = restype
-
+        
     def _delrestype(self):
         self._ptr = None
         del self._restype_
-
+        
     restype = property(_getrestype, _setrestype, _delrestype)
 
     def _geterrcheck(self):
         return getattr(self, '_errcheck_', None)
-
     def _seterrcheck(self, errcheck):
         if not callable(errcheck):
             raise TypeError("The errcheck attribute must be callable")
         self._errcheck_ = errcheck
-
     def _delerrcheck(self):
         try:
             del self._errcheck_
         except AttributeError:
             pass
-
     errcheck = property(_geterrcheck, _seterrcheck, _delerrcheck)
 
     def _ffishapes(self, args, restype):
@@ -181,6 +190,14 @@
             restype = 'O' # void
         return argtypes, restype
 
+    def _set_address(self, address):
+        if not self._buffer:
+            self._buffer = _rawffi.Array('P')(1)
+        self._buffer[0] = address
+
+    def _get_address(self):
+        return self._buffer[0]
+
     def __init__(self, *args):
         self.name = None
         self._objects = {keepalive_key(0):self}
@@ -188,7 +205,7 @@
 
         # Empty function object -- this is needed for casts
         if not args:
-            self._buffer = _rawffi.Array('P')(1)
+            self._set_address(0)
             return
 
         argsl = list(args)
@@ -196,20 +213,24 @@
 
         # Direct construction from raw address
         if isinstance(argument, (int, long)) and not argsl:
-            ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_)
-            self._ptr = _rawffi.FuncPtr(argument, ffiargs, ffires, self._flags_)
-            self._buffer = self._ptr.byptr()
+            self._set_address(argument)
+            restype = self._restype_
+            if restype is None:
+                import ctypes
+                restype = ctypes.c_int
+            self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype)
             return
 
-        # A callback into Python
+        
+        # A callback into python
         if callable(argument) and not argsl:
             self.callable = argument
             ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_)
             if self._restype_ is None:
                 ffires = None
-            self._ptr = _rawffi.CallbackPtr(self._wrap_callable(
-                argument, self.argtypes
-                ), ffiargs, ffires, self._flags_)
+            self._ptr = _rawffi.CallbackPtr(self._wrap_callable(argument,
+                                                                self.argtypes),
+                                            ffiargs, ffires, self._flags_)
             self._buffer = self._ptr.byptr()
             return
 
@@ -218,7 +239,7 @@
             import ctypes
             self.name, dll = argument
             if isinstance(dll, str):
-                self.dll = ctypes.CDLL(dll)
+                self.dll = ctypes.CDLL(self.dll)
             else:
                 self.dll = dll
             if argsl:
@@ -227,7 +248,7 @@
                     raise TypeError("Unknown constructor %s" % (args,))
             # We need to check dll anyway
             ptr = self._getfuncptr([], ctypes.c_int)
-            self._buffer = ptr.byptr()
+            self._set_address(ptr.getaddr())
             return
 
         # A COM function call, by index
@@ -270,15 +291,15 @@
                     # than the length of the argtypes tuple.
                     args = args[:len(self._argtypes_)]
             else:
-                plural = len(argtypes) > 1 and "s" or ""
+                plural = len(self._argtypes_) > 1 and "s" or ""
                 raise TypeError(
                     "This function takes %d argument%s (%s given)"
-                    % (len(argtypes), plural, len(args)))
+                    % (len(self._argtypes_), plural, len(args)))
 
             # check that arguments are convertible
             ## XXX Not as long as ctypes.cast is a callback function with
             ## py_object arguments...
-            ## self._convert_args(argtypes, args, {})
+            ## self._convert_args(self._argtypes_, args, {})
 
             try:
                 res = self.callable(*args)
@@ -301,6 +322,7 @@
                           RuntimeWarning, stacklevel=2)
 
         if self._com_index:
+            assert False, 'TODO2'
             from ctypes import cast, c_void_p, POINTER
             if not args:
                 raise ValueError(
@@ -312,77 +334,63 @@
             args[0] = args[0].value
         else:
             thisarg = None
+            
+        newargs, argtypes, outargs = self._convert_args(argtypes, args, kwargs)
 
-        args, outargs = self._convert_args(argtypes, args, kwargs)
-        argtypes = [type(arg) for arg in args]
+        funcptr = self._getfuncptr(argtypes, self._restype_, thisarg)
+        result = self._call_funcptr(funcptr, *newargs)
+        result = self._do_errcheck(result, args)
 
-        restype = self._restype_
-        funcptr = self._getfuncptr(argtypes, restype, thisarg)
+        if not outargs:
+            return result
+        if len(outargs) == 1:
+            return outargs[0]
+        return tuple(outargs)
+
+    def _call_funcptr(self, funcptr, *newargs):
+
         if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
             set_errno(_rawffi.get_errno())
         if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
             set_last_error(_rawffi.get_last_error())
         try:
-            resbuffer = funcptr(*[arg._get_buffer_for_param()._buffer
-                                  for arg in args])
+            result = funcptr(*newargs)
         finally:
             if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
                 set_errno(_rawffi.get_errno())
             if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
                 set_last_error(_rawffi.get_last_error())
+        #
+        return self._build_result(self._restype_, result, newargs)
 
-        result = None
-        if self._com_index:
-            if resbuffer[0] & 0x80000000:
-                raise get_com_error(resbuffer[0],
-                                    self._com_iid, args[0])
-            else:
-                result = int(resbuffer[0])
-        elif restype is not None:
-            checker = getattr(self.restype, '_check_retval_', None)
-            if checker:
-                val = restype(resbuffer[0])
-                # the original ctypes seems to make the distinction between
-                # classes defining a new type, and their subclasses
-                if '_type_' in restype.__dict__:
-                    val = val.value
-                result = checker(val)
-            elif not isinstance(restype, _CDataMeta):
-                result = restype(resbuffer[0])
-            else:
-                result = restype._CData_retval(resbuffer)
-
+    def _do_errcheck(self, result, args):
         # The 'errcheck' protocol
         if self._errcheck_:
             v = self._errcheck_(result, self, args)
             # If the errcheck funtion failed, let it throw
-            # If the errcheck function returned callargs unchanged,
+            # If the errcheck function returned newargs unchanged,
             # continue normal processing.
             # If the errcheck function returned something else,
             # use that as result.
             if v is not args:
-                result = v
+                return v
+        return result
 
-        if not outargs:
-            return result
-
-        if len(outargs) == 1:
-            return outargs[0]
-
-        return tuple(outargs)
+    def _getfuncptr_fromaddress(self, argtypes, restype):
+        address = self._get_address()
+        ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes]
+        ffires = restype.get_ffi_argtype()
+        return _ffi.FuncPtr.fromaddr(address, '', ffiargs, ffires)
 
     def _getfuncptr(self, argtypes, restype, thisarg=None):
-        if self._ptr is not None and argtypes is self._argtypes_:
+        if self._ptr is not None and (argtypes is self._argtypes_ or argtypes == self._argtypes_):
             return self._ptr
         if restype is None or not isinstance(restype, _CDataMeta):
             import ctypes
             restype = ctypes.c_int
-        argshapes = [arg._ffiargshape for arg in argtypes]
-        resshape = restype._ffiargshape
         if self._buffer is not None:
-            ptr = _rawffi.FuncPtr(self._buffer[0], argshapes, resshape,
-                                  self._flags_)
-            if argtypes is self._argtypes_:
+            ptr = self._getfuncptr_fromaddress(argtypes, restype)
+            if argtypes == self._argtypes_:
                 self._ptr = ptr
             return ptr
 
@@ -391,14 +399,20 @@
             if not thisarg:
                 raise ValueError("COM method call without VTable")
             ptr = thisarg[self._com_index - 0x1000]
+            argshapes = [arg._ffiargshape for arg in argtypes]
+            resshape = restype._ffiargshape
             return _rawffi.FuncPtr(ptr, argshapes, resshape, self._flags_)
-
+        
         cdll = self.dll._handle
         try:
-            return cdll.ptr(self.name, argshapes, resshape, self._flags_)
+            ffi_argtypes = [argtype.get_ffi_argtype() for argtype in argtypes]
+            ffi_restype = restype.get_ffi_argtype()
+            self._ptr = cdll.getfunc(self.name, ffi_argtypes, ffi_restype)
+            return self._ptr
         except AttributeError:
             if self._flags_ & _rawffi.FUNCFLAG_CDECL:
                 raise
+
             # Win64 has no stdcall calling conv, so it should also not have the
             # name mangling of it.
             if WIN64:
@@ -409,23 +423,33 @@
             for i in range(33):
                 mangled_name = "_%s@%d" % (self.name, i*4)
                 try:
-                    return cdll.ptr(mangled_name, argshapes, resshape,
-                                    self._flags_)
+                    return cdll.getfunc(mangled_name,
+                                        ffi_argtypes, ffi_restype,
+                                        # XXX self._flags_
+                                        )
                 except AttributeError:
                     pass
             raise
 
-    @staticmethod
-    def _conv_param(argtype, arg):
-        from ctypes import c_char_p, c_wchar_p, c_void_p, c_int
+    @classmethod
+    def _conv_param(cls, argtype, arg):
+        if isinstance(argtype, _CDataMeta):
+            #arg = argtype.from_param(arg)
+            arg = argtype.get_ffi_param(arg)
+            return arg, argtype
+        
         if argtype is not None:
             arg = argtype.from_param(arg)
         if hasattr(arg, '_as_parameter_'):
             arg = arg._as_parameter_
         if isinstance(arg, _CData):
-            # The usual case when argtype is defined
-            cobj = arg
-        elif isinstance(arg, str):
+            return arg._to_ffi_param(), type(arg)
+        #
+        # non-usual case: we do the import here to save a lot of code in the
+        # jit trace of the normal case
+        from ctypes import c_char_p, c_wchar_p, c_void_p, c_int
+        #
+        if isinstance(arg, str):
             cobj = c_char_p(arg)
         elif isinstance(arg, unicode):
             cobj = c_wchar_p(arg)
@@ -435,11 +459,13 @@
             cobj = c_int(arg)
         else:
             raise TypeError("Don't know how to handle %s" % (arg,))
-        return cobj
+
+        return cobj._to_ffi_param(), type(cobj)
 
     def _convert_args(self, argtypes, args, kwargs, marker=object()):
-        callargs = []
+        newargs = []
         outargs = []
+        newargtypes = []
         total = len(args)
         paramflags = self._paramflags
 
@@ -470,8 +496,9 @@
                     val = defval
                     if val is marker:
                         val = 0
-                    wrapped = self._conv_param(argtype, val)
-                    callargs.append(wrapped)
+                    newarg, newargtype = self._conv_param(argtype, val)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 elif flag in (0, PARAMFLAG_FIN):
                     if inargs_idx < total:
                         val = args[inargs_idx]
@@ -485,38 +512,102 @@
                         raise TypeError("required argument '%s' missing" % name)
                     else:
                         raise TypeError("not enough arguments")
-                    wrapped = self._conv_param(argtype, val)
-                    callargs.append(wrapped)
+                    newarg, newargtype = self._conv_param(argtype, val)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 elif flag == PARAMFLAG_FOUT:
                     if defval is not marker:
                         outargs.append(defval)
-                        wrapped = self._conv_param(argtype, defval)
+                        newarg, newargtype = self._conv_param(argtype, defval)
                     else:
                         import ctypes
                         val = argtype._type_()
                         outargs.append(val)
-                        wrapped = ctypes.byref(val)
-                    callargs.append(wrapped)
+                        newarg = ctypes.byref(val)
+                        newargtype = type(newarg)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 else:
                     raise ValueError("paramflag %d not yet implemented" % flag)
             else:
                 try:
-                    wrapped = self._conv_param(argtype, args[i])
+                    newarg, newargtype = self._conv_param(argtype, args[i])
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
-                callargs.append(wrapped)
+                newargs.append(newarg)
+                newargtypes.append(newargtype)
                 inargs_idx += 1
 
-        if len(callargs) < total:
-            extra = args[len(callargs):]
+        if len(newargs) < len(args):
+            extra = args[len(newargs):]
             for i, arg in enumerate(extra):
                 try:
-                    wrapped = self._conv_param(None, arg)
+                    newarg, newargtype = self._conv_param(None, arg)
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
-                callargs.append(wrapped)
+                newargs.append(newarg)
+                newargtypes.append(newargtype)
+        return newargs, newargtypes, outargs
 
-        return callargs, outargs
+    
+    def _wrap_result(self, restype, result):
+        """
+        Convert from low-level repr of the result to the high-level python
+        one.
+        """
+        # hack for performance: if restype is a "simple" primitive type, don't
+        # allocate the buffer because it's going to be thrown away immediately
+        if restype.__bases__[0] is _SimpleCData and not restype._is_pointer_like():
+            return result
+        #
+        shape = restype._ffishape
+        if is_struct_shape(shape):
+            buf = result
+        else:
+            buf = _rawffi.Array(shape)(1, autofree=True)
+            buf[0] = result
+        retval = restype._CData_retval(buf)
+        return retval
+
+    def _build_result(self, restype, result, argsandobjs):
+        """Build the function result:
+           If there is no OUT parameter, return the actual function result
+           If there is one OUT parameter, return it
+           If there are many OUT parameters, return a tuple"""
+
+        # XXX: note for the future: the function used to take a "resbuffer",
+        # i.e. an array of ints. Now it takes a result, which is already a
+        # python object. All places that do "resbuffer[0]" should check that
+        # result is actually an int and just use it.
+        #
+        # Also, argsandobjs used to be "args" in __call__, now it's "newargs"
+        # (i.e., the already unwrapped objects). It's used only when we have a
+        # PARAMFLAG_FOUT and it's probably wrong, I'll fix it when I find a
+        # failing test
+
+        retval = None
+
+        if self._com_index:
+            if resbuffer[0] & 0x80000000:
+                raise get_com_error(resbuffer[0],
+                                    self._com_iid, argsandobjs[0])
+            else:
+                retval = int(resbuffer[0])
+        elif restype is not None:
+            checker = getattr(self.restype, '_check_retval_', None)
+            if checker:
+                val = restype(result)
+                # the original ctypes seems to make the distinction between
+                # classes defining a new type, and their subclasses
+                if '_type_' in restype.__dict__:
+                    val = val.value
+                retval = checker(val)
+            elif not isinstance(restype, _CDataMeta):
+                retval = restype(result)
+            else:
+                retval = self._wrap_result(restype, result)
+
+        return retval
 
     def __nonzero__(self):
         return self._com_index is not None or bool(self._buffer[0])
@@ -532,3 +623,61 @@
                 self._ptr.free()
                 self._ptr = None
             self._needs_free = False
+
+
+def make_fastpath_subclass(CFuncPtr):
+    if CFuncPtr._is_fastpath:
+        return CFuncPtr
+    #
+    try:
+        return make_fastpath_subclass.memo[CFuncPtr]
+    except KeyError:
+        pass
+
+    class CFuncPtrFast(CFuncPtr):
+
+        _is_fastpath = True
+        _slowpath_allowed = True # set to False by tests
+
+        @classmethod
+        def enable_fastpath_maybe(cls, obj):
+            if (obj.callable is None and
+                obj._com_index is None):
+                obj.__class__ = cls
+
+        def __rollback(self):
+            assert self._slowpath_allowed
+            self.__class__ = CFuncPtr
+
+        # disable the fast path if we reset argtypes
+        def _setargtypes(self, argtypes):
+            self.__rollback()
+            self._setargtypes(argtypes)
+        argtypes = property(CFuncPtr._getargtypes, _setargtypes)
+
+        def _setcallable(self, func):
+            self.__rollback()
+            self.callable = func
+        callable = property(lambda x: None, _setcallable)
+
+        def _setcom_index(self, idx):
+            self.__rollback()
+            self._com_index = idx
+        _com_index = property(lambda x: None, _setcom_index)
+
+        def __call__(self, *args):
+            thisarg = None
+            argtypes = self._argtypes_
+            restype = self._restype_
+            funcptr = self._getfuncptr(argtypes, restype, thisarg)
+            try:
+                result = self._call_funcptr(funcptr, *args)
+                result = self._do_errcheck(result, args)
+            except (TypeError, ArgumentError): # XXX, should be FFITypeError
+                assert self._slowpath_allowed
+                return CFuncPtr.__call__(self, *args)
+            return result
+
+    make_fastpath_subclass.memo[CFuncPtr] = CFuncPtrFast
+    return CFuncPtrFast
+make_fastpath_subclass.memo = {}
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -1,6 +1,7 @@
 
 import _rawffi
-from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
+import _ffi
+from _ctypes.basics import _CData, _CDataMeta, cdata_from_address, ArgumentError
 from _ctypes.basics import keepalive_key, store_reference, ensure_objects
 from _ctypes.basics import sizeof, byref
 from _ctypes.array import Array, array_get_slice_params, array_slice_getitem,\
@@ -19,7 +20,7 @@
             length     = 1,
             _ffiargshape = 'P',
             _ffishape  = 'P',
-            _fficompositesize = None
+            _fficompositesize = None,
         )
         # XXX check if typedict['_type_'] is any sane
         # XXX remember about paramfunc
@@ -66,6 +67,7 @@
         self._ffiarray = ffiarray
         self.__init__ = __init__
         self._type_ = TP
+        self._ffiargtype = _ffi.types.Pointer(TP.get_ffi_argtype())
 
     from_address = cdata_from_address
 
@@ -114,6 +116,17 @@
 
     contents = property(getcontents, setcontents)
 
+    def _as_ffi_pointer_(self, ffitype):
+        return as_ffi_pointer(self, ffitype)
+
+def as_ffi_pointer(value, ffitype):
+    my_ffitype = type(value).get_ffi_argtype()
+    # for now, we always allow types.pointer, else a lot of tests
+    # break. We need to rethink how pointers are represented, though
+    if my_ffitype is not ffitype and ffitype is not _ffi.types.void_p:
+        raise ArgumentError, "expected %s instance, got %s" % (type(value), ffitype)
+    return value._get_buffer_value()
+
 def _cast_addr(obj, _, tp):
     if not (isinstance(tp, _CDataMeta) and tp._is_pointer_like()):
         raise TypeError("cast() argument 2 must be a pointer type, not %s"
diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py
--- a/lib_pypy/_ctypes/primitive.py
+++ b/lib_pypy/_ctypes/primitive.py
@@ -1,3 +1,4 @@
+import _ffi
 import _rawffi
 import weakref
 import sys
@@ -8,7 +9,7 @@
      CArgObject
 from _ctypes.builtin import ConvMode
 from _ctypes.array import Array
-from _ctypes.pointer import _Pointer
+from _ctypes.pointer import _Pointer, as_ffi_pointer
 
 class NULL(object):
     pass
@@ -140,6 +141,8 @@
                     value = 0
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
+            result._ffiargtype = _ffi.types.Pointer(_ffi.types.char)
+
         elif tp == 'Z':
             # c_wchar_p
             def _getvalue(self):
@@ -162,6 +165,7 @@
                     value = 0
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
+            result._ffiargtype = _ffi.types.Pointer(_ffi.types.unichar)
 
         elif tp == 'P':
             # c_void_p
@@ -248,6 +252,12 @@
                     self._buffer[0] = 0  # VARIANT_FALSE
             result.value = property(_getvalue, _setvalue)
 
+        # make pointer-types compatible with the _ffi fast path
+        if result._is_pointer_like():
+            def _as_ffi_pointer_(self, ffitype):
+                return as_ffi_pointer(self, ffitype)
+            result._as_ffi_pointer_ = _as_ffi_pointer_
+            
         return result
 
     from_address = cdata_from_address
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -240,6 +240,9 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _to_ffi_param(self):
+        return self._buffer
+
 
 class StructureMeta(StructOrUnionMeta):
     _is_union = False
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -275,7 +275,8 @@
     return unicode(x, 'utf-8')
 
 class Connection(object):
-    def __init__(self, database, isolation_level="", detect_types=0, timeout=None, cached_statements=None, factory=None):
+    def __init__(self, database, timeout=5.0, detect_types=0, isolation_level="",
+                 check_same_thread=True, factory=None, cached_statements=100):
         self.db = c_void_p()
         if sqlite.sqlite3_open(database, byref(self.db)) != SQLITE_OK:
             raise OperationalError("Could not open database")
@@ -308,7 +309,8 @@
         self._aggregates = {}
         self.aggregate_instances = {}
         self._collations = {}
-        self.thread_ident = thread_get_ident()
+        if check_same_thread:
+            self.thread_ident = thread_get_ident()
 
     def _get_exception(self, error_code = None):
         if error_code is None:
diff --git a/lib_pypy/ctypes_support.py b/lib_pypy/ctypes_support.py
--- a/lib_pypy/ctypes_support.py
+++ b/lib_pypy/ctypes_support.py
@@ -10,8 +10,8 @@
 # __________ the standard C library __________
 
 if sys.platform == 'win32':
-    import _rawffi
-    standard_c_lib = ctypes.CDLL('msvcrt', handle=_rawffi.get_libc())
+    import _ffi
+    standard_c_lib = ctypes.CDLL('msvcrt', handle=_ffi.get_libc())
 else:
     standard_c_lib = ctypes.CDLL(ctypes.util.find_library('c'))
 
diff --git a/lib_pypy/datetime.py b/lib_pypy/datetime.py
--- a/lib_pypy/datetime.py
+++ b/lib_pypy/datetime.py
@@ -1422,12 +1422,17 @@
             converter = _time.localtime
         else:
             converter = _time.gmtime
-        if 1 - (t % 1.0) < 0.000001:
-            t = float(int(t)) + 1
-        if t < 0:
-            t -= 1
+        if t < 0.0:
+            us = int(round(((-t) % 1.0) * 1000000))
+            if us > 0:
+                us = 1000000 - us
+                t -= 1.0
+        else:
+            us = int(round((t % 1.0) * 1000000))
+            if us == 1000000:
+                us = 0
+                t += 1.0
         y, m, d, hh, mm, ss, weekday, jday, dst = converter(t)
-        us = int((t % 1.0) * 1000000)
         ss = min(ss, 59)    # clamp out leap seconds if the platform has them
         result = cls(y, m, d, hh, mm, ss, us, tz)
         if tz is not None:
diff --git a/lib_pypy/msvcrt.py b/lib_pypy/msvcrt.py
--- a/lib_pypy/msvcrt.py
+++ b/lib_pypy/msvcrt.py
@@ -46,4 +46,42 @@
         e = get_errno()
         raise IOError(e, errno.errorcode[e])
 
+# Console I/O routines
+
+kbhit = _c._kbhit
+kbhit.argtypes = []
+kbhit.restype = ctypes.c_int
+
+getch = _c._getch
+getch.argtypes = []
+getch.restype = ctypes.c_char
+
+getwch = _c._getwch
+getwch.argtypes = []
+getwch.restype = ctypes.c_wchar
+
+getche = _c._getche
+getche.argtypes = []
+getche.restype = ctypes.c_char
+
+getwche = _c._getwche
+getwche.argtypes = []
+getwche.restype = ctypes.c_wchar
+
+putch = _c._putch
+putch.argtypes = [ctypes.c_char]
+putch.restype = None
+
+putwch = _c._putwch
+putwch.argtypes = [ctypes.c_wchar]
+putwch.restype = None
+
+ungetch = _c._ungetch
+ungetch.argtypes = [ctypes.c_char]
+ungetch.restype = None
+
+ungetwch = _c._ungetwch
+ungetwch.argtypes = [ctypes.c_wchar]
+ungetwch.restype = None
+
 del ctypes
diff --git a/lib_pypy/pypy_test/test_datetime.py b/lib_pypy/pypy_test/test_datetime.py
--- a/lib_pypy/pypy_test/test_datetime.py
+++ b/lib_pypy/pypy_test/test_datetime.py
@@ -32,4 +32,28 @@
     assert datetime.datetime.utcfromtimestamp(a).microsecond == 0
     assert datetime.datetime.utcfromtimestamp(a).second == 1
 
-    
+def test_more_datetime_rounding():
+    # this test verified on top of CPython 2.7 (using a plain
+    # "import datetime" above)
+    expected_results = {
+        -1000.0: 'datetime.datetime(1970, 1, 1, 0, 43, 20)',
+        -999.9999996: 'datetime.datetime(1970, 1, 1, 0, 43, 20)',
+        -999.4: 'datetime.datetime(1970, 1, 1, 0, 43, 20, 600000)',
+        -999.0000004: 'datetime.datetime(1970, 1, 1, 0, 43, 21)',
+        -1.0: 'datetime.datetime(1970, 1, 1, 0, 59, 59)',
+        -0.9999996: 'datetime.datetime(1970, 1, 1, 0, 59, 59)',
+        -0.4: 'datetime.datetime(1970, 1, 1, 0, 59, 59, 600000)',
+        -0.0000004: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.0: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.0000004: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.4: 'datetime.datetime(1970, 1, 1, 1, 0, 0, 400000)',
+        0.9999996: 'datetime.datetime(1970, 1, 1, 1, 0, 1)',
+        1000.0: 'datetime.datetime(1970, 1, 1, 1, 16, 40)',
+        1000.0000004: 'datetime.datetime(1970, 1, 1, 1, 16, 40)',
+        1000.4: 'datetime.datetime(1970, 1, 1, 1, 16, 40, 400000)',
+        1000.9999996: 'datetime.datetime(1970, 1, 1, 1, 16, 41)',
+        1293843661.191: 'datetime.datetime(2011, 1, 1, 2, 1, 1, 191000)',
+        }
+    for t in sorted(expected_results):
+        dt = datetime.datetime.fromtimestamp(t)
+        assert repr(dt) == expected_results[t]
diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py
--- a/lib_pypy/resource.py
+++ b/lib_pypy/resource.py
@@ -7,7 +7,7 @@
 
 from ctypes_support import standard_c_lib as libc
 from ctypes_support import get_errno
-from ctypes import Structure, c_int, c_long, byref, sizeof
+from ctypes import Structure, c_int, c_long, byref, sizeof, POINTER
 from errno import EINVAL, EPERM
 import _structseq
 
@@ -25,6 +25,8 @@
 _setrlimit = libc.setrlimit
 try:
     _getpagesize = libc.getpagesize
+    _getpagesize.argtypes = ()
+    _getpagesize.restype = c_int
 except AttributeError:
     from os import sysconf
     _getpagesize = None
@@ -61,6 +63,10 @@
         ("ru_nivcsw", c_long),
     )
 
+_getrusage.argtypes = (c_int, POINTER(_struct_rusage))
+_getrusage.restype = c_int
+
+
 class struct_rusage:
     __metaclass__ = _structseq.structseqtype
 
@@ -94,6 +100,12 @@
         ("rlim_max", rlim_t),
     )
 
+_getrlimit.argtypes = (c_int, POINTER(rlimit))
+_getrlimit.restype = c_int
+_setrlimit.argtypes = (c_int, POINTER(rlimit))
+_setrlimit.restype = c_int
+
+
 @builtinify
 def getrusage(who):
     ru = _struct_rusage()
diff --git a/py/__init__.py b/py/__init__.py
--- a/py/__init__.py
+++ b/py/__init__.py
@@ -8,7 +8,7 @@
 
 (c) Holger Krekel and others, 2004-2010
 """
-__version__ = '1.4.3'
+__version__ = '1.4.4.dev1'
 
 from py import _apipkg
 
@@ -70,10 +70,6 @@
         'getrawcode'        : '._code.code:getrawcode',
         'patch_builtins'    : '._code.code:patch_builtins',
         'unpatch_builtins'  : '._code.code:unpatch_builtins',
-        '_AssertionError'   : '._code.assertion:AssertionError',
-        '_reinterpret_old'  : '._code.assertion:reinterpret_old',
-        '_reinterpret'      : '._code.assertion:reinterpret',
-        '_reprcompare'      : '._code.assertion:_reprcompare',
     },
 
     # backports and additions of builtins
diff --git a/py/_code/_assertionnew.py b/py/_code/_assertionnew.py
deleted file mode 100644
--- a/py/_code/_assertionnew.py
+++ /dev/null
@@ -1,339 +0,0 @@
-"""
-Find intermediate evalutation results in assert statements through builtin AST.
-This should replace _assertionold.py eventually.
-"""
-
-import sys
-import ast
-
-import py
-from py._code.assertion import _format_explanation, BuiltinAssertionError
-
-
-if sys.platform.startswith("java") and sys.version_info < (2, 5, 2):
-    # See http://bugs.jython.org/issue1497
-    _exprs = ("BoolOp", "BinOp", "UnaryOp", "Lambda", "IfExp", "Dict",
-              "ListComp", "GeneratorExp", "Yield", "Compare", "Call",
-              "Repr", "Num", "Str", "Attribute", "Subscript", "Name",
-              "List", "Tuple")
-    _stmts = ("FunctionDef", "ClassDef", "Return", "Delete", "Assign",
-              "AugAssign", "Print", "For", "While", "If", "With", "Raise",
-              "TryExcept", "TryFinally", "Assert", "Import", "ImportFrom",
-              "Exec", "Global", "Expr", "Pass", "Break", "Continue")
-    _expr_nodes = set(getattr(ast, name) for name in _exprs)
-    _stmt_nodes = set(getattr(ast, name) for name in _stmts)
-    def _is_ast_expr(node):
-        return node.__class__ in _expr_nodes
-    def _is_ast_stmt(node):
-        return node.__class__ in _stmt_nodes
-else:
-    def _is_ast_expr(node):
-        return isinstance(node, ast.expr)
-    def _is_ast_stmt(node):
-        return isinstance(node, ast.stmt)
-
-
-class Failure(Exception):
-    """Error found while interpreting AST."""
-
-    def __init__(self, explanation=""):
-        self.cause = sys.exc_info()
-        self.explanation = explanation
-
-
-def interpret(source, frame, should_fail=False):
-    mod = ast.parse(source)
-    visitor = DebugInterpreter(frame)
-    try:
-        visitor.visit(mod)
-    except Failure:
-        failure = sys.exc_info()[1]
-        return getfailure(failure)
-    if should_fail:
-        return ("(assertion failed, but when it was re-run for "
-                "printing intermediate values, it did not fail.  Suggestions: "
-                "compute assert expression before the assert or use --no-assert)")
-
-def run(offending_line, frame=None):
-    if frame is None:
-        frame = py.code.Frame(sys._getframe(1))
-    return interpret(offending_line, frame)
-
-def getfailure(failure):
-    explanation = _format_explanation(failure.explanation)
-    value = failure.cause[1]
-    if str(value):
-        lines = explanation.splitlines()
-        if not lines:
-            lines.append("")
-        lines[0] += " << %s" % (value,)
-        explanation = "\n".join(lines)
-    text = "%s: %s" % (failure.cause[0].__name__, explanation)
-    if text.startswith("AssertionError: assert "):
-        text = text[16:]
-    return text
-
-
-operator_map = {
-    ast.BitOr : "|",
-    ast.BitXor : "^",
-    ast.BitAnd : "&",
-    ast.LShift : "<<",
-    ast.RShift : ">>",
-    ast.Add : "+",
-    ast.Sub : "-",
-    ast.Mult : "*",
-    ast.Div : "/",
-    ast.FloorDiv : "//",
-    ast.Mod : "%",
-    ast.Eq : "==",
-    ast.NotEq : "!=",
-    ast.Lt : "<",
-    ast.LtE : "<=",
-    ast.Gt : ">",
-    ast.GtE : ">=",
-    ast.Pow : "**",
-    ast.Is : "is",
-    ast.IsNot : "is not",
-    ast.In : "in",
-    ast.NotIn : "not in"
-}
-
-unary_map = {
-    ast.Not : "not %s",
-    ast.Invert : "~%s",
-    ast.USub : "-%s",
-    ast.UAdd : "+%s"
-}
-
-
-class DebugInterpreter(ast.NodeVisitor):
-    """Interpret AST nodes to gleam useful debugging information. """
-
-    def __init__(self, frame):
-        self.frame = frame
-
-    def generic_visit(self, node):
-        # Fallback when we don't have a special implementation.
-        if _is_ast_expr(node):
-            mod = ast.Expression(node)
-            co = self._compile(mod)
-            try:
-                result = self.frame.eval(co)
-            except Exception:
-                raise Failure()
-            explanation = self.frame.repr(result)
-            return explanation, result
-        elif _is_ast_stmt(node):
-            mod = ast.Module([node])
-            co = self._compile(mod, "exec")
-            try:
-                self.frame.exec_(co)
-            except Exception:
-                raise Failure()
-            return None, None
-        else:
-            raise AssertionError("can't handle %s" %(node,))
-
-    def _compile(self, source, mode="eval"):
-        return compile(source, "<assertion interpretation>", mode)
-
-    def visit_Expr(self, expr):
-        return self.visit(expr.value)
-
-    def visit_Module(self, mod):
-        for stmt in mod.body:
-            self.visit(stmt)
-
-    def visit_Name(self, name):
-        explanation, result = self.generic_visit(name)
-        # See if the name is local.
-        source = "%r in locals() is not globals()" % (name.id,)
-        co = self._compile(source)
-        try:
-            local = self.frame.eval(co)
-        except Exception:
-            # have to assume it isn't
-            local = False
-        if not local:
-            return name.id, result
-        return explanation, result
-
-    def visit_Compare(self, comp):
-        left = comp.left
-        left_explanation, left_result = self.visit(left)
-        for op, next_op in zip(comp.ops, comp.comparators):
-            next_explanation, next_result = self.visit(next_op)
-            op_symbol = operator_map[op.__class__]
-            explanation = "%s %s %s" % (left_explanation, op_symbol,
-                                        next_explanation)
-            source = "__exprinfo_left %s __exprinfo_right" % (op_symbol,)
-            co = self._compile(source)
-            try:
-                result = self.frame.eval(co, __exprinfo_left=left_result,
-                                         __exprinfo_right=next_result)
-            except Exception:
-                raise Failure(explanation)
-            try:
-                if not result:
-                    break
-            except KeyboardInterrupt:
-                raise
-            except:
-                break
-            left_explanation, left_result = next_explanation, next_result
-
-        rcomp = py.code._reprcompare
-        if rcomp:
-            res = rcomp(op_symbol, left_result, next_result)
-            if res:
-                explanation = res
-        return explanation, result
-
-    def visit_BoolOp(self, boolop):
-        is_or = isinstance(boolop.op, ast.Or)
-        explanations = []
-        for operand in boolop.values:
-            explanation, result = self.visit(operand)
-            explanations.append(explanation)
-            if result == is_or:
-                break
-        name = is_or and " or " or " and "
-        explanation = "(" + name.join(explanations) + ")"
-        return explanation, result
-
-    def visit_UnaryOp(self, unary):
-        pattern = unary_map[unary.op.__class__]
-        operand_explanation, operand_result = self.visit(unary.operand)
-        explanation = pattern % (operand_explanation,)
-        co = self._compile(pattern % ("__exprinfo_expr",))
-        try:
-            result = self.frame.eval(co, __exprinfo_expr=operand_result)
-        except Exception:
-            raise Failure(explanation)
-        return explanation, result
-
-    def visit_BinOp(self, binop):
-        left_explanation, left_result = self.visit(binop.left)
-        right_explanation, right_result = self.visit(binop.right)
-        symbol = operator_map[binop.op.__class__]
-        explanation = "(%s %s %s)" % (left_explanation, symbol,
-                                      right_explanation)
-        source = "__exprinfo_left %s __exprinfo_right" % (symbol,)
-        co = self._compile(source)
-        try:
-            result = self.frame.eval(co, __exprinfo_left=left_result,
-                                     __exprinfo_right=right_result)
-        except Exception:
-            raise Failure(explanation)
-        return explanation, result
-
-    def visit_Call(self, call):
-        func_explanation, func = self.visit(call.func)
-        arg_explanations = []
-        ns = {"__exprinfo_func" : func}
-        arguments = []
-        for arg in call.args:
-            arg_explanation, arg_result = self.visit(arg)
-            arg_name = "__exprinfo_%s" % (len(ns),)
-            ns[arg_name] = arg_result
-            arguments.append(arg_name)
-            arg_explanations.append(arg_explanation)
-        for keyword in call.keywords:
-            arg_explanation, arg_result = self.visit(keyword.value)
-            arg_name = "__exprinfo_%s" % (len(ns),)
-            ns[arg_name] = arg_result
-            keyword_source = "%s=%%s" % (keyword.arg)
-            arguments.append(keyword_source % (arg_name,))
-            arg_explanations.append(keyword_source % (arg_explanation,))
-        if call.starargs:
-            arg_explanation, arg_result = self.visit(call.starargs)
-            arg_name = "__exprinfo_star"
-            ns[arg_name] = arg_result
-            arguments.append("*%s" % (arg_name,))
-            arg_explanations.append("*%s" % (arg_explanation,))
-        if call.kwargs:
-            arg_explanation, arg_result = self.visit(call.kwargs)
-            arg_name = "__exprinfo_kwds"
-            ns[arg_name] = arg_result
-            arguments.append("**%s" % (arg_name,))
-            arg_explanations.append("**%s" % (arg_explanation,))
-        args_explained = ", ".join(arg_explanations)
-        explanation = "%s(%s)" % (func_explanation, args_explained)
-        args = ", ".join(arguments)
-        source = "__exprinfo_func(%s)" % (args,)
-        co = self._compile(source)
-        try:
-            result = self.frame.eval(co, **ns)
-        except Exception:
-            raise Failure(explanation)
-        pattern = "%s\n{%s = %s\n}"
-        rep = self.frame.repr(result)
-        explanation = pattern % (rep, rep, explanation)
-        return explanation, result
-
-    def _is_builtin_name(self, name):
-        pattern = "%r not in globals() and %r not in locals()"
-        source = pattern % (name.id, name.id)
-        co = self._compile(source)
-        try:
-            return self.frame.eval(co)
-        except Exception:
-            return False
-
-    def visit_Attribute(self, attr):
-        if not isinstance(attr.ctx, ast.Load):
-            return self.generic_visit(attr)
-        source_explanation, source_result = self.visit(attr.value)
-        explanation = "%s.%s" % (source_explanation, attr.attr)
-        source = "__exprinfo_expr.%s" % (attr.attr,)
-        co = self._compile(source)
-        try:
-            result = self.frame.eval(co, __exprinfo_expr=source_result)
-        except Exception:
-            raise Failure(explanation)
-        explanation = "%s\n{%s = %s.%s\n}" % (self.frame.repr(result),
-                                              self.frame.repr(result),
-                                              source_explanation, attr.attr)
-        # Check if the attr is from an instance.
-        source = "%r in getattr(__exprinfo_expr, '__dict__', {})"
-        source = source % (attr.attr,)
-        co = self._compile(source)
-        try:
-            from_instance = self.frame.eval(co, __exprinfo_expr=source_result)
-        except Exception:
-            from_instance = True
-        if from_instance:
-            rep = self.frame.repr(result)
-            pattern = "%s\n{%s = %s\n}"
-            explanation = pattern % (rep, rep, explanation)
-        return explanation, result
-
-    def visit_Assert(self, assrt):
-        test_explanation, test_result = self.visit(assrt.test)
-        if test_explanation.startswith("False\n{False =") and \
-                test_explanation.endswith("\n"):
-            test_explanation = test_explanation[15:-2]
-        explanation = "assert %s" % (test_explanation,)
-        if not test_result:
-            try:
-                raise BuiltinAssertionError
-            except Exception:
-                raise Failure(explanation)
-        return explanation, test_result
-
-    def visit_Assign(self, assign):
-        value_explanation, value_result = self.visit(assign.value)
-        explanation = "... = %s" % (value_explanation,)
-        name = ast.Name("__exprinfo_expr", ast.Load(),
-                        lineno=assign.value.lineno,
-                        col_offset=assign.value.col_offset)
-        new_assign = ast.Assign(assign.targets, name, lineno=assign.lineno,
-                                col_offset=assign.col_offset)
-        mod = ast.Module([new_assign])
-        co = self._compile(mod, "exec")
-        try:
-            self.frame.exec_(co, __exprinfo_expr=value_result)
-        except Exception:
-            raise Failure(explanation)
-        return explanation, value_result
diff --git a/py/_code/_assertionold.py b/py/_code/_assertionold.py
deleted file mode 100644
--- a/py/_code/_assertionold.py
+++ /dev/null
@@ -1,555 +0,0 @@
-import py
-import sys, inspect
-from compiler import parse, ast, pycodegen
-from py._code.assertion import BuiltinAssertionError, _format_explanation
-
-passthroughex = py.builtin._sysex
-
-class Failure:
-    def __init__(self, node):
-        self.exc, self.value, self.tb = sys.exc_info()
-        self.node = node
-
-class View(object):
-    """View base class.
-
-    If C is a subclass of View, then C(x) creates a proxy object around
-    the object x.  The actual class of the proxy is not C in general,
-    but a *subclass* of C determined by the rules below.  To avoid confusion
-    we call view class the class of the proxy (a subclass of C, so of View)
-    and object class the class of x.
-
-    Attributes and methods not found in the proxy are automatically read on x.
-    Other operations like setting attributes are performed on the proxy, as
-    determined by its view class.  The object x is available from the proxy
-    as its __obj__ attribute.
-
-    The view class selection is determined by the __view__ tuples and the
-    optional __viewkey__ method.  By default, the selected view class is the
-    most specific subclass of C whose __view__ mentions the class of x.
-    If no such subclass is found, the search proceeds with the parent
-    object classes.  For example, C(True) will first look for a subclass
-    of C with __view__ = (..., bool, ...) and only if it doesn't find any
-    look for one with __view__ = (..., int, ...), and then ..., object,...
-    If everything fails the class C itself is considered to be the default.
-
-    Alternatively, the view class selection can be driven by another aspect
-    of the object x, instead of the class of x, by overriding __viewkey__.
-    See last example at the end of this module.
-    """
-
-    _viewcache = {}
-    __view__ = ()
-
-    def __new__(rootclass, obj, *args, **kwds):
-        self = object.__new__(rootclass)
-        self.__obj__ = obj
-        self.__rootclass__ = rootclass
-        key = self.__viewkey__()
-        try:
-            self.__class__ = self._viewcache[key]
-        except KeyError:
-            self.__class__ = self._selectsubclass(key)
-        return self
-
-    def __getattr__(self, attr):
-        # attributes not found in the normal hierarchy rooted on View
-        # are looked up in the object's real class
-        return getattr(self.__obj__, attr)
-
-    def __viewkey__(self):
-        return self.__obj__.__class__
-
-    def __matchkey__(self, key, subclasses):
-        if inspect.isclass(key):
-            keys = inspect.getmro(key)
-        else:
-            keys = [key]
-        for key in keys:
-            result = [C for C in subclasses if key in C.__view__]
-            if result:
-                return result
-        return []
-
-    def _selectsubclass(self, key):
-        subclasses = list(enumsubclasses(self.__rootclass__))
-        for C in subclasses:
-            if not isinstance(C.__view__, tuple):
-                C.__view__ = (C.__view__,)
-        choices = self.__matchkey__(key, subclasses)
-        if not choices:
-            return self.__rootclass__
-        elif len(choices) == 1:
-            return choices[0]
-        else:
-            # combine the multiple choices
-            return type('?', tuple(choices), {})
-
-    def __repr__(self):
-        return '%s(%r)' % (self.__rootclass__.__name__, self.__obj__)
-
-
-def enumsubclasses(cls):
-    for subcls in cls.__subclasses__():
-        for subsubclass in enumsubclasses(subcls):
-            yield subsubclass
-    yield cls
-
-
-class Interpretable(View):
-    """A parse tree node with a few extra methods."""
-    explanation = None
-
-    def is_builtin(self, frame):
-        return False
-
-    def eval(self, frame):
-        # fall-back for unknown expression nodes
-        try:
-            expr = ast.Expression(self.__obj__)
-            expr.filename = '<eval>'
-            self.__obj__.filename = '<eval>'
-            co = pycodegen.ExpressionCodeGenerator(expr).getCode()
-            result = frame.eval(co)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-        self.result = result
-        self.explanation = self.explanation or frame.repr(self.result)
-
-    def run(self, frame):
-        # fall-back for unknown statement nodes
-        try:
-            expr = ast.Module(None, ast.Stmt([self.__obj__]))
-            expr.filename = '<run>'
-            co = pycodegen.ModuleCodeGenerator(expr).getCode()
-            frame.exec_(co)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-
-    def nice_explanation(self):
-        return _format_explanation(self.explanation)
-
-
-class Name(Interpretable):
-    __view__ = ast.Name
-
-    def is_local(self, frame):
-        source = '%r in locals() is not globals()' % self.name
-        try:
-            return frame.is_true(frame.eval(source))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def is_global(self, frame):
-        source = '%r in globals()' % self.name
-        try:
-            return frame.is_true(frame.eval(source))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def is_builtin(self, frame):
-        source = '%r not in locals() and %r not in globals()' % (
-            self.name, self.name)
-        try:
-            return frame.is_true(frame.eval(source))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def eval(self, frame):
-        super(Name, self).eval(frame)
-        if not self.is_local(frame):
-            self.explanation = self.name
-
-class Compare(Interpretable):
-    __view__ = ast.Compare
-
-    def eval(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        for operation, expr2 in self.ops:
-            if hasattr(self, 'result'):
-                # shortcutting in chained expressions
-                if not frame.is_true(self.result):
-                    break
-            expr2 = Interpretable(expr2)
-            expr2.eval(frame)
-            self.explanation = "%s %s %s" % (
-                expr.explanation, operation, expr2.explanation)
-            source = "__exprinfo_left %s __exprinfo_right" % operation
-            try:
-                self.result = frame.eval(source,
-                                         __exprinfo_left=expr.result,
-                                         __exprinfo_right=expr2.result)
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-            expr = expr2
-
-class And(Interpretable):
-    __view__ = ast.And
-
-    def eval(self, frame):
-        explanations = []
-        for expr in self.nodes:
-            expr = Interpretable(expr)
-            expr.eval(frame)
-            explanations.append(expr.explanation)
-            self.result = expr.result
-            if not frame.is_true(expr.result):
-                break
-        self.explanation = '(' + ' and '.join(explanations) + ')'
-
-class Or(Interpretable):
-    __view__ = ast.Or
-
-    def eval(self, frame):
-        explanations = []
-        for expr in self.nodes:
-            expr = Interpretable(expr)
-            expr.eval(frame)
-            explanations.append(expr.explanation)
-            self.result = expr.result
-            if frame.is_true(expr.result):
-                break
-        self.explanation = '(' + ' or '.join(explanations) + ')'
-
-
-# == Unary operations ==
-keepalive = []
-for astclass, astpattern in {
-    ast.Not    : 'not __exprinfo_expr',
-    ast.Invert : '(~__exprinfo_expr)',
-    }.items():
-
-    class UnaryArith(Interpretable):
-        __view__ = astclass
-
-        def eval(self, frame, astpattern=astpattern):
-            expr = Interpretable(self.expr)
-            expr.eval(frame)
-            self.explanation = astpattern.replace('__exprinfo_expr',
-                                                  expr.explanation)
-            try:
-                self.result = frame.eval(astpattern,
-                                         __exprinfo_expr=expr.result)
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-
-    keepalive.append(UnaryArith)
-
-# == Binary operations ==
-for astclass, astpattern in {
-    ast.Add    : '(__exprinfo_left + __exprinfo_right)',
-    ast.Sub    : '(__exprinfo_left - __exprinfo_right)',
-    ast.Mul    : '(__exprinfo_left * __exprinfo_right)',
-    ast.Div    : '(__exprinfo_left / __exprinfo_right)',
-    ast.Mod    : '(__exprinfo_left % __exprinfo_right)',
-    ast.Power  : '(__exprinfo_left ** __exprinfo_right)',
-    }.items():
-
-    class BinaryArith(Interpretable):
-        __view__ = astclass
-
-        def eval(self, frame, astpattern=astpattern):
-            left = Interpretable(self.left)
-            left.eval(frame)
-            right = Interpretable(self.right)
-            right.eval(frame)
-            self.explanation = (astpattern
-                                .replace('__exprinfo_left',  left .explanation)
-                                .replace('__exprinfo_right', right.explanation))
-            try:
-                self.result = frame.eval(astpattern,
-                                         __exprinfo_left=left.result,
-                                         __exprinfo_right=right.result)
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-
-    keepalive.append(BinaryArith)
-
-
-class CallFunc(Interpretable):
-    __view__ = ast.CallFunc
-
-    def is_bool(self, frame):
-        source = 'isinstance(__exprinfo_value, bool)'
-        try:
-            return frame.is_true(frame.eval(source,
-                                            __exprinfo_value=self.result))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def eval(self, frame):
-        node = Interpretable(self.node)
-        node.eval(frame)
-        explanations = []
-        vars = {'__exprinfo_fn': node.result}
-        source = '__exprinfo_fn('
-        for a in self.args:
-            if isinstance(a, ast.Keyword):
-                keyword = a.name
-                a = a.expr
-            else:
-                keyword = None
-            a = Interpretable(a)
-            a.eval(frame)
-            argname = '__exprinfo_%d' % len(vars)
-            vars[argname] = a.result
-            if keyword is None:
-                source += argname + ','
-                explanations.append(a.explanation)
-            else:
-                source += '%s=%s,' % (keyword, argname)
-                explanations.append('%s=%s' % (keyword, a.explanation))
-        if self.star_args:
-            star_args = Interpretable(self.star_args)
-            star_args.eval(frame)
-            argname = '__exprinfo_star'
-            vars[argname] = star_args.result
-            source += '*' + argname + ','
-            explanations.append('*' + star_args.explanation)
-        if self.dstar_args:
-            dstar_args = Interpretable(self.dstar_args)
-            dstar_args.eval(frame)
-            argname = '__exprinfo_kwds'
-            vars[argname] = dstar_args.result
-            source += '**' + argname + ','
-            explanations.append('**' + dstar_args.explanation)
-        self.explanation = "%s(%s)" % (
-            node.explanation, ', '.join(explanations))
-        if source.endswith(','):
-            source = source[:-1]
-        source += ')'
-        try:
-            self.result = frame.eval(source, **vars)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-        if not node.is_builtin(frame) or not self.is_bool(frame):
-            r = frame.repr(self.result)
-            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
-
-class Getattr(Interpretable):
-    __view__ = ast.Getattr
-
-    def eval(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        source = '__exprinfo_expr.%s' % self.attrname
-        try:
-            self.result = frame.eval(source, __exprinfo_expr=expr.result)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-        self.explanation = '%s.%s' % (expr.explanation, self.attrname)
-        # if the attribute comes from the instance, its value is interesting
-        source = ('hasattr(__exprinfo_expr, "__dict__") and '
-                  '%r in __exprinfo_expr.__dict__' % self.attrname)
-        try:
-            from_instance = frame.is_true(
-                frame.eval(source, __exprinfo_expr=expr.result))
-        except passthroughex:
-            raise
-        except:
-            from_instance = True
-        if from_instance:
-            r = frame.repr(self.result)
-            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
-
-# == Re-interpretation of full statements ==
-
-class Assert(Interpretable):
-    __view__ = ast.Assert
-
-    def run(self, frame):
-        test = Interpretable(self.test)
-        test.eval(frame)
-        # simplify 'assert False where False = ...'
-        if (test.explanation.startswith('False\n{False = ') and
-            test.explanation.endswith('\n}')):
-            test.explanation = test.explanation[15:-2]
-        # print the result as  'assert <explanation>'
-        self.result = test.result
-        self.explanation = 'assert ' + test.explanation
-        if not frame.is_true(test.result):
-            try:
-                raise BuiltinAssertionError
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-
-class Assign(Interpretable):
-    __view__ = ast.Assign
-
-    def run(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        self.result = expr.result
-        self.explanation = '... = ' + expr.explanation
-        # fall-back-run the rest of the assignment
-        ass = ast.Assign(self.nodes, ast.Name('__exprinfo_expr'))
-        mod = ast.Module(None, ast.Stmt([ass]))
-        mod.filename = '<run>'
-        co = pycodegen.ModuleCodeGenerator(mod).getCode()
-        try:
-            frame.exec_(co, __exprinfo_expr=expr.result)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-
-class Discard(Interpretable):
-    __view__ = ast.Discard
-
-    def run(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        self.result = expr.result
-        self.explanation = expr.explanation
-
-class Stmt(Interpretable):
-    __view__ = ast.Stmt
-
-    def run(self, frame):
-        for stmt in self.nodes:
-            stmt = Interpretable(stmt)
-            stmt.run(frame)
-
-
-def report_failure(e):
-    explanation = e.node.nice_explanation()
-    if explanation:
-        explanation = ", in: " + explanation
-    else:
-        explanation = ""
-    sys.stdout.write("%s: %s%s\n" % (e.exc.__name__, e.value, explanation))
-
-def check(s, frame=None):
-    if frame is None:
-        frame = sys._getframe(1)
-        frame = py.code.Frame(frame)
-    expr = parse(s, 'eval')
-    assert isinstance(expr, ast.Expression)
-    node = Interpretable(expr.node)
-    try:
-        node.eval(frame)
-    except passthroughex:
-        raise
-    except Failure:
-        e = sys.exc_info()[1]
-        report_failure(e)
-    else:
-        if not frame.is_true(node.result):
-            sys.stderr.write("assertion failed: %s\n" % node.nice_explanation())
-
-
-###########################################################
-# API / Entry points
-# #########################################################
-
-def interpret(source, frame, should_fail=False):
-    module = Interpretable(parse(source, 'exec').node)
-    #print "got module", module
-    if isinstance(frame, py.std.types.FrameType):
-        frame = py.code.Frame(frame)
-    try:
-        module.run(frame)
-    except Failure:
-        e = sys.exc_info()[1]
-        return getfailure(e)
-    except passthroughex:
-        raise
-    except:
-        import traceback
-        traceback.print_exc()
-    if should_fail:
-        return ("(assertion failed, but when it was re-run for "
-                "printing intermediate values, it did not fail.  Suggestions: "
-                "compute assert expression before the assert or use --nomagic)")
-    else:
-        return None
-
-def getmsg(excinfo):
-    if isinstance(excinfo, tuple):
-        excinfo = py.code.ExceptionInfo(excinfo)
-    #frame, line = gettbline(tb)
-    #frame = py.code.Frame(frame)
-    #return interpret(line, frame)
-
-    tb = excinfo.traceback[-1]
-    source = str(tb.statement).strip()
-    x = interpret(source, tb.frame, should_fail=True)
-    if not isinstance(x, str):
-        raise TypeError("interpret returned non-string %r" % (x,))
-    return x
-
-def getfailure(e):
-    explanation = e.node.nice_explanation()
-    if str(e.value):
-        lines = explanation.split('\n')
-        lines[0] += "  << %s" % (e.value,)
-        explanation = '\n'.join(lines)
-    text = "%s: %s" % (e.exc.__name__, explanation)
-    if text.startswith('AssertionError: assert '):
-        text = text[16:]
-    return text
-
-def run(s, frame=None):
-    if frame is None:
-        frame = sys._getframe(1)
-        frame = py.code.Frame(frame)
-    module = Interpretable(parse(s, 'exec').node)
-    try:
-        module.run(frame)
-    except Failure:
-        e = sys.exc_info()[1]
-        report_failure(e)
-
-
-if __name__ == '__main__':
-    # example:
-    def f():
-        return 5
-    def g():
-        return 3
-    def h(x):
-        return 'never'
-    check("f() * g() == 5")
-    check("not f()")
-    check("not (f() and g() or 0)")
-    check("f() == g()")
-    i = 4
-    check("i == f()")
-    check("len(f()) == 0")
-    check("isinstance(2+3+4, float)")
-
-    run("x = i")
-    check("x == 5")
-
-    run("assert not f(), 'oops'")
-    run("a, b, c = 1, 2")
-    run("a, b, c = f()")
-
-    check("max([f(),g()]) == 4")
-    check("'hello'[g()] == 'h'")
-    run("'guk%d' % h(f())")
diff --git a/py/_code/assertion.py b/py/_code/assertion.py
deleted file mode 100644
--- a/py/_code/assertion.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import sys
-import py
-
-BuiltinAssertionError = py.builtin.builtins.AssertionError
-
-_reprcompare = None # if set, will be called by assert reinterp for comparison ops
-
-def _format_explanation(explanation):
-    """This formats an explanation
-
-    Normally all embedded newlines are escaped, however there are
-    three exceptions: \n{, \n} and \n~.  The first two are intended
-    cover nested explanations, see function and attribute explanations
-    for examples (.visit_Call(), visit_Attribute()).  The last one is
-    for when one explanation needs to span multiple lines, e.g. when
-    displaying diffs.
-    """
-    raw_lines = (explanation or '').split('\n')
-    # escape newlines not followed by {, } and ~
-    lines = [raw_lines[0]]
-    for l in raw_lines[1:]:
-        if l.startswith('{') or l.startswith('}') or l.startswith('~'):
-            lines.append(l)
-        else:
-            lines[-1] += '\\n' + l
-
-    result = lines[:1]
-    stack = [0]
-    stackcnt = [0]
-    for line in lines[1:]:
-        if line.startswith('{'):
-            if stackcnt[-1]:
-                s = 'and   '
-            else:
-                s = 'where '
-            stack.append(len(result))
-            stackcnt[-1] += 1
-            stackcnt.append(0)
-            result.append(' +' + '  '*(len(stack)-1) + s + line[1:])
-        elif line.startswith('}'):
-            assert line.startswith('}')
-            stack.pop()
-            stackcnt.pop()
-            result[stack[-1]] += line[1:]
-        else:
-            assert line.startswith('~')
-            result.append('  '*len(stack) + line[1:])
-    assert len(stack) == 1
-    return '\n'.join(result)
-
-
-class AssertionError(BuiltinAssertionError):
-    def __init__(self, *args):
-        BuiltinAssertionError.__init__(self, *args)
-        if args:
-            try:
-                self.msg = str(args[0])
-            except py.builtin._sysex:
-                raise
-            except:
-                self.msg = "<[broken __repr__] %s at %0xd>" %(
-                    args[0].__class__, id(args[0]))
-        else:
-            f = py.code.Frame(sys._getframe(1))
-            try:
-                source = f.code.fullsource
-                if source is not None:
-                    try:
-                        source = source.getstatement(f.lineno, assertion=True)
-                    except IndexError:
-                        source = None
-                    else:
-                        source = str(source.deindent()).strip()
-            except py.error.ENOENT:
-                source = None
-                # this can also occur during reinterpretation, when the
-                # co_filename is set to "<run>".
-            if source:
-                self.msg = reinterpret(source, f, should_fail=True)
-            else:
-                self.msg = "<could not determine information>"
-            if not self.args:
-                self.args = (self.msg,)
-
-if sys.version_info > (3, 0):
-    AssertionError.__module__ = "builtins"
-    reinterpret_old = "old reinterpretation not available for py3"
-else:
-    from py._code._assertionold import interpret as reinterpret_old
-if sys.version_info >= (2, 6) or (sys.platform.startswith("java")):
-    from py._code._assertionnew import interpret as reinterpret
-else:
-    reinterpret = reinterpret_old
-
diff --git a/py/_code/code.py b/py/_code/code.py
--- a/py/_code/code.py
+++ b/py/_code/code.py
@@ -145,17 +145,6 @@
         return self.frame.f_locals
     locals = property(getlocals, None, None, "locals of underlaying frame")
 
-    def reinterpret(self):
-        """Reinterpret the failing statement and returns a detailed information
-           about what operations are performed."""
-        if self.exprinfo is None:
-            source = str(self.statement).strip()
-            x = py.code._reinterpret(source, self.frame, should_fail=True)
-            if not isinstance(x, str):
-                raise TypeError("interpret returned non-string %r" % (x,))
-            self.exprinfo = x
-        return self.exprinfo
-
     def getfirstlinesource(self):
         # on Jython this firstlineno can be -1 apparently
         return max(self.frame.code.firstlineno, 0)
@@ -310,7 +299,7 @@
         #     ExceptionInfo-like classes may have different attributes.
         if tup is None:
             tup = sys.exc_info()
-            if exprinfo is None and isinstance(tup[1], py.code._AssertionError):
+            if exprinfo is None and isinstance(tup[1], AssertionError):
                 exprinfo = getattr(tup[1], 'msg', None)
                 if exprinfo is None:
                     exprinfo = str(tup[1])
@@ -690,22 +679,15 @@
 
 oldbuiltins = {}
 
-def patch_builtins(assertion=True, compile=True):
-    """ put compile and AssertionError builtins to Python's builtins. """
-    if assertion:
-        from py._code import assertion
-        l = oldbuiltins.setdefault('AssertionError', [])
-        l.append(py.builtin.builtins.AssertionError)
-        py.builtin.builtins.AssertionError = assertion.AssertionError
+def patch_builtins(compile=True):
+    """ put compile builtins to Python's builtins. """
     if compile:
         l = oldbuiltins.setdefault('compile', [])
         l.append(py.builtin.builtins.compile)
         py.builtin.builtins.compile = py.code.compile
 
-def unpatch_builtins(assertion=True, compile=True):
+def unpatch_builtins(compile=True):
     """ remove compile and AssertionError builtins from Python builtins. """
-    if assertion:
-        py.builtin.builtins.AssertionError = oldbuiltins['AssertionError'].pop()
     if compile:
         py.builtin.builtins.compile = oldbuiltins['compile'].pop()
 
diff --git a/pypy/annotation/bookkeeper.py b/pypy/annotation/bookkeeper.py
--- a/pypy/annotation/bookkeeper.py
+++ b/pypy/annotation/bookkeeper.py
@@ -279,13 +279,13 @@
         desc = self.getdesc(cls)
         return desc.getuniqueclassdef()
 
-    def getlistdef(self, **flags):
+    def getlistdef(self, **flags_if_new):
         """Get the ListDef associated with the current position."""
         try:
             listdef = self.listdefs[self.position_key]
         except KeyError:
             listdef = self.listdefs[self.position_key] = ListDef(self)
-            listdef.listitem.__dict__.update(flags)
+            listdef.listitem.__dict__.update(flags_if_new)
         return listdef
 
     def newlist(self, *s_values, **flags):
@@ -294,6 +294,9 @@
         listdef = self.getlistdef(**flags)
         for s_value in s_values:
             listdef.generalize(s_value)
+        if flags:
+            assert flags.keys() == ['range_step']
+            listdef.generalize_range_step(flags['range_step'])
         return SomeList(listdef)
 
     def getdictdef(self, is_r_dict=False):
diff --git a/pypy/annotation/listdef.py b/pypy/annotation/listdef.py
--- a/pypy/annotation/listdef.py
+++ b/pypy/annotation/listdef.py
@@ -184,6 +184,11 @@
     def generalize(self, s_value):
         self.listitem.generalize(s_value)
 
+    def generalize_range_step(self, range_step):
+        newlistitem = ListItem(self.listitem.bookkeeper, s_ImpossibleValue)
+        newlistitem.range_step = range_step
+        self.listitem.merge(newlistitem)
+
     def __repr__(self):
         return '<[%r]%s%s%s%s>' % (self.listitem.s_value,
                                self.listitem.mutated and 'm' or '',
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -3483,6 +3483,17 @@
         a = self.RPythonAnnotator()
         raises(Exception, a.build_types, f, [int])
 
+    def test_range_variable_step(self):
+        def g(n):
+            return range(0, 10, n)
+        def f(n):
+            r = g(1)    # constant step, at first
+            s = g(n)    # but it becomes a variable step
+            return r
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [int])
+        assert s.listdef.listitem.range_step == 0
+
 
 def g(n):
     return [0,1,2,n]
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -33,13 +33,17 @@
      "struct", "_hashlib", "_md5", "_sha", "_minimal_curses", "cStringIO",
      "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
      "_bisect", "binascii", "_multiprocessing", '_warnings',
-     "_collections", "_multibytecodec", "micronumpy"]
+     "_collections", "_multibytecodec", "micronumpy", "_ffi"]
 ))
 
 translation_modules = default_modules.copy()
 translation_modules.update(dict.fromkeys(
     ["fcntl", "rctime", "select", "signal", "_rawffi", "zlib",
-     "struct", "_md5", "cStringIO", "array"]))
+     "struct", "_md5", "cStringIO", "array", "_ffi",
+     # the following are needed for pyrepl (and hence for the
+     # interactive prompt/pdb)
+     "termios", "_minimal_curses",
+     ]))
 
 working_oo_modules = default_modules.copy()
 working_oo_modules.update(dict.fromkeys(
@@ -80,6 +84,7 @@
     "_rawffi": [("objspace.usemodules.struct", True)],
     "cpyext": [("translation.secondaryentrypoints", "cpyext"),
                ("translation.shared", sys.platform == "win32")],
+    "_ffi":    [("translation.jit_ffi", True)],
 }
 
 module_import_dependencies = {
diff --git a/pypy/config/test/test_pypyoption.py b/pypy/config/test/test_pypyoption.py
--- a/pypy/config/test/test_pypyoption.py
+++ b/pypy/config/test/test_pypyoption.py
@@ -73,3 +73,7 @@
             fn = prefix + "." + path + ".txt"
             yield check_file_exists, fn
 
+def test__ffi_opt():
+    config = get_pypy_config(translating=True)
+    config.objspace.usemodules._ffi = True
+    assert config.translation.jit_ffi
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -117,6 +117,8 @@
     ChoiceOption("jit_profiler", "integrate profiler support into the JIT",
                  ["off", "oprofile"],
                  default="off"),
+    # jit_ffi is automatically turned on by withmod-_ffi (which is enabled by default)
+    BoolOption("jit_ffi", "optimize libffi calls", default=False, cmdline=None),
 
     # misc
     BoolOption("verbose", "Print extra information", default=False),
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -136,6 +136,11 @@
 next access.  Any code that uses weak proxies must carefully catch such
 ``ReferenceError`` at any place that uses them.
 
+As a side effect, the ``finally`` clause inside a generator will be executed
+only when the generator object is garbage collected (see `issue 736`__).
+
+.. __: http://bugs.pypy.org/issue736
+
 There are a few extra implications for the difference in the GC.  Most
 notably, if an object has a ``__del__``, the ``__del__`` is never called more
 than once in PyPy; but CPython will call the same ``__del__`` several times
@@ -168,6 +173,11 @@
     >>>> A.__del__ = lambda self: None
     __main__:1: RuntimeWarning: a __del__ method added to an existing type will not be called
 
+Even more obscure: the same is true, for old-style classes, if you attach
+the ``__del__`` to an instance (even in CPython this does not work with
+new-style classes).  You get a RuntimeWarning in PyPy.  To fix these cases
+just make sure there is a ``__del__`` method in the class to start with.
+
 
 Subclasses of built-in types
 ----------------------------
diff --git a/pypy/doc/garbage_collection.rst b/pypy/doc/garbage_collection.rst
--- a/pypy/doc/garbage_collection.rst
+++ b/pypy/doc/garbage_collection.rst
@@ -212,90 +212,4 @@
   becomes free garbage, to be collected at the next major collection.
 
 
-Minimark GC
------------
-
-This is a simplification and rewrite of the ideas from the Hybrid GC.
-It uses a nursery for the young objects, and mark-and-sweep for the old
-objects.  This is a moving GC, but objects may only move once (from
-the nursery to the old stage).
-
-The main difference with the Hybrid GC is that the mark-and-sweep
-objects (the "old stage") are directly handled by the GC's custom
-allocator, instead of being handled by malloc() calls.  The gain is that
-it is then possible, during a major collection, to walk through all old
-generation objects without needing to store a list of pointers to them.
-So as a first approximation, when compared to the Hybrid GC, the
-Minimark GC saves one word of memory per old object.
-
-There are a number of environment variables that can be tweaked to
-influence the GC.  (Their default value should be ok for most usages.)
-You can read more about them at the start of
-`pypy/rpython/memory/gc/minimark.py`_.
-
-In more details:
-
-- The small newly malloced objects are allocated in the nursery (case 1).
-  All objects living in the nursery are "young".
-
-- The big objects are always handled directly by the system malloc().
-  But the big newly malloced objects are still "young" when they are
-  allocated (case 2), even though they don't live in the nursery.
-
-- When the nursery is full, we do a minor collection, i.e. we find
-  which "young" objects are still alive (from cases 1 and 2).  The
-  "young" flag is then removed.  The surviving case 1 objects are moved
-  to the old stage. The dying case 2 objects are immediately freed.
-
-- The old stage is an area of memory containing old (small) objects.  It
-  is handled by `pypy/rpython/memory/gc/minimarkpage.py`_.  It is organized
-  as "arenas" of 256KB or 512KB, subdivided into "pages" of 4KB or 8KB.
-  Each page can either be free, or contain small objects of all the same
-  size.  Furthermore at any point in time each object location can be
-  either allocated or freed.  The basic design comes from ``obmalloc.c``
-  from CPython (which itself comes from the same source as the Linux
-  system malloc()).
-
-- New objects are added to the old stage at every minor collection.
-  Immediately after a minor collection, when we reach some threshold, we
-  trigger a major collection.  This is the mark-and-sweep step.  It walks
-  over *all* objects (mark), and then frees some fraction of them (sweep).
-  This means that the only time when we want to free objects is while
-  walking over all of them; we never ask to free an object given just its
-  address.  This allows some simplifications and memory savings when
-  compared to ``obmalloc.c``.
-
-- As with all generational collectors, this GC needs a write barrier to
-  record which old objects have a reference to young objects.
-
-- Additionally, we found out that it is useful to handle the case of
-  big arrays specially: when we allocate a big array (with the system
-  malloc()), we reserve a small number of bytes before.  When the array
-  grows old, we use the extra bytes as a set of bits.  Each bit
-  represents 128 entries in the array.  Whenever the write barrier is
-  called to record a reference from the Nth entry of the array to some
-  young object, we set the bit number ``(N/128)`` to 1.  This can
-  considerably speed up minor collections, because we then only have to
-  scan 128 entries of the array instead of all of them.
-
-- As usual, we need special care about weak references, and objects with
-  finalizers.  Weak references are allocated in the nursery, and if they
-  survive they move to the old stage, as usual for all objects; the
-  difference is that the reference they contain must either follow the
-  object, or be set to NULL if the object dies.  And the objects with
-  finalizers, considered rare enough, are immediately allocated old to
-  simplify the design.  In particular their ``__del__`` method can only
-  be called just after a major collection.
-
-- The objects move once only, so we can use a trick to implement id()
-  and hash().  If the object is not in the nursery, it won't move any
-  more, so its id() and hash() are the object's address, cast to an
-  integer.  If the object is in the nursery, and we ask for its id()
-  or its hash(), then we pre-reserve a location in the old stage, and
-  return the address of that location.  If the object survives the
-  next minor collection, we move it there, and so its id() and hash()
-  are preserved.  If the object dies then the pre-reserved location
-  becomes free garbage, to be collected at the next major collection.
-
-
 .. include:: _ref.txt
diff --git a/pypy/doc/image/jitviewer.png b/pypy/doc/image/jitviewer.png
new file mode 100644
index 0000000000000000000000000000000000000000..ad2abca5c88125061fa519dcf3f9fada577573ee
GIT binary patch

[cut]

diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -21,6 +21,8 @@
 
 * `speed.pypy.org`_: Daily benchmarks of how fast PyPy is
 
+* `potential project ideas`_: In case you want to get your feet wet...
+
 Documentation for the PyPy Python Interpreter
 ===============================================
 
@@ -59,8 +61,6 @@
   (if they are not already developed in the FAQ_).
   You can find logs of the channel here_.
 
-.. XXX play1? 
-
 Meeting PyPy developers
 =======================
 
@@ -83,7 +83,7 @@
 .. _`Release 1.5`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
-
+.. _`potential project ideas`: project-ideas.html
 
 Project Documentation
 =====================================
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/project-ideas.rst
@@ -0,0 +1,137 @@
+
+Potential project list
+======================
+
+This is a list of projects that are interesting for potential contributors
+who are seriously interested in the PyPy project. They mostly share common
+patterns - they're mid-to-large in size, they're usually well defined as
+a standalone projects and they're not being actively worked on. For small
+projects that you might want to work on, it's much better to either look
+at the `issue tracker`_, pop up on #pypy on irc.freenode.net or write to the
+`mailing list`_. This is simply for the reason that small possible projects
+tend to change very rapidly.
+
+This list is mostly for having on overview on potential projects. This list is
+by definition not exhaustive and we're pleased if people come up with their
+own improvement ideas. In any case, if you feel like working on some of those
+projects, or anything else in PyPy, pop up on IRC or write to us on the
+`mailing list`_.
+
+Numpy improvements
+------------------
+
+This is more of a project-container than a single project. Possible ideas:
+
+* experiment with auto-vectorization using SSE or implement vectorization
+  without automatically detecting it for array operations.
+
+* improve numpy, for example implement memory views.
+
+* interface with fortran/C libraries.
+
+Improving the jitviewer
+------------------------
+
+Analyzing performance of applications is always tricky. We have various
+tools, for example a `jitviewer`_ that help us analyze performance.
+
+The jitviewer shows the code generated by the PyPy JIT in a hierarchical way,
+as shown by the screenshot below:
+
+  - at the bottom level, it shows the Python source code of the compiled loops
+
+  - for each source code line, it shows the corresponding Python bytecode
+
+  - for each opcode, it shows the corresponding jit operations, which are the
+    ones actually sent to the backend for compiling (such as ``i15 = i10 <
+    2000`` in the example)
+
+.. image:: image/jitviewer.png
+
+We would like to add one level to this hierarchy, by showing the generated
+machine code for each jit operation.  The necessary information is already in
+the log file produced by the JIT, so it is "only" a matter of teaching the
+jitviewer to display it.  Ideally, the machine code should be hidden by
+default and viewable on request.
+
+The jitviewer is a web application based on flask and jinja2 (and jQuery on
+the client): if you have great web developing skills and want to help PyPy,
+this is an ideal task to get started, because it does not require any deep
+knowledge of the internals.
+
+Translation Toolchain
+---------------------
+
+* Incremental or distributed translation.
+
+* Allow separate compilation of extension modules.
+
+Work on some of other languages
+-------------------------------
+
+There are various languages implemented using the RPython translation toolchain.
+One of the most interesting is the `JavaScript implementation`_, but there
+are others like scheme or prolog. An interesting project would be to improve
+the jittability of those or to experiment with various optimizations.
+
+Various GCs
+-----------
+
+PyPy has pluggable garbage collection policy. This means that various garbage
+collectors can be written for specialized purposes, or even various
+experiments can be done for the general purpose. Examples
+
+* An incremental garbage collector that has specified maximal pause times,
+  crucial for games
+
+* A garbage collector that compact memory better for mobile devices
+
+* A concurrent garbage collector (a lot of work)
+
+Remove the GIL
+--------------
+
+This is a major task that requires lots of thinking. However, few subprojects
+can be potentially specified, unless a better plan can be thought out:
+
+* A thread-aware garbage collector
+
+* Better RPython primitives for dealing with concurrency
+
+* JIT passes to remove locks on objects
+
+* (maybe) implement locking in Python interpreter
+
+* alternatively, look at Software Transactional Memory
+
+Introduce new benchmarks
+------------------------
+
+We're usually happy to introduce new benchmarks. Please consult us
+before, but in general something that's real-world python code
+and is not already represented is welcome. We need at least a standalone
+script that can run without parameters. Example ideas (benchmarks need
+to be got from them!):
+
+* `hg`
+
+* `sympy`
+
+Experiment (again) with LLVM backend for RPython compilation
+------------------------------------------------------------
+
+We already tried working with LLVM and at the time, LLVM was not mature enough
+for our needs. It's possible that this has changed, reviving the LLVM backend
+(or writing new from scratch) for static compilation would be a good project.
+
+(On the other hand, just generating C code and using clang might be enough.
+The issue with that is the so-called "asmgcc GC root finder", which has tons
+of issues of this own.  In my opinion (arigo), it would be definitely a
+better project to try to optimize the alternative, the "shadowstack" GC root
+finder, which is nicely portable.  So far it gives a pypy that is around
+7% slower.)
+
+.. _`issue tracker`: http://bugs.pypy.org
+.. _`mailing list`: http://mail.python.org/mailman/listinfo/pypy-dev
+.. _`jitviewer`: http://bitbucket.org/pypy/jitviewer
+.. _`JavaScript implementation`: https://bitbucket.org/pypy/lang-js/overview
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -90,15 +90,18 @@
     ###  Construction  ###
 
     def __init__(self, space, args_w, keywords=None, keywords_w=None,
-                 w_stararg=None, w_starstararg=None):
+                 w_stararg=None, w_starstararg=None, keyword_names_w=None):
         self.space = space
         assert isinstance(args_w, list)
         self.arguments_w = args_w
         self.keywords = keywords
         self.keywords_w = keywords_w
+        self.keyword_names_w = keyword_names_w  # matches the tail of .keywords
         if keywords is not None:
             assert keywords_w is not None
             assert len(keywords_w) == len(keywords)
+            assert (keyword_names_w is None or
+                    len(keyword_names_w) <= len(keywords))
             make_sure_not_resized(self.keywords)
             make_sure_not_resized(self.keywords_w)
 
@@ -132,7 +135,8 @@
 
     def replace_arguments(self, args_w):
         "Return a new Arguments with a args_w as positional arguments."
-        return Arguments(self.space, args_w, self.keywords, self.keywords_w)
+        return Arguments(self.space, args_w, self.keywords, self.keywords_w,
+                         keyword_names_w = self.keyword_names_w)
 
     def prepend(self, w_firstarg):
         "Return a new Arguments with a new argument inserted first."
@@ -201,15 +205,16 @@
                         space.w_TypeError,
                         space.wrap("keywords must be strings"))
                 if e.match(space, space.w_UnicodeEncodeError):
-                    raise OperationError(
-                        space.w_TypeError,
-                        space.wrap("keyword cannot be encoded to ascii"))
-                raise
-            if self.keywords and key in self.keywords:
-                raise operationerrfmt(self.space.w_TypeError,
-                                      "got multiple values "
-                                      "for keyword argument "
-                                      "'%s'", key)
+                    # Allow this to pass through
+                    key = None
+                else:
+                    raise
+            else:
+                if self.keywords and key in self.keywords:
+                    raise operationerrfmt(self.space.w_TypeError,
+                                          "got multiple values "
+                                          "for keyword argument "
+                                          "'%s'", key)
             keywords[i] = key
             keywords_w[i] = space.getitem(w_starstararg, w_key)
             i += 1
@@ -219,6 +224,7 @@
         else:
             self.keywords = self.keywords + keywords
             self.keywords_w = self.keywords_w + keywords_w
+        self.keyword_names_w = keys_w
 
     def fixedunpack(self, argcount):
         """The simplest argument parsing: get the 'argcount' arguments,
@@ -339,6 +345,10 @@
             used_keywords = [False] * num_kwds
             for i in range(num_kwds):
                 name = keywords[i]
+                # If name was not encoded as a string, it could be None. In that
+                # case, it's definitely not going to be in the signature.
+                if name is None:
+                    continue
                 j = signature.find_argname(name)
                 if j < 0:
                     continue
@@ -374,17 +384,26 @@
         if has_kwarg:
             w_kwds = self.space.newdict()
             if num_remainingkwds:
+                #
+                limit = len(keywords)
+                if self.keyword_names_w is not None:
+                    limit -= len(self.keyword_names_w)
                 for i in range(len(keywords)):
                     if not used_keywords[i]:
-                        key = keywords[i]
-                        self.space.setitem(w_kwds, self.space.wrap(key), keywords_w[i])
+                        if i < limit:
+                            w_key = self.space.wrap(keywords[i])
+                        else:
+                            w_key = self.keyword_names_w[i - limit]
+                        self.space.setitem(w_kwds, w_key, keywords_w[i])
+                #
             scope_w[co_argcount + has_vararg] = w_kwds
         elif num_remainingkwds:
             if co_argcount == 0:
                 raise ArgErrCount(avail, num_kwds,
                               co_argcount, has_vararg, has_kwarg,
                               defaults_w, missing)
-            raise ArgErrUnknownKwds(num_remainingkwds, keywords, used_keywords)
+            raise ArgErrUnknownKwds(self.space, num_remainingkwds, keywords,
+                                    used_keywords, self.keyword_names_w)
 
         if missing:
             raise ArgErrCount(avail, num_kwds,
@@ -443,9 +462,15 @@
         w_args = space.newtuple(self.arguments_w)
         w_kwds = space.newdict()
         if self.keywords is not None:
+            limit = len(self.keywords)
+            if self.keyword_names_w is not None:
+                limit -= len(self.keyword_names_w)
             for i in range(len(self.keywords)):
-                space.setitem(w_kwds, space.wrap(self.keywords[i]),
-                                      self.keywords_w[i])
+                if i < limit:
+                    w_key = space.wrap(self.keywords[i])
+                else:
+                    w_key = self.keyword_names_w[i - limit]
+                space.setitem(w_kwds, w_key, self.keywords_w[i])
         return w_args, w_kwds
 
 class ArgumentsForTranslation(Arguments):
@@ -666,14 +691,33 @@
 
 class ArgErrUnknownKwds(ArgErr):
 
-    def __init__(self, num_remainingkwds, keywords, used_keywords):
-        self.kwd_name = ''
+    def __init__(self, space, num_remainingkwds, keywords, used_keywords,
+                 keyword_names_w):
+        name = ''
         self.num_kwds = num_remainingkwds
         if num_remainingkwds == 1:
             for i in range(len(keywords)):
                 if not used_keywords[i]:
-                    self.kwd_name = keywords[i]
+                    name = keywords[i]
+                    if name is None:
+                        # We'll assume it's unicode. Encode it.
+                        # Careful, I *think* it should not be possible to
+                        # get an IndexError here but you never know.
+                        try:
+                            if keyword_names_w is None:
+                                raise IndexError
+                            # note: negative-based indexing from the end
+                            w_name = keyword_names_w[i - len(keywords)]
+                        except IndexError:
+                            name = '?'
+                        else:
+                            w_enc = space.wrap(space.sys.defaultencoding)
+                            w_err = space.wrap("replace")
+                            w_name = space.call_method(w_name, "encode", w_enc,
+                                                       w_err)
+                            name = space.str_w(w_name)
                     break
+        self.kwd_name = name
 
     def getmsg(self, fnname):
         if self.num_kwds == 1:
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -55,7 +55,7 @@
         co_expr = compile(evalexpr, '<evalexpr>', 'eval')
         space = self.space
         pyco_expr = PyCode._from_code(space, co_expr)
-        w_res = pyco_expr.exec_host_bytecode(space, w_dict, w_dict)
+        w_res = pyco_expr.exec_host_bytecode(w_dict, w_dict)
         res = space.str_w(space.repr(w_res))
         if not isinstance(expected, float):
             assert res == repr(expected)
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -989,10 +989,7 @@
             compiler = self.createcompiler()
             expression = compiler.compile(expression, '?', 'eval', 0,
                                          hidden_applevel=hidden_applevel)
-        if isinstance(expression, types.CodeType):
-            # XXX only used by appsupport
-            expression = PyCode._from_code(self, expression)
-        if not isinstance(expression, PyCode):
+        else:
             raise TypeError, 'space.eval(): expected a string, code or PyCode object'
         return expression.exec_code(self, w_globals, w_locals)
 
@@ -1007,9 +1004,6 @@
             compiler = self.createcompiler()
             statement = compiler.compile(statement, filename, 'exec', 0,
                                          hidden_applevel=hidden_applevel)
-        if isinstance(statement, types.CodeType):
-            # XXX only used by appsupport
-            statement = PyCode._from_code(self, statement)
         if not isinstance(statement, PyCode):
             raise TypeError, 'space.exec_(): expected a string, code or PyCode object'
         w_key = self.wrap('__builtins__')
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -256,7 +256,7 @@
                          tuple(self.co_freevars),
                          tuple(self.co_cellvars) )
 
-    def exec_host_bytecode(self, w_dict, w_globals, w_locals):
+    def exec_host_bytecode(self, w_globals, w_locals):
         from pypy.interpreter.pyframe import CPythonFrame
         frame = CPythonFrame(self.space, self, w_globals, None)
         frame.setdictscope(w_locals)
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 import py
 from pypy.interpreter.argument import (Arguments, ArgumentsForTranslation,
     ArgErr, ArgErrUnknownKwds, ArgErrMultipleValues, ArgErrCount, rawshape,
@@ -126,6 +127,7 @@
     w_AttributeError = AttributeError
     w_UnicodeEncodeError = UnicodeEncodeError
     w_dict = dict
+    w_str = str
 
 class TestArgumentsNormal(object):
 
@@ -485,26 +487,6 @@
         args._match_signature(None, l, Signature(['abc']))
         assert len(l) == 1
         assert l[0] == space.wrap(5)
-        #
-        def str_w(w):
-            try:
-                return str(w)
-            except UnicodeEncodeError:
-                raise OperationError(space.w_UnicodeEncodeError,
-                                     space.wrap("oups"))
-        space.str_w = str_w
-        w_starstar = space.wrap({u'\u1234': 5})
-        err = py.test.raises(OperationError, Arguments,
-                             space, [], w_starstararg=w_starstar)
-        # Check that we get a TypeError.  On CPython it is because of
-        # "no argument called '?'".  On PyPy we get a TypeError too, but
-        # earlier: "keyword cannot be encoded to ascii".  The
-        # difference, besides the error message, is only apparent if the
-        # receiver also takes a **arg.  Then CPython passes the
-        # non-ascii unicode unmodified, whereas PyPy complains.  We will
-        # not care until someone has a use case for that.
-        assert not err.value.match(space, space.w_UnicodeEncodeError)
-        assert     err.value.match(space, space.w_TypeError)
 
 class TestErrorHandling(object):
     def test_missing_args(self):
@@ -559,13 +541,26 @@
             assert 0, "did not raise"
 
     def test_unknown_keywords(self):
-        err = ArgErrUnknownKwds(1, ['a', 'b'], [True, False])
+        space = DummySpace()
+        err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [True, False], None)
         s = err.getmsg('foo')
         assert s == "foo() got an unexpected keyword argument 'b'"
-        err = ArgErrUnknownKwds(2, ['a', 'b', 'c'], [True, False, False])
+        err = ArgErrUnknownKwds(space, 2, ['a', 'b', 'c'],
+                                [True, False, False], None)
         s = err.getmsg('foo')
         assert s == "foo() got 2 unexpected keyword arguments"
 
+    def test_unknown_unicode_keyword(self):
+        class DummySpaceUnicode(DummySpace):
+            class sys:
+                defaultencoding = 'utf-8'
+        space = DummySpaceUnicode()
+        err = ArgErrUnknownKwds(space, 1, ['a', None, 'b', 'c'],
+                                [True, False, True, True],
+                                [unichr(0x1234), u'b', u'c'])
+        s = err.getmsg('foo')
+        assert s == "foo() got an unexpected keyword argument '\xe1\x88\xb4'"
+
     def test_multiple_values(self):
         err = ArgErrMultipleValues('bla')
         s = err.getmsg('foo')
@@ -592,6 +587,14 @@
         exc = raises(TypeError, (lambda a, b, **kw: 0), a=1)
         assert exc.value.message == "<lambda>() takes exactly 2 non-keyword arguments (0 given)"
 
+    def test_unicode_keywords(self):
+        def f(**kwargs):
+            assert kwargs[u"&#32654;"] == 42
+        f(**{u"&#32654;" : 42})
+        def f(x): pass
+        e = raises(TypeError, "f(**{u'&#252;' : 19})")
+        assert "?" in str(e.value)
+
 def make_arguments_for_translation(space, args_w, keywords_w={},
                                    w_stararg=None, w_starstararg=None):
     return ArgumentsForTranslation(space, args_w, keywords_w.keys(),
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -136,6 +136,7 @@
     'call'            : (('ref', 'varargs'), 'intorptr'),
     'call_assembler'  : (('varargs',), 'intorptr'),
     'cond_call_gc_wb' : (('ptr', 'ptr'), None),
+    'cond_call_gc_wb_array': (('ptr', 'int', 'ptr'), None),
     'oosend'          : (('varargs',), 'intorptr'),
     'oosend_pure'     : (('varargs',), 'intorptr'),
     'guard_true'      : (('bool',), None),
@@ -600,15 +601,15 @@
         #
         return _op_default_implementation
 
-    def op_debug_merge_point(self, _, value, recdepth):
+    def op_debug_merge_point(self, _, *args):
         from pypy.jit.metainterp.warmspot import get_stats
-        loc = ConstPtr(value)._get_str()
         try:
             stats = get_stats()
         except AttributeError:
             pass
         else:
-            stats.add_merge_point_location(loc)
+            stats.add_merge_point_location(args[1:])
+        pass
 
     def op_guard_true(self, _, value):
         if not value:
@@ -820,6 +821,12 @@
             raise NotImplementedError
 
     def op_call(self, calldescr, func, *args):
+        return self._do_call(calldescr, func, args, call_with_llptr=False)
+
+    def op_call_release_gil(self, calldescr, func, *args):
+        return self._do_call(calldescr, func, args, call_with_llptr=True)
+
+    def _do_call(self, calldescr, func, args, call_with_llptr):
         global _last_exception
         assert _last_exception is None, "exception left behind"
         assert _call_args_i == _call_args_r == _call_args_f == []
@@ -838,7 +845,8 @@
             else:
                 raise TypeError(x)
         try:
-            return _do_call_common(func, args_in_order, calldescr)
+            return _do_call_common(func, args_in_order, calldescr,
+                                   call_with_llptr)
         except LLException, lle:
             _last_exception = lle
             d = {'v': None,
@@ -850,6 +858,9 @@
     def op_cond_call_gc_wb(self, descr, a, b):
         py.test.skip("cond_call_gc_wb not supported")
 
+    def op_cond_call_gc_wb_array(self, descr, a, b, c):
+        py.test.skip("cond_call_gc_wb_array not supported")
+
     def op_oosend(self, descr, obj, *args):
         raise NotImplementedError("oosend for lltype backend??")
 
@@ -1480,17 +1491,20 @@
     'v': lltype.Void,
     }
 
-def _do_call_common(f, args_in_order=None, calldescr=None):
+def _do_call_common(f, args_in_order=None, calldescr=None,
+                    call_with_llptr=False):
     ptr = llmemory.cast_int_to_adr(f).ptr
     PTR = lltype.typeOf(ptr)
     if PTR == rffi.VOIDP:
         # it's a pointer to a C function, so we don't have a precise
         # signature: create one from the descr
+        assert call_with_llptr is True
         ARGS = map(kind2TYPE.get, calldescr.arg_types)
         RESULT = kind2TYPE[calldescr.typeinfo]
         FUNC = lltype.FuncType(ARGS, RESULT)
         func_to_call = rffi.cast(lltype.Ptr(FUNC), ptr)
     else:
+        assert call_with_llptr is False
         FUNC = PTR.TO
         ARGS = FUNC.ARGS
         func_to_call = ptr._obj._callable
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -134,7 +134,7 @@
         old, oldindex = faildescr._compiled_fail
         llimpl.compile_redirect_fail(old, oldindex, c)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """In a real assembler backend, this should assemble the given
         list of operations.  Here we just generate a similar CompiledLoop
         instance.  The code here is RPython, whereas the code in llimpl
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -1,5 +1,6 @@
 import py
 from pypy.rpython.lltypesystem import lltype, rffi, llmemory, rclass
+from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.jit.backend.llsupport import symbolic, support
 from pypy.jit.metainterp.history import AbstractDescr, getkind, BoxInt, BoxPtr
 from pypy.jit.metainterp.history import BasicFailDescr, LoopToken, BoxFloat
@@ -45,6 +46,8 @@
     size = 0      # help translation
     is_immutable = False
 
+    tid = llop.combine_ushort(lltype.Signed, 0, 0)
+
     def __init__(self, size, count_fields_if_immut=-1):
         self.size = size
         self.count_fields_if_immut = count_fields_if_immut
@@ -149,6 +152,7 @@
 
 class BaseArrayDescr(AbstractDescr):
     _clsname = ''
+    tid = llop.combine_ushort(lltype.Signed, 0, 0)
 
     def get_base_size(self, translate_support_code):
         basesize, _, _ = symbolic.get_array_token(_A, translate_support_code)
@@ -263,6 +267,9 @@
 
     def __repr__(self):
         res = '%s(%s)' % (self.__class__.__name__, self.arg_classes)
+        extraeffect = getattr(self.extrainfo, 'extraeffect', None)
+        if extraeffect is not None:
+            res += ' EF=%r' % extraeffect
         oopspecindex = getattr(self.extrainfo, 'oopspecindex', 0)
         if oopspecindex:
             from pypy.jit.codewriter.effectinfo import EffectInfo
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -3,13 +3,16 @@
 from pypy.jit.backend.llsupport.descr import DynamicIntCallDescr, NonGcPtrCallDescr,\
     FloatCallDescr, VoidCallDescr
 
+class UnsupportedKind(Exception):
+    pass
+
 def get_call_descr_dynamic(ffi_args, ffi_result, extrainfo=None):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
         reskind = get_ffi_type_kind(ffi_result)
         argkinds = [get_ffi_type_kind(arg) for arg in ffi_args]
-    except KeyError:
+    except UnsupportedKind:
         return None # ??
     arg_classes = ''.join(argkinds)
     if reskind == history.INT:
@@ -33,7 +36,7 @@
         return history.FLOAT
     elif kind == 'v':
         return history.VOID
-    assert False, "Unsupported kind '%s'" % kind
+    raise UnsupportedKind("Unsupported kind '%s'" % kind)
 
 def is_ffi_type_signed(ffi_type):
     from pypy.rlib.libffi import types
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -34,7 +34,7 @@
         pass
     def do_write_barrier(self, gcref_struct, gcref_newptr):
         pass
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         return operations
     def can_inline_malloc(self, descr):
         return False
@@ -146,78 +146,6 @@
 # All code below is for the hybrid or minimark GC
 
 
-class GcRefList:
-    """Handles all references from the generated assembler to GC objects.
-    This is implemented as a nonmovable, but GC, list; the assembler contains
-    code that will (for now) always read from this list."""
-
-    GCREF_LIST = lltype.GcArray(llmemory.GCREF)     # followed by the GC
-
-    HASHTABLE = rffi.CArray(llmemory.Address)      # ignored by the GC
-    HASHTABLE_BITS = 10
-    HASHTABLE_SIZE = 1 << HASHTABLE_BITS
-
-    def initialize(self):
-        if we_are_translated(): n = 2000
-        else:                   n = 10    # tests only
-        self.list = self.alloc_gcref_list(n)
-        self.nextindex = 0
-        self.oldlists = []
-        # A pseudo dictionary: it is fixed size, and it may contain
-        # random nonsense after a collection moved the objects.  It is only
-        # used to avoid too many duplications in the GCREF_LISTs.
-        self.hashtable = lltype.malloc(self.HASHTABLE,
-                                       self.HASHTABLE_SIZE+1,
-                                       flavor='raw', track_allocation=False)
-        dummy = lltype.direct_ptradd(lltype.direct_arrayitems(self.hashtable),
-                                     self.HASHTABLE_SIZE)
-        dummy = llmemory.cast_ptr_to_adr(dummy)
-        for i in range(self.HASHTABLE_SIZE+1):
-            self.hashtable[i] = dummy
-
-    def alloc_gcref_list(self, n):
-        # Important: the GRREF_LISTs allocated are *non-movable*.  This
-        # requires support in the gc (hybrid GC or minimark GC so far).
-        if we_are_translated():
-            list = rgc.malloc_nonmovable(self.GCREF_LIST, n)
-            assert list, "malloc_nonmovable failed!"
-        else:
-            list = lltype.malloc(self.GCREF_LIST, n)     # for tests only
-        return list
-
-    def get_address_of_gcref(self, gcref):
-        assert lltype.typeOf(gcref) == llmemory.GCREF
-        # first look in the hashtable, using an inexact hash (fails after
-        # the object moves)
-        addr = llmemory.cast_ptr_to_adr(gcref)
-        hash = llmemory.cast_adr_to_int(addr, "forced")
-        hash -= hash >> self.HASHTABLE_BITS
-        hash &= self.HASHTABLE_SIZE - 1
-        addr_ref = self.hashtable[hash]
-        # the following test is safe anyway, because the addresses found
-        # in the hashtable are always the addresses of nonmovable stuff
-        # ('addr_ref' is an address inside self.list, not directly the
-        # address of a real moving GC object -- that's 'addr_ref.address[0]'.)
-        if addr_ref.address[0] == addr:
-            return addr_ref
-        # if it fails, add an entry to the list
-        if self.nextindex == len(self.list):
-            # reallocate first, increasing a bit the size every time
-            self.oldlists.append(self.list)
-            self.list = self.alloc_gcref_list(len(self.list) // 4 * 5)
-            self.nextindex = 0
-        # add it
-        index = self.nextindex
-        self.list[index] = gcref
-        addr_ref = lltype.direct_ptradd(lltype.direct_arrayitems(self.list),
-                                        index)
-        addr_ref = llmemory.cast_ptr_to_adr(addr_ref)
-        self.nextindex = index + 1
-        # record it in the hashtable
-        self.hashtable[hash] = addr_ref
-        return addr_ref
-
-
 class GcRootMap_asmgcc(object):
     """Handles locating the stack roots in the assembler.
     This is the class supporting --gcrootfinder=asmgcc.
@@ -527,6 +455,7 @@
     def __init__(self, gc_ll_descr):
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
+        self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
         self.fielddescr_tid = get_field_descr(gc_ll_descr,
                                               gc_ll_descr.GCClass.HDR, 'tid')
         self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
@@ -546,6 +475,14 @@
         funcaddr = llmemory.cast_ptr_to_adr(funcptr)
         return cpu.cast_adr_to_int(funcaddr)
 
+    def get_write_barrier_from_array_fn(self, cpu):
+        # returns a function with arguments [array, index, newvalue]
+        llop1 = self.llop1
+        funcptr = llop1.get_write_barrier_from_array_failing_case(
+            self.WB_ARRAY_FUNCPTR)
+        funcaddr = llmemory.cast_ptr_to_adr(funcptr)
+        return cpu.cast_adr_to_int(funcaddr)    # this may return 0
+
 
 class GcLLDescr_framework(GcLLDescription):
     DEBUG = False    # forced to True by x86/test/test_zrpy_gc.py
@@ -559,7 +496,7 @@
         self.translator = translator
         self.llop1 = llop1
 
-        # we need the hybrid or minimark GC for GcRefList.alloc_gcref_list()
+        # we need the hybrid or minimark GC for rgc._make_sure_does_not_move()
         # to work
         if gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
             raise NotImplementedError("--gc=%s not implemented with the JIT" %
@@ -574,8 +511,6 @@
                                       " with the JIT" % (name,))
         gcrootmap = cls(gcdescr)
         self.gcrootmap = gcrootmap
-        self.gcrefs = GcRefList()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
 
         # make a TransformerLayoutBuilder and save it on the translator
         # where it can be fished and reused by the FrameworkGCTransformer
@@ -617,6 +552,8 @@
             [lltype.Signed, lltype.Signed], llmemory.GCREF))
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
             [llmemory.Address, llmemory.Address], lltype.Void))
+        self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
+            [llmemory.Address, lltype.Signed, llmemory.Address], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
         #
         def malloc_array(itemsize, tid, num_elem):
@@ -706,7 +643,6 @@
         return rffi.cast(lltype.Signed, fptr)
 
     def initialize(self):
-        self.gcrefs.initialize()
         self.gcrootmap.initialize()
 
     def init_size_descr(self, S, descr):
@@ -768,54 +704,32 @@
             funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
                     llmemory.cast_ptr_to_adr(gcref_newptr))
 
-    def replace_constptrs_with_getfield_raw(self, cpu, newops, op):
-        # xxx some performance issue here
-        newargs = [None] * op.numargs()
-        needs_copy = False
+    def record_constptrs(self, op, gcrefs_output_list):
         for i in range(op.numargs()):
             v = op.getarg(i)
-            newargs[i] = v
             if isinstance(v, ConstPtr) and bool(v.value):
-                addr = self.gcrefs.get_address_of_gcref(v.value)
-                # ^^^even for non-movable objects, to record their presence
-                if rgc.can_move(v.value):
-                    box = BoxPtr(v.value)
-                    addr = cpu.cast_adr_to_int(addr)
-                    newops.append(ResOperation(rop.GETFIELD_RAW,
-                                               [ConstInt(addr)], box,
-                                               self.single_gcref_descr))
-                    newargs[i] = box
-                    needs_copy = True
-        #
-        if needs_copy:
-            return op.copy_and_change(op.getopnum(), args=newargs)
-        else:
-            return op
+                p = v.value
+                rgc._make_sure_does_not_move(p)
+                gcrefs_output_list.append(p)
 
-
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         # Perform two kinds of rewrites in parallel:
         #
         # - Add COND_CALLs to the write barrier before SETFIELD_GC and
         #   SETARRAYITEM_GC operations.
         #
-        # - Remove all uses of ConstPtrs away from the assembler.
-        #   Idea: when running on a moving GC, we can't (easily) encode
-        #   the ConstPtrs in the assembler, because they can move at any
-        #   point in time.  Instead, we store them in 'gcrefs.list', a GC
-        #   but nonmovable list; and here, we modify 'operations' to
-        #   replace direct usage of ConstPtr with a BoxPtr loaded by a
-        #   GETFIELD_RAW from the array 'gcrefs.list'.
+        # - Record the ConstPtrs from the assembler.
         #
         newops = []
+        known_lengths = {}
         # we can only remember one malloc since the next malloc can possibly
         # collect
         last_malloc = None
         for op in operations:
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 continue
-            # ---------- replace ConstPtrs with GETFIELD_RAW ----------
-            op = self.replace_constptrs_with_getfield_raw(cpu, newops, op)
+            # ---------- record the ConstPtrs ----------
+            self.record_constptrs(op, gcrefs_output_list)
             if op.is_malloc():
                 last_malloc = op.result
             elif op.can_malloc():
@@ -838,10 +752,14 @@
                     v = op.getarg(2)
                     if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
                                             bool(v.value)): # store a non-NULL
-                        # XXX detect when we should produce a
-                        # write_barrier_from_array
-                        self._gen_write_barrier(newops, op.getarg(0), v)
+                        self._gen_write_barrier_array(newops, op.getarg(0),
+                                                      op.getarg(1), v,
+                                                      cpu, known_lengths)
                         op = op.copy_and_change(rop.SETARRAYITEM_RAW)
+            elif op.getopnum() == rop.NEW_ARRAY:
+                v_length = op.getarg(0)
+                if isinstance(v_length, ConstInt):
+                    known_lengths[op.result] = v_length.getint()
             # ----------
             newops.append(op)
         return newops
@@ -851,6 +769,24 @@
         newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
                                    descr=self.write_barrier_descr))
 
+    def _gen_write_barrier_array(self, newops, v_base, v_index, v_value,
+                                 cpu, known_lengths):
+        if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0:
+            # If we know statically the length of 'v', and it is not too
+            # big, then produce a regular write_barrier.  If it's unknown or
+            # too big, produce instead a write_barrier_from_array.
+            LARGE = 130
+            length = known_lengths.get(v_base, LARGE)
+            if length >= LARGE:
+                # unknown or too big: produce a write_barrier_from_array
+                args = [v_base, v_index, v_value]
+                newops.append(ResOperation(rop.COND_CALL_GC_WB_ARRAY, args,
+                                           None,
+                                           descr=self.write_barrier_descr))
+                return
+        # fall-back case: produce a write_barrier
+        self._gen_write_barrier(newops, v_base, v_value)
+
     def can_inline_malloc(self, descr):
         assert isinstance(descr, BaseSizeDescr)
         if descr.size < self.max_size_of_young_obj:
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -37,6 +37,11 @@
         self.frame_depth += size
         return newloc
 
+    def reserve_location_in_frame(self, size):
+        frame_depth = self.frame_depth
+        self.frame_depth += size
+        return frame_depth
+
     # abstract methods that need to be overwritten for specific assemblers
     @staticmethod
     def frame_pos(loc, type):
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -9,7 +9,7 @@
 from pypy.jit.metainterp.resoperation import get_deep_immutable_oplist
 from pypy.jit.tool.oparser import parse
 from pypy.rpython.lltypesystem.rclass import OBJECT, OBJECT_VTABLE
-from pypy.jit.metainterp.test.test_optimizeopt import equaloplists
+from pypy.jit.metainterp.optimizeopt.util import equaloplists
 
 def test_boehm():
     gc_ll_descr = GcLLDescr_boehm(None, None, None)
@@ -49,19 +49,6 @@
 
 # ____________________________________________________________
 
-def test_GcRefList():
-    S = lltype.GcStruct('S')
-    order = range(50) * 4
-    random.shuffle(order)
-    allocs = [lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(S))
-              for i in range(50)]
-    allocs = [allocs[i] for i in order]
-    #
-    gcrefs = GcRefList()
-    gcrefs.initialize()
-    addrs = [gcrefs.get_address_of_gcref(ptr) for ptr in allocs]
-    for i in range(len(allocs)):
-        assert addrs[i].address[0] == llmemory.cast_ptr_to_adr(allocs[i])
 
 class TestGcRootMapAsmGcc:
 
@@ -288,6 +275,18 @@
     def get_write_barrier_failing_case(self, FPTRTYPE):
         return llhelper(FPTRTYPE, self._write_barrier_failing_case)
 
+    _have_wb_from_array = False
+
+    def _write_barrier_from_array_failing_case(self, adr_struct, v_index):
+        self.record.append(('barrier_from_array', adr_struct, v_index))
+
+    def get_write_barrier_from_array_failing_case(self, FPTRTYPE):
+        if self._have_wb_from_array:
+            return llhelper(FPTRTYPE,
+                            self._write_barrier_from_array_failing_case)
+        else:
+            return lltype.nullptr(FPTRTYPE.TO)
+
 
 class TestFramework(object):
     gc = 'hybrid'
@@ -303,9 +302,20 @@
             config = config_
         class FakeCPU(object):
             def cast_adr_to_int(self, adr):
-                ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
-                assert ptr._obj._callable == llop1._write_barrier_failing_case
-                return 42
+                if not adr:
+                    return 0
+                try:
+                    ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_failing_case
+                    return 42
+                except lltype.InvalidCast:
+                    ptr = llmemory.cast_adr_to_ptr(
+                        adr, gc_ll_descr.WB_ARRAY_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_from_array_failing_case
+                    return 43
+
         gcdescr = get_description(config_)
         translator = FakeTranslator()
         llop1 = FakeLLOp()
@@ -414,11 +424,11 @@
             ResOperation(rop.DEBUG_MERGE_POINT, ['dummy', 2], None),
             ]
         gc_ll_descr = self.gc_ll_descr
-        operations = gc_ll_descr.rewrite_assembler(None, operations)
+        operations = gc_ll_descr.rewrite_assembler(None, operations, [])
         assert len(operations) == 0
 
     def test_rewrite_assembler_1(self):
-        # check rewriting of ConstPtrs
+        # check recording of ConstPtrs
         class MyFakeCPU(object):
             def cast_adr_to_int(self, adr):
                 assert adr == "some fake address"
@@ -438,56 +448,12 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         gc_ll_descr.gcrefs = MyFakeGCRefList()
+        gcrefs = []
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        assert len(operations) == 2
-        assert operations[0].getopnum() == rop.GETFIELD_RAW
-        assert operations[0].getarg(0) == ConstInt(43)
-        assert operations[0].getdescr() == gc_ll_descr.single_gcref_descr
-        v_box = operations[0].result
-        assert isinstance(v_box, BoxPtr)
-        assert operations[1].getopnum() == rop.PTR_EQ
-        assert operations[1].getarg(0) == v_random_box
-        assert operations[1].getarg(1) == v_box
-        assert operations[1].result == v_result
-
-    def test_rewrite_assembler_1_cannot_move(self):
-        # check rewriting of ConstPtrs
-        class MyFakeCPU(object):
-            def cast_adr_to_int(self, adr):
-                xxx    # should not be called
-        class MyFakeGCRefList(object):
-            def get_address_of_gcref(self, s_gcref1):
-                seen.append(s_gcref1)
-                assert s_gcref1 == s_gcref
-                return "some fake address"
-        seen = []
-        S = lltype.GcStruct('S')
-        s = lltype.malloc(S)
-        s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
-        v_random_box = BoxPtr()
-        v_result = BoxInt()
-        operations = [
-            ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)],
-                         v_result),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.gcrefs = MyFakeGCRefList()
-        old_can_move = rgc.can_move
-        operations = get_deep_immutable_oplist(operations)
-        try:
-            rgc.can_move = lambda s: False
-            operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        finally:
-            rgc.can_move = old_can_move
-        assert len(operations) == 1
-        assert operations[0].getopnum() == rop.PTR_EQ
-        assert operations[0].getarg(0) == v_random_box
-        assert operations[0].getarg(1) == ConstPtr(s_gcref)
-        assert operations[0].result == v_result
-        # check that s_gcref gets added to the list anyway, to make sure
-        # that the GC sees it
-        assert seen == [s_gcref]
+        operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
+                                                   gcrefs)
+        assert operations2 == operations
+        assert gcrefs == [s_gcref]
 
     def test_rewrite_assembler_2(self):
         # check write barriers before SETFIELD_GC
@@ -500,7 +466,8 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations,
+                                                   [])
         assert len(operations) == 2
         #
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
@@ -515,29 +482,93 @@
 
     def test_rewrite_assembler_3(self):
         # check write barriers before SETARRAYITEM_GC
-        v_base = BoxPtr()
-        v_index = BoxInt()
-        v_value = BoxPtr()
-        array_descr = AbstractDescr()
-        operations = [
-            ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], None,
-                         descr=array_descr),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
-        assert len(operations) == 2
-        #
-        assert operations[0].getopnum() == rop.COND_CALL_GC_WB
-        assert operations[0].getarg(0) == v_base
-        assert operations[0].getarg(1) == v_value
-        assert operations[0].result is None
-        #
-        assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
-        assert operations[1].getarg(0) == v_base
-        assert operations[1].getarg(1) == v_index
-        assert operations[1].getarg(2) == v_value
-        assert operations[1].getdescr() == array_descr
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            assert operations[0].getarg(1) == v_value
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
+
+    def test_rewrite_assembler_4(self):
+        # check write barriers before SETARRAYITEM_GC,
+        # if we have actually a write_barrier_from_array.
+        self.llop1._have_wb_from_array = True
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
+                assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+                assert operations[0].getarg(0) == v_base
+                assert operations[0].getarg(1) == v_value
+            else:
+                assert operations[0].getopnum() == rop.COND_CALL_GC_WB_ARRAY
+                assert operations[0].getarg(0) == v_base
+                assert operations[0].getarg(1) == v_index
+                assert operations[0].getarg(2) == v_value
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
 
     def test_rewrite_assembler_initialization_store(self):
         S = lltype.GcStruct('S', ('parent', OBJECT),
@@ -558,7 +589,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_2(self):
@@ -583,7 +615,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_3(self):
@@ -602,7 +635,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
 class TestFrameworkMiniMark(TestFramework):
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -53,7 +53,7 @@
         """Called once by the front-end when the program stops."""
         pass
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """Assemble the given loop.
         Should create and attach a fresh CompiledLoopToken to
         looptoken.compiled_loop_token and stick extra attributes
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -57,146 +57,146 @@
         return ConstInt(heaptracker.adr2int(addr))
 
     def test_call_aligned_with_spilled_values(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                local_floats = list(floats)
-                local_ints = list(ints)
-                args = []
-                spills = []
-                funcargs = []
-                float_count = 0
-                int_count = 0
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append('f%d' % float_count)
-                        spills.append('force_spill(f%d)' % float_count)
-                        float_count += 1
-                        funcargs.append(F)
-                    else:
-                        args.append('i%d' % int_count)
-                        spills.append('force_spill(i%d)' % int_count)
-                        int_count += 1
-                        funcargs.append(I)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            local_floats = list(floats)
+            local_ints = list(ints)
+            args = []
+            spills = []
+            funcargs = []
+            float_count = 0
+            int_count = 0
+            for i in range(8):
+                if case & (1<<i):
+                    args.append('f%d' % float_count)
+                    spills.append('force_spill(f%d)' % float_count)
+                    float_count += 1
+                    funcargs.append(F)
+                else:
+                    args.append('i%d' % int_count)
+                    spills.append('force_spill(i%d)' % int_count)
+                    int_count += 1
+                    funcargs.append(I)
 
-                arguments = ', '.join(args)
-                spill_ops = '\n'.join(spills)
+            arguments = ', '.join(args)
+            spill_ops = '\n'.join(spills)
 
-                FUNC = self.FuncType(funcargs, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+            FUNC = self.FuncType(funcargs, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                ops = '[%s]\n' % arguments
-                ops += '%s\n' % spill_ops
-                ops += 'f99 = call(ConstClass(func_ptr), %s, descr=calldescr)\n' % arguments
-                ops += 'finish(f99, %s)\n' % arguments
+            ops = '[%s]\n' % arguments
+            ops += '%s\n' % spill_ops
+            ops += 'f99 = call(ConstClass(func_ptr), %s, descr=calldescr)\n' % arguments
+            ops += 'finish(f99, %s)\n' % arguments
 
-                loop = parse(ops, namespace=locals())
-                looptoken = LoopToken()
-                done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
-                self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
-                expected_result = self._prepare_args(args, floats, ints)
+            loop = parse(ops, namespace=locals())
+            looptoken = LoopToken()
+            done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            expected_result = self._prepare_args(args, floats, ints)
 
-                res = self.cpu.execute_token(looptoken)
-                x = longlong.getrealfloat(cpu.get_latest_value_float(0))
-                assert abs(x - expected_result) < 0.0001
+            res = self.cpu.execute_token(looptoken)
+            x = longlong.getrealfloat(cpu.get_latest_value_float(0))
+            assert abs(x - expected_result) < 0.0001
 
     def test_call_aligned_with_imm_values(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                result = 0.0
-                args = []
-                argslist = []
-                local_floats = list(floats)
-                local_ints = list(ints)
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append(F)
-                        arg = local_floats.pop()
-                        result += arg
-                        argslist.append(constfloat(arg))
-                    else:
-                        args.append(I)
-                        arg = local_ints.pop()
-                        result += arg
-                        argslist.append(ConstInt(arg))
-                FUNC = self.FuncType(args, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(constfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(ConstInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                res = self.execute_operation(rop.CALL,
-                                             [funcbox] + argslist,
-                                             'float', descr=calldescr)
-                assert abs(res.getfloat() - result) < 0.0001
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_aligned_with_args_on_the_stack(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                result = 0.0
-                args = []
-                argslist = []
-                local_floats = list(floats)
-                local_ints = list(ints)
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append(F)
-                        arg = local_floats.pop()
-                        result += arg
-                        argslist.append(boxfloat(arg))
-                    else:
-                        args.append(I)
-                        arg = local_ints.pop()
-                        result += arg
-                        argslist.append(BoxInt(arg))
-                FUNC = self.FuncType(args, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(boxfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(BoxInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                res = self.execute_operation(rop.CALL,
-                                             [funcbox] + argslist,
-                                             'float', descr=calldescr)
-                assert abs(res.getfloat() - result) < 0.0001
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_alignment_call_assembler(self):
         from pypy.rlib.libffi import types
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -560,23 +560,6 @@
                                          'int', descr=calldescr)
             assert res.value == func_ints(*args)
 
-    def test_call_to_c_function(self):
-        from pypy.rlib.libffi import CDLL, types, ArgChain
-        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
-        libc = CDLL(libc_name)
-        c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
-        argchain = ArgChain().arg(ord('A'))
-        assert c_tolower.call(argchain, rffi.INT) == ord('a')
-
-        func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
-        funcbox = ConstInt(heaptracker.adr2int(func_adr))
-        calldescr = self.cpu.calldescrof_dynamic([types.uchar], types.sint)
-        res = self.execute_operation(rop.CALL,
-                                     [funcbox, BoxInt(ord('A'))],
-                                     'int',
-                                     descr=calldescr)
-        assert res.value == ord('a')
-
     def test_call_with_const_floats(self):
         def func(f1, f2):
             return f1 + f2
@@ -1680,7 +1663,7 @@
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
@@ -1699,12 +1682,49 @@
             s = lltype.malloc(S)
             s.tid = value
             sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            t = lltype.malloc(S)
+            tgcref = lltype.cast_opaque_ptr(llmemory.GCREF, t)
             del record[:]
             self.execute_operation(rop.COND_CALL_GC_WB,
-                                   [BoxPtr(sgcref), ConstInt(-2121)],
+                                   [BoxPtr(sgcref), ConstPtr(tgcref)],
                                    'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, -2121)]
+                assert record == [(s, t)]
+            else:
+                assert record == []
+
+    def test_cond_call_gc_wb_array(self):
+        def func_void(a, b, c):
+            record.append((a, b, c))
+        record = []
+        #
+        S = lltype.GcStruct('S', ('tid', lltype.Signed))
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
+                             lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+        funcbox = self.get_funcbox(self.cpu, func_ptr)
+        class WriteBarrierDescr(AbstractDescr):
+            jit_wb_if_flag = 4096
+            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_singlebyte = 0x10
+            def get_write_barrier_from_array_fn(self, cpu):
+                return funcbox.getint()
+        #
+        for cond in [False, True]:
+            value = random.randrange(-sys.maxint, sys.maxint)
+            if cond:
+                value |= 4096
+            else:
+                value &= ~4096
+            s = lltype.malloc(S)
+            s.tid = value
+            sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            del record[:]
+            self.execute_operation(rop.COND_CALL_GC_WB_ARRAY,
+                       [BoxPtr(sgcref), ConstInt(123), BoxPtr(sgcref)],
+                       'void', descr=WriteBarrierDescr())
+            if cond:
+                assert record == [(s, 123, s)]
             else:
                 assert record == []
 
@@ -1843,6 +1863,99 @@
         assert self.cpu.get_latest_value_int(2) == 10
         assert values == [1, 10]
 
+    def test_call_to_c_function(self):
+        from pypy.rlib.libffi import CDLL, types, ArgChain
+        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+        libc = CDLL(libc_name)
+        c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
+        argchain = ArgChain().arg(ord('A'))
+        assert c_tolower.call(argchain, rffi.INT) == ord('a')
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.uchar], types.sint)
+        i1 = BoxInt()
+        i2 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i1], i2,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
+        ]
+        ops[1].setfailargs([i1, i2])
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, ord('G'))
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_int(0) == ord('g')
+
+    def test_call_to_c_function_with_callback(self):
+        from pypy.rlib.libffi import CDLL, types, ArgChain, clibffi
+        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+        libc = CDLL(libc_name)
+        types_size_t = clibffi.cast_type_to_ffitype(rffi.SIZE_T)
+        c_qsort = libc.getpointer('qsort', [types.pointer, types_size_t,
+                                            types_size_t, types.pointer],
+                                  types.void)
+        class Glob(object):
+            pass
+        glob = Glob()
+        class X(object):
+            pass
+        #
+        def callback(p1, p2):
+            glob.lst.append(X())
+            return rffi.cast(rffi.INT, 1)
+        CALLBACK = lltype.Ptr(lltype.FuncType([lltype.Signed,
+                                               lltype.Signed], rffi.INT))
+        fn = llhelper(CALLBACK, callback)
+        S = lltype.Struct('S', ('x', rffi.INT), ('y', rffi.INT))
+        raw = lltype.malloc(S, flavor='raw')
+        argchain = ArgChain()
+        argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+        argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 2))
+        argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 4))
+        argchain = argchain.arg(rffi.cast(lltype.Signed, fn))
+        glob.lst = []
+        c_qsort.call(argchain, lltype.Void)
+        assert len(glob.lst) > 0
+        del glob.lst[:]
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_qsort.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.pointer, types_size_t,
+                                             types_size_t, types.pointer],
+                                            types.void)
+        i0 = BoxInt()
+        i1 = BoxInt()
+        i2 = BoxInt()
+        i3 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i0, i1, i2, i3], None,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [], None, descr=BasicFailDescr(0))
+        ]
+        ops[1].setfailargs([])
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1, i2, i3], ops, looptoken)
+        self.cpu.set_future_value_int(0, rffi.cast(lltype.Signed, raw))
+        self.cpu.set_future_value_int(1, 2)
+        self.cpu.set_future_value_int(2, 4)
+        self.cpu.set_future_value_int(3, rffi.cast(lltype.Signed, fn))
+        assert glob.lst == []
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert len(glob.lst) > 0
+        lltype.free(raw, flavor='raw')
+
     def test_guard_not_invalidated(self):
         cpu = self.cpu
         i0 = BoxInt()
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -128,6 +128,8 @@
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
         self._build_stack_check_slowpath()
+        if gc_ll_descr.gcrootmap:
+            self._build_release_gil(gc_ll_descr.gcrootmap)
         debug_start('jit-backend-counts')
         self.set_debug(have_debug_prints())
         debug_stop('jit-backend-counts')
@@ -306,7 +308,66 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
-    def assemble_loop(self, inputargs, operations, looptoken, log):
+    @staticmethod
+    def _release_gil_asmgcc(css):
+        # similar to trackgcroot.py:pypy_asm_stackwalk, first part
+        from pypy.rpython.memory.gctransform import asmgcroot
+        new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
+        next = asmgcroot.gcrootanchor.next
+        new.next = next
+        new.prev = asmgcroot.gcrootanchor
+        asmgcroot.gcrootanchor.next = new
+        next.prev = new
+        # and now release the GIL
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_asmgcc(css):
+        # first reacquire the GIL
+        after = rffi.aroundstate.after
+        if after:
+            after()
+        # similar to trackgcroot.py:pypy_asm_stackwalk, second part
+        from pypy.rpython.memory.gctransform import asmgcroot
+        old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
+        prev = old.prev
+        next = old.next
+        prev.next = next
+        next.prev = prev
+
+    @staticmethod
+    def _release_gil_shadowstack():
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_shadowstack():
+        after = rffi.aroundstate.after
+        if after:
+            after()
+
+    _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+    _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
+                                                  lltype.Void))
+
+    def _build_release_gil(self, gcrootmap):
+        if gcrootmap.is_shadow_stack:
+            releasegil_func = llhelper(self._NOARG_FUNC,
+                                       self._release_gil_shadowstack)
+            reacqgil_func = llhelper(self._NOARG_FUNC,
+                                     self._reacquire_gil_shadowstack)
+        else:
+            releasegil_func = llhelper(self._CLOSESTACK_FUNC,
+                                       self._release_gil_asmgcc)
+            reacqgil_func = llhelper(self._CLOSESTACK_FUNC,
+                                     self._reacquire_gil_asmgcc)
+        self.releasegil_addr  = self.cpu.cast_ptr_to_int(releasegil_func)
+        self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
+
+    def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
         '''adds the following attributes to looptoken:
                _x86_loop_code       (an integer giving an address)
                _x86_bootstrap_code  (an integer giving an address)
@@ -322,6 +383,7 @@
         # for the duration of compiling one loop or a one bridge.
 
         clt = CompiledLoopToken(self.cpu, looptoken.number)
+        clt.allgcrefs = []
         looptoken.compiled_loop_token = clt
         if not we_are_translated():
             # Arguments should be unique
@@ -329,13 +391,13 @@
 
         self.setup(looptoken)
         self.currently_compiling_loop = looptoken
-        funcname = self._find_debug_merge_point(operations)
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        arglocs, operations = regalloc.prepare_loop(inputargs, operations, looptoken)
+        arglocs, operations = regalloc.prepare_loop(inputargs, operations,
+                                                    looptoken, clt.allgcrefs)
         looptoken._x86_arglocs = arglocs
 
         bootstrappos = self.mc.get_relative_pos()
@@ -355,7 +417,7 @@
         #
         rawstart = self.materialize_loop(looptoken)
         debug_print("Loop #%d (%s) has address %x to %x" % (
-            looptoken.number, funcname,
+            looptoken.number, loopname,
             rawstart + self.looppos,
             rawstart + directbootstrappos))
         self._patch_stackadjust(rawstart + stackadjustpos,
@@ -375,7 +437,7 @@
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
-            name = "Loop # %s: %s" % (looptoken.number, funcname)
+            name = "Loop # %s: %s" % (looptoken.number, loopname)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
         return ops_offset
@@ -395,7 +457,6 @@
             return
 
         self.setup(original_loop_token)
-        funcname = self._find_debug_merge_point(operations)
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(faildescr, operations)
@@ -407,7 +468,8 @@
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         fail_depths = faildescr._x86_current_depths
         operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
-                                             operations)
+                                             operations,
+                                             self.current_clt.allgcrefs)
 
         stackadjustpos = self._patchable_stackadjust()
         frame_depth, param_depth = self._assemble(regalloc, operations)
@@ -417,8 +479,8 @@
         #
         rawstart = self.materialize_loop(original_loop_token)
 
-        debug_print("Bridge out of guard %d (%s) has address %x to %x" %
-                    (descr_number, funcname, rawstart, rawstart + codeendpos))
+        debug_print("Bridge out of guard %d has address %x to %x" %
+                    (descr_number, rawstart, rawstart + codeendpos))
         self._patch_stackadjust(rawstart + stackadjustpos,
                                 frame_depth + param_depth)
         self.patch_pending_failure_recoveries(rawstart)
@@ -432,7 +494,7 @@
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
-            name = "Bridge # %s: %s" % (descr_number, funcname)
+            name = "Bridge # %s" % (descr_number,)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
         return ops_offset
@@ -492,17 +554,6 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _find_debug_merge_point(self, operations):
-
-        for op in operations:
-            if op.getopnum() == rop.DEBUG_MERGE_POINT:
-                funcname = op.getarg(0)._get_str()
-                break
-        else:
-            funcname = "<loop %d>" % len(self.loop_run_counters)
-        # invent the counter, so we don't get too confused
-        return funcname
-
     def _register_counter(self):
         if self._debug:
             # YYY very minor leak -- we need the counters to stay alive
@@ -1987,6 +2038,102 @@
         self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
         self.implement_guard(guard_token, 'L')
 
+    def genop_guard_call_release_gil(self, op, guard_op, guard_token,
+                                     arglocs, result_loc):
+        # first, close the stack in the sense of the asmgcc GC root tracker
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap:
+            self.call_release_gil(gcrootmap, arglocs)
+        # do the call
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
+        self._genop_call(op, arglocs, result_loc, fail_index)
+        # then reopen the stack
+        if gcrootmap:
+            self.call_reacquire_gil(gcrootmap, result_loc)
+        # finally, the guard_not_forced
+        self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
+        self.implement_guard(guard_token, 'L')
+
+    def call_release_gil(self, gcrootmap, save_registers):
+        # First, we need to save away the registers listed in
+        # 'save_registers' that are not callee-save.  XXX We assume that
+        # the XMM registers won't be modified.  We store them in
+        # [ESP+4], [ESP+8], etc., leaving enough room in [ESP] for the
+        # single argument to closestack_addr below.
+        p = WORD
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                self.mc.MOV_sr(p, reg.value)
+                p += WORD
+        self._regalloc.reserve_param(p//WORD)
+        #
+        if gcrootmap.is_shadow_stack:
+            args = []
+        else:
+            # note that regalloc.py used save_all_regs=True to save all
+            # registers, so we don't have to care about saving them (other
+            # than ebp) in the close_stack_struct.  But if they are registers
+            # like %eax that would be destroyed by this call, *and* they are
+            # used by arglocs for the *next* call, then trouble; for now we
+            # will just push/pop them.
+            from pypy.rpython.memory.gctransform import asmgcroot
+            css = self._regalloc.close_stack_struct
+            if css == 0:
+                use_words = (2 + max(asmgcroot.INDEX_OF_EBP,
+                                     asmgcroot.FRAME_PTR) + 1)
+                pos = self._regalloc.fm.reserve_location_in_frame(use_words)
+                css = get_ebp_ofs(pos + use_words - 1)
+                self._regalloc.close_stack_struct = css
+            # The location where the future CALL will put its return address
+            # will be [ESP-WORD], so save that as the next frame's top address
+            self.mc.LEA_rs(eax.value, -WORD)        # LEA EAX, [ESP-4]
+            frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
+            self.mc.MOV_br(frame_ptr, eax.value)    # MOV [css.frame], EAX
+            # Save ebp
+            index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
+            self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
+            # Call the closestack() function (also releasing the GIL)
+            if IS_X86_32:
+                reg = eax
+            elif IS_X86_64:
+                reg = edi
+            self.mc.LEA_rb(reg.value, css)
+            args = [reg]
+        #
+        self._emit_call(-1, imm(self.releasegil_addr), args)
+        # Finally, restore the registers saved above.
+        p = WORD
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                self.mc.MOV_rs(reg.value, p)
+                p += WORD
+
+    def call_reacquire_gil(self, gcrootmap, save_loc):
+        # save the previous result (eax/xmm0) into the stack temporarily.
+        # XXX like with call_release_gil(), we assume that we don't need
+        # to save xmm0 in this case.
+        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
+            self.mc.MOV_sr(WORD, save_loc.value)
+            self._regalloc.reserve_param(2)
+        # call the reopenstack() function (also reacquiring the GIL)
+        if gcrootmap.is_shadow_stack:
+            args = []
+        else:
+            css = self._regalloc.close_stack_struct
+            assert css != 0
+            if IS_X86_32:
+                reg = eax
+            elif IS_X86_64:
+                reg = edi
+            self.mc.LEA_rb(reg.value, css)
+            args = [reg]
+        self._emit_call(-1, imm(self.reacqgil_addr), args)
+        # restore the result from the stack
+        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
+            self.mc.MOV_rs(save_loc.value, WORD)
+
     def genop_guard_call_assembler(self, op, guard_op, guard_token,
                                    arglocs, result_loc):
         faildescr = guard_op.getdescr()
@@ -2076,13 +2223,26 @@
     def genop_discard_cond_call_gc_wb(self, op, arglocs):
         # Write code equivalent to write_barrier() in the GC: it checks
         # a flag in the object at arglocs[0], and if set, it calls the
-        # function remember_young_pointer() from the GC.  The two arguments
-        # to the call are in arglocs[:2].  The rest, arglocs[2:], contains
+        # function remember_young_pointer() from the GC.  The arguments
+        # to the call are in arglocs[:N].  The rest, arglocs[N:], contains
         # registers that need to be saved and restored across the call.
+        # N is either 2 (regular write barrier) or 3 (array write barrier).
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
             assert cls is not None and isinstance(descr, cls)
+        #
+        opnum = op.getopnum()
+        if opnum == rop.COND_CALL_GC_WB:
+            N = 2
+            func = descr.get_write_barrier_fn(self.cpu)
+        elif opnum == rop.COND_CALL_GC_WB_ARRAY:
+            N = 3
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            assert func != 0
+        else:
+            raise AssertionError(opnum)
+        #
         loc_base = arglocs[0]
         self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
                       imm(descr.jit_wb_if_flag_singlebyte))
@@ -2093,33 +2253,37 @@
         if IS_X86_32:
             limit = -1      # push all arglocs on the stack
         elif IS_X86_64:
-            limit = 1       # push only arglocs[2:] on the stack
+            limit = N - 1   # push only arglocs[N:] on the stack
         for i in range(len(arglocs)-1, limit, -1):
             loc = arglocs[i]
             if isinstance(loc, RegLoc):
                 self.mc.PUSH_r(loc.value)
             else:
-                assert not IS_X86_64 # there should only be regs in arglocs[2:]
+                assert not IS_X86_64 # there should only be regs in arglocs[N:]
                 self.mc.PUSH_i32(loc.getint())
         if IS_X86_64:
             # We clobber these registers to pass the arguments, but that's
             # okay, because consider_cond_call_gc_wb makes sure that any
             # caller-save registers with values in them are present in
-            # arglocs[2:] too, so they are saved on the stack above and
+            # arglocs[N:] too, so they are saved on the stack above and
             # restored below.
-            remap_frame_layout(self, arglocs[:2], [edi, esi],
+            if N == 2:
+                callargs = [edi, esi]
+            else:
+                callargs = [edi, esi, edx]
+            remap_frame_layout(self, arglocs[:N], callargs,
                                X86_64_SCRATCH_REG)
-
+        #
         # misaligned stack in the call, but it's ok because the write barrier
         # is not going to call anything more.  Also, this assumes that the
         # write barrier does not touch the xmm registers.  (Slightly delicate
         # assumption, given that the write barrier can end up calling the
         # platform's malloc() from AddressStack.append().  XXX may need to
         # be done properly)
-        self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu)))
+        self.mc.CALL(imm(func))
         if IS_X86_32:
-            self.mc.ADD_ri(esp.value, 2*WORD)
-        for i in range(2, len(arglocs)):
+            self.mc.ADD_ri(esp.value, N*WORD)
+        for i in range(N, len(arglocs)):
             loc = arglocs[i]
             assert isinstance(loc, RegLoc)
             self.mc.POP_r(loc.value)
@@ -2128,6 +2292,8 @@
         assert 0 < offset <= 127
         self.mc.overwrite(jz_location-1, chr(offset))
 
+    genop_discard_cond_call_gc_wb_array = genop_discard_cond_call_gc_wb
+
     def genop_force_token(self, op, arglocs, resloc):
         # RegAlloc.consider_force_token ensures this:
         assert isinstance(resloc, RegLoc)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -156,12 +156,14 @@
         self.translate_support_code = translate_support_code
         # to be read/used by the assembler too
         self.jump_target_descr = None
+        self.close_stack_struct = 0
 
-    def _prepare(self, inputargs, operations):
+    def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
         self.param_depth = 0
         cpu = self.assembler.cpu
-        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
+        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
+                                                       allgcrefs)
         # compute longevity of variables
         longevity = self._compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
@@ -172,15 +174,16 @@
                                    assembler = self.assembler)
         return operations
 
-    def prepare_loop(self, inputargs, operations, looptoken):
-        operations = self._prepare(inputargs, operations)
+    def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         jump = operations[-1]
         loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
         self.loop_consts = loop_consts
         return self._process_inputargs(inputargs), operations
 
-    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations):
-        operations = self._prepare(inputargs, operations)
+    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
+                       allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         self.loop_consts = {}
         self._update_bindings(arglocs, inputargs)
         self.fm.frame_depth = prev_depths[0]
@@ -388,7 +391,9 @@
         self.assembler.regalloc_perform_discard(op, arglocs)
 
     def can_merge_with_next_guard(self, op, i, operations):
-        if op.getopnum() == rop.CALL_MAY_FORCE or op.getopnum() == rop.CALL_ASSEMBLER:
+        if (op.getopnum() == rop.CALL_MAY_FORCE or
+            op.getopnum() == rop.CALL_ASSEMBLER or
+            op.getopnum() == rop.CALL_RELEASE_GIL):
             assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
             return True
         if not op.is_comparison():
@@ -779,6 +784,19 @@
         self.xrm.possibly_free_var(op.getarg(1))
 
     def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
+        # we need to save registers on the stack:
+        #
+        #  - at least the non-callee-saved registers
+        #
+        #  - for shadowstack, we assume that any call can collect, and we
+        #    save also the callee-saved registers that contain GC pointers,
+        #    so that they can be found by follow_stack_frame_of_assembler()
+        #
+        #  - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
+        #    anyway, in case we need to do cpu.force().  The issue is that
+        #    grab_frame_values() would not be able to locate values in
+        #    callee-saved registers.
+        #
         save_all_regs = guard_not_forced_op is not None
         self.xrm.before_call(force_store, save_all_regs=save_all_regs)
         if not save_all_regs:
@@ -845,6 +863,8 @@
         assert guard_op is not None
         self._consider_call(op, guard_op)
 
+    consider_call_release_gil = consider_call_may_force
+
     def consider_call_assembler(self, op, guard_op):
         descr = op.getdescr()
         assert isinstance(descr, LoopToken)
@@ -864,12 +884,12 @@
     def consider_cond_call_gc_wb(self, op):
         assert op.result is None
         args = op.getarglist()
-        loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
-        # ^^^ we force loc_newvalue in a reg (unless it's a Const),
-        # because it will be needed anyway by the following setfield_gc.
-        # It avoids loading it twice from the memory.
-        loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args)
-        arglocs = [loc_base, loc_newvalue]
+        N = len(args)
+        # we force all arguments in a reg (unless they are Consts),
+        # because it will be needed anyway by the following setfield_gc
+        # or setarrayitem_gc. It avoids loading it twice from the memory.
+        arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args)
+                   for i in range(N)]
         # add eax, ecx and edx as extra "arguments" to ensure they are
         # saved and restored.  Fish in self.rm to know which of these
         # registers really need to be saved (a bit of a hack).  Moreover,
@@ -883,6 +903,8 @@
         self.PerformDiscard(op, arglocs)
         self.rm.possibly_free_vars_for_op(op)
 
+    consider_cond_call_gc_wb_array = consider_cond_call_gc_wb
+
     def fastpath_malloc_fixedsize(self, op, descr):
         assert isinstance(descr, BaseSizeDescr)
         self._do_fastpath_malloc(op, descr.size, descr.tid)
@@ -1358,7 +1380,9 @@
         name = name[len('consider_'):]
         num = getattr(rop, name.upper())
         if (is_comparison_or_ovf_op(num)
-            or num == rop.CALL_MAY_FORCE or num == rop.CALL_ASSEMBLER):
+            or num == rop.CALL_MAY_FORCE
+            or num == rop.CALL_ASSEMBLER
+            or num == rop.CALL_RELEASE_GIL):
             oplist_with_guard[num] = value
             oplist[num] = add_none_argument(value)
         else:
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -22,6 +22,7 @@
 
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
+    with_threads = False
 
     def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
                  gcdescr=None):
@@ -38,6 +39,7 @@
                 if not oprofile.OPROFILE_AVAILABLE:
                     log.WARNING('oprofile support was explicitly enabled, but oprofile headers seem not to be available')
                 profile_agent = oprofile.OProfileAgent()
+            self.with_threads = config.translation.thread
 
         self.profile_agent = profile_agent
 
@@ -77,9 +79,9 @@
         lines = machine_code_dump(data, addr, self.backend_name, label_list)
         print ''.join(lines)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
-        return self.assembler.assemble_loop(inputargs, operations, looptoken,
-                                            log=log)
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
+        return self.assembler.assemble_loop(name, inputargs, operations,
+                                            looptoken, log=log)
 
     def compile_bridge(self, faildescr, inputargs, operations,
                        original_loop_token, log=True):
@@ -122,8 +124,8 @@
         addr = executable_token._x86_bootstrap_code
         #llop.debug_print(lltype.Void, ">>>> Entering", addr)
         func = rffi.cast(lltype.Ptr(self.BOOTSTRAP_TP), addr)
+        fail_index = self._execute_call(func)
         #llop.debug_print(lltype.Void, "<<<< Back")
-        fail_index = self._execute_call(func)
         return self.get_fail_descr_from_number(fail_index)
 
     def _execute_call(self, func):
@@ -140,10 +142,11 @@
                 LLInterpreter.current_interpreter = prev_interpreter
         return res
 
-    @staticmethod
     def cast_ptr_to_int(x):
         adr = llmemory.cast_ptr_to_adr(x)
         return CPU386.cast_adr_to_int(adr)
+    cast_ptr_to_int._annspecialcase_ = 'specialize:arglltype(0)'
+    cast_ptr_to_int = staticmethod(cast_ptr_to_int)
 
     all_null_registers = lltype.malloc(rffi.LONGP.TO, 24,
                                        flavor='raw', zero=True,
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -530,6 +530,7 @@
     POP_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
 
     LEA_rb = insn(rex_w, '\x8D', register(1,8), stack_bp(2))
+    LEA_rs = insn(rex_w, '\x8D', register(1,8), stack_sp(2))
     LEA32_rb = insn(rex_w, '\x8D', register(1,8),stack_bp(2,force_32bits=True))
     LEA_ra = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_scaled_reg_plus_const(2))
     LEA_rm = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_const(2))
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -16,7 +16,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcRefList, GcPtrFieldDescr
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr
 
 from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
 from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
@@ -51,11 +51,9 @@
     gcrootmap = MockGcRootMap()
 
     def initialize(self):
-        self.gcrefs = GcRefList()
-        self.gcrefs.initialize()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
+        pass
 
-    replace_constptrs_with_getfield_raw = GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func
+    record_constptrs = GcLLDescr_framework.record_constptrs.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 
 class TestRegallocDirectGcIntegration(object):
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -330,6 +330,7 @@
                         assert result != expected
 
     def test_compile_bridge_check_profile_info(self):
+        py.test.skip("does not work, reinvestigate")
         class FakeProfileAgent(object):
             def __init__(self):
                 self.functions = []
@@ -362,7 +363,7 @@
         operations[3].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
         name, loopaddress, loopsize = agent.functions[0]
-        assert name == "Loop # 17: hello"
+        assert name == "Loop # 17: hello (loop counter 0)"
         assert loopaddress <= looptoken._x86_loop_code
         assert loopsize >= 40 # randomish number
 
@@ -378,7 +379,7 @@
 
         self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken)
         name, address, size = agent.functions[1]
-        assert name == "Bridge # 0: bye"
+        assert name == "Bridge # 0: bye (loop counter 1)"
         # Would be exactly ==, but there are some guard failure recovery
         # stubs in-between
         assert address >= loopaddress + loopsize
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -1,8 +1,7 @@
 """
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
+This is a test that translates a complete JIT together with a GC and runs it.
+It is testing that the GC-dependent aspects basically work, mostly the mallocs
+and the various cases of write barrier.
 """
 
 import weakref
@@ -10,16 +9,11 @@
 from pypy.annotation import policy as annpolicy
 from pypy.rlib import rgc
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rlib.jit import JitDriver, dont_look_inside
 from pypy.rlib.jit import purefunction, unroll_safe
-from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
 from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
 from pypy.tool.udir import udir
-from pypy.jit.backend.x86.arch import IS_X86_64
 from pypy.config.translationoption import DEFL_GC
-import py.test
 
 class X(object):
     def __init__(self, x=0):
@@ -86,7 +80,7 @@
     #
     return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
 
-def compile(f, gc, **kwds):
+def compile(f, gc, enable_opts='', **kwds):
     from pypy.annotation.listdef import s_list_of_strings
     from pypy.translator.translator import TranslationContext
     from pypy.jit.metainterp.warmspot import apply_jit
@@ -110,14 +104,14 @@
                 old_value[obj, attr] = getattr(obj, attr)
                 setattr(obj, attr, value)
             #
-            apply_jit(t, enable_opts='')
+            apply_jit(t, enable_opts=enable_opts)
             #
         finally:
             for (obj, attr), oldvalue in old_value.items():
                 setattr(obj, attr, oldvalue)
 
     cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
+    cbuilder.generate_source(defines=cbuilder.DEBUG_DEFINES)
     cbuilder.compile()
     return cbuilder
 
@@ -154,8 +148,10 @@
 
 # ______________________________________________________________________
 
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
+
+class BaseFrameworkTests(object):
+    compile_kwds = {}
+
     def setup_class(cls):
         funcs = []
         name_to_func = {}
@@ -205,7 +201,8 @@
         try:
             GcLLDescr_framework.DEBUG = True
             cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
+                                   gcrootfinder=cls.gcrootfinder, jit=True,
+                                   **cls.compile_kwds)
         finally:
             GcLLDescr_framework.DEBUG = OLD_DEBUG
 
@@ -224,32 +221,36 @@
     def run_orig(self, name, n, x):
         self.main_allfuncs(name, n, x)
 
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
-        #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
+
+class CompileFrameworkTests(BaseFrameworkTests):
+    # Test suite using (so far) the minimark GC.
+
+##    def define_libffi_workaround(cls):
+##        # XXX: this is a workaround for a bug in database.py.  It seems that
+##        # the problem is triggered by optimizeopt/fficall.py, and in
+##        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
+##        # these tests, that line is the only place where libffi.Func is
+##        # referenced.
+##        #
+##        # The problem occurs because the gctransformer tries to annotate a
+##        # low-level helper to call the __del__ of libffi.Func when it's too
+##        # late.
+##        #
+##        # This workaround works by forcing the annotator (and all the rest of
+##        # the toolchain) to see libffi.Func in a "proper" context, not just as
+##        # the target of cast_base_ptr_to_instance.  Note that the function
+##        # below is *never* called by any actual test, it's just annotated.
+##        #
+##        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
+##        libc_name = get_libc_name()
+##        def f(n, x, *args):
+##            libc = CDLL(libc_name)
+##            ptr = libc.getpointer('labs', [types.slong], types.slong)
+##            chain = ArgChain()
+##            chain.arg(n)
+##            n = ptr.call(chain, lltype.Signed)
+##            return (n, x) + args
+##        return None, f, None
 
     def define_compile_framework_1(cls):
         # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
@@ -456,6 +457,73 @@
     def test_compile_framework_7(self):
         self.run('compile_framework_7')
 
+    def define_compile_framework_8(cls):
+        # Array of pointers, of unknown length (test write_barrier_from_array)
+        def before(n, x):
+            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            if n < 1900:
+                check(l[0].x == 123)
+                l = [None] * (16 + (n & 7))
+                l[0] = X(123)
+                l[1] = X(n)
+                l[2] = X(n+10)
+                l[3] = X(n+20)
+                l[4] = X(n+30)
+                l[5] = X(n+40)
+                l[6] = X(n+50)
+                l[7] = X(n+60)
+                l[8] = X(n+70)
+                l[9] = X(n+80)
+                l[10] = X(n+90)
+                l[11] = X(n+100)
+                l[12] = X(n+110)
+                l[13] = X(n+120)
+                l[14] = X(n+130)
+                l[15] = X(n+140)
+            if n < 1800:
+                check(len(l) == 16 + (n & 7))
+                check(l[0].x == 123)
+                check(l[1].x == n)
+                check(l[2].x == n+10)
+                check(l[3].x == n+20)
+                check(l[4].x == n+30)
+                check(l[5].x == n+40)
+                check(l[6].x == n+50)
+                check(l[7].x == n+60)
+                check(l[8].x == n+70)
+                check(l[9].x == n+80)
+                check(l[10].x == n+90)
+                check(l[11].x == n+100)
+                check(l[12].x == n+110)
+                check(l[13].x == n+120)
+                check(l[14].x == n+130)
+                check(l[15].x == n+140)
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            check(len(l) >= 16)
+            check(l[0].x == 123)
+            check(l[1].x == 2)
+            check(l[2].x == 12)
+            check(l[3].x == 22)
+            check(l[4].x == 32)
+            check(l[5].x == 42)
+            check(l[6].x == 52)
+            check(l[7].x == 62)
+            check(l[8].x == 72)
+            check(l[9].x == 82)
+            check(l[10].x == 92)
+            check(l[11].x == 102)
+            check(l[12].x == 112)
+            check(l[13].x == 122)
+            check(l[14].x == 132)
+            check(l[15].x == 142)
+        return before, f, after
+
+    def test_compile_framework_8(self):
+        self.run('compile_framework_8')
+
     def define_compile_framework_external_exception_handling(cls):
         def before(n, x):
             x = X(0)
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_releasegil.py
copy from pypy/jit/backend/x86/test/test_zrpy_gc.py
copy to pypy/jit/backend/x86/test/test_zrpy_releasegil.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_releasegil.py
@@ -1,618 +1,110 @@
-"""
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
-"""
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.rlib.jit import dont_look_inside
+from pypy.jit.metainterp.optimizeopt import ALL_OPTS_NAMES
 
-import weakref
-import py, os
-from pypy.annotation import policy as annpolicy
-from pypy.rlib import rgc
-from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.rlib.jit import JitDriver, dont_look_inside
-from pypy.rlib.jit import purefunction, unroll_safe
-from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
-from pypy.tool.udir import udir
-from pypy.jit.backend.x86.arch import IS_X86_64
-from pypy.config.translationoption import DEFL_GC
-import py.test
+from pypy.rlib.libffi import CDLL, types, ArgChain, clibffi
+from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+from pypy.rpython.annlowlevel import llhelper
 
-class X(object):
-    def __init__(self, x=0):
-        self.x = x
+from pypy.jit.backend.x86.test.test_zrpy_gc import BaseFrameworkTests
+from pypy.jit.backend.x86.test.test_zrpy_gc import check
 
-    next = None
 
-class CheckError(Exception):
-    pass
+class ReleaseGILTests(BaseFrameworkTests):
+    compile_kwds = dict(enable_opts=ALL_OPTS_NAMES, thread=True)
 
-def check(flag):
-    if not flag:
-        raise CheckError
-
-def get_g(main):
-    main._dont_inline_ = True
-    def g(name, n):
-        x = X()
-        x.foo = 2
-        main(n, x)
-        x.foo = 5
-        return weakref.ref(x)
-    g._dont_inline_ = True
-    return g
-
-
-def get_entry(g):
-
-    def entrypoint(args):
-        name = ''
-        n = 2000
-        argc = len(args)
-        if argc > 1:
-            name = args[1]
-        if argc > 2:
-            n = int(args[2])
-        r_list = []
-        for i in range(20):
-            r = g(name, n)
-            r_list.append(r)
-            rgc.collect()
-        rgc.collect(); rgc.collect()
-        freed = 0
-        for r in r_list:
-            if r() is None:
-                freed += 1
-        print freed
-        return 0
-
-    return entrypoint
-
-
-def get_functions_to_patch():
-    from pypy.jit.backend.llsupport import gc
-    #
-    can_inline_malloc1 = gc.GcLLDescr_framework.can_inline_malloc
-    def can_inline_malloc2(*args):
-        try:
-            if os.environ['PYPY_NO_INLINE_MALLOC']:
-                return False
-        except KeyError:
+    def define_simple(self):
+        class Glob:
             pass
-        return can_inline_malloc1(*args)
-    #
-    return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
-
-def compile(f, gc, **kwds):
-    from pypy.annotation.listdef import s_list_of_strings
-    from pypy.translator.translator import TranslationContext
-    from pypy.jit.metainterp.warmspot import apply_jit
-    from pypy.translator.c import genc
-    #
-    t = TranslationContext()
-    t.config.translation.gc = gc
-    if gc != 'boehm':
-        t.config.translation.gcremovetypeptr = True
-    for name, value in kwds.items():
-        setattr(t.config.translation, name, value)
-    ann = t.buildannotator(policy=annpolicy.StrictAnnotatorPolicy())
-    ann.build_types(f, [s_list_of_strings], main_entry_point=True)
-    t.buildrtyper().specialize()
-
-    if kwds['jit']:
-        patch = get_functions_to_patch()
-        old_value = {}
-        try:
-            for (obj, attr), value in patch.items():
-                old_value[obj, attr] = getattr(obj, attr)
-                setattr(obj, attr, value)
-            #
-            apply_jit(t, enable_opts='')
-            #
-        finally:
-            for (obj, attr), oldvalue in old_value.items():
-                setattr(obj, attr, oldvalue)
-
-    cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
-    cbuilder.compile()
-    return cbuilder
-
-def run(cbuilder, args=''):
-    #
-    pypylog = udir.join('test_zrpy_gc.log')
-    data = cbuilder.cmdexec(args, env={'PYPYLOG': ':%s' % pypylog})
-    return data.strip()
-
-def compile_and_run(f, gc, **kwds):
-    cbuilder = compile(f, gc, **kwds)
-    return run(cbuilder)
-
-
-
-def test_compile_boehm():
-    myjitdriver = JitDriver(greens = [], reds = ['n', 'x'])
-    @dont_look_inside
-    def see(lst, n):
-        assert len(lst) == 3
-        assert lst[0] == n+10
-        assert lst[1] == n+20
-        assert lst[2] == n+30
-    def main(n, x):
-        while n > 0:
-            myjitdriver.can_enter_jit(n=n, x=x)
-            myjitdriver.jit_merge_point(n=n, x=x)
-            y = X()
-            y.foo = x.foo
-            n -= y.foo
-            see([n+10, n+20, n+30], n)
-    res = compile_and_run(get_entry(get_g(main)), "boehm", jit=True)
-    assert int(res) >= 16
-
-# ______________________________________________________________________
-
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
-    def setup_class(cls):
-        funcs = []
-        name_to_func = {}
-        for fullname in dir(cls):
-            if not fullname.startswith('define'):
-                continue
-            definefunc = getattr(cls, fullname)
-            _, name = fullname.split('_', 1)
-            beforefunc, loopfunc, afterfunc = definefunc.im_func(cls)
-            if beforefunc is None:
-                def beforefunc(n, x):
-                    return n, x, None, None, None, None, None, None, None, None, None, ''
-            if afterfunc is None:
-                def afterfunc(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-                    pass
-            beforefunc.func_name = 'before_'+name
-            loopfunc.func_name = 'loop_'+name
-            afterfunc.func_name = 'after_'+name
-            funcs.append((beforefunc, loopfunc, afterfunc))
-            assert name not in name_to_func
-            name_to_func[name] = len(name_to_func)
-        print name_to_func
-        def allfuncs(name, n):
-            x = X()
-            x.foo = 2
-            main_allfuncs(name, n, x)
-            x.foo = 5
-            return weakref.ref(x)
-        def main_allfuncs(name, n, x):
-            num = name_to_func[name]
-            n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s = funcs[num][0](n, x)
-            while n > 0:
-                myjitdriver.can_enter_jit(num=num, n=n, x=x, x0=x0, x1=x1,
-                        x2=x2, x3=x3, x4=x4, x5=x5, x6=x6, x7=x7, l=l, s=s)
-                myjitdriver.jit_merge_point(num=num, n=n, x=x, x0=x0, x1=x1,
-                        x2=x2, x3=x3, x4=x4, x5=x5, x6=x6, x7=x7, l=l, s=s)
-
-                n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s = funcs[num][1](
-                        n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s)
-            funcs[num][2](n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s)
-        myjitdriver = JitDriver(greens = ['num'],
-                                reds = ['n', 'x', 'x0', 'x1', 'x2', 'x3', 'x4',
-                                        'x5', 'x6', 'x7', 'l', 's'])
-        cls.main_allfuncs = staticmethod(main_allfuncs)
-        cls.name_to_func = name_to_func
-        OLD_DEBUG = GcLLDescr_framework.DEBUG
-        try:
-            GcLLDescr_framework.DEBUG = True
-            cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
-        finally:
-            GcLLDescr_framework.DEBUG = OLD_DEBUG
-
-    def _run(self, name, n, env):
-        res = self.cbuilder.cmdexec("%s %d" %(name, n), env=env)
-        assert int(res) == 20
-
-    def run(self, name, n=2000):
-        pypylog = udir.join('TestCompileFramework.log')
-        env = {'PYPYLOG': ':%s' % pypylog,
-               'PYPY_NO_INLINE_MALLOC': '1'}
-        self._run(name, n, env)
-        env['PYPY_NO_INLINE_MALLOC'] = ''
-        self._run(name, n, env)
-
-    def run_orig(self, name, n, x):
-        self.main_allfuncs(name, n, x)
-
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
+        glob = Glob()
         #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
-
-    def define_compile_framework_1(cls):
-        # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
-        # without write_barriers and root stack enumeration.
-        def f(n, x, *args):
-            y = X()
-            y.foo = x.foo
-            n -= y.foo
-            return (n, x) + args
-        return None, f, None
-
-    def test_compile_framework_1(self):
-        self.run('compile_framework_1')
-
-    def define_compile_framework_2(cls):
-        # More complex test, requires root stack enumeration but
-        # not write_barriers.
-        def f(n, x, *args):
-            prev = x
-            for j in range(101):    # f() runs 20'000 times, thus allocates
-                y = X()             # a total of 2'020'000 objects
-                y.foo = prev.foo
-                prev = y
-            n -= prev.foo
-            return (n, x) + args
-        return None, f, None
-
-    def test_compile_framework_2(self):
-        self.run('compile_framework_2')
-
-    def define_compile_framework_3(cls):
-        # Third version of the test.  Really requires write_barriers.
-        def f(n, x, *args):
-            x.next = None
-            for j in range(101):    # f() runs 20'000 times, thus allocates
-                y = X()             # a total of 2'020'000 objects
-                y.foo = j+1
-                y.next = x.next
-                x.next = y
-            check(x.next.foo == 101)
-            total = 0
-            y = x
-            for j in range(101):
-                y = y.next
-                total += y.foo
-            check(not y.next)
-            check(total == 101*102/2)
-            n -= x.foo
-            return (n, x) + args
-        return None, f, None
-
-
-
-    def test_compile_framework_3(self):
-        x_test = X()
-        x_test.foo = 5
-        self.run_orig('compile_framework_3', 6, x_test)     # check that it does not raise CheckError
-        self.run('compile_framework_3')
-
-    def define_compile_framework_3_extra(cls):
-        # Extra version of the test, with tons of live vars around the residual
-        # call that all contain a GC pointer.
-        @dont_look_inside
-        def residual(n=26):
-            x = X()
-            x.next = X()
-            x.next.foo = n
-            return x
+        def f42(n):
+            c_strchr = glob.c_strchr
+            raw = rffi.str2charp("foobar" + chr((n & 63) + 32))
+            argchain = ArgChain()
+            argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+            argchain = argchain.arg(rffi.cast(rffi.INT, ord('b')))
+            res = c_strchr.call(argchain, rffi.CCHARP)
+            check(rffi.charp2str(res) == "bar" + chr((n & 63) + 32))
+            rffi.free_charp(raw)
         #
         def before(n, x):
-            residual(5)
-            x0 = residual()
-            x1 = residual()
-            x2 = residual()
-            x3 = residual()
-            x4 = residual()
-            x5 = residual()
-            x6 = residual()
-            x7 = residual()
-            n *= 19
-            return n, None, x0, x1, x2, x3, x4, x5, x6, x7, None, None
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            x8 = residual()
-            x9 = residual()
-            check(x0.next.foo == 26)
-            check(x1.next.foo == 26)
-            check(x2.next.foo == 26)
-            check(x3.next.foo == 26)
-            check(x4.next.foo == 26)
-            check(x5.next.foo == 26)
-            check(x6.next.foo == 26)
-            check(x7.next.foo == 26)
-            check(x8.next.foo == 26)
-            check(x9.next.foo == 26)
-            x0, x1, x2, x3, x4, x5, x6, x7 = x7, x4, x6, x5, x3, x2, x9, x8
+            libc = CDLL(libc_name)
+            c_strchr = libc.getpointer('strchr', [types.pointer, types.sint],
+                                       types.pointer)
+            glob.c_strchr = c_strchr
+            return (n, None, None, None, None, None,
+                    None, None, None, None, None, None)
+        #
+        def f(n, x, *args):
+            f42(n)
             n -= 1
-            return n, None, x0, x1, x2, x3, x4, x5, x6, x7, None, None
-        return before, f, None
-
-    def test_compile_framework_3_extra(self):
-        self.run_orig('compile_framework_3_extra', 6, None)     # check that it does not raise CheckError
-        self.run('compile_framework_3_extra')
-
-    def define_compile_framework_4(cls):
-        # Fourth version of the test, with __del__.
-        from pypy.rlib.debug import debug_print
-        class Counter:
-            cnt = 0
-        counter = Counter()
-        class Z:
-            def __del__(self):
-                counter.cnt -= 1
-        def before(n, x):
-            debug_print('counter.cnt =', counter.cnt)
-            check(counter.cnt < 5)
-            counter.cnt = n // x.foo
-            return n, x, None, None, None, None, None, None, None, None, None, None
-        def f(n, x, *args):
-            Z()
-            n -= x.foo
             return (n, x) + args
         return before, f, None
 
-    def test_compile_framework_4(self):
-        self.run('compile_framework_4')
+    def test_simple(self):
+        self.run('simple')
 
-    def define_compile_framework_5(cls):
-        # Test string manipulation.
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            n -= x.foo
-            s += str(n)
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(len(s) == 1*5 + 2*45 + 3*450 + 4*500)
-        return None, f, after
-
-    def test_compile_framework_5(self):
-        self.run('compile_framework_5')
-
-    def define_compile_framework_7(cls):
-        # Array of pointers (test the write barrier for setarrayitem_gc)
+    def define_close_stack(self):
+        #
+        class Glob(object):
+            pass
+        glob = Glob()
+        class X(object):
+            pass
+        #
+        def callback(p1, p2):
+            for i in range(100):
+                glob.lst.append(X())
+            return rffi.cast(rffi.INT, 1)
+        CALLBACK = lltype.Ptr(lltype.FuncType([lltype.Signed,
+                                               lltype.Signed], rffi.INT))
+        #
+        @dont_look_inside
+        def alloc1():
+            return llmemory.raw_malloc(16)
+        @dont_look_inside
+        def free1(p):
+            llmemory.raw_free(p)
+        #
+        def f42():
+            length = len(glob.lst)
+            c_qsort = glob.c_qsort
+            raw = alloc1()
+            fn = llhelper(CALLBACK, rffi._make_wrapper_for(CALLBACK, callback))
+            argchain = ArgChain()
+            argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+            argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 2))
+            argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 8))
+            argchain = argchain.arg(rffi.cast(lltype.Signed, fn))
+            c_qsort.call(argchain, lltype.Void)
+            free1(raw)
+            check(len(glob.lst) > length)
+            del glob.lst[:]
+        #
         def before(n, x):
-            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            if n < 1900:
-                check(l[0].x == 123)
-                l = [None] * 16
-                l[0] = X(123)
-                l[1] = X(n)
-                l[2] = X(n+10)
-                l[3] = X(n+20)
-                l[4] = X(n+30)
-                l[5] = X(n+40)
-                l[6] = X(n+50)
-                l[7] = X(n+60)
-                l[8] = X(n+70)
-                l[9] = X(n+80)
-                l[10] = X(n+90)
-                l[11] = X(n+100)
-                l[12] = X(n+110)
-                l[13] = X(n+120)
-                l[14] = X(n+130)
-                l[15] = X(n+140)
-            if n < 1800:
-                check(len(l) == 16)
-                check(l[0].x == 123)
-                check(l[1].x == n)
-                check(l[2].x == n+10)
-                check(l[3].x == n+20)
-                check(l[4].x == n+30)
-                check(l[5].x == n+40)
-                check(l[6].x == n+50)
-                check(l[7].x == n+60)
-                check(l[8].x == n+70)
-                check(l[9].x == n+80)
-                check(l[10].x == n+90)
-                check(l[11].x == n+100)
-                check(l[12].x == n+110)
-                check(l[13].x == n+120)
-                check(l[14].x == n+130)
-                check(l[15].x == n+140)
-            n -= x.foo
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(len(l) == 16)
-            check(l[0].x == 123)
-            check(l[1].x == 2)
-            check(l[2].x == 12)
-            check(l[3].x == 22)
-            check(l[4].x == 32)
-            check(l[5].x == 42)
-            check(l[6].x == 52)
-            check(l[7].x == 62)
-            check(l[8].x == 72)
-            check(l[9].x == 82)
-            check(l[10].x == 92)
-            check(l[11].x == 102)
-            check(l[12].x == 112)
-            check(l[13].x == 122)
-            check(l[14].x == 132)
-            check(l[15].x == 142)
-        return before, f, after
-
-    def test_compile_framework_7(self):
-        self.run('compile_framework_7')
-
-    def define_compile_framework_external_exception_handling(cls):
-        def before(n, x):
-            x = X(0)
-            return n, x, None, None, None, None, None, None, None, None, None, None
-
-        @dont_look_inside
-        def g(x):
-            if x > 200:
-                return 2
-            raise ValueError
-        @dont_look_inside
-        def h(x):
-            if x > 150:
-                raise ValueError
-            return 2
-
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            try:
-                x.x += g(n)
-            except ValueError:
-                x.x += 1
-            try:
-                x.x += h(n)
-            except ValueError:
-                x.x -= 1
+            libc = CDLL(libc_name)
+            types_size_t = clibffi.cast_type_to_ffitype(rffi.SIZE_T)
+            c_qsort = libc.getpointer('qsort', [types.pointer, types_size_t,
+                                                types_size_t, types.pointer],
+                                      types.void)
+            glob.c_qsort = c_qsort
+            glob.lst = []
+            return (n, None, None, None, None, None,
+                    None, None, None, None, None, None)
+        #
+        def f(n, x, *args):
+            f42()
             n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(x.x == 1800 * 2 + 1850 * 2 + 200 - 150)
-
+            return (n, x) + args
         return before, f, None
 
-    def test_compile_framework_external_exception_handling(self):
-        self.run('compile_framework_external_exception_handling')
+    def test_close_stack(self):
+        self.run('close_stack')
 
-    def define_compile_framework_bug1(self):
-        @purefunction
-        def nonmoving():
-            x = X(1)
-            for i in range(7):
-                rgc.collect()
-            return x
 
-        @dont_look_inside
-        def do_more_stuff():
-            x = X(5)
-            for i in range(7):
-                rgc.collect()
-            return x
-
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            x0 = do_more_stuff()
-            check(nonmoving().x == 1)
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-
-        return None, f, None
-
-    def test_compile_framework_bug1(self):
-        self.run('compile_framework_bug1', 200)
-
-    def define_compile_framework_vref(self):
-        from pypy.rlib.jit import virtual_ref, virtual_ref_finish
-        class A:
-            pass
-        glob = A()
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            a = A()
-            glob.v = vref = virtual_ref(a)
-            virtual_ref_finish(vref, a)
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f, None
-
-    def test_compile_framework_vref(self):
-        self.run('compile_framework_vref', 200)
-
-    def define_compile_framework_float(self):
-        # test for a bug: the fastpath_malloc does not save and restore
-        # xmm registers around the actual call to the slow path
-        class A:
-            x0 = x1 = x2 = x3 = x4 = x5 = x6 = x7 = 0
-        @dont_look_inside
-        def escape1(a):
-            a.x0 += 0
-            a.x1 += 6
-            a.x2 += 12
-            a.x3 += 18
-            a.x4 += 24
-            a.x5 += 30
-            a.x6 += 36
-            a.x7 += 42
-        @dont_look_inside
-        def escape2(n, f0, f1, f2, f3, f4, f5, f6, f7):
-            check(f0 == n + 0.0)
-            check(f1 == n + 0.125)
-            check(f2 == n + 0.25)
-            check(f3 == n + 0.375)
-            check(f4 == n + 0.5)
-            check(f5 == n + 0.625)
-            check(f6 == n + 0.75)
-            check(f7 == n + 0.875)
-        @unroll_safe
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            i = 0
-            while i < 42:
-                m = n + i
-                f0 = m + 0.0
-                f1 = m + 0.125
-                f2 = m + 0.25
-                f3 = m + 0.375
-                f4 = m + 0.5
-                f5 = m + 0.625
-                f6 = m + 0.75
-                f7 = m + 0.875
-                a1 = A()
-                # at this point, all or most f's are still in xmm registers
-                escape1(a1)
-                escape2(m, f0, f1, f2, f3, f4, f5, f6, f7)
-                i += 1
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f, None
-
-    def test_compile_framework_float(self):
-        self.run('compile_framework_float')
-
-    def define_compile_framework_minimal_size_in_nursery(self):
-        S = lltype.GcStruct('S')    # no fields!
-        T = lltype.GcStruct('T', ('i', lltype.Signed))
-        @unroll_safe
-        def f42(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            lst1 = []
-            lst2 = []
-            i = 0
-            while i < 42:
-                s1 = lltype.malloc(S)
-                t1 = lltype.malloc(T)
-                t1.i = 10000 + i + n
-                lst1.append(s1)
-                lst2.append(t1)
-                i += 1
-            i = 0
-            while i < 42:
-                check(lst2[i].i == 10000 + i + n)
-                i += 1
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f42, None
-
-    def test_compile_framework_minimal_size_in_nursery(self):
-        self.run('compile_framework_minimal_size_in_nursery')
-
-
-class TestShadowStack(CompileFrameworkTests):
+class TestShadowStack(ReleaseGILTests):
     gcrootfinder = "shadowstack"
 
-class TestAsmGcc(CompileFrameworkTests):
+class TestAsmGcc(ReleaseGILTests):
     gcrootfinder = "asmgcc"
diff --git a/pypy/jit/codewriter/assembler.py b/pypy/jit/codewriter/assembler.py
--- a/pypy/jit/codewriter/assembler.py
+++ b/pypy/jit/codewriter/assembler.py
@@ -76,7 +76,8 @@
                 TYPE = llmemory.Address
             if TYPE == llmemory.Address:
                 value = heaptracker.adr2int(value)
-            elif not isinstance(value, ComputedIntSymbolic):
+            if not isinstance(value, (llmemory.AddressAsInt,
+                                      ComputedIntSymbolic)):
                 value = lltype.cast_primitive(lltype.Signed, value)
                 if allow_short and -128 <= value <= 127:
                     # emit the constant as a small integer
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -237,6 +237,8 @@
             self.readwrite_analyzer.analyze(op), self.cpu, extraeffect,
             oopspecindex, can_invalidate)
         #
+        if oopspecindex != EffectInfo.OS_NONE:
+            assert effectinfo is not None
         if pure or loopinvariant:
             assert effectinfo is not None
             assert extraeffect != EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -108,6 +108,9 @@
     def check_forces_virtual_or_virtualizable(self):
         return self.extraeffect >= self.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
 
+    def has_random_effects(self):
+        return self.oopspecindex == self.OS_LIBFFI_CALL
+
 def effectinfo_from_writeanalyze(effects, cpu,
                                  extraeffect=EffectInfo.EF_CAN_RAISE,
                                  oopspecindex=EffectInfo.OS_NONE,
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -768,10 +768,10 @@
         from pypy.rpython.lltypesystem.rffi import size_and_sign, sizeof
         from pypy.rlib.rarithmetic import intmask
         assert not self._is_gc(op.args[0])
-        size1, unsigned1 = size_and_sign(op.args[0].concretetype)
         size2, unsigned2 = size_and_sign(op.result.concretetype)
         if size2 >= sizeof(lltype.Signed):
             return     # the target type is LONG or ULONG
+        size1, unsigned1 = size_and_sign(op.args[0].concretetype)
         #
         def bounds(size, unsigned):
             if unsigned:
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -4,6 +4,7 @@
 from pypy.objspace.flow.model import Constant, Variable
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
+from pypy.rlib import rstack
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -13,7 +14,7 @@
 from pypy.jit.metainterp.history import BoxPtr, BoxObj, BoxFloat, Const
 from pypy.jit.metainterp import history
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
-from pypy.jit.metainterp.optimizeutil import InvalidLoop
+from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp.resume import NUMBERING
 from pypy.jit.codewriter import heaptracker, longlong
 
@@ -156,6 +157,7 @@
 def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
     jitdriver_sd.on_compile(metainterp_sd.logger_ops, loop.token,
                             loop.operations, type, greenkey)
+    loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     globaldata = metainterp_sd.globaldata
     loop_token = loop.token
     loop_token.number = n = globaldata.loopnumbering
@@ -170,7 +172,7 @@
     debug_start("jit-backend")
     try:
         ops_offset = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                    loop.token)
+                                                    loop.token, name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
@@ -452,9 +454,17 @@
         # Called during a residual call from the assembler, if the code
         # actually needs to force one of the virtualrefs or the virtualizable.
         # Implemented by forcing *all* virtualrefs and the virtualizable.
-        faildescr = cpu.force(token)
-        assert isinstance(faildescr, ResumeGuardForcedDescr)
-        faildescr.handle_async_forcing(token)
+
+        # don't interrupt me! If the stack runs out in force_from_resumedata()
+        # then we have seen cpu.force() but not self.save_data(), leaving in
+        # an inconsistent state
+        rstack._stack_criticalcode_start()
+        try:
+            faildescr = cpu.force(token)
+            assert isinstance(faildescr, ResumeGuardForcedDescr)
+            faildescr.handle_async_forcing(token)
+        finally:
+            rstack._stack_criticalcode_stop()
 
     def handle_async_forcing(self, force_token):
         from pypy.jit.metainterp.resume import force_from_resumedata
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -82,9 +82,6 @@
 do_call_loopinvariant = do_call
 do_call_may_force = do_call
 
-def do_call_c(cpu, metainterp, argboxes, descr):
-    raise NotImplementedError("Should never be called directly")
-
 def do_getarrayitem_gc(cpu, _, arraybox, indexbox, arraydescr):
     array = arraybox.getref_base()
     index = indexbox.getint()
@@ -319,9 +316,11 @@
             if value in (rop.FORCE_TOKEN,
                          rop.CALL_ASSEMBLER,
                          rop.COND_CALL_GC_WB,
+                         rop.COND_CALL_GC_WB_ARRAY,
                          rop.DEBUG_MERGE_POINT,
                          rop.JIT_DEBUG,
                          rop.SETARRAYITEM_RAW,
+                         rop.CALL_RELEASE_GIL,
                          rop.QUASIIMMUT_FIELD,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -712,10 +712,14 @@
         return -2      # xxx risk of changing hash...
 
 def make_hashable_int(i):
+    from pypy.rpython.lltypesystem.ll2ctypes import NotCtypesAllocatedStructure
     if not we_are_translated() and isinstance(i, llmemory.AddressAsInt):
         # Warning: such a hash changes at the time of translation
         adr = heaptracker.int2adr(i)
-        return llmemory.cast_adr_to_int(adr, "emulated")
+        try:
+            return llmemory.cast_adr_to_int(adr, "emulated")
+        except NotCtypesAllocatedStructure:
+            return 12345 # use an arbitrary number for the hash
     return i
 
 def get_const_ptr_for_string(s):
@@ -793,6 +797,7 @@
     operations = None
     token = None
     call_pure_results = None
+    logops = None
     quasi_immutable_deps = None
 
     def __init__(self, name):
diff --git a/pypy/jit/metainterp/logger.py b/pypy/jit/metainterp/logger.py
--- a/pypy/jit/metainterp/logger.py
+++ b/pypy/jit/metainterp/logger.py
@@ -11,47 +11,71 @@
 
     def __init__(self, metainterp_sd, guard_number=False):
         self.metainterp_sd = metainterp_sd
-        self.ts = metainterp_sd.cpu.ts
         self.guard_number = guard_number
 
     def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
         if type is None:
             debug_start("jit-log-noopt-loop")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-loop")
         else:
             debug_start("jit-log-opt-loop")
             debug_print("# Loop", number, ":", type,
                         "with", len(operations), "ops")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-opt-loop")
+        return logops
 
     def log_bridge(self, inputargs, operations, number=-1, ops_offset=None):
         if number == -1:
             debug_start("jit-log-noopt-bridge")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-bridge")
         else:
             debug_start("jit-log-opt-bridge")
             debug_print("# bridge out of Guard", number,
                         "with", len(operations), "ops")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-opt-bridge")
+        return logops
 
     def log_short_preamble(self, inputargs, operations):
         debug_start("jit-log-short-preamble")
-        self._log_operations(inputargs, operations, ops_offset=None)
-        debug_stop("jit-log-short-preamble")            
+        logops = self._log_operations(inputargs, operations, ops_offset=None)
+        debug_stop("jit-log-short-preamble")
+        return logops
+
+    def _log_operations(self, inputargs, operations, ops_offset):
+        if not have_debug_prints():
+            return None
+        logops = self._make_log_operations()
+        logops._log_operations(inputargs, operations, ops_offset)
+        return logops
+
+    def _make_log_operations(self):
+        return LogOperations(self.metainterp_sd, self.guard_number)
+
+
+class LogOperations(object):
+    """
+    ResOperation logger.  Each instance contains a memo giving numbers
+    to boxes, and is typically used to log a single loop.
+    """
+    def __init__(self, metainterp_sd, guard_number):
+        self.metainterp_sd = metainterp_sd
+        self.ts = metainterp_sd.cpu.ts
+        self.guard_number = guard_number
+        self.memo = {}
 
     def repr_of_descr(self, descr):
         return descr.repr_of_descr()
 
-    def repr_of_arg(self, memo, arg):
+    def repr_of_arg(self, arg):
         try:
-            mv = memo[arg]
+            mv = self.memo[arg]
         except KeyError:
-            mv = len(memo)
-            memo[arg] = mv
+            mv = len(self.memo)
+            self.memo[arg] = mv
         if isinstance(arg, ConstInt):
             if int_could_be_an_address(arg.value):
                 addr = arg.getaddr()
@@ -75,11 +99,12 @@
         else:
             return '?'
 
-    def repr_of_resop(self, memo, op, ops_offset=None):
+    def repr_of_resop(self, op, ops_offset=None):
         if op.getopnum() == rop.DEBUG_MERGE_POINT:
-            loc = op.getarg(0)._get_str()
-            reclev = op.getarg(1).getint()
-            return "debug_merge_point('%s', %s)" % (loc, reclev)
+            jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
+            s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+            s = s.replace(',', '.') # we use comma for argument splitting
+            return "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
         if ops_offset is None:
             offset = -1
         else:
@@ -88,9 +113,10 @@
             s_offset = ""
         else:
             s_offset = "+%d: " % offset
-        args = ", ".join([self.repr_of_arg(memo, op.getarg(i)) for i in range(op.numargs())])
+        args = ", ".join([self.repr_of_arg(op.getarg(i)) for i in range(op.numargs())])
+
         if op.result is not None:
-            res = self.repr_of_arg(memo, op.result) + " = "
+            res = self.repr_of_arg(op.result) + " = "
         else:
             res = ""
         is_guard = op.is_guard()
@@ -103,7 +129,7 @@
                 r = self.repr_of_descr(descr)
             args += ', descr=' +  r
         if is_guard and op.getfailargs() is not None:
-            fail_args = ' [' + ", ".join([self.repr_of_arg(memo, arg)
+            fail_args = ' [' + ", ".join([self.repr_of_arg(arg)
                                           for arg in op.getfailargs()]) + ']'
         else:
             fail_args = ''
@@ -114,13 +140,12 @@
             return
         if ops_offset is None:
             ops_offset = {}
-        memo = {}
         if inputargs is not None:
-            args = ", ".join([self.repr_of_arg(memo, arg) for arg in inputargs])
+            args = ", ".join([self.repr_of_arg(arg) for arg in inputargs])
             debug_print('[' + args + ']')
         for i in range(len(operations)):
             op = operations[i]
-            debug_print(self.repr_of_resop(memo, operations[i], ops_offset))
+            debug_print(self.repr_of_resop(operations[i], ops_offset))
         if ops_offset and None in ops_offset:
             offset = ops_offset[None]
             debug_print("+%d: --end of the loop--" % offset)
diff --git a/pypy/jit/metainterp/optimize.py b/pypy/jit/metainterp/optimize.py
--- a/pypy/jit/metainterp/optimize.py
+++ b/pypy/jit/metainterp/optimize.py
@@ -1,9 +1,20 @@
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
+from pypy.jit.metainterp.jitexc import JitException
+
+class InvalidLoop(JitException):
+    """Raised when the optimize*.py detect that the loop that
+    we are trying to build cannot possibly make sense as a
+    long-running loop (e.g. it cannot run 2 complete iterations)."""
+
+class RetraceLoop(JitException):
+    """ Raised when inlining a short preamble resulted in an
+        InvalidLoop. This means the optimized loop is too specialized
+        to be useful here, so we trace it again and produced a second
+        copy specialized in some different way.
+    """
 
 # ____________________________________________________________
 
-from pypy.jit.metainterp.optimizeopt import optimize_loop_1, optimize_bridge_1
-
 def optimize_loop(metainterp_sd, old_loop_tokens, loop, enable_opts):
     debug_start("jit-optimize")
     try:
@@ -13,8 +24,10 @@
         debug_stop("jit-optimize")
 
 def _optimize_loop(metainterp_sd, old_loop_tokens, loop, enable_opts):
+    from pypy.jit.metainterp.optimizeopt import optimize_loop_1
     cpu = metainterp_sd.cpu
-    metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations)
+    loop.logops = metainterp_sd.logger_noopt.log_loop(loop.inputargs,
+                                                      loop.operations)
     # XXX do we really still need a list?
     if old_loop_tokens:
         return old_loop_tokens[0]
@@ -35,8 +48,10 @@
 
 def _optimize_bridge(metainterp_sd, old_loop_tokens, bridge, enable_opts,
                      inline_short_preamble, retraced=False):
+    from pypy.jit.metainterp.optimizeopt import optimize_bridge_1
     cpu = metainterp_sd.cpu
-    metainterp_sd.logger_noopt.log_loop(bridge.inputargs, bridge.operations)
+    bridge.logops = metainterp_sd.logger_noopt.log_loop(bridge.inputargs,
+                                                        bridge.operations)
     if old_loop_tokens:
         old_loop_token = old_loop_tokens[0]
         bridge.operations[-1].setdescr(old_loop_token)   # patch jump target
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -15,7 +15,7 @@
             ('virtualize', OptVirtualize),
             ('string', OptString),
             ('heap', OptHeap),
-            ('ffi', OptFfiCall),
+            ('ffi', None),
             ('unroll', None)]
 # no direct instantiation of unroll
 unroll_all_opts = unrolling_iterable(ALL_OPTS)
@@ -25,10 +25,9 @@
 ALL_OPTS_NAMES = ':'.join([name for name, _ in ALL_OPTS])
 PARAMETERS['enable_opts'] = ALL_OPTS_NAMES
 
-def optimize_loop_1(metainterp_sd, loop, enable_opts,
+def build_opt_chain(metainterp_sd, enable_opts,
                     inline_short_preamble=True, retraced=False):
-    """Optimize loop.operations to remove internal overheadish operations.
-    """
+    config = metainterp_sd.config
     optimizations = []
     unroll = 'unroll' in enable_opts
     for name, opt in unroll_all_opts:
@@ -36,6 +35,11 @@
             if opt is not None:
                 o = opt()
                 optimizations.append(o)
+            elif name == 'ffi' and config.translation.jit_ffi:
+                # we cannot put the class directly in the unrolling_iterable,
+                # because we do not want it to be seen at all (to avoid to
+                # introduce a dependency on libffi in case we do not need it)
+                optimizations.append(OptFfiCall())
 
     if ('rewrite' not in enable_opts or 'virtualize' not in enable_opts
         or 'heap' not in enable_opts):
@@ -44,6 +48,17 @@
     if inline_short_preamble:
         optimizations = [OptInlineShortPreamble(retraced)] + optimizations
 
+    return optimizations, unroll
+
+
+def optimize_loop_1(metainterp_sd, loop, enable_opts,
+                    inline_short_preamble=True, retraced=False):
+    """Optimize loop.operations to remove internal overheadish operations.
+    """
+
+    optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts,
+                                            inline_short_preamble, retraced)
+
     if unroll:
         optimize_unroll(metainterp_sd, loop, optimizations)
     else:
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -1,10 +1,13 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.libffi import Func
+from pypy.rlib.debug import debug_start, debug_stop, debug_print, have_debug_prints
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import _findall
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
+
 
 class FuncInfo(object):
 
@@ -12,14 +15,18 @@
     restype = None
     descr = None
     prepare_op = None
-    force_token_op = None
 
     def __init__(self, funcval, cpu, prepare_op):
         self.funcval = funcval
         self.opargs = []
         argtypes, restype = self._get_signature(funcval)
-        self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        try:
+            self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        except UnsupportedKind:
+            # e.g., I or U for long longs
+            self.descr = None
         self.prepare_op = prepare_op
+        self.delayed_ops = []
 
     def _get_signature(self, funcval):
         """
@@ -64,8 +71,20 @@
 
 class OptFfiCall(Optimization):
 
-    def __init__(self):
+    def setup(self):
         self.funcinfo = None
+        if self.optimizer.loop is not None:
+            self.logops = self.optimizer.loop.logops
+        else:
+            self.logops = None
+
+    def propagate_begin_forward(self):
+        debug_start('jit-log-ffiopt')
+        Optimization.propagate_begin_forward(self)
+
+    def propagate_end_forward(self):
+        debug_stop('jit-log-ffiopt')
+        Optimization.propagate_end_forward(self)
 
     def reconstruct_for_next_iteration(self, short_boxes, surviving_boxes,
                                        optimizer, valuemap):
@@ -73,29 +92,31 @@
         # FIXME: Should any status be saved for next iteration?
 
     def begin_optimization(self, funcval, op):
-        self.rollback_maybe()
+        self.rollback_maybe('begin_optimization', op)
         self.funcinfo = FuncInfo(funcval, self.optimizer.cpu, op)
 
     def commit_optimization(self):
         self.funcinfo = None
 
-    def rollback_maybe(self):
+    def rollback_maybe(self, msg, op):
         if self.funcinfo is None:
             return # nothing to rollback
         #
         # we immediately set funcinfo to None to prevent recursion when
         # calling emit_op
+        if self.logops is not None:
+            debug_print('rollback: ' + msg + ': ', self.logops.repr_of_resop(op))
         funcinfo = self.funcinfo
         self.funcinfo = None
         self.emit_operation(funcinfo.prepare_op)
         for op in funcinfo.opargs:
             self.emit_operation(op)
-        if funcinfo.force_token_op:
-            self.emit_operation(funcinfo.force_token_op)
+        for delayed_op in funcinfo.delayed_ops:
+            self.emit_operation(delayed_op)
 
     def emit_operation(self, op):
         # we cannot emit any operation during the optimization
-        self.rollback_maybe()
+        self.rollback_maybe('invalid op', op)
         Optimization.emit_operation(self, op)
 
     def optimize_CALL(self, op):
@@ -136,13 +157,18 @@
         # call_may_force and the setfield_gc, so the final result we get is
         # again force_token/setfield_gc/call_may_force.
         #
+        # However, note that nowadays we also allow to have any setfield_gc
+        # between libffi_prepare and libffi_call, so while the comment above
+        # it's a bit superfluous, it has been left there for future reference.
         if self.funcinfo is None:
             self.emit_operation(op)
         else:
-            self.funcinfo.force_token_op = op
+            self.funcinfo.delayed_ops.append(op)
+
+    optimize_SETFIELD_GC = optimize_FORCE_TOKEN
 
     def do_prepare_call(self, op):
-        self.rollback_maybe()
+        self.rollback_maybe('prepare call', op)
         funcval = self._get_funcval(op)
         if not funcval.is_constant():
             return [op] # cannot optimize
@@ -166,16 +192,18 @@
         for push_op in funcinfo.opargs:
             argval = self.getvalue(push_op.getarg(2))
             arglist.append(argval.force_box())
-        newop = ResOperation(rop.CALL_MAY_FORCE, arglist, op.result,
+        newop = ResOperation(rop.CALL_RELEASE_GIL, arglist, op.result,
                              descr=funcinfo.descr)
         self.commit_optimization()
         ops = []
-        if funcinfo.force_token_op:
-            ops.append(funcinfo.force_token_op)
+        for delayed_op in funcinfo.delayed_ops:
+            ops.append(delayed_op)
         ops.append(newop)
         return ops
 
     def propagate_forward(self, op):
+        if self.logops is not None:
+            debug_print(self.logops.repr_of_resop(op))
         opnum = op.getopnum()
         for value, func in optimize_ops:
             if opnum == value:
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -1,5 +1,5 @@
 import os
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import _findall
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.jit.metainterp.jitexc import JitException
@@ -278,6 +278,7 @@
         assert opnum != rop.CALL_PURE
         if (opnum == rop.CALL or
             opnum == rop.CALL_MAY_FORCE or
+            opnum == rop.CALL_RELEASE_GIL or
             opnum == rop.CALL_ASSEMBLER):
             if opnum == rop.CALL_ASSEMBLER:
                 effectinfo = None
@@ -285,7 +286,7 @@
                 effectinfo = op.getdescr().get_extra_info()
             if effectinfo is None or effectinfo.check_can_invalidate():
                 self._seen_guard_not_invalidated = False
-            if effectinfo is not None:
+            if effectinfo is not None and not effectinfo.has_random_effects():
                 # XXX we can get the wrong complexity here, if the lists
                 # XXX stored on effectinfo are large
                 for fielddescr in effectinfo.readonly_descrs_fields:
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -1,5 +1,5 @@
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, CONST_1, CONST_0
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import _findall
 from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded, \
     IntLowerBound, IntUpperBound
 from pypy.jit.metainterp.history import Const, ConstInt
@@ -20,6 +20,14 @@
 
     def flush(self):
         assert self.posponedop is None
+
+    def setup(self):
+        self.posponedop = None
+        self.nextop = None
+
+    def reconstruct_for_next_iteration(self, optimizer, valuemap):
+        assert self.posponedop is None
+        return self 
         
     def propagate_forward(self, op):
         if op.is_ovf():
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -4,9 +4,9 @@
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp import jitprof
 from pypy.jit.metainterp.executor import execute_nonspec
-from pypy.jit.metainterp.optimizeutil import _findall, sort_descrs
-from pypy.jit.metainterp.optimizeutil import descrlist_dict
-from pypy.jit.metainterp.optimizeutil import InvalidLoop, args_dict
+from pypy.jit.metainterp.optimizeopt.util import _findall, sort_descrs
+from pypy.jit.metainterp.optimizeopt.util import descrlist_dict, args_dict
+from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp import resume, compile
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
 from pypy.rpython.lltypesystem import lltype
@@ -232,6 +232,14 @@
     def __init__(self):
         pass # make rpython happy
 
+    def propagate_begin_forward(self):
+        if self.next_optimization:
+            self.next_optimization.propagate_begin_forward()
+
+    def propagate_end_forward(self):
+        if self.next_optimization:
+            self.next_optimization.propagate_end_forward()
+
     def propagate_forward(self, op):
         raise NotImplementedError
 
@@ -538,11 +546,13 @@
         # ^^^ at least at the start of bridges.  For loops, we could set
         # it to False, but we probably don't care
         self.newoperations = []
+        self.first_optimization.propagate_begin_forward()
         self.i = 0
         while self.i < len(self.loop.operations):
             op = self.loop.operations[self.i]
             self.first_optimization.propagate_forward(op)
             self.i += 1
+        self.first_optimization.propagate_end_forward()
         self.loop.operations = self.newoperations
         self.loop.quasi_immutable_deps = self.quasi_immutable_deps
         # accumulate counters
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -1,7 +1,7 @@
 from pypy.jit.metainterp.optimizeopt.optimizer import *
 from pypy.jit.metainterp.resoperation import opboolinvers, opboolreflex
 from pypy.jit.metainterp.history import ConstInt
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import _findall
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.optimizeopt.intutils import IntBound
@@ -197,6 +197,32 @@
         else:
             self.emit_operation(op)
 
+    def optimize_FLOAT_MUL(self, op):
+        arg1 = op.getarg(0)
+        arg2 = op.getarg(1)
+
+        # Constant fold f0 * 1.0 and turn f0 * -1.0 into a FLOAT_NEG, these
+        # work in all cases, including NaN and inf
+        for lhs, rhs in [(arg1, arg2), (arg2, arg1)]:
+            v1 = self.getvalue(lhs)
+            v2 = self.getvalue(rhs)
+
+            if v1.is_constant():
+                if v1.box.getfloat() == 1.0:
+                    self.make_equal_to(op.result, v2)
+                    return
+                elif v1.box.getfloat() == -1.0:
+                    self.emit_operation(ResOperation(
+                        rop.FLOAT_NEG, [rhs], op.result
+                    ))
+                    return
+        self.emit_operation(op)
+
+    def optimize_FLOAT_NEG(self, op):
+        v1 = op.getarg(0)
+        self.emit_operation(op)
+        self.pure(rop.FLOAT_NEG, [op.result], v1)
+
     def optimize_CALL_PURE(self, op):
         arg_consts = []
         for i in range(op.numargs()):
@@ -430,14 +456,22 @@
         dest_start_box = self.get_constant_box(op.getarg(4))
         length = self.get_constant_box(op.getarg(5))
         if (source_value.is_virtual() and source_start_box and dest_start_box
-            and length and dest_value.is_virtual()):
-            # XXX optimize the case where dest value is not virtual,
-            #     but we still can avoid a mess
+            and length and (dest_value.is_virtual() or length.getint() <= 8)):
+            from pypy.jit.metainterp.optimizeopt.virtualize import VArrayValue
+            assert isinstance(source_value, VArrayValue)
             source_start = source_start_box.getint()
             dest_start = dest_start_box.getint()
             for index in range(length.getint()):
                 val = source_value.getitem(index + source_start)
-                dest_value.setitem(index + dest_start, val)
+                if dest_value.is_virtual():
+                    dest_value.setitem(index + dest_start, val)
+                else:
+                    newop = ResOperation(rop.SETARRAYITEM_GC,
+                                         [op.getarg(2),
+                                          ConstInt(index + dest_start),
+                                          val.force_box()], None,
+                                         descr=source_value.arraydescr)
+                    self.emit_operation(newop)
             return True
         if length and length.getint() == 0:
             return True # 0-length arraycopy
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -1,7 +1,7 @@
 
 from pypy.jit.metainterp.resoperation import ResOperation, rop
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import _findall
 
 class OptSimplify(Optimization):
     def optimize_CALL_PURE(self, op):
diff --git a/pypy/jit/metainterp/optimizeopt/string.py b/pypy/jit/metainterp/optimizeopt/string.py
--- a/pypy/jit/metainterp/optimizeopt/string.py
+++ b/pypy/jit/metainterp/optimizeopt/string.py
@@ -8,7 +8,7 @@
 from pypy.jit.metainterp.optimizeopt import optimizer, virtualize
 from pypy.jit.metainterp.optimizeopt.optimizer import CONST_0, CONST_1
 from pypy.jit.metainterp.optimizeopt.optimizer import llhelper
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import _findall
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.codewriter import heaptracker
 from pypy.rlib.unroll import unrolling_iterable
diff --git a/pypy/jit/metainterp/optimizeopt/test/__init__.py b/pypy/jit/metainterp/optimizeopt/test/__init__.py
new file mode 100644
diff --git a/pypy/jit/metainterp/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
rename from pypy/jit/metainterp/test/test_optimizebasic.py
rename to pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -1,37 +1,15 @@
 import py
 from pypy.rlib.objectmodel import instantiate
-from pypy.jit.metainterp.test.test_optimizeutil import (LLtypeMixin,
-                                                        #OOtypeMixin,
-                                                        BaseTest)
+from pypy.jit.metainterp.optimizeopt.test.test_util import (
+    LLtypeMixin, BaseTest, FakeMetaInterpStaticData)
+from pypy.jit.metainterp.test.test_compile import FakeLogger
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
-from pypy.jit.metainterp.optimizeutil import InvalidLoop
+from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp.history import AbstractDescr, ConstInt, BoxInt
-from pypy.jit.metainterp.jitprof import EmptyProfiler
 from pypy.jit.metainterp import executor, compile, resume, history
 from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
-from pypy.jit.tool.oparser import pure_parse
-from pypy.jit.metainterp.optimizeutil import args_dict
-
-##class FakeFrame(object):
-##    parent_resumedata_snapshot = None
-##    parent_resumedata_frame_info_list = None
-
-##    def __init__(self, code="", pc=0):
-##        self.jitcode = code
-##        self.pc = pc
-
-class Fake(object):
-    failargs_limit = 1000
-    storedebug = None
-
-class FakeMetaInterpStaticData(object):
-
-    def __init__(self, cpu):
-        self.cpu = cpu
-        self.profiler = EmptyProfiler()
-        self.options = Fake()
-        self.globaldata = Fake()
+
 
 def test_store_final_boxes_in_guard():
     from pypy.jit.metainterp.compile import ResumeGuardDescr
@@ -101,7 +79,7 @@
     assert vinfo3 is vinfo4
 
 def test_descrlist_dict():
-    from pypy.jit.metainterp import optimizeutil
+    from pypy.jit.metainterp.optimizeopt import util as optimizeutil
     h1 = optimizeutil.descrlist_hash([])
     h2 = optimizeutil.descrlist_hash([LLtypeMixin.valuedescr])
     h3 = optimizeutil.descrlist_hash(
@@ -130,160 +108,21 @@
 
 # ____________________________________________________________
 
-def equaloplists(oplist1, oplist2, strict_fail_args=True, remap={},
-                 text_right=None):
-    # try to use the full width of the terminal to display the list
-    # unfortunately, does not work with the default capture method of py.test
-    # (which is fd), you you need to use either -s or --capture=sys, else you
-    # get the standard 80 columns width
-    totwidth = py.io.get_terminal_width()
-    width = totwidth / 2 - 1
-    print ' Comparing lists '.center(totwidth, '-')
-    text_right = text_right or 'expected'
-    print '%s| %s' % ('optimized'.center(width), text_right.center(width))
-    for op1, op2 in zip(oplist1, oplist2):
-        txt1 = str(op1)
-        txt2 = str(op2)
-        while txt1 or txt2:
-            print '%s| %s' % (txt1[:width].ljust(width), txt2[:width])
-            txt1 = txt1[width:]
-            txt2 = txt2[width:]
-        assert op1.getopnum() == op2.getopnum()
-        assert op1.numargs() == op2.numargs()
-        for i in range(op1.numargs()):
-            x = op1.getarg(i)
-            y = op2.getarg(i)
-            assert x == remap.get(y, y)
-        if op2.result in remap:
-            assert op1.result == remap[op2.result]
-        else:
-            remap[op2.result] = op1.result
-        if op1.getopnum() != rop.JUMP:      # xxx obscure
-            assert op1.getdescr() == op2.getdescr()
-        if op1.getfailargs() or op2.getfailargs():
-            assert len(op1.getfailargs()) == len(op2.getfailargs())
-            if strict_fail_args:
-                for x, y in zip(op1.getfailargs(), op2.getfailargs()):
-                    assert x == remap.get(y, y)
-            else:
-                fail_args1 = set(op1.getfailargs())
-                fail_args2 = set([remap.get(y, y) for y in op2.getfailargs()])
-                assert fail_args1 == fail_args2
-    assert len(oplist1) == len(oplist2)
-    print '-'*totwidth
-    return True
-
-def test_equaloplists():
-    ops = """
-    [i0]
-    i1 = int_add(i0, 1)
-    i2 = int_add(i1, 1)
-    guard_true(i1) [i2]
-    jump(i1)
-    """
-    namespace = {}
-    loop1 = pure_parse(ops, namespace=namespace)
-    loop2 = pure_parse(ops, namespace=namespace)
-    loop3 = pure_parse(ops.replace("i2 = int_add", "i2 = int_sub"),
-                       namespace=namespace)
-    assert equaloplists(loop1.operations, loop2.operations)
-    py.test.raises(AssertionError,
-                   "equaloplists(loop1.operations, loop3.operations)")
-
-def test_equaloplists_fail_args():
-    ops = """
-    [i0]
-    i1 = int_add(i0, 1)
-    i2 = int_add(i1, 1)
-    guard_true(i1) [i2, i1]
-    jump(i1)
-    """
-    namespace = {}
-    loop1 = pure_parse(ops, namespace=namespace)
-    loop2 = pure_parse(ops.replace("[i2, i1]", "[i1, i2]"),
-                       namespace=namespace)
-    py.test.raises(AssertionError,
-                   "equaloplists(loop1.operations, loop2.operations)")
-    assert equaloplists(loop1.operations, loop2.operations,
-                        strict_fail_args=False)
-    loop3 = pure_parse(ops.replace("[i2, i1]", "[i2, i0]"),
-                       namespace=namespace)
-    py.test.raises(AssertionError,
-                   "equaloplists(loop1.operations, loop3.operations)")
-
-# ____________________________________________________________
-
-class Storage(compile.ResumeGuardDescr):
-    "for tests."
-    def __init__(self, metainterp_sd=None, original_greenkey=None):
-        self.metainterp_sd = metainterp_sd
-        self.original_greenkey = original_greenkey
-    def store_final_boxes(self, op, boxes):
-        op.setfailargs(boxes)
-    def __eq__(self, other):
-        return type(self) is type(other)      # xxx obscure
-
-def _sortboxes(boxes):
-    _kind2count = {history.INT: 1, history.REF: 2, history.FLOAT: 3}
-    return sorted(boxes, key=lambda box: _kind2count[box.type])
 
 class BaseTestBasic(BaseTest):
 
-    def invent_fail_descr(self, fail_args):
-        if fail_args is None:
-            return None
-        descr = Storage()
-        descr.rd_frame_info_list = resume.FrameInfo(None, "code", 11)
-        descr.rd_snapshot = resume.Snapshot(None, _sortboxes(fail_args))
-        return descr
-
-    def assert_equal(self, optimized, expected):
-        assert len(optimized.inputargs) == len(expected.inputargs)
-        remap = {}
-        for box1, box2 in zip(optimized.inputargs, expected.inputargs):
-            assert box1.__class__ == box2.__class__
-            remap[box2] = box1
-        assert equaloplists(optimized.operations,
-                            expected.operations, False, remap)
+    enable_opts = "intbounds:rewrite:virtualize:string:heap"
 
     def optimize_loop(self, ops, optops, call_pure_results=None):
+
         loop = self.parse(ops)
-        #
-        self.loop = loop
-        loop.call_pure_results = args_dict()
-        if call_pure_results is not None:
-            for k, v in call_pure_results.items():
-                loop.call_pure_results[list(k)] = v
-        metainterp_sd = FakeMetaInterpStaticData(self.cpu)
-        if hasattr(self, 'vrefinfo'):
-            metainterp_sd.virtualref_info = self.vrefinfo
-        if hasattr(self, 'callinfocollection'):
-            metainterp_sd.callinfocollection = self.callinfocollection
-        #
-        # XXX list the exact optimizations that are needed for each test
-        from pypy.jit.metainterp.optimizeopt import (OptIntBounds,
-                                                     OptRewrite,
-                                                     OptVirtualize,
-                                                     OptString,
-                                                     OptHeap,
-                                                     Optimizer)
-        from pypy.jit.metainterp.optimizeopt.fficall import OptFfiCall
-
-        optimizations = [OptIntBounds(),
-                         OptRewrite(),
-                         OptVirtualize(),
-                         OptString(),
-                         OptHeap(),
-                         OptFfiCall(),
-                         ]
-        optimizer = Optimizer(metainterp_sd, loop, optimizations)
-        optimizer.propagate_all_forward()
-        #
         expected = self.parse(optops)
+        self._do_optimize_loop(loop, call_pure_results)
         print '\n'.join([str(o) for o in loop.operations])
         self.assert_equal(loop, expected)
 
 
+
 class BaseTestOptimizeBasic(BaseTestBasic):
 
     def test_simple(self):
@@ -2287,6 +2126,81 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_fold_constant_partial_ops_float(self):
+        ops = """
+        [f0]
+        f1 = float_mul(f0, 1.0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        expected = """
+        [f0]
+        f2 = escape(f0)
+        jump(f2)
+        """
+        self.optimize_loop(ops, expected)
+
+        ops = """
+        [f0]
+        f1 = float_mul(1.0, f0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        expected = """
+        [f0]
+        f2 = escape(f0)
+        jump(f2)
+        """
+        self.optimize_loop(ops, expected)
+
+
+        ops = """
+        [f0]
+        f1 = float_mul(f0, -1.0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        expected = """
+        [f0]
+        f1 = float_neg(f0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        self.optimize_loop(ops, expected)
+
+        ops = """
+        [f0]
+        f1 = float_mul(-1.0, f0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        expected = """
+        [f0]
+        f1 = float_neg(f0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_fold_repeated_float_neg(self):
+        ops = """
+        [f0]
+        f1 = float_neg(f0)
+        f2 = float_neg(f1)
+        f3 = float_neg(f2)
+        f4 = float_neg(f3)
+        escape(f4)
+        jump(f4)
+        """
+        expected = """
+        [f0]
+        # The backend removes this dead op.
+        f1 = float_neg(f0)
+        escape(f0)
+        jump(f0)
+        """
+        self.optimize_loop(ops, expected)
+
     # ----------
 
     def make_fail_descr(self):
diff --git a/pypy/jit/metainterp/test/test_optimizefficall.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
rename from pypy/jit/metainterp/test/test_optimizefficall.py
rename to pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
--- a/pypy/jit/metainterp/test/test_optimizefficall.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
@@ -2,8 +2,8 @@
 from pypy.rlib.libffi import Func, types
 from pypy.jit.metainterp.history import AbstractDescr
 from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.metainterp.test.test_optimizebasic import BaseTestBasic
-from pypy.jit.metainterp.test.test_optimizebasic import LLtypeMixin
+from pypy.jit.metainterp.optimizeopt.test.test_optimizebasic import BaseTestBasic
+from pypy.jit.metainterp.optimizeopt.test.test_optimizebasic import LLtypeMixin
 
 class MyCallDescr(AbstractDescr):
     """
@@ -32,12 +32,15 @@
 
 
 class TestFfiCall(BaseTestBasic, LLtypeMixin):
-    jit_ffi = True
+
+    enable_opts = "intbounds:rewrite:virtualize:string:heap:ffi"
 
     class namespace:
         cpu = LLtypeMixin.cpu
         FUNC = LLtypeMixin.FUNC
         vable_token_descr = LLtypeMixin.valuedescr
+        valuedescr = LLtypeMixin.valuedescr
+
         int_float__int = MyCallDescr('if', 'i')
         funcptr = FakeLLObject()
         func = FakeLLObject(_fake_class=Func,
@@ -76,7 +79,7 @@
         """
         expected = """
         [i0, f1]
-        i3 = call_may_force(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
         guard_not_forced() []
         guard_no_exception() []
         jump(i3, f1)
@@ -99,7 +102,7 @@
 
     def test_handle_virtualizables(self):
         # this test needs an explanation to understand what goes on: see the
-        # coment in optimize_FORCE_TOKEN
+        # comment in optimize_FORCE_TOKEN
         ops = """
         [i0, f1, p2]
         call(0, ConstPtr(func),                       descr=libffi_prepare)
@@ -116,7 +119,7 @@
         [i0, f1, p2]
         i4 = force_token()
         setfield_gc(p2, i4, descr=vable_token_descr)
-        i3 = call_may_force(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
         guard_not_forced() [p2]
         guard_no_exception() [p2]
         jump(i3, f1, p2)
@@ -213,7 +216,7 @@
         call(0, ConstPtr(func),                        descr=libffi_prepare)
         #
         # this "nested" call is nicely optimized
-        i4 = call_may_force(67890, i0, f1, descr=int_float__int)
+        i4 = call_release_gil(67890, i0, f1, descr=int_float__int)
         guard_not_forced() []
         guard_no_exception() []
         #
@@ -242,3 +245,25 @@
         """
         expected = ops
         loop = self.optimize_loop(ops, expected)
+
+    def test_allow_setfields_in_between(self):
+        ops = """
+        [i0, f1, p2]
+        call(0, ConstPtr(func),                       descr=libffi_prepare)
+        call(0, ConstPtr(func), i0,                   descr=libffi_push_arg)
+        call(0, ConstPtr(func), f1,                   descr=libffi_push_arg)
+        setfield_gc(p2, i0,                           descr=valuedescr)
+        i3 = call_may_force(0, ConstPtr(func), 12345, descr=libffi_call)
+        guard_not_forced() []
+        guard_no_exception() []
+        jump(i3, f1, p2)
+        """
+        expected = """
+        [i0, f1, p2]
+        setfield_gc(p2, i0, descr=valuedescr)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
+        guard_not_forced() []
+        guard_no_exception() []
+        jump(i3, f1, p2)
+        """
+        loop = self.optimize_loop(ops, expected)
diff --git a/pypy/jit/metainterp/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
rename from pypy/jit/metainterp/test/test_optimizeopt.py
rename to pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -1,206 +1,91 @@
 import py
 from pypy.rlib.objectmodel import instantiate
-from pypy.jit.metainterp.test.test_optimizeutil import (LLtypeMixin,
-                                                        #OOtypeMixin,
-                                                        BaseTest)
+from pypy.jit.metainterp.optimizeopt.test.test_util import (
+    LLtypeMixin, BaseTest, Storage, _sortboxes)
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
-from pypy.jit.metainterp.optimizeopt import optimize_loop_1, ALL_OPTS_DICT
-from pypy.jit.metainterp.optimizeutil import InvalidLoop
+from pypy.jit.metainterp.optimizeopt import optimize_loop_1, ALL_OPTS_DICT, build_opt_chain
+from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp.history import AbstractDescr, ConstInt, BoxInt
 from pypy.jit.metainterp.history import TreeLoop, LoopToken
 from pypy.jit.metainterp.jitprof import EmptyProfiler
 from pypy.jit.metainterp import executor, compile, resume, history
 from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
 from pypy.jit.tool.oparser import pure_parse
-from pypy.jit.metainterp.test.test_optimizebasic import equaloplists
-from pypy.jit.metainterp.optimizeutil import args_dict
-
-class Fake(object):
-    failargs_limit = 1000
-    storedebug = None
-
-class FakeMetaInterpStaticData(object):
-
-    def __init__(self, cpu, jit_ffi=False):
-        self.cpu = cpu
-        self.profiler = EmptyProfiler()
-        self.options = Fake()
-        self.globaldata = Fake()
-        self.jit_ffi = jit_ffi
-
-def test_store_final_boxes_in_guard():
-    from pypy.jit.metainterp.compile import ResumeGuardDescr
-    from pypy.jit.metainterp.resume import tag, TAGBOX
-    b0 = BoxInt()
-    b1 = BoxInt()
-    opt = optimizeopt.Optimizer(FakeMetaInterpStaticData(LLtypeMixin.cpu),
-                                None)
-    fdescr = ResumeGuardDescr()
-    op = ResOperation(rop.GUARD_TRUE, ['dummy'], None, descr=fdescr)
-    # setup rd data
-    fi0 = resume.FrameInfo(None, "code0", 11)
-    fdescr.rd_frame_info_list = resume.FrameInfo(fi0, "code1", 33)
-    snapshot0 = resume.Snapshot(None, [b0])
-    fdescr.rd_snapshot = resume.Snapshot(snapshot0, [b1])
+from pypy.jit.metainterp.optimizeopt.util import args_dict
+from pypy.jit.metainterp.optimizeopt.test.test_optimizebasic import FakeMetaInterpStaticData
+from pypy.config.pypyoption import get_pypy_config
+
+
+def test_build_opt_chain():
+    def check(chain, expected_names):
+        names = [opt.__class__.__name__ for opt in chain]
+        assert names == expected_names
     #
-    opt.store_final_boxes_in_guard(op)
-    if op.getfailargs() == [b0, b1]:
-        assert list(fdescr.rd_numb.nums)      == [tag(1, TAGBOX)]
-        assert list(fdescr.rd_numb.prev.nums) == [tag(0, TAGBOX)]
-    else:
-        assert op.getfailargs() == [b1, b0]
-        assert list(fdescr.rd_numb.nums)      == [tag(0, TAGBOX)]
-        assert list(fdescr.rd_numb.prev.nums) == [tag(1, TAGBOX)]
-    assert fdescr.rd_virtuals is None
-    assert fdescr.rd_consts == []
-
-def test_sharing_field_lists_of_virtual():
-    class FakeOptimizer(object):
-        class cpu(object):
-            pass
-    opt = FakeOptimizer()
-    virt1 = virtualize.AbstractVirtualStructValue(opt, None)
-    lst1 = virt1._get_field_descr_list()
-    assert lst1 == []
-    lst2 = virt1._get_field_descr_list()
-    assert lst1 is lst2
-    virt1.setfield(LLtypeMixin.valuedescr, optimizeopt.OptValue(None))
-    lst3 = virt1._get_field_descr_list()
-    assert lst3 == [LLtypeMixin.valuedescr]
-    lst4 = virt1._get_field_descr_list()
-    assert lst3 is lst4
-
-    virt2 = virtualize.AbstractVirtualStructValue(opt, None)
-    lst5 = virt2._get_field_descr_list()
-    assert lst5 is lst1
-    virt2.setfield(LLtypeMixin.valuedescr, optimizeopt.OptValue(None))
-    lst6 = virt1._get_field_descr_list()
-    assert lst6 is lst3
-
-def test_reuse_vinfo():
-    class FakeVInfo(object):
-        def set_content(self, fieldnums):
-            self.fieldnums = fieldnums
-        def equals(self, fieldnums):
-            return self.fieldnums == fieldnums
-    class FakeVirtualValue(virtualize.AbstractVirtualValue):
-        def _make_virtual(self, *args):
-            return FakeVInfo()
-    v1 = FakeVirtualValue(None, None, None)
-    vinfo1 = v1.make_virtual_info(None, [1, 2, 4])
-    vinfo2 = v1.make_virtual_info(None, [1, 2, 4])
-    assert vinfo1 is vinfo2
-    vinfo3 = v1.make_virtual_info(None, [1, 2, 6])
-    assert vinfo3 is not vinfo2
-    vinfo4 = v1.make_virtual_info(None, [1, 2, 6])
-    assert vinfo3 is vinfo4
-
-def test_descrlist_dict():
-    from pypy.jit.metainterp import optimizeutil
-    h1 = optimizeutil.descrlist_hash([])
-    h2 = optimizeutil.descrlist_hash([LLtypeMixin.valuedescr])
-    h3 = optimizeutil.descrlist_hash(
-            [LLtypeMixin.valuedescr, LLtypeMixin.nextdescr])
-    assert h1 != h2
-    assert h2 != h3
-    assert optimizeutil.descrlist_eq([], [])
-    assert not optimizeutil.descrlist_eq([], [LLtypeMixin.valuedescr])
-    assert optimizeutil.descrlist_eq([LLtypeMixin.valuedescr],
-                                     [LLtypeMixin.valuedescr])
-    assert not optimizeutil.descrlist_eq([LLtypeMixin.valuedescr],
-                                         [LLtypeMixin.nextdescr])
-    assert optimizeutil.descrlist_eq([LLtypeMixin.valuedescr, LLtypeMixin.nextdescr],
-                                     [LLtypeMixin.valuedescr, LLtypeMixin.nextdescr])
-    assert not optimizeutil.descrlist_eq([LLtypeMixin.nextdescr, LLtypeMixin.valuedescr],
-                                         [LLtypeMixin.valuedescr, LLtypeMixin.nextdescr])
-
-    # descrlist_eq should compare by identity of the descrs, not by the result
-    # of sort_key
-    class FakeDescr(object):
-        def sort_key(self):
-            return 1
-
-    assert not optimizeutil.descrlist_eq([FakeDescr()], [FakeDescr()])
+    metainterp_sd = FakeMetaInterpStaticData(None)
+    chain, _ = build_opt_chain(metainterp_sd, "", inline_short_preamble=False)
+    check(chain, ["OptSimplify"])
+    #
+    chain, _ = build_opt_chain(metainterp_sd, "")
+    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    #
+    chain, _ = build_opt_chain(metainterp_sd, "")
+    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    #
+    chain, _ = build_opt_chain(metainterp_sd, "heap:intbounds")
+    check(chain, ["OptInlineShortPreamble", "OptIntBounds", "OptHeap", "OptSimplify"])
+    #
+    chain, unroll = build_opt_chain(metainterp_sd, "unroll")
+    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    assert unroll
+    #
+    chain, _ = build_opt_chain(metainterp_sd, "aaa:bbb", inline_short_preamble=False)
+    check(chain, ["OptSimplify"])
+    #
+    chain, _ = build_opt_chain(metainterp_sd, "ffi", inline_short_preamble=False)
+    check(chain, ["OptFfiCall", "OptSimplify"])
+    #
+    metainterp_sd.config = get_pypy_config(translating=True)
+    assert not metainterp_sd.config.translation.jit_ffi
+    chain, _ = build_opt_chain(metainterp_sd, "ffi", inline_short_preamble=False)
+    check(chain, ["OptSimplify"])
+
 
 # ____________________________________________________________
-class Storage(compile.ResumeGuardDescr):
-    "for tests."
-    def __init__(self, metainterp_sd=None, original_greenkey=None):
-        self.metainterp_sd = metainterp_sd
-        self.original_greenkey = original_greenkey
-    def store_final_boxes(self, op, boxes):
-        op.setfailargs(boxes)
-    def __eq__(self, other):
-        return type(self) is type(other)      # xxx obscure
+
+
+class FakeDescr(compile.ResumeGuardDescr):
+    class rd_snapshot:
+        class prev:
+            prev = None
+            boxes = []
+        boxes = []
     def clone_if_mutable(self):
-        res = Storage(self.metainterp_sd, self.original_greenkey)
-        self.copy_all_attributes_into(res)
-        return res
-
-def _sortboxes(boxes):
-    _kind2count = {history.INT: 1, history.REF: 2, history.FLOAT: 3}
-    return sorted(boxes, key=lambda box: _kind2count[box.type])
-
-class BaseTestOptimizeOpt(BaseTest):
-    jit_ffi = False
-
-    def invent_fail_descr(self, fail_args):
-        if fail_args is None:
-            return None
-        descr = Storage()
-        descr.rd_frame_info_list = resume.FrameInfo(None, "code", 11)
-        descr.rd_snapshot = resume.Snapshot(None, _sortboxes(fail_args))
-        return descr
-
-    def assert_equal(self, optimized, expected, text_right=None):
-        assert len(optimized.inputargs) == len(expected.inputargs)
-        remap = {}
-        for box1, box2 in zip(optimized.inputargs, expected.inputargs):
-            assert box1.__class__ == box2.__class__
-            remap[box2] = box1
-        assert equaloplists(optimized.operations,
-                            expected.operations, False, remap, text_right)
-
-    def optimize_loop(self, ops, optops, expected_preamble=None,
+        return self
+
+
+class BaseTestWithUnroll(BaseTest):
+
+    enable_opts = "intbounds:rewrite:virtualize:string:heap:unroll"
+
+    def optimize_loop(self, ops, expected, expected_preamble=None,
                       call_pure_results=None, expected_short=None):
         loop = self.parse(ops)
-        if optops != "crash!":
-            expected = self.parse(optops)
-        else:
-            expected = "crash!"
+        if expected != "crash!":
+            expected = self.parse(expected)
         if expected_preamble:
             expected_preamble = self.parse(expected_preamble)
         if expected_short:
             expected_short = self.parse(expected_short)
-        #
-        self.loop = loop
-        loop.call_pure_results = args_dict()
-        if call_pure_results is not None:
-            for k, v in call_pure_results.items():
-                loop.call_pure_results[list(k)] = v
         loop.preamble = TreeLoop('preamble')
         loop.preamble.inputargs = loop.inputargs
         loop.preamble.token = LoopToken()
-        metainterp_sd = FakeMetaInterpStaticData(self.cpu, self.jit_ffi)
-        if hasattr(self, 'vrefinfo'):
-            metainterp_sd.virtualref_info = self.vrefinfo
-        if hasattr(self, 'callinfocollection'):
-            metainterp_sd.callinfocollection = self.callinfocollection
-        class FakeDescr(compile.ResumeGuardDescr):
-            class rd_snapshot:
-                class prev:
-                    prev = None
-                    boxes = []
-                boxes = []
-            def clone_if_mutable(self):
-                return self
             def __eq__(self, other):
                 return isinstance(other, Storage) or isinstance(other, FakeDescr)
         loop.preamble.start_resumedescr = FakeDescr()
-        optimize_loop_1(metainterp_sd, loop, ALL_OPTS_DICT)
         #
-
+        self._do_optimize_loop(loop, call_pure_results)
+        #
         print
         print "Preamble:"
         print loop.preamble.inputargs
@@ -220,7 +105,6 @@
             print '\n'.join([str(o) for o in short.operations])        
             print
         
-
         assert expected != "crash!", "should have raised an exception"
         self.assert_equal(loop, expected)
         if expected_preamble:
@@ -230,10 +114,9 @@
             self.assert_equal(short, expected_short,
                               text_right='expected short preamble')
             
-
         return loop
 
-class OptimizeOptTest(BaseTestOptimizeOpt):
+class OptimizeOptTest(BaseTestWithUnroll):
 
     def setup_method(self, meth=None):
         class FailDescr(compile.ResumeGuardDescr):
@@ -3652,6 +3535,56 @@
         '''
         self.optimize_loop(ops, expected)
 
+    def test_arraycopy_dest_not_virtual(self):
+        ops = '''
+        []
+        p1 = new_array(3, descr=arraydescr)
+        p2 = new_array(3, descr=arraydescr)
+        setarrayitem_gc(p1, 2, 10, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        call(0, p1, p2, 0, 0, 3, descr=arraycopydescr)
+        escape(p2)
+        jump()
+        '''
+        expected = '''
+        []
+        p2 = new_array(3, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        setarrayitem_gc(p2, 0, 0, descr=arraydescr)
+        setarrayitem_gc(p2, 1, 0, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 10, descr=arraydescr)
+        escape(p2)
+        jump()
+        '''
+        self.optimize_loop(ops, expected)
+
+    def test_arraycopy_dest_not_virtual_too_long(self):
+        ops = '''
+        []
+        p1 = new_array(10, descr=arraydescr)
+        p2 = new_array(10, descr=arraydescr)
+        setarrayitem_gc(p1, 2, 10, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        call(0, p1, p2, 0, 0, 10, descr=arraycopydescr)
+        escape(p2)
+        jump()
+        '''
+        expected = '''
+        []
+        p2 = new_array(10, descr=arraydescr)
+        setarrayitem_gc(p2, 2, 13, descr=arraydescr)
+        escape(p2)
+        p1 = new_array(10, descr=arraydescr)
+        setarrayitem_gc(p1, 2, 10, descr=arraydescr)
+        call(0, p1, p2, 0, 0, 10, descr=arraycopydescr)
+        escape(p2)
+        jump()
+        '''
+        self.optimize_loop(ops, expected)
+
     def test_bound_lt(self):
         ops = """
         [i0]
diff --git a/pypy/jit/metainterp/test/test_optimizeutil.py b/pypy/jit/metainterp/optimizeopt/test/test_util.py
rename from pypy/jit/metainterp/test/test_optimizeutil.py
rename to pypy/jit/metainterp/optimizeopt/test/test_util.py
--- a/pypy/jit/metainterp/test/test_optimizeutil.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_util.py
@@ -9,11 +9,15 @@
 from pypy.jit.metainterp.history import (BoxInt, BoxPtr, ConstInt, ConstPtr,
                                          Const, TreeLoop, BoxObj,
                                          ConstObj, AbstractDescr)
-from pypy.jit.metainterp.optimizeutil import sort_descrs, InvalidLoop
+from pypy.jit.metainterp.optimizeopt.util import sort_descrs, equaloplists
+from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.codewriter.heaptracker import register_known_gctype, adr2int
-from pypy.jit.tool.oparser import parse
+from pypy.jit.tool.oparser import parse, pure_parse
 from pypy.jit.metainterp.quasiimmut import QuasiImmutDescr
+from pypy.jit.metainterp import compile, resume, history
+from pypy.jit.metainterp.jitprof import EmptyProfiler
+from pypy.config.pypyoption import get_pypy_config
 
 def test_sort_descrs():
     class PseudoDescr(AbstractDescr):
@@ -28,6 +32,44 @@
         sort_descrs(lst2)
         assert lst2 == lst
 
+def test_equaloplists():
+    ops = """
+    [i0]
+    i1 = int_add(i0, 1)
+    i2 = int_add(i1, 1)
+    guard_true(i1) [i2]
+    jump(i1)
+    """
+    namespace = {}
+    loop1 = pure_parse(ops, namespace=namespace)
+    loop2 = pure_parse(ops, namespace=namespace)
+    loop3 = pure_parse(ops.replace("i2 = int_add", "i2 = int_sub"),
+                       namespace=namespace)
+    assert equaloplists(loop1.operations, loop2.operations)
+    py.test.raises(AssertionError,
+                   "equaloplists(loop1.operations, loop3.operations)")
+
+def test_equaloplists_fail_args():
+    ops = """
+    [i0]
+    i1 = int_add(i0, 1)
+    i2 = int_add(i1, 1)
+    guard_true(i1) [i2, i1]
+    jump(i1)
+    """
+    namespace = {}
+    loop1 = pure_parse(ops, namespace=namespace)
+    loop2 = pure_parse(ops.replace("[i2, i1]", "[i1, i2]"),
+                       namespace=namespace)
+    py.test.raises(AssertionError,
+                   "equaloplists(loop1.operations, loop2.operations)")
+    assert equaloplists(loop1.operations, loop2.operations,
+                        strict_fail_args=False)
+    loop3 = pure_parse(ops.replace("[i2, i1]", "[i2, i0]"),
+                       namespace=namespace)
+    py.test.raises(AssertionError,
+                   "equaloplists(loop1.operations, loop3.operations)")
+
 # ____________________________________________________________
 
 class LLtypeMixin(object):
@@ -256,8 +298,45 @@
 ##                       u_vtable_adr: cpu.typedescrof(U)}
 ##    namespace = locals()
 
+# ____________________________________________________________
+
+
+
+class Fake(object):
+    failargs_limit = 1000
+    storedebug = None
+
+
+class FakeMetaInterpStaticData(object):
+
+    def __init__(self, cpu):
+        self.cpu = cpu
+        self.profiler = EmptyProfiler()
+        self.options = Fake()
+        self.globaldata = Fake()
+        self.config = get_pypy_config(translating=True)
+        self.config.translation.jit_ffi = True
+
+
+class Storage(compile.ResumeGuardDescr):
+    "for tests."
+    def __init__(self, metainterp_sd=None, original_greenkey=None):
+        self.metainterp_sd = metainterp_sd
+        self.original_greenkey = original_greenkey
+    def store_final_boxes(self, op, boxes):
+        op.setfailargs(boxes)
+    def __eq__(self, other):
+        return type(self) is type(other)      # xxx obscure
+    def clone_if_mutable(self):
+        res = Storage(self.metainterp_sd, self.original_greenkey)
+        self.copy_all_attributes_into(res)
+        return res
+
+def _sortboxes(boxes):
+    _kind2count = {history.INT: 1, history.REF: 2, history.FLOAT: 3}
+    return sorted(boxes, key=lambda box: _kind2count[box.type])
+
 class BaseTest(object):
-    invent_fail_descr = None
 
     def parse(self, s, boxkinds=None):
         return parse(s, self.cpu, self.namespace,
@@ -265,5 +344,40 @@
                      boxkinds=boxkinds,
                      invent_fail_descr=self.invent_fail_descr)
 
+    def invent_fail_descr(self, model, fail_args):
+        if fail_args is None:
+            return None
+        descr = Storage()
+        descr.rd_frame_info_list = resume.FrameInfo(None, "code", 11)
+        descr.rd_snapshot = resume.Snapshot(None, _sortboxes(fail_args))
+        return descr
+
+    def assert_equal(self, optimized, expected, text_right=None):
+        from pypy.jit.metainterp.optimizeopt.util import equaloplists
+        assert len(optimized.inputargs) == len(expected.inputargs)
+        remap = {}
+        for box1, box2 in zip(optimized.inputargs, expected.inputargs):
+            assert box1.__class__ == box2.__class__
+            remap[box2] = box1
+        assert equaloplists(optimized.operations,
+                            expected.operations, False, remap, text_right)
+
+    def _do_optimize_loop(self, loop, call_pure_results):
+        from pypy.jit.metainterp.optimizeopt import optimize_loop_1
+        from pypy.jit.metainterp.optimizeopt.util import args_dict
+
+        self.loop = loop
+        loop.call_pure_results = args_dict()
+        if call_pure_results is not None:
+            for k, v in call_pure_results.items():
+                loop.call_pure_results[list(k)] = v
+        metainterp_sd = FakeMetaInterpStaticData(self.cpu)
+        if hasattr(self, 'vrefinfo'):
+            metainterp_sd.virtualref_info = self.vrefinfo
+        if hasattr(self, 'callinfocollection'):
+            metainterp_sd.callinfocollection = self.callinfocollection
+        #
+        optimize_loop_1(metainterp_sd, loop, self.enable_opts)
+
 # ____________________________________________________________
 
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -5,7 +5,7 @@
 from pypy.jit.metainterp.resume import Snapshot
 from pypy.jit.metainterp.history import TreeLoop, LoopToken
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
-from pypy.jit.metainterp.optimizeutil import InvalidLoop, RetraceLoop
+from pypy.jit.metainterp.optimize import InvalidLoop, RetraceLoop
 from pypy.jit.metainterp.jitexc import JitException
 from pypy.jit.metainterp.history import make_hashable_int
 from pypy.jit.codewriter.effectinfo import EffectInfo
diff --git a/pypy/jit/metainterp/optimizeutil.py b/pypy/jit/metainterp/optimizeopt/util.py
rename from pypy/jit/metainterp/optimizeutil.py
rename to pypy/jit/metainterp/optimizeopt/util.py
--- a/pypy/jit/metainterp/optimizeutil.py
+++ b/pypy/jit/metainterp/optimizeopt/util.py
@@ -1,21 +1,10 @@
+import py
 from pypy.rlib.objectmodel import r_dict, compute_identity_hash
 from pypy.rlib.rarithmetic import intmask
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.jit.metainterp import resoperation, history
-from pypy.jit.metainterp.jitexc import JitException
 from pypy.rlib.debug import make_sure_not_resized
-
-class InvalidLoop(JitException):
-    """Raised when the optimize*.py detect that the loop that
-    we are trying to build cannot possibly make sense as a
-    long-running loop (e.g. it cannot run 2 complete iterations)."""
-
-class RetraceLoop(JitException):
-    """ Raised when inlining a short preamble resulted in an
-        InvalidLoop. This means the optimized loop is too specialized
-        to be useful here, so we trace it again and produced a second
-        copy specialized in some different way.
-    """
+from pypy.jit.metainterp.resoperation import rop
 
 # ____________________________________________________________
 # Misc. utilities
@@ -113,3 +102,49 @@
 
 def args_dict_box():
     return r_dict(args_eq, args_hash)
+
+
+# ____________________________________________________________
+
+def equaloplists(oplist1, oplist2, strict_fail_args=True, remap={},
+                 text_right=None):
+    # try to use the full width of the terminal to display the list
+    # unfortunately, does not work with the default capture method of py.test
+    # (which is fd), you you need to use either -s or --capture=sys, else you
+    # get the standard 80 columns width
+    totwidth = py.io.get_terminal_width()
+    width = totwidth / 2 - 1
+    print ' Comparing lists '.center(totwidth, '-')
+    text_right = text_right or 'expected'
+    print '%s| %s' % ('optimized'.center(width), text_right.center(width))
+    for op1, op2 in zip(oplist1, oplist2):
+        txt1 = str(op1)
+        txt2 = str(op2)
+        while txt1 or txt2:
+            print '%s| %s' % (txt1[:width].ljust(width), txt2[:width])
+            txt1 = txt1[width:]
+            txt2 = txt2[width:]
+        assert op1.getopnum() == op2.getopnum()
+        assert op1.numargs() == op2.numargs()
+        for i in range(op1.numargs()):
+            x = op1.getarg(i)
+            y = op2.getarg(i)
+            assert x == remap.get(y, y)
+        if op2.result in remap:
+            assert op1.result == remap[op2.result]
+        else:
+            remap[op2.result] = op1.result
+        if op1.getopnum() != rop.JUMP:      # xxx obscure
+            assert op1.getdescr() == op2.getdescr()
+        if op1.getfailargs() or op2.getfailargs():
+            assert len(op1.getfailargs()) == len(op2.getfailargs())
+            if strict_fail_args:
+                for x, y in zip(op1.getfailargs(), op2.getfailargs()):
+                    assert x == remap.get(y, y)
+            else:
+                fail_args1 = set(op1.getfailargs())
+                fail_args2 = set([remap.get(y, y) for y in op2.getfailargs()])
+                assert fail_args1 == fail_args2
+    assert len(oplist1) == len(oplist2)
+    print '-'*totwidth
+    return True
diff --git a/pypy/jit/metainterp/optimizeopt/virtualize.py b/pypy/jit/metainterp/optimizeopt/virtualize.py
--- a/pypy/jit/metainterp/optimizeopt/virtualize.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualize.py
@@ -1,7 +1,7 @@
 from pypy.jit.metainterp.history import Const, ConstInt, BoxInt
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.optimizeutil import _findall, sort_descrs
-from pypy.jit.metainterp.optimizeutil import descrlist_dict
+from pypy.jit.metainterp.optimizeopt.util import _findall, sort_descrs
+from pypy.jit.metainterp.optimizeopt.util import descrlist_dict
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.jit.metainterp.optimizeopt import optimizer
 from pypy.jit.metainterp.optimizeopt.optimizer import OptValue
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -21,7 +21,8 @@
 from pypy.rlib.objectmodel import specialize
 from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr, MissingLiveness
 from pypy.jit.codewriter import heaptracker, longlong
-from pypy.jit.metainterp.optimizeutil import RetraceLoop, args_dict_box, args_dict
+from pypy.jit.metainterp.optimizeopt.util import args_dict_box, args_dict
+from pypy.jit.metainterp.optimize import RetraceLoop
 
 # ____________________________________________________________
 
@@ -867,7 +868,7 @@
         any_operation = len(self.metainterp.history.operations) > 0
         jitdriver_sd = self.metainterp.staticdata.jitdrivers_sd[jdindex]
         self.verify_green_args(jitdriver_sd, greenboxes)
-        self.debug_merge_point(jitdriver_sd, self.metainterp.in_recursion,
+        self.debug_merge_point(jdindex, self.metainterp.in_recursion,
                                greenboxes)
 
         if self.metainterp.seen_loop_header_for_jdindex < 0:
@@ -914,13 +915,10 @@
                                     assembler_call=True)
             raise ChangeFrame
 
-    def debug_merge_point(self, jitdriver_sd, in_recursion, greenkey):
+    def debug_merge_point(self, jd_index, in_recursion, greenkey):
         # debugging: produce a DEBUG_MERGE_POINT operation
-        loc = jitdriver_sd.warmstate.get_location_str(greenkey)
-        debug_print(loc)
-        constloc = self.metainterp.cpu.ts.conststr(loc)
-        self.metainterp.history.record(rop.DEBUG_MERGE_POINT,
-                                       [constloc, ConstInt(in_recursion)], None)
+        args = [ConstInt(jd_index), ConstInt(in_recursion)] + greenkey
+        self.metainterp.history.record(rop.DEBUG_MERGE_POINT, args, None)
 
     @arguments("box", "label")
     def opimpl_goto_if_exception_mismatch(self, vtablebox, next_exc_target):
@@ -1265,8 +1263,7 @@
     logger_ops = None
 
     def __init__(self, cpu, options,
-                 ProfilerClass=EmptyProfiler, warmrunnerdesc=None,
-                 jit_ffi=True):
+                 ProfilerClass=EmptyProfiler, warmrunnerdesc=None):
         self.cpu = cpu
         self.stats = self.cpu.stats
         self.options = options
@@ -1276,7 +1273,11 @@
         self.profiler = ProfilerClass()
         self.profiler.cpu = cpu
         self.warmrunnerdesc = warmrunnerdesc
-        self.jit_ffi = jit_ffi
+        if warmrunnerdesc:
+            self.config = warmrunnerdesc.translator.config
+        else:
+            from pypy.config.pypyoption import get_pypy_config
+            self.config = get_pypy_config(translating=True)
 
         backendmodule = self.cpu.__module__
         backendmodule = backendmodule.split('.')[-2]
@@ -1927,7 +1928,6 @@
 
         self.history.inputargs = original_inputargs
         self.history.operations.pop()     # remove the JUMP
-        # FIXME: Why is self.history.inputargs not restored?
 
     def compile_bridge(self, live_arg_boxes):
         num_green_args = self.jitdriver_sd.num_green_args
@@ -1963,6 +1963,8 @@
                                               start_resumedescr, False)
         self.history.operations.pop()     # remove the JUMP
         if loop_token is None:
+            self.history.inputargs = original_inputargs
+            self.history.operations = original_operations
             return
 
         if loop_token.short_preamble:
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -191,9 +191,15 @@
         # of the operation.  It must inherit from AbstractDescr.  The
         # backend provides it with cpu.fielddescrof(), cpu.arraydescrof(),
         # cpu.calldescrof(), and cpu.typedescrof().
+        self._check_descr(descr)
+        self._descr = descr
+
+    def _check_descr(self, descr):
+        if not we_are_translated() and getattr(descr, 'I_am_a_descr', False):
+            return # needed for the mock case in oparser_model
         from pypy.jit.metainterp.history import check_descr
         check_descr(descr)
-        self._descr = descr
+
 
 class GuardResOp(ResOpWithDescr):
 
@@ -471,8 +477,9 @@
     'STRSETITEM/3',
     'UNICODESETITEM/3',
     #'RUNTIMENEW/1',     # ootype operation
-    'COND_CALL_GC_WB/2d', # [objptr, newvalue]   (for the write barrier)
-    'DEBUG_MERGE_POINT/2',      # debugging only
+    'COND_CALL_GC_WB/2d', # [objptr, newvalue] (for the write barrier)
+    'COND_CALL_GC_WB_ARRAY/3d', # [objptr, arrayindex, newvalue] (write barr.)
+    'DEBUG_MERGE_POINT/*',      # debugging only
     'JIT_DEBUG/*',              # debugging only
     'VIRTUAL_REF_FINISH/2',   # removed before it's passed to the backend
     'COPYSTRCONTENT/5',       # src, dst, srcstart, dststart, length
@@ -485,6 +492,7 @@
     'CALL_ASSEMBLER/*d',  # call already compiled assembler
     'CALL_MAY_FORCE/*d',
     'CALL_LOOPINVARIANT/*d',
+    'CALL_RELEASE_GIL/*d',  # release the GIL and "close the stack" for asmgcc
     #'OOSEND',                     # ootype operation
     #'OOSEND_PURE',                # ootype operation
     'CALL_PURE/*d',             # removed before it's passed to the backend
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -10,7 +10,7 @@
 from pypy.rlib.objectmodel import we_are_translated, specialize
 from pypy.rlib.debug import have_debug_prints, ll_assert
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
-from pypy.jit.metainterp.optimizeutil import InvalidLoop
+from pypy.jit.metainterp.optimize import InvalidLoop
 
 # Logic to encode the chain of frames and the state of the boxes at a
 # guard operation, and to decode it again.  This is a bit advanced,
diff --git a/pypy/jit/metainterp/test/support.py b/pypy/jit/metainterp/test/support.py
--- a/pypy/jit/metainterp/test/support.py
+++ b/pypy/jit/metainterp/test/support.py
@@ -15,14 +15,14 @@
                   supports_longlong=False, **kwds):
     from pypy.jit.codewriter import support
 
-    class FakeJitCell:
+    class FakeJitCell(object):
         __compiled_merge_points = []
         def get_compiled_merge_points(self):
             return self.__compiled_merge_points[:]
         def set_compiled_merge_points(self, lst):
             self.__compiled_merge_points = lst
 
-    class FakeWarmRunnerState:
+    class FakeWarmRunnerState(object):
         def attach_unoptimized_bridge_from_interp(self, greenkey, newloop):
             pass
 
@@ -30,6 +30,9 @@
             from pypy.rpython.annlowlevel import llhelper
             return llhelper(FUNCPTR, func)
 
+        def get_location_str(self, args):
+            return 'location'
+
         def jit_cell_at_key(self, greenkey):
             assert greenkey == []
             return self._cell
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -534,7 +534,7 @@
                 y -= x
             return y
         #
-        res = self.meta_interp(f, [3, 6], repeat=7)
+        res = self.meta_interp(f, [3, 6], repeat=7, function_threshold=0)
         assert res == 6 - 4 - 5
         self.check_history(call=0)   # because the trace starts in the middle
         #
diff --git a/pypy/jit/metainterp/test/test_compile.py b/pypy/jit/metainterp/test/test_compile.py
--- a/pypy/jit/metainterp/test/test_compile.py
+++ b/pypy/jit/metainterp/test/test_compile.py
@@ -1,3 +1,4 @@
+from pypy.config.pypyoption import get_pypy_config
 from pypy.jit.metainterp.history import LoopToken, ConstInt, History, Stats
 from pypy.jit.metainterp.history import BoxInt, INT
 from pypy.jit.metainterp.compile import insert_loop_token, compile_new_loop
@@ -5,7 +6,7 @@
 from pypy.jit.metainterp.compile import ResumeGuardCountersInt
 from pypy.jit.metainterp.compile import compile_tmp_callback
 from pypy.jit.metainterp import jitprof, typesystem, compile
-from pypy.jit.metainterp.test.test_optimizeutil import LLtypeMixin
+from pypy.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
 from pypy.jit.tool.oparser import parse
 from pypy.jit.metainterp.optimizeopt import ALL_OPTS_DICT
 
@@ -30,13 +31,16 @@
     ts = typesystem.llhelper
     def __init__(self):
         self.seen = []
-    def compile_loop(self, inputargs, operations, token):
+    def compile_loop(self, inputargs, operations, token, name=''):
         self.seen.append((inputargs, operations, token))
 
 class FakeLogger(object):
     def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
         pass
 
+    def repr_of_resop(self, op):
+        return repr(op)
+
 class FakeState(object):
     enable_opts = ALL_OPTS_DICT.copy()
     enable_opts.pop('unroll')
@@ -44,6 +48,9 @@
     def attach_unoptimized_bridge_from_interp(*args):
         pass
 
+    def get_location_str(self, args):
+        return 'location'
+
 class FakeGlobalData(object):
     loopnumbering = 0
 
@@ -51,11 +58,11 @@
     
     logger_noopt = FakeLogger()
     logger_ops = FakeLogger()
+    config = get_pypy_config(translating=True)
 
     stats = Stats()
     profiler = jitprof.EmptyProfiler()
     warmrunnerdesc = None
-    jit_ffi = False
     def log(self, msg, event_kind=None):
         pass
 
@@ -63,6 +70,8 @@
     call_pure_results = {}
     class jitdriver_sd:
         warmstate = FakeState()
+        on_compile = staticmethod(lambda *args: None)
+        on_compile_bridge = staticmethod(lambda *args: None)
 
 def test_compile_new_loop():
     cpu = FakeCPU()
diff --git a/pypy/jit/metainterp/test/test_fficall.py b/pypy/jit/metainterp/test/test_fficall.py
--- a/pypy/jit/metainterp/test/test_fficall.py
+++ b/pypy/jit/metainterp/test/test_fficall.py
@@ -1,28 +1,46 @@
 
 import py
-from pypy.rlib.jit import JitDriver, hint
+from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
+from pypy.rlib.jit import JitDriver, hint, dont_look_inside
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.libffi import ArgChain
+from pypy.rlib.libffi import ArgChain, longlong2float, float2longlong
+from pypy.rlib.libffi import IS_32_BIT
 from pypy.rlib.test.test_libffi import TestLibffiCall as _TestLibffiCall
 from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.objectmodel import specialize
+from pypy.tool.sourcetools import func_with_new_name
 from pypy.jit.metainterp.test.support import LLJitMixin
 
-
 class TestFfiCall(LLJitMixin, _TestLibffiCall):
 
     # ===> ../../../rlib/test/test_libffi.py
 
-    def call(self, funcspec, args, RESULT, init_result=0):
+    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
         """
         Call the function specified by funcspec in a loop, and let the jit to
         see and optimize it.
         """
         #
         lib, name, argtypes, restype = funcspec
-        args = unrolling_iterable(args)
+        method_and_args = []
+        for argval in args:
+            if type(argval) is r_singlefloat:
+                method_name = 'arg_singlefloat'
+                argval = float(argval)
+            elif IS_32_BIT and type(argval) in [r_longlong, r_ulonglong]:
+                method_name = 'arg_longlong'
+                argval = rffi.cast(rffi.LONGLONG, argval)
+                argval = longlong2float(argval)
+            elif isinstance(argval, tuple):
+                method_name, argval = argval
+            else:
+                method_name = 'arg'
+            method_and_args.append((method_name, argval))
+        method_and_args = unrolling_iterable(method_and_args)
         #
         reds = ['n', 'res', 'func']
-        if type(init_result) is float:
+        if (RESULT in [rffi.FLOAT, rffi.DOUBLE] or
+            IS_32_BIT and RESULT in [rffi.LONGLONG, rffi.ULONGLONG]):
             reds = ['n', 'func', 'res'] # floats must be *after* refs
         driver = JitDriver(reds=reds, greens=[])
         #
@@ -34,12 +52,17 @@
                 driver.can_enter_jit(n=n, res=res, func=func)
                 func = hint(func, promote=True)
                 argchain = ArgChain()
-                for argval in args: # this loop is unrolled
-                    argchain.arg(argval)
-                res = func.call(argchain, RESULT)
+                # this loop is unrolled
+                for method_name, argval in method_and_args:
+                    getattr(argchain, method_name)(argval)
+                res = func.call(argchain, RESULT, is_struct=is_struct)
                 n += 1
             return res
         #
-        res = self.meta_interp(f, [0])
+        res = self.meta_interp(f, [0], backendopt=True)
         return res
 
+    def test_byval_result(self):
+        _TestLibffiCall.test_byval_result(self)
+    test_byval_result.__doc__ = _TestLibffiCall.test_byval_result.__doc__
+    test_byval_result.dont_track_allocations = True
diff --git a/pypy/jit/metainterp/test/test_history.py b/pypy/jit/metainterp/test/test_history.py
--- a/pypy/jit/metainterp/test/test_history.py
+++ b/pypy/jit/metainterp/test/test_history.py
@@ -1,5 +1,5 @@
 from pypy.jit.metainterp.history import *
-from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 
 
 def test_repr():
@@ -10,6 +10,18 @@
     const = ConstPtr(lltype.cast_opaque_ptr(llmemory.GCREF, s))
     assert const._getrepr_() == "*T"
 
+def test_repr_ll2ctypes():
+    ptr = lltype.malloc(rffi.VOIDPP.TO, 10, flavor='raw')
+    # force it to be a ll2ctypes object
+    ptr = rffi.cast(rffi.VOIDPP, rffi.cast(rffi.LONG, ptr))
+    adr = llmemory.cast_ptr_to_adr(ptr)
+    lltype.free(ptr, flavor='raw')
+    intval = llmemory.cast_adr_to_int(adr, 'symbolic')
+    box = BoxInt(intval)
+    s = box.repr_rpython()
+    assert s.startswith('12345/') # the arbitrary hash value used by
+                                  # make_hashable_int
+
 def test_same_constant():
     c1a = ConstInt(0)
     c1b = ConstInt(0)
diff --git a/pypy/jit/metainterp/test/test_jitdriver.py b/pypy/jit/metainterp/test/test_jitdriver.py
--- a/pypy/jit/metainterp/test/test_jitdriver.py
+++ b/pypy/jit/metainterp/test/test_jitdriver.py
@@ -113,6 +113,7 @@
             return n
         #
         def loop2(g, r):
+            myjitdriver1.set_param('function_threshold', 0)
             while r > 0:
                 myjitdriver2.can_enter_jit(g=g, r=r)
                 myjitdriver2.jit_merge_point(g=g, r=r)
diff --git a/pypy/jit/metainterp/test/test_logger.py b/pypy/jit/metainterp/test/test_logger.py
--- a/pypy/jit/metainterp/test/test_logger.py
+++ b/pypy/jit/metainterp/test/test_logger.py
@@ -4,7 +4,7 @@
 from pypy.jit.metainterp import logger
 from pypy.jit.metainterp.typesystem import llhelper
 from StringIO import StringIO
-from pypy.jit.metainterp.test.test_optimizeopt import equaloplists
+from pypy.jit.metainterp.optimizeopt.util import equaloplists
 from pypy.jit.metainterp.history import AbstractDescr, LoopToken, BasicFailDescr
 from pypy.jit.backend.model import AbstractCPU
 
@@ -36,19 +36,29 @@
         return capturing(logger.Logger.log_loop, self,
                          loop.inputargs, loop.operations, ops_offset=ops_offset)
 
-    def repr_of_descr(self, descr):
-        for k, v in self.namespace.items():
-            if v == descr:
-                return k
-        return descr.repr_of_descr()
+    def _make_log_operations(self1):
+        class LogOperations(logger.LogOperations):
+            def repr_of_descr(self, descr):
+                for k, v in self1.namespace.items():
+                    if v == descr:
+                        return k
+                return descr.repr_of_descr()
+        logops = LogOperations(self1.metainterp_sd, self1.guard_number)
+        self1.logops = logops
+        return logops
 
 class TestLogger(object):
     ts = llhelper
 
     def make_metainterp_sd(self):
+        class FakeJitDriver(object):
+            class warmstate(object):
+                get_location_str = staticmethod(lambda args: "dupa")
+        
         class FakeMetaInterpSd:
             cpu = AbstractCPU()
             cpu.ts = self.ts
+            jitdrivers_sd = [FakeJitDriver()]
             def get_name_from_address(self, addr):
                 return 'Name'
         return FakeMetaInterpSd()
@@ -66,7 +76,7 @@
         if check_equal:
             equaloplists(loop.operations, oloop.operations)
             assert oloop.inputargs == loop.inputargs
-        return loop, oloop
+        return logger, loop, oloop
     
     def test_simple(self):
         inp = '''
@@ -106,18 +116,18 @@
     def test_debug_merge_point(self):
         inp = '''
         []
-        debug_merge_point("info", 0)
+        debug_merge_point(0, 0)
         '''
-        loop, oloop = self.reparse(inp, check_equal=False)
-        assert loop.operations[0].getarg(0)._get_str() == 'info'
-        assert oloop.operations[0].getarg(0)._get_str() == 'info'
+        _, loop, oloop = self.reparse(inp, check_equal=False)
+        assert loop.operations[0].getarg(1).getint() == 0
+        assert oloop.operations[0].getarg(1)._get_str() == "dupa"
         
     def test_floats(self):
         inp = '''
         [f0]
         f1 = float_add(3.5, f0)
         '''
-        loop, oloop = self.reparse(inp)
+        _, loop, oloop = self.reparse(inp)
         equaloplists(loop.operations, oloop.operations)
 
     def test_jump(self):
@@ -179,6 +189,17 @@
         assert output.splitlines()[0] == "# bridge out of Guard 3 with 0 ops"
         pure_parse(output)
 
+    def test_repr_single_op(self):
+        inp = '''
+        [i0, i1, i2, p3, p4, p5]
+        i6 = int_add(i1, i2)
+        i8 = int_add(i6, 3)
+        jump(i0, i8, i6, p3, p4, p5)
+        '''
+        logger, loop, _ = self.reparse(inp)
+        op = loop.operations[1]
+        assert logger.logops.repr_of_resop(op) == "i8 = int_add(i6, 3)"
+
     def test_ops_offset(self):
         inp = '''
         [i0]
diff --git a/pypy/jit/metainterp/test/test_pyjitpl.py b/pypy/jit/metainterp/test/test_pyjitpl.py
--- a/pypy/jit/metainterp/test/test_pyjitpl.py
+++ b/pypy/jit/metainterp/test/test_pyjitpl.py
@@ -6,7 +6,7 @@
 from pypy.jit.metainterp.history import BoxInt, ConstInt
 from pypy.jit.metainterp.history import History
 from pypy.jit.metainterp.resoperation import ResOperation, rop
-from pypy.jit.metainterp.test.test_optimizeopt import equaloplists
+from pypy.jit.metainterp.optimizeopt.util import equaloplists
 from pypy.jit.codewriter.jitcode import JitCode
 
 
diff --git a/pypy/jit/metainterp/test/test_recursive.py b/pypy/jit/metainterp/test/test_recursive.py
--- a/pypy/jit/metainterp/test/test_recursive.py
+++ b/pypy/jit/metainterp/test/test_recursive.py
@@ -483,6 +483,7 @@
  
         def main(inline):
             myjitdriver.set_param("threshold", 10)
+            myjitdriver.set_param('function_threshold', 60)
             if inline:
                 myjitdriver.set_param('inlining', True)
             else:
@@ -1193,6 +1194,51 @@
                 i -= 1
         self.meta_interp(portal, [0, 10], inline=True)
 
+    def test_trace_from_start_always(self):
+        from pypy.rlib.nonconst import NonConstant
+        
+        driver = JitDriver(greens = ['c'], reds = ['i', 'v'])
+
+        def portal(c, i, v):
+            while i > 0:
+                driver.jit_merge_point(c=c, i=i, v=v)
+                portal(c, i - 1, v)
+                if v:
+                    driver.can_enter_jit(c=c, i=i, v=v)
+                break
+
+        def main(c, i, set_param, v):
+            if set_param:
+                driver.set_param('function_threshold', 0)
+            portal(c, i, v)
+
+        self.meta_interp(main, [10, 10, False, False], inline=True)
+        self.check_tree_loop_count(1)
+        self.check_loop_count(0)
+        self.meta_interp(main, [3, 10, True, False], inline=True)
+        self.check_tree_loop_count(0)
+        self.check_loop_count(0)
+
+    def test_trace_from_start_does_not_prevent_inlining(self):
+        driver = JitDriver(greens = ['c', 'bc'], reds = ['i'])
+        
+        def portal(bc, c, i):
+            while True:
+                driver.jit_merge_point(c=c, bc=bc, i=i)
+                if bc == 0:
+                    portal(1, 8, 0)
+                    c += 1
+                else:
+                    return
+                if c == 10: # bc == 0                    
+                    c = 0
+                    if i >= 100:
+                        return
+                    driver.can_enter_jit(c=c, bc=bc, i=i)
+                i += 1
+
+        self.meta_interp(portal, [0, 0, 0], inline=True)
+        self.check_loops(call=0, call_may_force=0)
 
 class TestLLtype(RecursiveTests, LLJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_resume.py b/pypy/jit/metainterp/test/test_resume.py
--- a/pypy/jit/metainterp/test/test_resume.py
+++ b/pypy/jit/metainterp/test/test_resume.py
@@ -6,7 +6,7 @@
 from pypy.jit.metainterp.resume import *
 from pypy.jit.metainterp.history import BoxInt, BoxPtr, ConstInt
 from pypy.jit.metainterp.history import ConstPtr, ConstFloat
-from pypy.jit.metainterp.test.test_optimizeutil import LLtypeMixin
+from pypy.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
 from pypy.jit.metainterp import executor
 from pypy.jit.codewriter import heaptracker, longlong
 
diff --git a/pypy/jit/metainterp/test/test_virtualizable.py b/pypy/jit/metainterp/test/test_virtualizable.py
--- a/pypy/jit/metainterp/test/test_virtualizable.py
+++ b/pypy/jit/metainterp/test/test_virtualizable.py
@@ -11,7 +11,7 @@
 from pypy.rpython.rclass import FieldListAccessor
 from pypy.jit.metainterp.warmspot import get_stats, get_translator
 from pypy.jit.metainterp import history
-from pypy.jit.metainterp.test.test_optimizeutil import LLtypeMixin
+from pypy.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
 
 def promote_virtualizable(*args):
     pass
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -80,7 +80,7 @@
         self.meta_interp(f, [123, 10])
         assert len(get_stats().locations) >= 4
         for loc in get_stats().locations:
-            assert loc == 'GREEN IS 123.'
+            assert loc == (0, 123)
 
     def test_set_param_enable_opts(self):
         from pypy.rpython.annlowlevel import llstr, hlstr
diff --git a/pypy/jit/metainterp/test/test_warmstate.py b/pypy/jit/metainterp/test/test_warmstate.py
--- a/pypy/jit/metainterp/test/test_warmstate.py
+++ b/pypy/jit/metainterp/test/test_warmstate.py
@@ -181,6 +181,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
@@ -207,6 +208,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = llhelper(GET_LOCATION, get_location)
         _confirm_enter_jit_ptr = None
@@ -230,6 +232,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = llhelper(ENTER_JIT, confirm_enter_jit)
@@ -253,6 +256,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -66,6 +66,7 @@
 def jittify_and_run(interp, graph, args, repeat=1,
                     backendopt=False, trace_limit=sys.maxint,
                     inline=False, loop_longevity=0, retrace_limit=5,
+                    function_threshold=4,
                     enable_opts=ALL_OPTS_NAMES, **kwds):
     from pypy.config.config import ConfigError
     translator = interp.typer.annotator.translator
@@ -77,9 +78,14 @@
         translator.config.translation.list_comprehension_operations = True
     except ConfigError:
         pass
+    try:
+        translator.config.translation.jit_ffi = True
+    except ConfigError:
+        pass
     warmrunnerdesc = WarmRunnerDesc(translator, backendopt=backendopt, **kwds)
     for jd in warmrunnerdesc.jitdrivers_sd:
         jd.warmstate.set_param_threshold(3)          # for tests
+        jd.warmstate.set_param_function_threshold(function_threshold)
         jd.warmstate.set_param_trace_eagerness(2)    # for tests
         jd.warmstate.set_param_trace_limit(trace_limit)
         jd.warmstate.set_param_inlining(inline)
@@ -422,7 +428,7 @@
         if self.translator.rtyper.type_system.name == 'lltypesystem':
             def maybe_enter_jit(*args):
                 try:
-                    maybe_compile_and_run(*args)
+                    maybe_compile_and_run(state.increment_threshold, *args)
                 except JitException:
                     raise     # go through
                 except Exception, e:
@@ -430,15 +436,13 @@
             maybe_enter_jit._always_inline_ = True
         else:
             def maybe_enter_jit(*args):
-                maybe_compile_and_run(*args)
+                maybe_compile_and_run(state.increment_threshold, *args)
             maybe_enter_jit._always_inline_ = True
         jd._maybe_enter_jit_fn = maybe_enter_jit
 
-        can_inline = state.can_inline_greenargs
         num_green_args = jd.num_green_args
         def maybe_enter_from_start(*args):
-            if not can_inline(*args[:num_green_args]):
-                maybe_compile_and_run(*args)
+            maybe_compile_and_run(state.increment_function_threshold, *args)
         maybe_enter_from_start._always_inline_ = True
         jd._maybe_enter_from_start_fn = maybe_enter_from_start
 
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -208,15 +208,20 @@
             meth = getattr(self, 'set_param_' + name)
             meth(default_value)
 
-    def set_param_threshold(self, threshold):
+    def _compute_threshold(self, threshold):
         if threshold <= 0:
-            self.increment_threshold = 0   # never reach the THRESHOLD_LIMIT
-            return
+            return 0 # never reach the THRESHOLD_LIMIT
         if threshold < 2:
             threshold = 2
-        self.increment_threshold = (self.THRESHOLD_LIMIT // threshold) + 1
+        return (self.THRESHOLD_LIMIT // threshold) + 1
         # the number is at least 1, and at most about half THRESHOLD_LIMIT
 
+    def set_param_threshold(self, threshold):
+        self.increment_threshold = self._compute_threshold(threshold)
+
+    def set_param_function_threshold(self, threshold):
+        self.increment_function_threshold = self._compute_threshold(threshold)
+
     def set_param_trace_eagerness(self, value):
         self.trace_eagerness = value
 
@@ -291,7 +296,7 @@
         self.make_jitdriver_callbacks()
         confirm_enter_jit = self.confirm_enter_jit
 
-        def maybe_compile_and_run(*args):
+        def maybe_compile_and_run(threshold, *args):
             """Entry point to the JIT.  Called at the point with the
             can_enter_jit() hint.
             """
@@ -307,7 +312,7 @@
 
             if cell.counter >= 0:
                 # update the profiling counter
-                n = cell.counter + self.increment_threshold
+                n = cell.counter + threshold
                 if n <= self.THRESHOLD_LIMIT:       # bound not reached
                     cell.counter = n
                     return
@@ -599,12 +604,8 @@
         get_location_ptr = self.jitdriver_sd._get_printable_location_ptr
         if get_location_ptr is None:
             missing = '(no jitdriver.get_printable_location!)'
-            missingll = llstr(missing)
             def get_location_str(greenkey):
-                if we_are_translated():
-                    return missingll
-                else:
-                    return missing
+                return missing
         else:
             rtyper = self.warmrunnerdesc.rtyper
             unwrap_greenkey = self.make_unwrap_greenkey()
@@ -612,10 +613,10 @@
             def get_location_str(greenkey):
                 greenargs = unwrap_greenkey(greenkey)
                 fn = support.maybe_on_top_of_llinterp(rtyper, get_location_ptr)
-                res = fn(*greenargs)
-                if not we_are_translated() and not isinstance(res, str):
-                    res = hlstr(res)
-                return res
+                llres = fn(*greenargs)
+                if not we_are_translated() and isinstance(llres, str):
+                    return llres
+                return hlstr(llres)
         self.get_location_str = get_location_str
         #
         confirm_enter_jit_ptr = self.jitdriver_sd._confirm_enter_jit_ptr
diff --git a/pypy/jit/tl/pypyjit.py b/pypy/jit/tl/pypyjit.py
--- a/pypy/jit/tl/pypyjit.py
+++ b/pypy/jit/tl/pypyjit.py
@@ -30,6 +30,7 @@
     BACKEND = 'c'
 
 config = get_pypy_config(translating=True)
+config.translation.backendopt.inline_threshold = 0.1
 config.translation.gc = 'boehm'
 config.objspace.nofaking = True
 config.translating = True
diff --git a/pypy/jit/tool/oparser.py b/pypy/jit/tool/oparser.py
--- a/pypy/jit/tool/oparser.py
+++ b/pypy/jit/tool/oparser.py
@@ -3,24 +3,15 @@
 in a nicer fashion
 """
 
-from pypy.jit.metainterp.history import TreeLoop, BoxInt, ConstInt,\
-     ConstObj, ConstPtr, Box, BasicFailDescr, BoxFloat, ConstFloat,\
-     LoopToken, get_const_ptr_for_string, get_const_ptr_for_unicode
+from pypy.jit.tool.oparser_model import get_model
+
 from pypy.jit.metainterp.resoperation import rop, ResOperation, \
                                             ResOpWithDescr, N_aryOp, \
                                             UnaryOp, PlainResOp
-from pypy.jit.metainterp.typesystem import llhelper
-from pypy.jit.codewriter.heaptracker import adr2int
-from pypy.jit.codewriter import longlong
-from pypy.rpython.lltypesystem import lltype, llmemory
-from pypy.rpython.ootypesystem import ootype
 
 class ParseError(Exception):
     pass
 
-class Boxes(object):
-    pass
-
 class ESCAPE_OP(N_aryOp, ResOpWithDescr):
 
     OPNUM = -123
@@ -54,37 +45,15 @@
     def clone(self):
         return FORCE_SPILL(self.OPNUM, self.getarglist()[:])
 
-class ExtendedTreeLoop(TreeLoop):
 
-    def getboxes(self):
-        def opboxes(operations):
-            for op in operations:
-                yield op.result
-                for box in op.getarglist():
-                    yield box
-        def allboxes():
-            for box in self.inputargs:
-                yield box
-            for box in opboxes(self.operations):
-                yield box
-
-        boxes = Boxes()
-        for box in allboxes():
-            if isinstance(box, Box):
-                name = str(box)
-                setattr(boxes, name, box)
-        return boxes
-
-    def setvalues(self, **kwds):
-        boxes = self.getboxes()
-        for name, value in kwds.iteritems():
-            getattr(boxes, name).value = value
-
-def default_fail_descr(fail_args=None):
-    return BasicFailDescr()
+def default_fail_descr(model, fail_args=None):
+    return model.BasicFailDescr()
 
 
 class OpParser(object):
+
+    use_mock_model = False
+    
     def __init__(self, input, cpu, namespace, type_system, boxkinds,
                  invent_fail_descr=default_fail_descr,
                  nonstrict=False):
@@ -100,7 +69,8 @@
             self._cache = {}
         self.invent_fail_descr = invent_fail_descr
         self.nonstrict = nonstrict
-        self.looptoken = LoopToken()
+        self.model = get_model(self.use_mock_model)
+        self.looptoken = self.model.LoopToken()
 
     def get_const(self, name, typ):
         if self._consts is None:
@@ -108,16 +78,16 @@
         obj = self._consts[name]
         if self.type_system == 'lltype':
             if typ == 'ptr':
-                return ConstPtr(obj)
+                return self.model.ConstPtr(obj)
             else:
                 assert typ == 'class'
-                return ConstInt(adr2int(llmemory.cast_ptr_to_adr(obj)))
+                return self.model.ConstInt(self.model.ptr_to_int(obj))
         else:
             if typ == 'ptr':
-                return ConstObj(obj)
+                return self.model.ConstObj(obj)
             else:
                 assert typ == 'class'
-                return ConstObj(ootype.cast_to_object(obj))
+                return self.model.ConstObj(ootype.cast_to_object(obj))
 
     def get_descr(self, poss_descr):
         if poss_descr.startswith('<'):
@@ -132,16 +102,16 @@
             pass
         if elem.startswith('i'):
             # integer
-            box = BoxInt()
-            _box_counter_more_than(elem[1:])
+            box = self.model.BoxInt()
+            _box_counter_more_than(self.model, elem[1:])
         elif elem.startswith('f'):
-            box = BoxFloat()
-            _box_counter_more_than(elem[1:])
+            box = self.model.BoxFloat()
+            _box_counter_more_than(self.model, elem[1:])
         elif elem.startswith('p'):
             # pointer
-            ts = getattr(self.cpu, 'ts', llhelper)
+            ts = getattr(self.cpu, 'ts', self.model.llhelper)
             box = ts.BoxRef()
-            _box_counter_more_than(elem[1:])
+            _box_counter_more_than(self.model, elem[1:])
         else:
             for prefix, boxclass in self.boxkinds.iteritems():
                 if elem.startswith(prefix):
@@ -175,21 +145,21 @@
 
     def getvar(self, arg):
         if not arg:
-            return ConstInt(0)
+            return self.model.ConstInt(0)
         try:
-            return ConstInt(int(arg))
+            return self.model.ConstInt(int(arg))
         except ValueError:
             if self.is_float(arg):
-                return ConstFloat(longlong.getfloatstorage(float(arg)))
+                return self.model.ConstFloat(self.model.convert_to_floatstorage(arg))
             if (arg.startswith('"') or arg.startswith("'") or
                 arg.startswith('s"')):
                 # XXX ootype
                 info = arg[1:].strip("'\"")
-                return get_const_ptr_for_string(info)
+                return self.model.get_const_ptr_for_string(info)
             if arg.startswith('u"'):
                 # XXX ootype
                 info = arg[1:].strip("'\"")
-                return get_const_ptr_for_unicode(info)
+                return self.model.get_const_ptr_for_unicode(info)
             if arg.startswith('ConstClass('):
                 name = arg[len('ConstClass('):-1]
                 return self.get_const(name, 'class')
@@ -197,9 +167,9 @@
                 return None
             elif arg == 'NULL':
                 if self.type_system == 'lltype':
-                    return ConstPtr(ConstPtr.value)
+                    return self.model.ConstPtr(self.model.ConstPtr.value)
                 else:
-                    return ConstObj(ConstObj.value)
+                    return self.model.ConstObj(self.model.ConstObj.value)
             elif arg.startswith('ConstPtr('):
                 name = arg[len('ConstPtr('):-1]
                 return self.get_const(name, 'ptr')
@@ -211,11 +181,8 @@
         args = []
         descr = None
         if argspec.strip():
-            if opname == 'debug_merge_point':
-                allargs = argspec.rsplit(', ', 1)
-            else:
-                allargs = [arg for arg in argspec.split(",")
-                           if arg != '']
+            allargs = [arg for arg in argspec.split(",")
+                       if arg != '']
 
             poss_descr = allargs[-1].strip()
             if poss_descr.startswith('descr='):
@@ -266,14 +233,14 @@
                                 "Unknown var in fail_args: %s" % arg)
                     fail_args.append(fail_arg)
             if descr is None and self.invent_fail_descr:
-                descr = self.invent_fail_descr(fail_args)
+                descr = self.invent_fail_descr(self.model, fail_args)
             if hasattr(descr, '_oparser_uses_descr_of_guard'):
                 descr._oparser_uses_descr_of_guard(self, fail_args)
         else:
             fail_args = None
             if opnum == rop.FINISH:
                 if descr is None and self.invent_fail_descr:
-                    descr = self.invent_fail_descr()
+                    descr = self.invent_fail_descr(self.model)
             elif opnum == rop.JUMP:
                 if descr is None and self.invent_fail_descr:
                     descr = self.looptoken
@@ -338,7 +305,7 @@
         num, ops, last_offset = self.parse_ops(base_indent, newlines, 0)
         if num < len(newlines):
             raise ParseError("unexpected dedent at line: %s" % newlines[num])
-        loop = ExtendedTreeLoop("loop")
+        loop = self.model.ExtendedTreeLoop("loop")
         loop.comment = first_comment
         loop.token = self.looptoken
         loop.operations = ops
@@ -394,7 +361,7 @@
 
 def parse(input, cpu=None, namespace=None, type_system='lltype',
           boxkinds=None, invent_fail_descr=default_fail_descr,
-          no_namespace=False, nonstrict=False):
+          no_namespace=False, nonstrict=False, OpParser=OpParser):
     if namespace is None and not no_namespace:
         namespace = {}
     return OpParser(input, cpu, namespace, type_system, boxkinds,
@@ -405,6 +372,6 @@
     return parse(*args, **kwds)
 
 
-def _box_counter_more_than(s):
+def _box_counter_more_than(model, s):
     if s.isdigit():
-        Box._counter = max(Box._counter, int(s)+1)
+        model.Box._counter = max(model.Box._counter, int(s)+1)
diff --git a/pypy/jit/tool/oparser_model.py b/pypy/jit/tool/oparser_model.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/tool/oparser_model.py
@@ -0,0 +1,148 @@
+class Boxes(object):
+    pass
+
+def get_real_model():
+    class LoopModel(object):
+        from pypy.jit.metainterp.history import TreeLoop, LoopToken
+        from pypy.jit.metainterp.history import Box, BoxInt, BoxFloat
+        from pypy.jit.metainterp.history import ConstInt, ConstObj, ConstPtr, ConstFloat
+        from pypy.jit.metainterp.history import BasicFailDescr
+        from pypy.jit.metainterp.typesystem import llhelper
+
+        from pypy.jit.metainterp.history import get_const_ptr_for_string
+        from pypy.jit.metainterp.history import get_const_ptr_for_unicode
+        get_const_ptr_for_string = staticmethod(get_const_ptr_for_string)
+        get_const_ptr_for_unicode = staticmethod(get_const_ptr_for_unicode)
+
+        @staticmethod
+        def convert_to_floatstorage(arg):
+            from pypy.jit.codewriter import longlong
+            return longlong.getfloatstorage(float(arg))
+
+        @staticmethod
+        def ptr_to_int(obj):
+            from pypy.jit.codewriter.heaptracker import adr2int
+            from pypy.rpython.lltypesystem import llmemory
+            return adr2int(llmemory.cast_ptr_to_adr(obj))
+
+        @staticmethod
+        def ootype_cast_to_object(obj):
+            from pypy.rpython.ootypesystem import ootype
+            return ootype.cast_to_object(obj)
+
+    return LoopModel
+
+def get_mock_model():
+    class LoopModel(object):
+
+        class TreeLoop(object):
+            def __init__(self, name):
+                self.name = name
+
+        class LoopToken(object):
+            I_am_a_descr = True
+
+        class BasicFailDescr(object):
+            I_am_a_descr = True
+
+        class Box(object):
+            _counter = 0
+            type = 'b'
+
+            def __init__(self, value=0):
+                self.value = value
+
+            def __repr__(self):
+                result = str(self)
+                result += '(%s)' % self.value
+                return result
+
+            def __str__(self):
+                if not hasattr(self, '_str'):
+                    self._str = '%s%d' % (self.type, Box._counter)
+                    Box._counter += 1
+                return self._str
+
+        class BoxInt(Box):
+            type = 'i'
+
+        class BoxFloat(Box):
+            type = 'f'
+
+        class BoxRef(Box):
+            type = 'p'
+
+        class Const(object):
+            def __init__(self, value=None):
+                self.value = value
+
+            def _get_str(self):
+                return str(self.value)
+
+        class ConstInt(Const):
+            pass
+
+        class ConstPtr(Const):
+            pass
+
+        class ConstFloat(Const):
+            pass
+
+        @classmethod
+        def get_const_ptr_for_string(cls, s):
+            return cls.ConstPtr(s)
+
+        @classmethod
+        def get_const_ptr_for_unicode(cls, s):
+            return cls.ConstPtr(s)
+
+        @staticmethod
+        def convert_to_floatstorage(arg):
+            return float(arg)
+
+        @staticmethod
+        def ptr_to_int(obj):
+            return id(obj)
+
+        class llhelper(object):
+            pass
+
+    LoopModel.llhelper.BoxRef = LoopModel.BoxRef
+
+    return LoopModel
+
+
+def get_model(use_mock):
+    if use_mock:
+        model = get_mock_model()
+    else:
+        model = get_real_model()
+
+    class ExtendedTreeLoop(model.TreeLoop):
+
+        def getboxes(self):
+            def opboxes(operations):
+                for op in operations:
+                    yield op.result
+                    for box in op.getarglist():
+                        yield box
+            def allboxes():
+                for box in self.inputargs:
+                    yield box
+                for box in opboxes(self.operations):
+                    yield box
+
+            boxes = Boxes()
+            for box in allboxes():
+                if isinstance(box, model.Box):
+                    name = str(box)
+                    setattr(boxes, name, box)
+            return boxes
+
+        def setvalues(self, **kwds):
+            boxes = self.getboxes()
+            for name, value in kwds.iteritems():
+                getattr(boxes, name).value = value
+
+    model.ExtendedTreeLoop = ExtendedTreeLoop
+    return model
diff --git a/pypy/jit/tool/pypytrace-mode.el b/pypy/jit/tool/pypytrace-mode.el
--- a/pypy/jit/tool/pypytrace-mode.el
+++ b/pypy/jit/tool/pypytrace-mode.el
@@ -8,10 +8,16 @@
 (defun set-truncate-lines ()
   (setq truncate-lines t))
 
+;; to generate the list of keywords:
+;; from pypy.jit.metainterp import resoperation
+;; print ' '.join(sorted('"%s"' % op.lower() for op in resoperation.opname.values() if not op.startswith('GUARD')))
+
+
+
 (define-generic-mode 
   'pypytrace-mode                   ;; name of the mode to create
   nil
-  '("jump" "finish" "int_add" "int_sub" "int_mul" "int_floordiv" "uint_floordiv" "int_mod" "int_and" "int_or" "int_xor" "int_rshift" "int_lshift" "uint_rshift" "float_add" "float_sub" "float_mul" "float_truediv" "float_neg" "float_abs" "cast_float_to_int" "cast_int_to_float" "int_lt" "int_le" "int_eq" "int_ne" "int_gt" "int_ge" "uint_lt" "uint_le" "uint_gt" "uint_ge" "float_lt" "float_le" "float_eq" "float_ne" "float_gt" "float_ge" "int_is_zero" "int_is_true" "int_neg" "int_invert" "same_as" "ptr_eq" "ptr_ne" "arraylen_gc" "strlen" "strgetitem" "getfield_gc_pure" "getfield_raw_pure" "getarrayitem_gc_pure" "unicodelen" "unicodegetitem" "getarrayitem_gc" "getarrayitem_raw" "getfield_gc" "getfield_raw" "new" "new_with_vtable" "new_array" "force_token" "virtual_ref" "setarrayitem_gc" "setarrayitem_raw" "setfield_gc" "setfield_raw" "arraycopy" "newstr" "strsetitem" "unicodesetitem" "newunicode" "cond_call_gc_wb" "virtual_ref_finish" "call" "call_assembler" "call_may_force" "call_loopinvariant" "call_pure" "int_add_ovf" "int_sub_ovf" "int_mul_ovf") ;; keywords
+  '("arraylen_gc" "call" "call_assembler" "call_loopinvariant" "call_may_force" "call_pure" "call_release_gil" "cast_float_to_int" "cast_int_to_float" "cond_call_gc_wb" "copystrcontent" "copyunicodecontent" "debug_merge_point" "finish" "float_abs" "float_add" "float_eq" "float_ge" "float_gt" "float_le" "float_lt" "float_mul" "float_ne" "float_neg" "float_sub" "float_truediv" "force_token" "getarrayitem_gc" "getarrayitem_gc_pure" "getarrayitem_raw" "getfield_gc" "getfield_gc_pure" "getfield_raw" "getfield_raw_pure" "int_add" "int_add_ovf" "int_and" "int_eq" "int_floordiv" "int_ge" "int_gt" "int_invert" "int_is_true" "int_is_zero" "int_le" "int_lshift" "int_lt" "int_mod" "int_mul" "int_mul_ovf" "int_ne" "int_neg" "int_or" "int_rshift" "int_sub" "int_sub_ovf" "int_xor" "jit_debug" "jump" "new" "new_array" "new_with_vtable" "newstr" "newunicode" "ptr_eq" "ptr_ne" "quasiimmut_field" "read_timestamp" "same_as" "setarrayitem_gc" "setarrayitem_raw" "setfield_gc" "setfield_raw" "strgetitem" "strlen" "strsetitem" "uint_floordiv" "uint_ge" "uint_gt" "uint_le" "uint_lt" "uint_rshift" "unicodegetitem" "unicodelen" "unicodesetitem" "virtual_ref" "virtual_ref_finish") ;; keywords
   '( ;; additional regexps
     ("^# Loop.*" . 'hi-blue)
     ("\\[.*\\]" . 'font-lock-comment-face) ;; comment out argument lists
diff --git a/pypy/jit/tool/test/test_oparser.py b/pypy/jit/tool/test/test_oparser.py
--- a/pypy/jit/tool/test/test_oparser.py
+++ b/pypy/jit/tool/test/test_oparser.py
@@ -1,227 +1,274 @@
 import py
+import sys
 from pypy.rpython.lltypesystem import lltype, llmemory
 
-from pypy.jit.tool.oparser import parse, ParseError
+from pypy.jit.tool.oparser import parse, OpParser
 from pypy.jit.metainterp.resoperation import rop
-from pypy.jit.metainterp.history import AbstractDescr, BoxInt, LoopToken,\
-     BoxFloat
+from pypy.jit.metainterp.history import AbstractDescr, BoxInt, LoopToken
 
-def test_basic_parse():
-    x = """
-    [i0, i1]
-    # a comment
-    i2 = int_add(i0, i1)
-    i3 = int_sub(i2, 3) # another comment
-    finish() # (tricky)
-    """
-    loop = parse(x)
-    assert len(loop.operations) == 3
-    assert [op.getopnum() for op in loop.operations] == [rop.INT_ADD, rop.INT_SUB,
-                                                    rop.FINISH]
-    assert len(loop.inputargs) == 2
-    assert loop.operations[-1].getdescr()
+class BaseTestOparser(object):
 
-def test_const_ptr_subops():
-    x = """
-    [p0]
-    guard_class(p0, ConstClass(vtable)) []
-    """
-    S = lltype.Struct('S')
-    vtable = lltype.nullptr(S)
-    loop = parse(x, None, locals())
-    assert len(loop.operations) == 1
-    assert loop.operations[0].getdescr()
-    assert loop.operations[0].getfailargs() == []
+    OpParser = None
 
-def test_descr():
-    class Xyz(AbstractDescr):
-        pass
-    
-    x = """
-    [p0]
-    i1 = getfield_gc(p0, descr=stuff)
-    """
-    stuff = Xyz()
-    loop = parse(x, None, locals())
-    assert loop.operations[0].getdescr() is stuff
+    def parse(self, *args, **kwds):
+        kwds['OpParser'] = self.OpParser
+        return parse(*args, **kwds)
 
-def test_after_fail():
-    x = """
-    [i0]
-    guard_value(i0, 3) []
-    i1 = int_add(1, 2)
-    """
-    loop = parse(x, None, {})
-    assert len(loop.operations) == 2
+    def test_basic_parse(self):
+        x = """
+        [i0, i1]
+        # a comment
+        i2 = int_add(i0, i1)
+        i3 = int_sub(i2, 3) # another comment
+        finish() # (tricky)
+        """
+        loop = self.parse(x)
+        assert len(loop.operations) == 3
+        assert [op.getopnum() for op in loop.operations] == [rop.INT_ADD, rop.INT_SUB,
+                                                        rop.FINISH]
+        assert len(loop.inputargs) == 2
+        assert loop.operations[-1].getdescr()
 
-def test_descr_setfield():
-    class Xyz(AbstractDescr):
-        pass
-    
-    x = """
-    [p0]
-    setfield_gc(p0, 3, descr=stuff)
-    """
-    stuff = Xyz()
-    loop = parse(x, None, locals())
-    assert loop.operations[0].getdescr() is stuff
+    def test_const_ptr_subops(self):
+        x = """
+        [p0]
+        guard_class(p0, ConstClass(vtable)) []
+        """
+        S = lltype.Struct('S')
+        vtable = lltype.nullptr(S)
+        loop = self.parse(x, None, locals())
+        assert len(loop.operations) == 1
+        assert loop.operations[0].getdescr()
+        assert loop.operations[0].getfailargs() == []
 
-def test_boxname():
-    x = """
-    [i42]
-    i50 = int_add(i42, 1)
-    """
-    loop = parse(x, None, {})
-    assert str(loop.inputargs[0]) == 'i42'
-    assert str(loop.operations[0].result) == 'i50'
+    def test_descr(self):
+        class Xyz(AbstractDescr):
+            I_am_a_descr = True # for the mock case
 
-def test_getboxes():
-    x = """
-    [i0]
-    i1 = int_add(i0, 10)
-    """
-    loop = parse(x, None, {})
-    boxes = loop.getboxes()
-    assert boxes.i0 is loop.inputargs[0]
-    assert boxes.i1 is loop.operations[0].result
-    
-def test_setvalues():
-    x = """
-    [i0]
-    i1 = int_add(i0, 10)
-    """
-    loop = parse(x, None, {})
-    loop.setvalues(i0=32, i1=42)
-    assert loop.inputargs[0].value == 32
-    assert loop.operations[0].result.value == 42
+        x = """
+        [p0]
+        i1 = getfield_gc(p0, descr=stuff)
+        """
+        stuff = Xyz()
+        loop = self.parse(x, None, locals())
+        assert loop.operations[0].getdescr() is stuff
 
-def test_boxkind():
-    x = """
-    [sum0]
-    """
-    loop = parse(x, None, {}, boxkinds={'sum': BoxInt})
-    b = loop.getboxes()
-    assert isinstance(b.sum0, BoxInt)
-    
-def test_getvar_const_ptr():
-    x = '''
-    []
-    call(ConstPtr(func_ptr))
+    def test_after_fail(self):
+        x = """
+        [i0]
+        guard_value(i0, 3) []
+        i1 = int_add(1, 2)
+        """
+        loop = self.parse(x, None, {})
+        assert len(loop.operations) == 2
+
+    def test_descr_setfield(self):
+        class Xyz(AbstractDescr):
+            I_am_a_descr = True # for the mock case
+
+        x = """
+        [p0]
+        setfield_gc(p0, 3, descr=stuff)
+        """
+        stuff = Xyz()
+        loop = self.parse(x, None, locals())
+        assert loop.operations[0].getdescr() is stuff
+
+    def test_boxname(self):
+        x = """
+        [i42]
+        i50 = int_add(i42, 1)
+        """
+        loop = self.parse(x, None, {})
+        assert str(loop.inputargs[0]) == 'i42'
+        assert str(loop.operations[0].result) == 'i50'
+
+    def test_getboxes(self):
+        x = """
+        [i0]
+        i1 = int_add(i0, 10)
+        """
+        loop = self.parse(x, None, {})
+        boxes = loop.getboxes()
+        assert boxes.i0 is loop.inputargs[0]
+        assert boxes.i1 is loop.operations[0].result
+
+    def test_setvalues(self):
+        x = """
+        [i0]
+        i1 = int_add(i0, 10)
+        """
+        loop = self.parse(x, None, {})
+        loop.setvalues(i0=32, i1=42)
+        assert loop.inputargs[0].value == 32
+        assert loop.operations[0].result.value == 42
+
+    def test_getvar_const_ptr(self):
+        x = '''
+        []
+        call(ConstPtr(func_ptr))
+        '''
+        TP = lltype.GcArray(lltype.Signed)
+        NULL = lltype.cast_opaque_ptr(llmemory.GCREF, lltype.nullptr(TP))
+        loop = self.parse(x, None, {'func_ptr' : NULL})
+        assert loop.operations[0].getarg(0).value == NULL
+
+    def test_jump_target(self):
+        x = '''
+        []
+        jump()
+        '''
+        loop = self.parse(x)
+        assert loop.operations[0].getdescr() is loop.token
+
+    def test_jump_target_other(self):
+        looptoken = LoopToken()
+        looptoken.I_am_a_descr = True # for the mock case
+        x = '''
+        []
+        jump(descr=looptoken)
+        '''
+        loop = self.parse(x, namespace=locals())
+        assert loop.operations[0].getdescr() is looptoken
+
+    def test_floats(self):
+        x = '''
+        [f0]
+        f1 = float_add(f0, 3.5)
+        '''
+        loop = self.parse(x)
+        box = loop.operations[0].getarg(0)
+        # we cannot use isinstance, because in case of mock the class will be
+        # constructed on the fly
+        assert box.__class__.__name__ == 'BoxFloat'
+
+    def test_debug_merge_point(self):
+        x = '''
+        []
+        debug_merge_point(0, "info")
+        debug_merge_point(0, 'info')
+        debug_merge_point(1, '<some ('other.')> info')
+        debug_merge_point(0, '(stuff) #1')
+        '''
+        loop = self.parse(x)
+        assert loop.operations[0].getarg(1)._get_str() == 'info'
+        assert loop.operations[1].getarg(1)._get_str() == 'info'
+        assert loop.operations[2].getarg(1)._get_str() == "<some ('other.')> info"
+        assert loop.operations[3].getarg(1)._get_str() == "(stuff) #1"
+
+
+    def test_descr_with_obj_print(self):
+        x = '''
+        [p0]
+        setfield_gc(p0, 1, descr=<SomeDescr>)
+        '''
+        loop = self.parse(x)
+        # assert did not explode
+
+    example_loop_log = '''\
+    # bridge out of Guard12, 6 ops
+    [i0, i1, i2]
+    i4 = int_add(i0, 2)
+    i6 = int_sub(i1, 1)
+    i8 = int_gt(i6, 3)
+    guard_true(i8, descr=<Guard15>) [i4, i6]
+    debug_merge_point('(no jitdriver.get_printable_location!)', 0)
+    jump(i6, i4, descr=<Loop0>)
     '''
-    TP = lltype.GcArray(lltype.Signed)
-    NULL = lltype.cast_opaque_ptr(llmemory.GCREF, lltype.nullptr(TP))
-    loop = parse(x, None, {'func_ptr' : NULL})
-    assert loop.operations[0].getarg(0).value == NULL
 
-def test_jump_target():
-    x = '''
-    []
-    jump()
-    '''
-    loop = parse(x)
-    assert loop.operations[0].getdescr() is loop.token
+    def test_parse_no_namespace(self):
+        loop = self.parse(self.example_loop_log, no_namespace=True)
 
-def test_jump_target_other():
-    looptoken = LoopToken()
-    x = '''
-    []
-    jump(descr=looptoken)
-    '''
-    loop = parse(x, namespace=locals())
-    assert loop.operations[0].getdescr() is looptoken
+    def test_attach_comment_to_loop(self):
+        loop = self.parse(self.example_loop_log, no_namespace=True)
+        assert loop.comment == '    # bridge out of Guard12, 6 ops'
 
-def test_floats():
-    x = '''
-    [f0]
-    f1 = float_add(f0, 3.5)
-    '''
-    loop = parse(x)
-    assert isinstance(loop.operations[0].getarg(0), BoxFloat)
-    
-def test_debug_merge_point():
-    x = '''
-    []
-    debug_merge_point("info", 0)
-    debug_merge_point('info', 1)
-    debug_merge_point('<some ('other,')> info', 1)
-    debug_merge_point('(stuff) #1', 1)
-    '''
-    loop = parse(x)
-    assert loop.operations[0].getarg(0)._get_str() == 'info'
-    assert loop.operations[1].getarg(0)._get_str() == 'info'
-    assert loop.operations[2].getarg(0)._get_str() == "<some ('other,')> info"
-    assert loop.operations[3].getarg(0)._get_str() == "(stuff) #1"
-    
+    def test_parse_new_with_comma(self):
+        # this is generated by PYPYJITLOG, check that we can handle it
+        x = '''
+        []
+        p0 = new(, descr=<SizeDescr 12>)
+        '''
+        loop = self.parse(x)
+        assert loop.operations[0].getopname() == 'new'
 
-def test_descr_with_obj_print():
-    x = '''
-    [p0]
-    setfield_gc(p0, 1, descr=<SomeDescr>)
-    '''
-    loop = parse(x)
-    # assert did not explode
+    def test_no_fail_args(self):
+        x = '''
+        [i0]
+        guard_true(i0, descr=<Guard0>)
+        '''
+        loop = self.parse(x, nonstrict=True)
+        assert loop.operations[0].getfailargs() == []
 
-example_loop_log = '''\
-# bridge out of Guard12, 6 ops
-[i0, i1, i2]
-i4 = int_add(i0, 2)
-i6 = int_sub(i1, 1)
-i8 = int_gt(i6, 3)
-guard_true(i8, descr=<Guard15>) [i4, i6]
-debug_merge_point('(no jitdriver.get_printable_location!)', 0)
-jump(i6, i4, descr=<Loop0>)
-'''
+    def test_no_inputargs(self):
+        x = '''
+        i2 = int_add(i0, i1)
+        '''
+        loop = self.parse(x, nonstrict=True)
+        assert loop.inputargs == []
+        assert loop.operations[0].getopname() == 'int_add'
 
-def test_parse_no_namespace():
-    loop = parse(example_loop_log, no_namespace=True)
+    def test_offsets(self):
+        x = """
+        [i0, i1]
+        +10: i2 = int_add(i0, i1)
+        i3 = int_add(i2, 3)
+        """
+        #    +30: --end of the loop--
+        loop = self.parse(x)
+        assert loop.operations[0].offset == 10
+        assert not hasattr(loop.operations[1], 'offset')
 
-def test_attach_comment_to_loop():
-    loop = parse(example_loop_log, no_namespace=True)
-    assert loop.comment == '# bridge out of Guard12, 6 ops'
+    def test_last_offset(self):
+        x = """
+        [i0, i1]
+        +10: i2 = int_add(i0, i1)
+        i3 = int_add(i2, 3)
+        +30: --end of the loop--
+        """
+        loop = self.parse(x)
+        assert len(loop.operations) == 2
+        assert loop.last_offset == 30
 
-def test_parse_new_with_comma():
-    # this is generated by PYPYJITLOG, check that we can handle it
-    x = '''
-    []
-    p0 = new(, descr=<SizeDescr 12>)
-    '''
-    loop = parse(x)
-    assert loop.operations[0].getopname() == 'new'
 
-def test_no_fail_args():
-    x = '''
-    [i0]
-    guard_true(i0, descr=<Guard0>)
-    '''
-    loop = parse(x, nonstrict=True)
-    assert loop.operations[0].getfailargs() == []
+class TestOpParser(BaseTestOparser):
 
-def test_no_inputargs():
-    x = '''
-    i2 = int_add(i0, i1)
-    '''
-    loop = parse(x, nonstrict=True)
-    assert loop.inputargs == []
-    assert loop.operations[0].getopname() == 'int_add'
+    OpParser = OpParser
 
-def test_offsets():
-    x = """
-    [i0, i1]
-    +10: i2 = int_add(i0, i1)
-    i3 = int_add(i2, 3)
-    """
-    #    +30: --end of the loop--
-    loop = parse(x)
-    assert loop.operations[0].offset == 10
-    assert not hasattr(loop.operations[1], 'offset')
+    def test_boxkind(self):
+        x = """
+        [sum0]
+        """
+        loop = self.parse(x, None, {}, boxkinds={'sum': BoxInt})
+        b = loop.getboxes()
+        assert isinstance(b.sum0, BoxInt)
 
-def test_last_offset():
-    x = """
-    [i0, i1]
-    +10: i2 = int_add(i0, i1)
-    i3 = int_add(i2, 3)
-    +30: --end of the loop--
-    """
-    loop = parse(x)
-    assert len(loop.operations) == 2
-    assert loop.last_offset == 30
+
+class ForbiddenModule(object):
+    def __init__(self, name, old_mod):
+        self.name = name
+        self.old_mod = old_mod
+
+    def __getattr__(self, attr):
+        assert False, "You should not import module %s" % self.name
+
+
+class TestOpParserWithMock(BaseTestOparser):
+
+    class OpParser(OpParser):
+        use_mock_model = True
+
+    def setup_class(cls):
+        forbidden_mods = [
+            'pypy.jit.metainterp.history',
+            'pypy.rpython.lltypesystem.lltype',
+            ]
+        for modname in forbidden_mods:
+            if modname in sys.modules:
+                newmod = ForbiddenModule(modname, sys.modules[modname])
+                sys.modules[modname] = newmod
+
+    def teardown_class(cls):
+        for modname, mod in sys.modules.iteritems():
+            if isinstance(mod, ForbiddenModule):
+                sys.modules[modname] = mod.old_mod
diff --git a/pypy/module/__builtin__/__init__.py b/pypy/module/__builtin__/__init__.py
--- a/pypy/module/__builtin__/__init__.py
+++ b/pypy/module/__builtin__/__init__.py
@@ -31,6 +31,8 @@
 
         'apply'         : 'app_functional.apply',
         'sorted'        : 'app_functional.sorted',
+        'any'           : 'app_functional.any',
+        'all'           : 'app_functional.all',
         'vars'          : 'app_inspect.vars',
         'dir'           : 'app_inspect.dir',
 
@@ -95,8 +97,6 @@
         'range'         : 'functional.range_int',
         'xrange'        : 'functional.W_XRange',
         'enumerate'     : 'functional.W_Enumerate',
-        'all'           : 'functional.all',
-        'any'           : 'functional.any',
         'min'           : 'functional.min',
         'max'           : 'functional.max',
         'sum'           : 'functional.sum',
diff --git a/pypy/module/__builtin__/app_functional.py b/pypy/module/__builtin__/app_functional.py
--- a/pypy/module/__builtin__/app_functional.py
+++ b/pypy/module/__builtin__/app_functional.py
@@ -16,3 +16,21 @@
     sorted_lst = list(lst)
     sorted_lst.sort(cmp, key, reverse)
     return sorted_lst
+
+def any(seq):
+    """any(iterable) -> bool
+
+Return True if bool(x) is True for any x in the iterable."""
+    for x in seq:
+        if x:
+            return True
+    return False
+
+def all(seq):
+    """all(iterable) -> bool
+
+Return True if bool(x) is True for all values x in the iterable."""
+    for x in seq:
+        if not x:
+            return False
+    return True
diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py
--- a/pypy/module/__builtin__/functional.py
+++ b/pypy/module/__builtin__/functional.py
@@ -452,40 +452,6 @@
     w_empty = space.call_function(w_str_type)
     return space.call_method(w_empty, "join", space.newlist(result_w))
 
-def all(space, w_S):
-    """all(iterable) -> bool
-
-Return True if bool(x) is True for all values x in the iterable."""
-    w_iter = space.iter(w_S)
-    while True:
-        try:
-            w_next = space.next(w_iter)
-        except OperationError, e:
-            if not e.match(space, space.w_StopIteration):
-                raise       # re-raise other app-level exceptions
-            break
-        if not space.is_true(w_next):
-            return space.w_False
-    return space.w_True
-
-
-def any(space, w_S):
-    """any(iterable) -> bool
-
-Return True if bool(x) is True for any x in the iterable."""
-    w_iter = space.iter(w_S)
-    while True:
-        try:
-            w_next = space.next(w_iter)
-        except OperationError, e:
-            if not e.match(space, space.w_StopIteration):
-                raise       # re-raise other app-level exceptions
-            break
-        if space.is_true(w_next):
-            return space.w_True
-    return space.w_False
-
-
 class W_Enumerate(Wrappable):
 
     def __init__(self, w_iter, w_start):
diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py
--- a/pypy/module/__builtin__/test/test_builtin.py
+++ b/pypy/module/__builtin__/test/test_builtin.py
@@ -1,5 +1,6 @@
 import autopath
 import sys
+from pypy import conftest
 
 class AppTestBuiltinApp:
     def setup_class(cls):
@@ -15,6 +16,15 @@
             cls.w_sane_lookup = cls.space.wrap(True)
         except KeyError:
             cls.w_sane_lookup = cls.space.wrap(False)
+        # starting with CPython 2.6, when the stack is almost out, we
+        # can get a random error, instead of just a RuntimeError.
+        # For example if an object x has a __getattr__, we can get
+        # AttributeError if attempting to call x.__getattr__ runs out
+        # of stack.  That's annoying, so we just work around it.
+        if conftest.option.runappdirect:
+            cls.w_safe_runtimerror = cls.space.wrap(True)
+        else:
+            cls.w_safe_runtimerror = cls.space.wrap(sys.version_info < (2, 6))
 
     def test_bytes_alias(self):
         assert bytes is str
@@ -399,6 +409,8 @@
     def test_cmp_cyclic(self):
         if not self.sane_lookup:
             skip("underlying Python implementation has insane dict lookup")
+        if not self.safe_runtimerror:
+            skip("underlying Python may raise random exceptions on stack ovf")
         a = []; a.append(a)
         b = []; b.append(b)
         from UserList import UserList
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -32,15 +32,22 @@
                 space.wrap(reason))
             w_res = space.call_function(w_errorhandler, w_exc)
             if (not space.is_true(space.isinstance(w_res, space.w_tuple))
-                or space.len_w(w_res) != 2):
+                or space.len_w(w_res) != 2
+                or not space.is_true(space.isinstance(
+                                 space.getitem(w_res, space.wrap(0)),
+                                 space.w_unicode))):
+                if decode:
+                    msg = ("decoding error handler must return "
+                           "(unicode, int) tuple, not %s")
+                else:
+                    msg = ("encoding error handler must return "
+                           "(unicode, int) tuple, not %s")
                 raise operationerrfmt(
-                    space.w_TypeError,
-                    "encoding error handler must return "
-                    "(unicode, int) tuple, not %s",
+                    space.w_TypeError, msg,
                     space.str_w(space.repr(w_res)))
             w_replace, w_newpos = space.fixedview(w_res, 2)
             newpos = space.int_w(w_newpos)
-            if (newpos < 0):
+            if newpos < 0:
                 newpos = len(input) + newpos
             if newpos < 0 or newpos > len(input):
                 raise operationerrfmt(
@@ -50,7 +57,9 @@
                 replace = space.unicode_w(w_replace)
                 return replace, newpos
             else:
-                replace = space.str_w(w_replace)
+                from pypy.objspace.std.unicodetype import encode_object
+                w_str = encode_object(space, w_replace, encoding, None)
+                replace = space.str_w(w_str)
                 return replace, newpos
         return unicode_call_errorhandler
 
@@ -160,15 +169,7 @@
 def ignore_errors(space, w_exc):
     check_exception(space, w_exc)
     w_end = space.getattr(w_exc, space.wrap('end'))
-    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        return space.newtuple([space.wrap(''), w_end])
-    elif (space.isinstance_w(w_exc, space.w_UnicodeDecodeError) or
-          space.isinstance_w(w_exc, space.w_UnicodeTranslateError)):
-        return space.newtuple([space.wrap(u''), w_end])
-    else:
-        typename = space.type(w_exc).getname(space, '?')
-        raise operationerrfmt(space.w_TypeError,
-            "don't know how to handle %s in error callback", typename)
+    return space.newtuple([space.wrap(u''), w_end])
 
 def replace_errors(space, w_exc):
     check_exception(space, w_exc)
@@ -176,7 +177,7 @@
     w_end = space.getattr(w_exc, space.wrap('end'))
     size = space.int_w(w_end) - space.int_w(w_start)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        text = '?' * size
+        text = u'?' * size
         return space.newtuple([space.wrap(text), w_end])
     elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
         text = u'\ufffd'
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -540,6 +540,17 @@
         else:
             assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
 
+    def test_encode_error_bad_handler(self):
+        import codecs
+        codecs.register_error("test.bad_handler", lambda e: (repl, 1))
+        assert u"xyz".encode("latin-1", "test.bad_handler") == "xyz"
+        repl = u"\u1234"
+        raises(UnicodeEncodeError, u"\u5678".encode, "latin-1",
+               "test.bad_handler")
+        repl = u"\u00E9"
+        s = u"\u5678".encode("latin-1", "test.bad_handler")
+        assert s == '\xe9'
+
     def test_charmap_encode(self):
         assert 'xxx'.encode('charmap') == 'xxx'
 
@@ -593,3 +604,11 @@
         assert u'caf\xe9'.encode('mbcs') == 'caf\xe9'
         assert u'\u040a'.encode('mbcs') == '?' # some cyrillic letter
         assert 'cafx\e9'.decode('mbcs') == u'cafx\e9'
+
+    def test_bad_handler_string_result(self):
+        import _codecs
+        def f(exc):
+            return ('foo', exc.end)
+        _codecs.register_error("test.test_codecs_not_a_string", f)
+        raises(TypeError, u'\u1234'.encode, 'ascii',
+               'test.test_codecs_not_a_string')
diff --git a/pypy/module/_ffi/__init__.py b/pypy/module/_ffi/__init__.py
--- a/pypy/module/_ffi/__init__.py
+++ b/pypy/module/_ffi/__init__.py
@@ -4,8 +4,10 @@
 class Module(MixedModule):
 
     interpleveldefs = {
-        'CDLL'               : 'interp_ffi.W_CDLL',
-        'types':             'interp_ffi.W_types',
+        'CDLL':    'interp_ffi.W_CDLL',
+        'types':   'interp_ffi.W_types',
+        'FuncPtr': 'interp_ffi.W_FuncPtr',
+        'get_libc':'interp_ffi.get_libc',
     }
 
     appleveldefs = {}
diff --git a/pypy/module/_ffi/interp_ffi.py b/pypy/module/_ffi/interp_ffi.py
--- a/pypy/module/_ffi/interp_ffi.py
+++ b/pypy/module/_ffi/interp_ffi.py
@@ -4,63 +4,170 @@
     operationerrfmt
 from pypy.interpreter.gateway import interp2app, NoneNotWrapped, unwrap_spec
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
+from pypy.module._rawffi.structure import W_StructureInstance, W_Structure
 #
 from pypy.rpython.lltypesystem import lltype, rffi
 #
 from pypy.rlib import jit
 from pypy.rlib import libffi
 from pypy.rlib.rdynload import DLOpenError
-from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.rarithmetic import intmask, r_uint
 
 class W_FFIType(Wrappable):
-    def __init__(self, name, ffitype):
+
+    _immutable_fields_ = ['name', 'ffitype', 'w_datashape', 'w_pointer_to']
+    
+    def __init__(self, name, ffitype, w_datashape=None, w_pointer_to=None):
         self.name = name
         self.ffitype = ffitype
+        self.w_datashape = w_datashape
+        self.w_pointer_to = w_pointer_to
+        if self.is_struct():
+            assert w_datashape is not None
 
-    def str(self, space):
-        return space.wrap('<ffi type %s>' % self.name)
+    def descr_deref_pointer(self, space):
+        if self.w_pointer_to is None:
+            return space.w_None
+        return self.w_pointer_to
 
+    def repr(self, space):
+        return space.wrap(self.__repr__())
 
+    def __repr__(self):
+        return "<ffi type %s>" % self.name
+
+    def is_signed(self):
+        return (self is app_types.slong or
+                self is app_types.sint or
+                self is app_types.sshort or
+                self is app_types.sbyte or
+                self is app_types.slonglong)
+
+    def is_unsigned(self):
+        return (self is app_types.ulong or
+                self is app_types.uint or
+                self is app_types.ushort or
+                self is app_types.ubyte or
+                self is app_types.ulonglong)
+
+    def is_pointer(self):
+        return self.ffitype is libffi.types.pointer
+
+    def is_char(self):
+        return self is app_types.char
+
+    def is_unichar(self):
+        return self is app_types.unichar
+
+    def is_longlong(self):
+        return libffi.IS_32_BIT and (self is app_types.slonglong or
+                                     self is app_types.ulonglong)
+
+    def is_double(self):
+        return self is app_types.double
+
+    def is_singlefloat(self):
+        return self is app_types.float
+
+    def is_void(self):
+        return self is app_types.void
+
+    def is_struct(self):
+        return libffi.types.is_struct(self.ffitype)
 
 W_FFIType.typedef = TypeDef(
     'FFIType',
-    __str__ = interp2app(W_FFIType.str),
+    __repr__ = interp2app(W_FFIType.repr),
+    deref_pointer = interp2app(W_FFIType.descr_deref_pointer),
     )
 
 
+def build_ffi_types():
+    from pypy.rlib.clibffi import FFI_TYPE_P
+    types = [
+        # note: most of the type name directly come from the C equivalent,
+        # with the exception of bytes: in C, ubyte and char are equivalent,
+        # but for _ffi the first expects a number while the second a 1-length
+        # string
+        W_FFIType('slong',     libffi.types.slong),
+        W_FFIType('sint',      libffi.types.sint),
+        W_FFIType('sshort',    libffi.types.sshort),
+        W_FFIType('sbyte',     libffi.types.schar),
+        W_FFIType('slonglong', libffi.types.slonglong),
+        #
+        W_FFIType('ulong',     libffi.types.ulong),
+        W_FFIType('uint',      libffi.types.uint),
+        W_FFIType('ushort',    libffi.types.ushort),
+        W_FFIType('ubyte',     libffi.types.uchar),
+        W_FFIType('ulonglong', libffi.types.ulonglong),
+        #
+        W_FFIType('char',      libffi.types.uchar),
+        W_FFIType('unichar',   libffi.types.wchar_t),
+        #
+        W_FFIType('double',    libffi.types.double),
+        W_FFIType('float',     libffi.types.float),
+        W_FFIType('void',      libffi.types.void),
+        W_FFIType('void_p',    libffi.types.pointer),
+        #
+        # missing types:
+
+        ## 's' : ffi_type_pointer,
+        ## 'z' : ffi_type_pointer,
+        ## 'O' : ffi_type_pointer,
+        ## 'Z' : ffi_type_pointer,
+
+        ]
+    return dict([(t.name, t) for t in types])
+
+class app_types:
+    pass
+app_types.__dict__ = build_ffi_types()
+
+def descr_new_pointer(space, w_cls, w_pointer_to):
+    try:
+        return descr_new_pointer.cache[w_pointer_to]
+    except KeyError:
+        w_pointer_to = space.interp_w(W_FFIType, w_pointer_to)
+        name = '(pointer to %s)' % w_pointer_to.name
+        w_result = W_FFIType(name, libffi.types.pointer, w_pointer_to = w_pointer_to)
+        descr_new_pointer.cache[w_pointer_to] = w_result
+        return w_result
+descr_new_pointer.cache = {}
+
 class W_types(Wrappable):
     pass
-
-def build_ffi_types():
-    from pypy.rlib.clibffi import FFI_TYPE_P
-    tdict = {}
-    for key, value in libffi.types.__dict__.iteritems():
-        if key == 'getkind' or key.startswith('__'):
-            continue
-        assert lltype.typeOf(value) == FFI_TYPE_P
-        tdict[key] = W_FFIType(key, value)
-    return tdict
-    
 W_types.typedef = TypeDef(
     'types',
-    **build_ffi_types())
+    Pointer = interp2app(descr_new_pointer, as_classmethod=True),
+    **app_types.__dict__)
+
+
+def unwrap_ffitype(space, w_argtype, allow_void=False):
+    res = w_argtype.ffitype
+    if res is libffi.types.void and not allow_void:
+        msg = 'void is not a valid argument type'
+        raise OperationError(space.w_TypeError, space.wrap(msg))
+    return res
+
 
 # ========================================================================
 
 class W_FuncPtr(Wrappable):
 
-    _immutable_fields_ = ['func']
+    _immutable_fields_ = ['func', 'argtypes_w[*]', 'w_restype']
     
-    def __init__(self, func):
+    def __init__(self, func, argtypes_w, w_restype):
         self.func = func
+        self.argtypes_w = argtypes_w
+        self.w_restype = w_restype
 
     @jit.unroll_safe
-    def build_argchain(self, space, argtypes, args_w):
-        expected = len(argtypes)
+    def build_argchain(self, space, args_w):
+        expected = len(self.argtypes_w)
         given = len(args_w)
         if given != expected:
             arg = 'arguments'
-            if len(argtypes) == 1:
+            if len(self.argtypes_w) == 1:
                 arg = 'argument'
             raise operationerrfmt(space.w_TypeError,
                                   '%s() takes exactly %d %s (%d given)',
@@ -68,34 +175,103 @@
         #
         argchain = libffi.ArgChain()
         for i in range(expected):
-            argtype = argtypes[i]
+            w_argtype = self.argtypes_w[i]
             w_arg = args_w[i]
-            kind = libffi.types.getkind(argtype)
-            if kind == 'i':
+            if w_argtype.is_longlong():
+                # note that we must check for longlong first, because either
+                # is_signed or is_unsigned returns true anyway
+                assert libffi.IS_32_BIT
+                kind = libffi.types.getkind(w_argtype.ffitype) # XXX: remove the kind
+                self.arg_longlong(space, argchain, kind, w_arg)
+            elif w_argtype.is_signed():
                 argchain.arg(space.int_w(w_arg))
-            elif kind == 'u':
+            elif w_argtype.is_pointer():
+                w_arg = self.convert_pointer_arg_maybe(space, w_arg, w_argtype)
                 argchain.arg(intmask(space.uint_w(w_arg)))
-            elif kind == 'f':
+            elif w_argtype.is_unsigned():
+                argchain.arg(intmask(space.uint_w(w_arg)))
+            elif w_argtype.is_char():
+                w_arg = space.ord(w_arg)
+                argchain.arg(space.int_w(w_arg))
+            elif w_argtype.is_unichar():
+                w_arg = space.ord(w_arg)
+                argchain.arg(space.int_w(w_arg))
+            elif w_argtype.is_double():
                 argchain.arg(space.float_w(w_arg))
+            elif w_argtype.is_singlefloat():
+                argchain.arg_singlefloat(space.float_w(w_arg))
+            elif w_argtype.is_struct():
+                # arg_raw directly takes value to put inside ll_args
+                w_arg = space.interp_w(W_StructureInstance, w_arg)                
+                ptrval = w_arg.ll_buffer
+                argchain.arg_raw(ptrval)
             else:
-                assert False, "Argument kind '%s' not supported" % kind
+                assert False, "Argument shape '%s' not supported" % w_argtype
         return argchain
 
+    def convert_pointer_arg_maybe(self, space, w_arg, w_argtype):
+        """
+        Try to convert the argument by calling _as_ffi_pointer_()
+        """
+        meth = space.lookup(w_arg, '_as_ffi_pointer_') # this also promotes the type
+        if meth:
+            return space.call_function(meth, w_arg, w_argtype)
+        else:
+            return w_arg
+
+    @jit.dont_look_inside
+    def arg_longlong(self, space, argchain, kind, w_arg):
+        bigarg = space.bigint_w(w_arg)
+        if kind == 'I':
+            llval = bigarg.tolonglong()
+        elif kind == 'U':
+            ullval = bigarg.toulonglong()
+            llval = rffi.cast(rffi.LONGLONG, ullval)
+        else:
+            assert False
+        # this is a hack: we store the 64 bits of the long long into the
+        # 64 bits of a float (i.e., a C double)
+        floatval = libffi.longlong2float(llval)
+        argchain.arg_longlong(floatval)
+
     def call(self, space, args_w):
         self = jit.hint(self, promote=True)
-        argchain = self.build_argchain(space, self.func.argtypes, args_w)
-        reskind = libffi.types.getkind(self.func.restype)
-        if reskind == 'i':
+        argchain = self.build_argchain(space, args_w)
+        w_restype = self.w_restype
+        if w_restype.is_longlong():
+            # note that we must check for longlong first, because either
+            # is_signed or is_unsigned returns true anyway
+            assert libffi.IS_32_BIT
+            reskind = libffi.types.getkind(self.func.restype) # XXX: remove the kind
+            return self._call_longlong(space, argchain, reskind)
+        elif w_restype.is_signed():
             return self._call_int(space, argchain)
-        elif reskind == 'u':
+        elif w_restype.is_unsigned() or w_restype.is_pointer():
             return self._call_uint(space, argchain)
-        elif reskind == 'f':
+        elif w_restype.is_char():
+            intres = self.func.call(argchain, rffi.UCHAR)
+            return space.wrap(chr(intres))
+        elif w_restype.is_unichar():
+            intres = self.func.call(argchain, rffi.WCHAR_T)
+            return space.wrap(unichr(intres))
+        elif w_restype.is_double():
             floatres = self.func.call(argchain, rffi.DOUBLE)
             return space.wrap(floatres)
-        else:
+        elif w_restype.is_singlefloat():
+            # the result is a float, but widened to be inside a double
+            floatres = self.func.call(argchain, rffi.FLOAT)
+            return space.wrap(floatres)
+        elif w_restype.is_struct():
+            w_datashape = w_restype.w_datashape
+            assert isinstance(w_datashape, W_Structure)
+            ptrval = self.func.call(argchain, rffi.ULONG, is_struct=True)
+            return w_datashape.fromaddress(space, ptrval)
+        elif w_restype.is_void():
             voidres = self.func.call(argchain, lltype.Void)
             assert voidres is None
             return space.w_None
+        else:
+            assert False, "Return value shape '%s' not supported" % w_restype
 
     def _call_int(self, space, argchain):
         # if the declared return type of the function is smaller than LONG,
@@ -138,6 +314,10 @@
             # special case
             uintres = call(argchain, rffi.ULONG)
             return space.wrap(uintres)
+        elif restype is libffi.types.pointer:
+            ptrres = call(argchain, rffi.VOIDP)
+            uintres = rffi.cast(rffi.ULONG, ptrres)
+            return space.wrap(uintres)
         elif restype is libffi.types.uint:
             intres = rffi.cast(rffi.LONG, call(argchain, rffi.UINT))
         elif restype is libffi.types.ushort:
@@ -149,16 +329,52 @@
                                  space.wrap('Unsupported restype'))
         return space.wrap(intres)
 
+    @jit.dont_look_inside
+    def _call_longlong(self, space, argchain, reskind):
+        # this is a hack: we store the 64 bits of the long long into the 64
+        # bits of a float (i.e., a C double)
+        floatres = self.func.call(argchain, rffi.LONGLONG)
+        llres = libffi.float2longlong(floatres)
+        if reskind == 'I':
+            return space.wrap(llres)
+        elif reskind == 'U':
+            ullres = rffi.cast(rffi.ULONGLONG, llres)
+            return space.wrap(ullres)
+        else:
+            assert False
+
     def getaddr(self, space):
         """
         Return the physical address in memory of the function
         """
         return space.wrap(rffi.cast(rffi.LONG, self.func.funcsym))
 
+
+
+def unpack_argtypes(space, w_argtypes, w_restype):
+    argtypes_w = [space.interp_w(W_FFIType, w_argtype)
+                  for w_argtype in space.listview(w_argtypes)]
+    argtypes = [unwrap_ffitype(space, w_argtype) for w_argtype in
+                argtypes_w]
+    w_restype = space.interp_w(W_FFIType, w_restype)
+    restype = unwrap_ffitype(space, w_restype, allow_void=True)
+    return argtypes_w, argtypes, w_restype, restype
+
+ at unwrap_spec(addr=r_uint, name=str)
+def descr_fromaddr(space, w_cls, addr, name, w_argtypes, w_restype):
+    argtypes_w, argtypes, w_restype, restype = unpack_argtypes(space,
+                                                               w_argtypes,
+                                                               w_restype)
+    addr = rffi.cast(rffi.VOIDP, addr)
+    func = libffi.Func(name, argtypes, restype, addr)
+    return W_FuncPtr(func, argtypes_w, w_restype)
+
+
 W_FuncPtr.typedef = TypeDef(
-    'FuncPtr',
+    '_ffi.FuncPtr',
     __call__ = interp2app(W_FuncPtr.call),
     getaddr = interp2app(W_FuncPtr.getaddr),
+    fromaddr = interp2app(descr_fromaddr, as_classmethod=True)
     )
 
 
@@ -167,40 +383,57 @@
 
 class W_CDLL(Wrappable):
     def __init__(self, space, name):
+        self.space = space
+        if name is None:
+            self.name = "<None>"
+        else:
+            self.name = name
         try:
             self.cdll = libffi.CDLL(name)
         except DLOpenError, e:
-            raise operationerrfmt(space.w_OSError, '%s: %s', name,
+            raise operationerrfmt(space.w_OSError, '%s: %s', self.name,
                                   e.msg or 'unspecified error')
-        self.name = name
-        self.space = space
-
-    def ffitype(self, w_argtype, allow_void=False):
-        res = self.space.interp_w(W_FFIType, w_argtype).ffitype
-        if res is libffi.types.void and not allow_void:
-            space = self.space
-            msg = 'void is not a valid argument type'
-            raise OperationError(space.w_TypeError, space.wrap(msg))
-        return res
 
     @unwrap_spec(name=str)
     def getfunc(self, space, name, w_argtypes, w_restype):
-        argtypes = [self.ffitype(w_argtype) for w_argtype in
-                    space.listview(w_argtypes)]
-        restype = self.ffitype(w_restype, allow_void=True)
-        func = self.cdll.getpointer(name, argtypes, restype)
-        return W_FuncPtr(func)
+        argtypes_w, argtypes, w_restype, restype = unpack_argtypes(space,
+                                                                   w_argtypes,
+                                                                   w_restype)
+        try:
+            func = self.cdll.getpointer(name, argtypes, restype)
+        except KeyError:
+            raise operationerrfmt(space.w_AttributeError,
+                                  "No symbol %s found in library %s", name, self.name)
+            
+        return W_FuncPtr(func, argtypes_w, w_restype)
 
+    @unwrap_spec(name=str)
+    def getaddressindll(self, space, name):
+        try:
+            address_as_uint = rffi.cast(lltype.Unsigned,
+                                        self.cdll.getaddressindll(name))
+        except KeyError:
+            raise operationerrfmt(space.w_ValueError,
+                                  "No symbol %s found in library %s", name, self.name)
+        return space.wrap(address_as_uint)
 
- at unwrap_spec(name=str)
+ at unwrap_spec(name='str_or_None')
 def descr_new_cdll(space, w_type, name):
     return space.wrap(W_CDLL(space, name))
 
 
 W_CDLL.typedef = TypeDef(
-    'CDLL',
+    '_ffi.CDLL',
     __new__     = interp2app(descr_new_cdll),
     getfunc     = interp2app(W_CDLL.getfunc),
+    getaddressindll = interp2app(W_CDLL.getaddressindll),
     )
 
 # ========================================================================
+
+def get_libc(space):
+    from pypy.rlib.clibffi import get_libc_name
+    try:
+        return space.wrap(W_CDLL(space, get_libc_name()))
+    except OSError, e:
+        raise wrap_oserror(space, e)
diff --git a/pypy/module/_ffi/test/test__ffi.py b/pypy/module/_ffi/test/test__ffi.py
--- a/pypy/module/_ffi/test/test__ffi.py
+++ b/pypy/module/_ffi/test/test__ffi.py
@@ -17,7 +17,13 @@
 
         c_file = udir.ensure("test__ffi", dir=1).join("foolib.c")
         # automatically collect the C source from the docstrings of the tests
-        snippets = []
+        snippets = ["""
+        #ifdef _WIN32
+        #define DLLEXPORT __declspec(dllexport)
+        #else
+        #define DLLEXPORT
+        #endif
+        """]
         for name in dir(cls):
             if name.startswith('test_'):
                 meth = getattr(cls, name)
@@ -35,8 +41,9 @@
         from pypy.rpython.lltypesystem import rffi
         from pypy.rlib.libffi import get_libc_name, CDLL, types
         from pypy.rlib.test.test_libffi import get_libm_name
-        space = gettestobjspace(usemodules=('_ffi',))
+        space = gettestobjspace(usemodules=('_ffi', '_rawffi'))
         cls.space = space
+        cls.w_iswin32 = space.wrap(sys.platform == 'win32')
         cls.w_libfoo_name = space.wrap(cls.prepare_c_example())
         cls.w_libc_name = space.wrap(get_libc_name())
         libm_name = get_libm_name(sys.platform)
@@ -45,6 +52,13 @@
         pow = libm.getpointer('pow', [], types.void)
         pow_addr = rffi.cast(rffi.LONG, pow.funcsym)
         cls.w_pow_addr = space.wrap(pow_addr)
+        #
+        # these are needed for test_single_float_args
+        from ctypes import c_float
+        f_12_34 = c_float(12.34).value
+        f_56_78 = c_float(56.78).value
+        f_result = c_float(f_12_34 + f_56_78).value
+        cls.w_f_12_34_plus_56_78 = space.wrap(f_result)
 
     def test_libload(self):
         import _ffi
@@ -54,10 +68,20 @@
         import _ffi
         raises(OSError, _ffi.CDLL, "xxxxx_this_name_does_not_exist_xxxxx")
 
+    def test_libload_None(self):
+        if self.iswin32:
+            skip("unix specific")
+        from _ffi import CDLL, types
+        # this should return *all* loaded libs, dlopen(NULL)
+        dll = CDLL(None)
+        # Assume CPython, or PyPy compiled with cpyext
+        res = dll.getfunc('Py_IsInitialized', [], types.slong)()
+        assert res == 1
+
     def test_simple_types(self):
         from _ffi import types
-        assert str(types.sint) == '<ffi type sint>'
-        assert str(types.uint) == '<ffi type uint>'
+        assert str(types.sint) == "<ffi type sint>"
+        assert str(types.uint) == "<ffi type uint>"
         
     def test_callfunc(self):
         from _ffi import CDLL, types
@@ -70,10 +94,27 @@
         libm = CDLL(self.libm_name)
         pow = libm.getfunc('pow', [types.double, types.double], types.double)
         assert pow.getaddr() == self.pow_addr
-        
+
+    def test_getaddressindll(self):
+        import sys
+        from _ffi import CDLL, types
+        libm = CDLL(self.libm_name)
+        pow_addr = libm.getaddressindll('pow')
+        assert pow_addr == self.pow_addr & (sys.maxint*2-1)
+
+    def test_func_fromaddr(self):
+        import sys
+        from _ffi import CDLL, types, FuncPtr
+        libm = CDLL(self.libm_name)
+        pow_addr = libm.getaddressindll('pow')
+        pow = FuncPtr.fromaddr(pow_addr, 'pow', [types.double, types.double],
+                               types.double)
+        assert pow(2, 3) == 8
+
+
     def test_int_args(self):
         """
-            int sum_xy(int x, int y)
+            DLLEXPORT int sum_xy(int x, int y)
             {
                 return x+y;
             }
@@ -86,8 +127,8 @@
     def test_void_result(self):
         """
             int dummy = 0;
-            void set_dummy(int val) { dummy = val; }
-            int get_dummy() { return dummy; }
+            DLLEXPORT void set_dummy(int val) { dummy = val; }
+            DLLEXPORT int get_dummy() { return dummy; }
         """
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
@@ -96,10 +137,105 @@
         assert get_dummy() == 0
         assert set_dummy(42) is None
         assert get_dummy() == 42
+        set_dummy(0)
+
+    def test_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr() { return &dummy; }
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val) { *ptr = val; }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], types.void_p)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr',
+                                        [types.void_p, types.sint],
+                                        types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        set_val_to_ptr(ptr, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr, 0)
+
+    def test_convert_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr(); // defined in test_pointer_args
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val); // ditto
+        """
+        from _ffi import CDLL, types
+
+        class MyPointerWrapper(object):
+            def __init__(self, value):
+                self.value = value
+            def _as_ffi_pointer_(self, ffitype):
+                assert ffitype is types.void_p
+                return self.value
+        
+        libfoo = CDLL(self.libfoo_name)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], types.void_p)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr',
+                                        [types.void_p, types.sint],
+                                        types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        assert type(ptr) in (int, long)
+        ptr2 = MyPointerWrapper(ptr)
+        set_val_to_ptr(ptr2, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr2, 0)
+
+    def test_typed_pointer(self):
+        from _ffi import types
+        intptr = types.Pointer(types.sint) # create a typed pointer to sint
+        assert intptr.deref_pointer() is types.sint
+        assert str(intptr) == '<ffi type (pointer to sint)>'
+        assert types.sint.deref_pointer() is None
+        raises(TypeError, "types.Pointer(42)")
+
+    def test_pointer_identity(self):
+        from _ffi import types
+        x = types.Pointer(types.slong)
+        y = types.Pointer(types.slong)
+        z = types.Pointer(types.char)
+        assert x is y
+        assert x is not z
+
+    def test_typed_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr(); // defined in test_pointer_args
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val); // ditto
+        """
+        from _ffi import CDLL, types
+
+        libfoo = CDLL(self.libfoo_name)
+        intptr = types.Pointer(types.sint)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], intptr)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr', [intptr, types.sint], types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        set_val_to_ptr(ptr, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr, 0)
+
+    def test_huge_pointer_args(self):
+        """
+            #include <stdlib.h>
+            DLLEXPORT long is_null_ptr(void* ptr) { return ptr == NULL; }
+        """
+        import sys
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        is_null_ptr = libfoo.getfunc('is_null_ptr', [types.void_p], types.ulong)
+        assert not is_null_ptr(sys.maxint+1)
 
     def test_unsigned_long_args(self):
         """
-            unsigned long sum_xy_ul(unsigned long x, unsigned long y)
+            DLLEXPORT unsigned long sum_xy_ul(unsigned long x, unsigned long y)
             {
                 return x+y;
             }
@@ -114,12 +250,11 @@
 
     def test_unsigned_short_args(self):
         """
-            unsigned short sum_xy_us(unsigned short x, unsigned short y)
+            DLLEXPORT unsigned short sum_xy_us(unsigned short x, unsigned short y)
             {
                 return x+y;
             }
         """
-        import sys
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
         sum_xy = libfoo.getfunc('sum_xy_us', [types.ushort, types.ushort],
@@ -127,6 +262,166 @@
         assert sum_xy(32000, 8000) == 40000
         assert sum_xy(60000, 30000) == 90000 % 65536
 
+    def test_unsigned_byte_args(self):
+        """
+            DLLEXPORT unsigned char sum_xy_ub(unsigned char x, unsigned char y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_us', [types.ubyte, types.ubyte],
+                                types.ubyte)
+        assert sum_xy(100, 40) == 140
+        assert sum_xy(200, 60) == 260 % 256
+
+    def test_signed_byte_args(self):
+        """
+            DLLEXPORT signed char sum_xy_sb(signed char x, signed char y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_sb', [types.sbyte, types.sbyte],
+                                types.sbyte)
+        assert sum_xy(10, 20) == 30
+        assert sum_xy(100, 28) == -128
+
+    def test_char_args(self):
+        """
+            DLLEXPORT char my_toupper(char x)
+            {
+                return x - ('a'-'A');
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        my_toupper = libfoo.getfunc('my_toupper', [types.char],
+                                    types.char)
+        assert my_toupper('c') == 'C'
+
+    def test_unichar_args(self):
+        """
+            #include <stddef.h>
+            DLLEXPORT wchar_t sum_xy_wc(wchar_t x, wchar_t y)
+            {
+                return x + y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_wc', [types.unichar, types.unichar],
+                                types.unichar)
+        res = sum_xy(unichr(1000), unichr(2000))
+        assert type(res) is unicode
+        assert ord(res) == 3000
+
+    def test_single_float_args(self):
+        """
+            DLLEXPORT float sum_xy_float(float x, float y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_float', [types.float, types.float],
+                                types.float)
+        res = sum_xy(12.34, 56.78)
+        assert res == self.f_12_34_plus_56_78
+
+
+    def test_slonglong_args(self):
+        """
+            DLLEXPORT long long sum_xy_longlong(long long x, long long y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        maxint32 = 2147483647 # we cannot really go above maxint on 64 bits
+                              # (and we would not test anything, as there long
+                              # is the same as long long)
+
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_longlong', [types.slonglong, types.slonglong],
+                                types.slonglong)
+        x = maxint32+1
+        y = maxint32+2
+        res = sum_xy(x, y)
+        expected = maxint32*2 + 3
+        assert res == expected
+
+    def test_ulonglong_args(self):
+        """
+            DLLEXPORT unsigned long long sum_xy_ulonglong(unsigned long long x,
+                                                unsigned long long y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        maxint64 = 9223372036854775807 # maxint64+1 does not fit into a
+                                       # longlong, but it does into a
+                                       # ulonglong
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_ulonglong', [types.ulonglong, types.ulonglong],
+                                types.ulonglong)
+        x = maxint64+1
+        y = 2
+        res = sum_xy(x, y)
+        expected = maxint64 + 3
+        assert res == expected
+
+    def test_byval_argument(self):
+        """
+            struct Point {
+                long x;
+                long y;
+            };
+
+            DLLEXPORT long sum_point(struct Point p) {
+                return p.x + p.y;
+            }
+        """
+        import _rawffi
+        from _ffi import CDLL, types
+        POINT = _rawffi.Structure([('x', 'l'), ('y', 'l')])
+        ffi_point = POINT.get_ffi_type()
+        libfoo = CDLL(self.libfoo_name)
+        sum_point = libfoo.getfunc('sum_point', [ffi_point], types.slong)
+        #
+        p = POINT()
+        p.x = 30
+        p.y = 12
+        res = sum_point(p)
+        assert res == 42
+        p.free()
+
+    def test_byval_result(self):
+        """
+            DLLEXPORT struct Point make_point(long x, long y) {
+                struct Point p;
+                p.x = x;
+                p.y = y;
+                return p;
+            }
+        """
+        import _rawffi
+        from _ffi import CDLL, types
+        POINT = _rawffi.Structure([('x', 'l'), ('y', 'l')])
+        ffi_point = POINT.get_ffi_type()
+        libfoo = CDLL(self.libfoo_name)
+        make_point = libfoo.getfunc('make_point', [types.slong, types.slong], ffi_point)
+        #
+        p = make_point(12, 34)
+        assert p.x == 12
+        assert p.y == 34
+        p.free()
+
     def test_TypeError_numargs(self):
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
@@ -142,3 +437,10 @@
     def test_OSError_loading(self):
         from _ffi import CDLL, types
         raises(OSError, "CDLL('I do not exist')")
+
+    def test_AttributeError_missing_function(self):
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        raises(AttributeError, "libfoo.getfunc('I_do_not_exist', [], types.void)")
+        libnone = CDLL(None)
+        raises(AttributeError, "libnone.getfunc('I_do_not_exist', [], types.void)")
diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py
--- a/pypy/module/_file/interp_file.py
+++ b/pypy/module/_file/interp_file.py
@@ -349,11 +349,11 @@
 may be returned, even if no size parameter was given.""")
 
     _decl(locals(), "readline",
-        """readlines([size]) -> list of strings, each a line from the file.
+        """readline([size]) -> next line from the file, as a string.
 
-Call readline() repeatedly and return a list of the lines so read.
-The optional size argument, if given, is an approximate bound on the
-total number of bytes in the lines returned.""")
+Retain newline.  A non-negative size argument limits the maximum
+number of bytes to return (an incomplete line may be returned then).
+Return an empty string at EOF.""")
 
     _decl(locals(), "readlines",
         """readlines([size]) -> list of strings, each a line from the file.
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -3,6 +3,8 @@
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.tool.autopath import pypydir
 
+UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'
+
 
 class EncodeDecodeError(Exception):
     def __init__(self, start, end, reason):
@@ -103,8 +105,12 @@
                                           [DECODEBUF_P], rffi.SSIZE_T)
 pypy_cjk_dec_inbuf_consumed = llexternal('pypy_cjk_dec_inbuf_consumed',
                                          [DECODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_dec_replace_on_error = llexternal('pypy_cjk_dec_replace_on_error',
+                                           [DECODEBUF_P, rffi.CWCHARP,
+                                            rffi.SSIZE_T, rffi.SSIZE_T],
+                                           rffi.SSIZE_T)
 
-def decode(codec, stringdata):
+def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None):
     inleft = len(stringdata)
     inbuf = rffi.get_nonmovingbuffer(stringdata)
     try:
@@ -112,10 +118,12 @@
         if not decodebuf:
             raise MemoryError
         try:
-            r = pypy_cjk_dec_chunk(decodebuf)
-            if r != 0:
-                multibytecodec_decerror(decodebuf, r)
-                assert False
+            while True:
+                r = pypy_cjk_dec_chunk(decodebuf)
+                if r == 0:
+                    break
+                multibytecodec_decerror(decodebuf, r, errors,
+                                        errorcb, namecb, stringdata)
             src = pypy_cjk_dec_outbuf(decodebuf)
             length = pypy_cjk_dec_outlen(decodebuf)
             return rffi.wcharpsize2unicode(src, length)
@@ -126,7 +134,8 @@
     finally:
         rffi.free_nonmovingbuffer(stringdata, inbuf)
 
-def multibytecodec_decerror(decodebuf, e):
+def multibytecodec_decerror(decodebuf, e, errors,
+                            errorcb, namecb, stringdata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -138,12 +147,27 @@
     else:
         raise RuntimeError
     #
-    # if errors == ERROR_REPLACE:...
-    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    # compute the unicode to use as a replacement -> 'replace', and
+    # the current position in the input 'unicodedata' -> 'end'
     start = pypy_cjk_dec_inbuf_consumed(decodebuf)
     end = start + esize
-    if 1:  # errors == ERROR_STRICT:
+    if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
+    elif errors == "ignore":
+        replace = u""
+    elif errors == "replace":
+        replace = UNICODE_REPLACEMENT_CHARACTER
+    else:
+        assert errorcb
+        replace, end = errorcb(errors, namecb, reason,
+                               stringdata, start, end)
+    inbuf = rffi.get_nonmoving_unicodebuffer(replace)
+    try:
+        r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
+    finally:
+        rffi.free_nonmoving_unicodebuffer(replace, inbuf)
+    if r == MBERR_NOMEMORY:
+        raise MemoryError
 
 # ____________________________________________________________
 # Encoding
@@ -165,8 +189,12 @@
                                           [ENCODEBUF_P], rffi.SSIZE_T)
 pypy_cjk_enc_inbuf_consumed = llexternal('pypy_cjk_enc_inbuf_consumed',
                                          [ENCODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_enc_replace_on_error = llexternal('pypy_cjk_enc_replace_on_error',
+                                           [ENCODEBUF_P, rffi.CCHARP,
+                                            rffi.SSIZE_T, rffi.SSIZE_T],
+                                           rffi.SSIZE_T)
 
-def encode(codec, unicodedata):
+def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
     inleft = len(unicodedata)
     inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata)
     try:
@@ -174,14 +202,18 @@
         if not encodebuf:
             raise MemoryError
         try:
-            r = pypy_cjk_enc_chunk(encodebuf)
-            if r != 0:
-                multibytecodec_encerror(encodebuf, r)
-                assert False
-            r = pypy_cjk_enc_reset(encodebuf)
-            if r != 0:
-                multibytecodec_encerror(encodebuf, r)
-                assert False
+            while True:
+                r = pypy_cjk_enc_chunk(encodebuf)
+                if r == 0:
+                    break
+                multibytecodec_encerror(encodebuf, r, errors,
+                                        codec, errorcb, namecb, unicodedata)
+            while True:
+                r = pypy_cjk_enc_reset(encodebuf)
+                if r == 0:
+                    break
+                multibytecodec_encerror(encodebuf, r, errors,
+                                        codec, errorcb, namecb, unicodedata)
             src = pypy_cjk_enc_outbuf(encodebuf)
             length = pypy_cjk_enc_outlen(encodebuf)
             return rffi.charpsize2str(src, length)
@@ -192,7 +224,8 @@
     finally:
         rffi.free_nonmoving_unicodebuffer(unicodedata, inbuf)
 
-def multibytecodec_encerror(encodebuf, e):
+def multibytecodec_encerror(encodebuf, e, errors,
+                            codec, errorcb, namecb, unicodedata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -204,9 +237,27 @@
     else:
         raise RuntimeError
     #
-    # if errors == ERROR_REPLACE:...
-    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    # compute the string to use as a replacement -> 'replace', and
+    # the current position in the input 'unicodedata' -> 'end'
     start = pypy_cjk_enc_inbuf_consumed(encodebuf)
     end = start + esize
-    if 1:  # errors == ERROR_STRICT:
+    if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
+    elif errors == "ignore":
+        replace = ""
+    elif errors == "replace":
+        try:
+            replace = encode(codec, u"?")
+        except EncodeDecodeError:
+            replace = "?"
+    else:
+        assert errorcb
+        replace, end = errorcb(errors, namecb, reason,
+                               unicodedata, start, end)
+    inbuf = rffi.get_nonmovingbuffer(replace)
+    try:
+        r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
+    finally:
+        rffi.free_nonmovingbuffer(replace, inbuf)
+    if r == MBERR_NOMEMORY:
+        raise MemoryError
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -3,6 +3,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.error import OperationError
 from pypy.module._multibytecodec import c_codecs
+from pypy.module._codecs.interp_codecs import CodecState
 
 
 class MultibyteCodec(Wrappable):
@@ -13,13 +14,13 @@
 
     @unwrap_spec(input=str, errors="str_or_None")
     def decode(self, space, input, errors=None):
-        if errors is not None and errors != 'strict':
-            raise OperationError(space.w_NotImplementedError,    # XXX
-                                 space.wrap("errors='%s' in _multibytecodec"
-                                            % errors))
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
         #
         try:
-            output = c_codecs.decode(self.codec, input)
+            output = c_codecs.decode(self.codec, input, errors,
+                                     state.decode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
             raise OperationError(
                 space.w_UnicodeDecodeError,
@@ -37,13 +38,13 @@
 
     @unwrap_spec(input=unicode, errors="str_or_None")
     def encode(self, space, input, errors=None):
-        if errors is not None and errors != 'strict':
-            raise OperationError(space.w_NotImplementedError,    # XXX
-                                 space.wrap("errors='%s' in _multibytecodec"
-                                            % errors))
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
         #
         try:
-            output = c_codecs.encode(self.codec, input)
+            output = c_codecs.encode(self.codec, input, errors,
+                                     state.encode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
             raise OperationError(
                 space.w_UnicodeEncodeError,
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -36,6 +36,37 @@
         e = raises(UnicodeDecodeError, codec.decode, "~{xyz}").value
         assert e.args == ('hz', '~{xyz}', 2, 4, 'illegal multibyte sequence')
 
+    def test_decode_hz_ignore(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("def~{}abc", errors='ignore')
+        assert r == (u'def\u5fcf', 9)
+        r = codec.decode("def~{}abc", 'ignore')
+        assert r == (u'def\u5fcf', 9)
+
+    def test_decode_hz_replace(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("def~{}abc", errors='replace')
+        assert r == (u'def\ufffd\u5fcf', 9)
+        r = codec.decode("def~{}abc", 'replace')
+        assert r == (u'def\ufffd\u5fcf', 9)
+
+    def test_decode_custom_error_handler(self):
+        import codecs
+        codecs.register_error("test.decode_custom_error_handler",
+                              lambda e: (u'\u1234\u5678', e.end))
+        u = "abc\xDD".decode("hz", "test.decode_custom_error_handler")
+        assert u == u'abc\u1234\u5678'
+
+    def test_decode_custom_error_handler_overflow(self):
+        import codecs
+        import sys
+        codecs.register_error("test.test_decode_custom_error_handler_overflow",
+                              lambda e: (u'', sys.maxint + 1))
+        raises((IndexError, OverflowError), "abc\xDD".decode, "hz",
+               "test.test_decode_custom_error_handler_overflow")
+
     def test_encode_hz(self):
         import _codecs_cn
         codec = _codecs_cn.getcodec("hz")
@@ -54,3 +85,24 @@
         assert e.start == 3
         assert e.end == 4
         assert e.reason == 'illegal multibyte sequence'
+
+    def test_encode_hz_ignore(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'abc\u1234def', 'ignore')
+        assert r == ('abcdef', 7)
+        assert type(r[0]) is str
+
+    def test_encode_hz_replace(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'abc\u1234def', 'replace')
+        assert r == ('abc?def', 7)
+        assert type(r[0]) is str
+
+    def test_encode_custom_error_handler(self):
+        import codecs
+        codecs.register_error("test.multi_bad_handler", lambda e: (repl, 1))
+        repl = u"\u2014"
+        s = u"\uDDA1".encode("gbk", "test.multi_bad_handler")
+        assert s == '\xA1\xAA'
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -36,6 +36,16 @@
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
+def test_decode_hz_ignore():
+    c = getcodec("hz")
+    u = decode(c, 'def~{}abc', 'ignore')
+    assert u == u'def\u5fcf'
+
+def test_decode_hz_replace():
+    c = getcodec("hz")
+    u = decode(c, 'def~{}abc', 'replace')
+    assert u == u'def\ufffd\u5fcf'
+
 def test_encode_hz():
     c = getcodec("hz")
     s = encode(c, u'foobar')
@@ -51,6 +61,16 @@
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
+def test_encode_hz_ignore():
+    c = getcodec("hz")
+    s = encode(c, u'abc\u1234def', 'ignore')
+    assert s == 'abcdef'
+
+def test_encode_hz_replace():
+    c = getcodec("hz")
+    s = encode(c, u'abc\u1234def', 'replace')
+    assert s == 'abc?def'
+
 def test_encode_jisx0208():
     c = getcodec('iso2022_jp')
     s = encode(c, u'\u83ca\u5730\u6642\u592b')
diff --git a/pypy/module/_multiprocessing/test/test_memory.py b/pypy/module/_multiprocessing/test/test_memory.py
--- a/pypy/module/_multiprocessing/test/test_memory.py
+++ b/pypy/module/_multiprocessing/test/test_memory.py
@@ -3,7 +3,7 @@
 class AppTestMemory:
     def setup_class(cls):
         space = gettestobjspace(
-            usemodules=('_multiprocessing', 'mmap', '_rawffi'))
+            usemodules=('_multiprocessing', 'mmap', '_rawffi', '_ffi'))
         cls.space = space
 
     def test_address_of(self):
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -176,7 +176,7 @@
             except KeyError:
                 raise operationerrfmt(space.w_AttributeError,
                     "No symbol %s found in library %s", name, self.name)
-        
+
         elif (_MS_WINDOWS and
               space.is_true(space.isinstance(w_name, space.w_int))):
             ordinal = space.int_w(w_name)
@@ -250,11 +250,18 @@
     def get_basic_ffi_type(self):
         raise NotImplementedError
 
+    def descr_get_ffi_type(self, space):
+        # XXX: this assumes that you have the _ffi module enabled. In the long
+        # term, probably we will move the code for build structures and arrays
+        # from _rawffi to _ffi
+        from pypy.module._ffi.interp_ffi import W_FFIType
+        return W_FFIType('<unknown>', self.get_basic_ffi_type(), self)
+
     @unwrap_spec(n=int)
     def descr_size_alignment(self, space, n=1):
         return space.newtuple([space.wrap(self.size * n),
                                space.wrap(self.alignment)])
-    
+
 
 class W_DataInstance(Wrappable):
     def __init__(self, space, size, address=r_uint(0)):
@@ -420,7 +427,7 @@
                     if not (argletter in TYPEMAP_PTR_LETTERS and
                             letter in TYPEMAP_PTR_LETTERS):
                         msg = "Argument %d should be typecode %s, got %s"
-                        raise operationerrfmt(space.w_TypeError, msg, 
+                        raise operationerrfmt(space.w_TypeError, msg,
                                               i+1, argletter, letter)
             args_ll.append(arg.ll_buffer)
             # XXX we could avoid the intermediate list args_ll
@@ -473,17 +480,25 @@
 alignment = _create_new_accessor('alignment', 'c_alignment')
 
 @unwrap_spec(address=r_uint, maxlength=int)
-def charp2string(space, address, maxlength=sys.maxint):
+def charp2string(space, address, maxlength=-1):
     if address == 0:
         return space.w_None
-    s = rffi.charp2strn(rffi.cast(rffi.CCHARP, address), maxlength)
+    charp_addr = rffi.cast(rffi.CCHARP, address)
+    if maxlength == -1:
+        s = rffi.charp2str(charp_addr)
+    else:
+        s = rffi.charp2strn(charp_addr, maxlength)
     return space.wrap(s)
 
 @unwrap_spec(address=r_uint, maxlength=int)
-def wcharp2unicode(space, address, maxlength=sys.maxint):
+def wcharp2unicode(space, address, maxlength=-1):
     if address == 0:
         return space.w_None
-    s = rffi.wcharp2unicoden(rffi.cast(rffi.CWCHARP, address), maxlength)
+    wcharp_addr = rffi.cast(rffi.CWCHARP, address)
+    if maxlength == -1:
+        s = rffi.wcharp2unicode(wcharp_addr)
+    else:
+        s = rffi.wcharp2unicoden(wcharp_addr, maxlength)
     return space.wrap(s)
 
 @unwrap_spec(address=r_uint, maxlength=int)
diff --git a/pypy/module/_rawffi/structure.py b/pypy/module/_rawffi/structure.py
--- a/pypy/module/_rawffi/structure.py
+++ b/pypy/module/_rawffi/structure.py
@@ -248,7 +248,8 @@
     alignment   = interp_attrproperty('alignment', W_Structure),
     fieldoffset = interp2app(W_Structure.descr_fieldoffset),
     fieldsize   = interp2app(W_Structure.descr_fieldsize),
-    size_alignment = interp2app(W_Structure.descr_size_alignment)
+    size_alignment = interp2app(W_Structure.descr_size_alignment),
+    get_ffi_type   = interp2app(W_Structure.descr_get_ffi_type),
 )
 W_Structure.typedef.acceptable_as_base_class = False
 
diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py
--- a/pypy/module/_ssl/interp_ssl.py
+++ b/pypy/module/_ssl/interp_ssl.py
@@ -1,3 +1,4 @@
+from __future__ import with_statement
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.baseobjspace import W_Root, ObjSpace, Wrappable
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -526,15 +526,7 @@
 
     def array_tostring__Array(space, self):
         cbuf = self.charbuf()
-        s = ''.join([cbuf[i] for i in xrange(self.len * mytype.bytes)])
-        return self.space.wrap(s)
-##
-##         s = ''
-##         i = 0
-##         while i < self.len * mytype.bytes:
-##             s += cbuf[i]
-##             i += 1
-##         return self.space.wrap(s)
+        return self.space.wrap(rffi.charpsize2str(cbuf, self.len * mytype.bytes))
 
     def array_fromfile__Array_ANY_ANY(space, self, w_f, w_n):
         if not isinstance(w_f, W_File):
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -348,6 +348,7 @@
     '_Py_TrueStruct#': ('PyObject*', 'space.w_True'),
     '_Py_ZeroStruct#': ('PyObject*', 'space.w_False'),
     '_Py_NotImplementedStruct#': ('PyObject*', 'space.w_NotImplemented'),
+    '_Py_EllipsisObject#': ('PyObject*', 'space.w_Ellipsis'),
     'PyDateTimeAPI': ('PyDateTime_CAPI*', 'None'),
     }
 FORWARD_DECLS = []
@@ -966,6 +967,7 @@
     state = space.fromcache(State)
     if state.find_extension(name, path) is not None:
         return
+    old_context = state.package_context
     state.package_context = name, path
     try:
         from pypy.rlib import rdynload
@@ -991,7 +993,7 @@
         generic_cpy_call(space, initfunc)
         state.check_and_raise_exception()
     finally:
-        state.package_context = None, None
+        state.package_context = old_context
     state.fixup_extension(name, path)
 
 @specialize.ll()
diff --git a/pypy/module/cpyext/classobject.py b/pypy/module/cpyext/classobject.py
--- a/pypy/module/cpyext/classobject.py
+++ b/pypy/module/cpyext/classobject.py
@@ -31,4 +31,9 @@
         return w_result
     return w_instance.w_class.lookup(space, name)
 
+ at cpython_api([PyObject, PyObject, PyObject], PyObject)
+def PyClass_New(space, w_bases, w_dict, w_name):
+    w_classobj = space.gettypefor(W_ClassObject)
+    return space.call_function(w_classobj,
+                               w_name, w_bases, w_dict)
 
diff --git a/pypy/module/cpyext/frameobject.py b/pypy/module/cpyext/frameobject.py
--- a/pypy/module/cpyext/frameobject.py
+++ b/pypy/module/cpyext/frameobject.py
@@ -1,6 +1,7 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
-    cpython_api, bootstrap_function, PyObjectFields, cpython_struct)
+    cpython_api, bootstrap_function, PyObjectFields, cpython_struct,
+    CANNOT_FAIL)
 from pypy.module.cpyext.pyobject import (
     PyObject, Py_DecRef, make_ref, from_ref, track_reference,
     make_typedescr, get_typedescr)
@@ -9,6 +10,7 @@
 from pypy.module.cpyext.funcobject import PyCodeObject
 from pypy.interpreter.pyframe import PyFrame
 from pypy.interpreter.pycode import PyCode
+from pypy.interpreter.pytraceback import PyTraceback
 
 PyFrameObjectStruct = lltype.ForwardReference()
 PyFrameObject = lltype.Ptr(PyFrameObjectStruct)
@@ -80,3 +82,8 @@
     frame = space.interp_w(PyFrame, w_frame)
     record_application_traceback(space, state.operror, frame, 0)
     return 0
+
+ at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
+def PyTraceBack_Check(space, w_obj):
+    obj = space.interpclass_w(w_obj)
+    return obj is not None and isinstance(obj, PyTraceback)
diff --git a/pypy/module/cpyext/funcobject.py b/pypy/module/cpyext/funcobject.py
--- a/pypy/module/cpyext/funcobject.py
+++ b/pypy/module/cpyext/funcobject.py
@@ -69,6 +69,10 @@
     assert isinstance(w_method, Method)
     return borrow_from(w_method, w_method.w_class)
 
+ at cpython_api([PyObject], PyObject)
+def PyClassMethod_New(space, w_function):
+    return space.call_method(space.builtin, "classmethod", w_function)
+
 def unwrap_list_of_strings(space, w_list):
     return [space.str_w(w_item) for w_item in space.fixedview(w_list)]
 
diff --git a/pypy/module/cpyext/intobject.py b/pypy/module/cpyext/intobject.py
--- a/pypy/module/cpyext/intobject.py
+++ b/pypy/module/cpyext/intobject.py
@@ -4,7 +4,7 @@
 from pypy.module.cpyext.api import (
     cpython_api, build_type_checkers, PyObject,
     CONST_STRING, CANNOT_FAIL, Py_ssize_t)
-from pypy.rlib.rarithmetic import r_uint
+from pypy.rlib.rarithmetic import r_uint, intmask, LONG_TEST
 import sys
 
 PyInt_Check, PyInt_CheckExact = build_type_checkers("Int")
@@ -73,13 +73,24 @@
                              space.wrap("an integer is required, got NULL"))
     return space.int_w(w_obj) # XXX this is wrong on win64
 
+LONG_MAX = int(LONG_TEST - 1)
+
+ at cpython_api([rffi.SIZE_T], PyObject)
+def PyInt_FromSize_t(space, ival):
+    """Create a new integer object with a value of ival. If the value exceeds
+    LONG_MAX, a long integer object is returned.
+    """
+    if ival <= LONG_MAX:
+        return space.wrap(intmask(ival))
+    return space.wrap(ival)
+
 @cpython_api([Py_ssize_t], PyObject)
 def PyInt_FromSsize_t(space, ival):
     """Create a new integer object with a value of ival. If the value is larger
     than LONG_MAX or smaller than LONG_MIN, a long integer object is
     returned.
     """
-    return space.wrap(ival) # XXX this is wrong on win64
+    return space.wrap(ival)
 
 @cpython_api([CONST_STRING, rffi.CCHARPP, rffi.INT_real], PyObject)
 def PyInt_FromString(space, str, pend, base):
diff --git a/pypy/module/cpyext/number.py b/pypy/module/cpyext/number.py
--- a/pypy/module/cpyext/number.py
+++ b/pypy/module/cpyext/number.py
@@ -49,6 +49,13 @@
     failure.  This is the equivalent of the Python expression long(o)."""
     return space.long(w_obj)
 
+ at cpython_api([PyObject], PyObject)
+def PyNumber_Index(space, w_obj):
+    """Returns the o converted to a Python int or long on success or NULL with a
+    TypeError exception raised on failure.
+    """
+    return space.index(w_obj)
+
 def func_rename(newname):
     return lambda func: func_with_new_name(func, newname)
 
diff --git a/pypy/module/cpyext/src/modsupport.c b/pypy/module/cpyext/src/modsupport.c
--- a/pypy/module/cpyext/src/modsupport.c
+++ b/pypy/module/cpyext/src/modsupport.c
@@ -611,8 +611,8 @@
 	if (result != NULL && n > 0) {
 		for (i = 0; i < n; ++i) {
 			tmp = (PyObject *)va_arg(va, PyObject *);
+			Py_INCREF(tmp);
 			PyTuple_SET_ITEM(result, i, tmp);
-			Py_INCREF(tmp);
 		}
 	}
 	return result;
diff --git a/pypy/module/cpyext/stringobject.py b/pypy/module/cpyext/stringobject.py
--- a/pypy/module/cpyext/stringobject.py
+++ b/pypy/module/cpyext/stringobject.py
@@ -2,7 +2,7 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
     cpython_api, cpython_struct, bootstrap_function, build_type_checkers,
-    PyObjectFields, Py_ssize_t, CONST_STRING)
+    PyObjectFields, Py_ssize_t, CONST_STRING, CANNOT_FAIL)
 from pypy.module.cpyext.pyerrors import PyErr_BadArgument
 from pypy.module.cpyext.pyobject import (
     PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference,
@@ -203,6 +203,10 @@
     ref[0] = rffi.cast(PyObject, py_newstr)
     return 0
 
+ at cpython_api([PyObject, PyObject], rffi.INT, error=CANNOT_FAIL)
+def _PyString_Eq(space, w_str1, w_str2):
+    return space.eq_w(w_str1, w_str2)
+
 @cpython_api([PyObjectP, PyObject], lltype.Void)
 def PyString_Concat(space, ref, w_newpart):
     """Create a new string object in *string containing the contents of newpart
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -172,12 +172,6 @@
     This is equivalent to (PyBUF_ND)."""
     raise NotImplementedError
 
- at cpython_api([Py_buffer], lltype.Void)
-def PyBuffer_Release(space, view):
-    """Release the buffer view.  This should be called when the buffer
-    is no longer being used as it may free memory from it."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP], Py_ssize_t, error=CANNOT_FAIL)
 def PyBuffer_SizeFromFormat(space, format):
     """Return the implied ~Py_buffer.itemsize from the struct-stype
@@ -198,13 +192,6 @@
     given shape with the given number of bytes per element."""
     raise NotImplementedError
 
- at cpython_api([Py_buffer, PyObject, rffi.VOIDP, Py_ssize_t, rffi.INT_real, rffi.INT_real], rffi.INT_real, error=-1)
-def PyBuffer_FillInfo(space, view, obj, buf, len, readonly, infoflags):
-    """Fill in a buffer-info structure, view, correctly for an exporter that can
-    only share a contiguous chunk of memory of "unsigned bytes" of the given
-    length.  Return 0 on success and -1 (with raising an error) on error."""
-    raise NotImplementedError
-
 @cpython_api([Py_buffer], PyObject)
 def PyMemoryView_FromBuffer(space, view):
     """Create a memoryview object wrapping the given buffer-info structure view.
@@ -1094,14 +1081,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([PyObject], PyObject)
-def PyImport_ReloadModule(space, m):
-    """Reload a module.  This is best described by referring to the built-in
-    Python function reload(), as the standard reload() function calls this
-    function directly.  Return a new reference to the reloaded module, or NULL
-    with an exception set on failure (the module still exists in this case)."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP, PyObject], PyObject)
 def PyImport_ExecCodeModule(space, name, co):
     """Given a module name (possibly of the form package.module) and a code
@@ -1140,13 +1119,6 @@
     of the bytecode file, in little-endian byte order."""
     raise NotImplementedError
 
- at cpython_api([], PyObject)
-def PyImport_GetModuleDict(space):
-    """Return the dictionary used for the module administration (a.k.a.
-    sys.modules).  Note that this is a per-interpreter variable."""
-    borrow_from()
-    raise NotImplementedError
-
 @cpython_api([PyObject], PyObject)
 def PyImport_GetImporter(space, path):
     """Return an importer object for a sys.path/pkg.__path__ item
@@ -1701,13 +1673,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([rffi.SIZE_T], PyObject)
-def PyInt_FromSize_t(space, ival):
-    """Create a new integer object with a value of ival. If the value exceeds
-    LONG_MAX, a long integer object is returned.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.ULONGLONG, error=-1)
 def PyInt_AsUnsignedLongLongMask(space, io):
     """Will first attempt to cast the object to a PyIntObject or
@@ -1920,13 +1885,6 @@
     Reference counts are still not increased in this case."""
     raise NotImplementedError
 
- at cpython_api([PyObject], PyObject)
-def PyNumber_Index(space, o):
-    """Returns the o converted to a Python int or long on success or NULL with a
-    TypeError exception raised on failure.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject, rffi.INT_real], PyObject)
 def PyNumber_ToBase(space, n, base):
     """Returns the integer n converted to base as a string with a base
@@ -2254,15 +2212,6 @@
     standard C library function exit(status)."""
     raise NotImplementedError
 
- at cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
-def PyTuple_GetSlice(space, p, low, high):
-    """Take a slice of the tuple pointed to by p from low to high and return it
-    as a new tuple.
-
-    This function used an int type for low and high. This might
-    require changes in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
-
 @cpython_api([], rffi.INT_real, error=CANNOT_FAIL)
 def PyTuple_ClearFreeList(space):
     """Clear the free list. Return the total number of freed items.
@@ -2275,14 +2224,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([PyTypeObjectPtr], lltype.Void)
-def PyType_Modified(space, type):
-    """Invalidate the internal lookup cache for the type and all of its
-    subtypes.  This function must be called after any manual
-    modification of the attributes or base classes of the type.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
 def PyType_IS_GC(space, o):
     """Return true if the type object includes support for the cycle detector; this
diff --git a/pypy/module/cpyext/test/test_classobject.py b/pypy/module/cpyext/test/test_classobject.py
--- a/pypy/module/cpyext/test/test_classobject.py
+++ b/pypy/module/cpyext/test/test_classobject.py
@@ -40,3 +40,14 @@
         assert not isinstance(api.PyObject_GetAttr(w_instance, space.wrap('f')), Function)
         # _PyInstance_Lookup returns the raw descriptor
         assert isinstance(api._PyInstance_Lookup(w_instance, space.wrap('f')), Function)
+
+    def test_pyclass_new(self, space, api):
+        w_bases = space.newtuple([])
+        w_dict = space.newdict()
+        w_name = space.wrap("C")
+        w_class = api.PyClass_New(w_bases, w_dict, w_name)
+        assert not space.isinstance_w(w_class, space.w_type)
+        w_instance = space.call_function(w_class)
+        assert api.PyInstance_Check(w_instance)
+        assert space.is_true(space.call_method(space.builtin, "isinstance",
+                                               w_instance, w_class))
diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py
--- a/pypy/module/cpyext/test/test_eval.py
+++ b/pypy/module/cpyext/test/test_eval.py
@@ -193,3 +193,32 @@
             return args
         assert module.call_func(f) == ("text", 42, None)
         assert module.call_method("text") == 2
+
+    def test_CallFunctionObjArgs(self):
+        module = self.import_extension('foo', [
+            ("call_func", "METH_VARARGS",
+             """
+                PyObject *t = PyString_FromString("t");
+                PyObject *res = PyObject_CallFunctionObjArgs(
+                   PyTuple_GetItem(args, 0),
+                   Py_None, NULL);
+                Py_DECREF(t);
+                return res;
+             """),
+            ("call_method", "METH_VARARGS",
+             """
+                PyObject *t = PyString_FromString("t");
+                PyObject *count = PyString_FromString("count");
+                PyObject *res = PyObject_CallMethodObjArgs(
+                   PyTuple_GetItem(args, 0),
+                   count, t, NULL);
+                Py_DECREF(t);
+                Py_DECREF(count);
+                return res;
+             """),
+            ])
+        def f(*args):
+            return args
+        assert module.call_func(f) == (None,)
+        assert module.call_method("text") == 2
+        
diff --git a/pypy/module/cpyext/test/test_frameobject.py b/pypy/module/cpyext/test/test_frameobject.py
--- a/pypy/module/cpyext/test/test_frameobject.py
+++ b/pypy/module/cpyext/test/test_frameobject.py
@@ -64,3 +64,31 @@
         # Cython does not work on CPython as well...
         assert exc.traceback.tb_lineno == 42 # should be 48
         assert frame.f_lineno == 42
+
+    def test_traceback_check(self):
+        module = self.import_extension('foo', [
+            ("traceback_check", "METH_NOARGS",
+             """
+                 int check;
+                 PyObject *type, *value, *tb;
+                 PyObject *ret = PyRun_String("XXX", Py_eval_input, 
+                                              Py_None, Py_None);
+                 if (ret) {
+                     Py_DECREF(ret);
+                     PyErr_SetString(PyExc_AssertionError, "should raise");
+                     return NULL;
+                 }
+                 PyErr_Fetch(&type, &value, &tb);
+                 check = PyTraceBack_Check(tb);
+                 Py_XDECREF(type);
+                 Py_XDECREF(value);
+                 Py_XDECREF(tb);
+                 if (check) {
+                     Py_RETURN_TRUE;
+                 }
+                 else {
+                     Py_RETURN_FALSE;
+                 }
+             """),
+            ])
+        assert module.traceback_check()
diff --git a/pypy/module/cpyext/test/test_funcobject.py b/pypy/module/cpyext/test/test_funcobject.py
--- a/pypy/module/cpyext/test/test_funcobject.py
+++ b/pypy/module/cpyext/test/test_funcobject.py
@@ -44,3 +44,19 @@
         assert w_code.co_firstlineno == 3
         rffi.free_charp(filename)
         rffi.free_charp(funcname)
+
+    def test_classmethod(self, space, api):
+        w_function = space.appexec([], """():
+            def method(x): return x
+            return method
+        """)
+        w_class = space.call_function(space.w_type, space.wrap("C"),
+                                      space.newtuple([]), space.newdict())
+        w_instance = space.call_function(w_class)
+        # regular instance method
+        space.setattr(w_class, space.wrap("method"), w_function)
+        assert space.is_w(space.call_method(w_instance, "method"), w_instance)
+        # now a classmethod
+        w_classmethod = api.PyClassMethod_New(w_function)
+        space.setattr(w_class, space.wrap("classmethod"), w_classmethod)
+        assert space.is_w(space.call_method(w_instance, "classmethod"), w_class)
diff --git a/pypy/module/cpyext/test/test_intobject.py b/pypy/module/cpyext/test/test_intobject.py
--- a/pypy/module/cpyext/test/test_intobject.py
+++ b/pypy/module/cpyext/test/test_intobject.py
@@ -50,3 +50,19 @@
             ])
         assert module.from_string() == 0x1234
         assert type(module.from_string()) is int
+
+    def test_size_t(self):
+        module = self.import_extension('foo', [
+            ("values", "METH_NOARGS",
+             """
+                 return Py_BuildValue("NNNN",
+                     PyInt_FromSize_t(123),
+                     PyInt_FromSize_t((size_t)-1),
+                     PyInt_FromSsize_t(123),
+                     PyInt_FromSsize_t((size_t)-1));
+             """),
+            ])
+        values = module.values()
+        types = [type(x) for x in values]
+        assert types == [int, long, int, int]
+        
diff --git a/pypy/module/cpyext/test/test_number.py b/pypy/module/cpyext/test/test_number.py
--- a/pypy/module/cpyext/test/test_number.py
+++ b/pypy/module/cpyext/test/test_number.py
@@ -25,6 +25,15 @@
         assert api.PyInt_CheckExact(w_l)
         w_l = api.PyNumber_Int(space.wrap(2 << 65))
         assert api.PyLong_CheckExact(w_l)
+        w_l = api.PyNumber_Int(space.wrap(42.3))
+        assert api.PyInt_CheckExact(w_l)
+
+    def test_number_index(self, space, api):
+        w_l = api.PyNumber_Index(space.wrap(123L))
+        assert api.PyLong_CheckExact(w_l)
+        w_l = api.PyNumber_Index(space.wrap(42.3))
+        assert w_l is None
+        api.PyErr_Clear()
 
     def test_numbermethods(self, space, api):
         assert "ab" == space.unwrap(
diff --git a/pypy/module/cpyext/test/test_sliceobject.py b/pypy/module/cpyext/test/test_sliceobject.py
--- a/pypy/module/cpyext/test/test_sliceobject.py
+++ b/pypy/module/cpyext/test/test_sliceobject.py
@@ -67,3 +67,14 @@
              """),
             ])
         assert module.nullslice() == slice(None, None, None)
+
+    def test_ellipsis(self):
+        module = self.import_extension('foo', [
+            ("get_ellipsis", "METH_NOARGS",
+             """
+                 PyObject *ret = Py_Ellipsis;
+                 Py_INCREF(ret);
+                 return ret;
+             """),
+            ])
+        assert module.get_ellipsis() is Ellipsis
diff --git a/pypy/module/cpyext/test/test_stringobject.py b/pypy/module/cpyext/test/test_stringobject.py
--- a/pypy/module/cpyext/test/test_stringobject.py
+++ b/pypy/module/cpyext/test/test_stringobject.py
@@ -283,3 +283,7 @@
         self.raises(space, api, TypeError, api.PyString_AsEncodedObject,
             space.wrap(2), lltype.nullptr(rffi.CCHARP.TO), lltype.nullptr(rffi.CCHARP.TO)
         )
+
+    def test_eq(self, space, api):
+        assert 1 == api._PyString_Eq(space.wrap("hello"), space.wrap("hello"))
+        assert 0 == api._PyString_Eq(space.wrap("hello"), space.wrap("world"))
diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py
--- a/pypy/module/cpyext/test/test_tupleobject.py
+++ b/pypy/module/cpyext/test/test_tupleobject.py
@@ -42,3 +42,9 @@
         assert api.PyTuple_Size(atuple) == 2
         assert space.eq_w(space.getitem(atuple, space.wrap(0)), space.wrap(0))
         assert space.eq_w(space.getitem(atuple, space.wrap(1)), space.wrap(1))
+
+    def test_getslice(self, space, api):
+        w_tuple = space.newtuple([space.wrap(i) for i in range(10)])
+        w_slice = api.PyTuple_GetSlice(w_tuple, 3, -3)
+        assert space.eq_w(w_slice,
+                          space.newtuple([space.wrap(i) for i in range(3, 7)]))
diff --git a/pypy/module/cpyext/tupleobject.py b/pypy/module/cpyext/tupleobject.py
--- a/pypy/module/cpyext/tupleobject.py
+++ b/pypy/module/cpyext/tupleobject.py
@@ -79,3 +79,10 @@
     Py_DecRef(space, ref[0])
     ref[0] = make_ref(space, py_newtuple)
     return 0
+
+ at cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
+def PyTuple_GetSlice(space, w_obj, low, high):
+    """Take a slice of the tuple pointed to by p from low to high and return it
+    as a new tuple.
+    """
+    return space.getslice(w_obj, space.wrap(low), space.wrap(high))
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -650,3 +650,13 @@
     name = space.str_w(w_name)
     w_obj = w_type.lookup(name)
     return borrow_from(w_type, w_obj)
+
+ at cpython_api([PyTypeObjectPtr], lltype.Void)
+def PyType_Modified(space, w_obj):
+    """Invalidate the internal lookup cache for the type and all of its
+    subtypes.  This function must be called after any manual
+    modification of the attributes or base classes of the type.
+    """
+    # PyPy already takes care of direct modifications to type.__dict__
+    # (which is a W_DictProxyObject).
+    pass
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -8,8 +8,11 @@
     interpleveldefs = {
         'array': 'interp_numarray.SingleDimArray',
         'zeros': 'interp_numarray.zeros',
+        'empty': 'interp_numarray.zeros',
+        'ones': 'interp_numarray.ones',
 
         # ufuncs
+        'abs': 'interp_ufuncs.absolute',
         'absolute': 'interp_ufuncs.absolute',
         'copysign': 'interp_ufuncs.copysign',
         'exp': 'interp_ufuncs.exp',
@@ -20,4 +23,7 @@
         'sign': 'interp_ufuncs.sign',
     }
 
-    appleveldefs = {}
+    appleveldefs = {
+        'average': 'app_numpy.average',
+        'mean': 'app_numpy.mean',
+    }
diff --git a/pypy/module/micronumpy/app_numpy.py b/pypy/module/micronumpy/app_numpy.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/micronumpy/app_numpy.py
@@ -0,0 +1,11 @@
+import numpy
+
+def average(a):
+    # This implements a weighted average, for now we don't implement the
+    # weighting, just the average part!
+    return mean(a)
+
+def mean(a):
+    if not hasattr(a, "mean"):
+        a = numpy.array(a)
+    return a.mean()
\ No newline at end of file
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/micronumpy/compile.py
@@ -0,0 +1,49 @@
+
+""" This is a set of tools for standalone compiling of numpy expressions.
+It should not be imported by the module itself
+"""
+
+from pypy.module.micronumpy.interp_numarray import FloatWrapper, SingleDimArray
+
+class BogusBytecode(Exception):
+    pass
+
+def create_array(size):
+    a = SingleDimArray(size)
+    for i in range(size):
+        a.storage[i] = float(i % 10)
+    return a
+
+class TrivialSpace(object):
+    def wrap(self, x):
+        return x
+
+def numpy_compile(bytecode, array_size):
+    space = TrivialSpace()
+    stack = []
+    i = 0
+    for b in bytecode:
+        if b == 'a':
+            stack.append(create_array(array_size))
+            i += 1
+        elif b == 'f':
+            stack.append(FloatWrapper(1.2))
+        elif b == '+':
+            right = stack.pop()
+            stack.append(stack.pop().descr_add(space, right))
+        elif b == '-':
+            right = stack.pop()
+            stack.append(stack.pop().descr_sub(space, right))
+        elif b == '*':
+            right = stack.pop()
+            stack.append(stack.pop().descr_mul(space, right))
+        elif b == '/':
+            right = stack.pop()
+            stack.append(stack.pop().descr_div(space, right))
+        else:
+            print "Unknown opcode: %s" % b
+            raise BogusBytecode()
+    if len(stack) != 1:
+        print "Bogus bytecode, uneven stack length"
+        raise BogusBytecode()
+    return stack[0]
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -1,7 +1,7 @@
 from pypy.interpreter.baseobjspace import ObjSpace, W_Root, Wrappable
 from pypy.interpreter.error import operationerrfmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-from pypy.interpreter.typedef import TypeDef
+from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from pypy.rlib import jit
 from pypy.rpython.lltypesystem import lltype
 from pypy.tool.sourcetools import func_with_new_name
@@ -46,7 +46,7 @@
     def invalidated(self):
         for arr in self.invalidates:
             arr.force_if_needed()
-        self.invalidates = []
+        del self.invalidates[:]
 
     def _binop_impl(function):
         signature = Signature()
@@ -80,18 +80,36 @@
     def get_concrete(self):
         raise NotImplementedError
 
+    def descr_get_shape(self, space):
+        return space.newtuple([self.descr_len(space)])
+
     def descr_len(self, space):
         return self.get_concrete().descr_len(space)
 
-    @unwrap_spec(item=int)
-    def descr_getitem(self, space, item):
-        return self.get_concrete().descr_getitem(space, item)
+    def descr_getitem(self, space, w_idx):
+        # TODO: indexing by tuples
+        start, stop, step, slice_length = space.decode_index4(w_idx, self.find_size())
+        if step == 0:
+            # Single index
+            return space.wrap(self.get_concrete().getitem(start))
+        else:
+            # Slice
+            res = SingleDimSlice(start, stop, step, slice_length, self, self.signature.transition(SingleDimSlice.static_signature))
+            return space.wrap(res)
 
     @unwrap_spec(item=int, value=float)
     def descr_setitem(self, space, item, value):
         self.invalidated()
         return self.get_concrete().descr_setitem(space, item, value)
 
+    def descr_mean(self, space):
+        s = 0
+        concrete = self.get_concrete()
+        size = concrete.find_size()
+        for i in xrange(size):
+            s += concrete.getitem(i)
+        return space.wrap(s / size)
+
 
 class FloatWrapper(BaseArray):
     """
@@ -119,6 +137,10 @@
         self.forced_result = None
         self.signature = signature
 
+    def _del_sources(self):
+        # Function for deleting references to source arrays, to allow garbage-collecting them
+        raise NotImplementedError
+
     def compute(self):
         i = 0
         signature = self.signature
@@ -135,6 +157,7 @@
     def force_if_needed(self):
         if self.forced_result is None:
             self.forced_result = self.compute()
+            self._del_sources()
 
     def get_concrete(self):
         self.force_if_needed()
@@ -145,6 +168,13 @@
             return self.forced_result.eval(i)
         return self._eval(i)
 
+    def find_size(self):
+        if self.forced_result is not None:
+            # The result has been computed and sources may be unavailable
+            return self.forced_result.find_size()
+        return self._find_size()
+
+
 class Call1(VirtualArray):
     _immutable_fields_ = ["function", "values"]
 
@@ -153,7 +183,10 @@
         self.function = function
         self.values = values
 
-    def find_size(self):
+    def _del_sources(self):
+        self.values = None
+
+    def _find_size(self):
         return self.values.find_size()
 
     def _eval(self, i):
@@ -164,13 +197,18 @@
     Intermediate class for performing binary operations.
     """
     _immutable_fields_ = ["function", "left", "right"]
+
     def __init__(self, function, left, right, signature):
         VirtualArray.__init__(self, signature)
         self.function = function
         self.left = left
         self.right = right
 
-    def find_size(self):
+    def _del_sources(self):
+        self.left = None
+        self.right = None
+
+    def _find_size(self):
         try:
             return self.left.find_size()
         except ValueError:
@@ -181,6 +219,58 @@
         lhs, rhs = self.left.eval(i), self.right.eval(i)
         return self.function(lhs, rhs)
 
+class ViewArray(BaseArray):
+    """
+    Class for representing views of arrays, they will reflect changes of parent
+    arrays. Example: slices
+    """
+    _immutable_fields_ = ["parent"]
+
+    def __init__(self, parent, signature):
+        BaseArray.__init__(self)
+        self.signature = signature
+        self.parent = parent
+        self.invalidates = parent.invalidates
+
+    def get_concrete(self):
+        # in fact, ViewArray never gets "concrete" as it never stores data.
+        # This implementation is needed for BaseArray getitem/setitem to work,
+        # can be refactored.
+        return self
+
+    def eval(self, i):
+        return self.parent.eval(self.calc_index(i))
+
+    def getitem(self, item):
+        return self.parent.getitem(self.calc_index(item))
+
+    @unwrap_spec(item=int, value=float)
+    def descr_setitem(self, space, item, value):
+        return self.parent.descr_setitem(space, self.calc_index(item), value)
+
+    def descr_len(self, space):
+        return space.wrap(self.find_size())
+
+    def calc_index(self, item):
+        raise NotImplementedError
+
+class SingleDimSlice(ViewArray):
+    _immutable_fields_ = ["start", "stop", "step", "size"]
+    static_signature = Signature()
+
+    def __init__(self, start, stop, step, slice_length, parent, signature):
+        ViewArray.__init__(self, parent, signature)
+        self.start = start
+        self.stop = stop
+        self.step = step
+        self.size = slice_length
+
+    def find_size(self):
+        return self.size
+
+    def calc_index(self, item):
+        return (self.start + item * self.step)
+
 
 class SingleDimArray(BaseArray):
     signature = Signature()
@@ -215,10 +305,8 @@
     def descr_len(self, space):
         return space.wrap(self.size)
 
-    @unwrap_spec(item=int)
-    def descr_getitem(self, space, item):
-        item = self.getindex(space, item)
-        return space.wrap(self.storage[item])
+    def getitem(self, item):
+        return self.storage[item]
 
     @unwrap_spec(item=int, value=float)
     def descr_setitem(self, space, item, value):
@@ -238,14 +326,23 @@
         i += 1
     return space.wrap(arr)
 
- at unwrap_spec(ObjSpace, int)
+ at unwrap_spec(size=int)
 def zeros(space, size):
     return space.wrap(SingleDimArray(size))
 
+ at unwrap_spec(size=int)
+def ones(space, size):
+    arr = SingleDimArray(size)
+    for i in xrange(size):
+        arr.storage[i] = 1.0
+    return space.wrap(arr)
 
 BaseArray.typedef = TypeDef(
     'numarray',
     __new__ = interp2app(descr_new_numarray),
+
+    shape = GetSetProperty(BaseArray.descr_get_shape),
+
     __len__ = interp2app(BaseArray.descr_len),
     __getitem__ = interp2app(BaseArray.descr_getitem),
     __setitem__ = interp2app(BaseArray.descr_setitem),
@@ -254,4 +351,6 @@
     __sub__ = interp2app(BaseArray.descr_sub),
     __mul__ = interp2app(BaseArray.descr_mul),
     __div__ = interp2app(BaseArray.descr_div),
+
+    mean = interp2app(BaseArray.descr_mean),
 )
\ No newline at end of file
diff --git a/pypy/module/micronumpy/interp_ufuncs.py b/pypy/module/micronumpy/interp_ufuncs.py
--- a/pypy/module/micronumpy/interp_ufuncs.py
+++ b/pypy/module/micronumpy/interp_ufuncs.py
@@ -8,22 +8,24 @@
 
 def ufunc(func):
     signature = Signature()
-    @unwrap_spec(array=BaseArray)
-    def impl(space, array):
-        w_res = Call1(func, array, array.signature.transition(signature))
-        array.invalidates.append(w_res)
-        return w_res
+    def impl(space, w_obj):
+        if isinstance(w_obj, BaseArray):
+            w_res = Call1(func, w_obj, w_obj.signature.transition(signature))
+            w_obj.invalidates.append(w_res)
+            return w_res
+        return space.wrap(func(space.float_w(w_obj)))
     return func_with_new_name(impl, "%s_dispatcher" % func.__name__)
 
 def ufunc2(func):
     signature = Signature()
-    @unwrap_spec(larray=BaseArray, rarray=BaseArray)
-    def impl(space, larray, rarray):
-        new_sig = larray.signature.transition(signature).transition(rarray.signature)
-        w_res = Call2(func, larray, rarray, new_sig)
-        larray.invalidates.append(w_res)
-        rarray.invalidates.append(w_res)
-        return w_res
+    def impl(space, w_lhs, w_rhs):
+        if isinstance(w_lhs, BaseArray) and isinstance(w_rhs, BaseArray):
+            new_sig = w_lhs.signature.transition(signature).transition(w_rhs.signature)
+            w_res = Call2(func, w_lhs, w_rhs, new_sig)
+            w_lhs.invalidates.append(w_res)
+            w_rhs.invalidates.append(w_res)
+            return w_res
+        return space.wrap(func(space.float_w(w_lhs), space.float_w(w_rhs)))
     return func_with_new_name(impl, "%s_dispatcher" % func.__name__)
 
 @ufunc
diff --git a/pypy/module/micronumpy/test/test_base.py b/pypy/module/micronumpy/test/test_base.py
--- a/pypy/module/micronumpy/test/test_base.py
+++ b/pypy/module/micronumpy/test/test_base.py
@@ -16,4 +16,14 @@
         v3 = ar.descr_add(space, FloatWrapper(1.0))
         assert v2.signature is v3.signature
         v4 = ar.descr_add(space, ar)
-        assert v1.signature is v4.signature
\ No newline at end of file
+        assert v1.signature is v4.signature
+
+    def test_slice_signature(self, space):
+        ar = SingleDimArray(10)
+        v1 = ar.descr_getitem(space, space.wrap(slice(1, 5, 1)))
+        v2 = ar.descr_getitem(space, space.wrap(slice(4, 6, 1)))
+        assert v1.signature is v2.signature
+
+        v3 = ar.descr_add(space, v1)
+        v4 = ar.descr_add(space, v2)
+        assert v3.signature is v4.signature
\ No newline at end of file
diff --git a/pypy/module/micronumpy/test/test_module.py b/pypy/module/micronumpy/test/test_module.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/micronumpy/test/test_module.py
@@ -0,0 +1,13 @@
+from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
+
+
+class AppTestNumPyModule(BaseNumpyAppTest):
+    def test_mean(self):
+        from numpy import array, mean
+        assert mean(array(range(5))) == 2.0
+        assert mean(range(5)) == 2.0
+
+    def test_average(self):
+        from numpy import array, average
+        assert average(range(10)) == 4.5
+        assert average(array(range(10))) == 4.5
\ No newline at end of file
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -18,6 +18,25 @@
         a[13] = 5.3
         assert a[13] == 5.3
 
+    def test_empty(self):
+        """
+        Test that empty() works.
+        """
+
+        from numpy import empty
+        a = empty(2)
+        a[1] = 1.0
+        assert a[1] == 1.0
+
+    def test_ones(self):
+        from numpy import ones
+        a = ones(3)
+        assert len(a) == 3
+        assert a[0] == 1
+        raises(IndexError, "a[3]")
+        a[2] = 4
+        assert a[2] == 4
+
     def test_iterator_init(self):
         from numpy import array
         a = array(range(5))
@@ -46,6 +65,15 @@
         assert len(a) == 5
         assert len(a + a) == 5
 
+    def test_shape(self):
+        from numpy import array
+        a = array(range(5))
+        assert a.shape == (5,)
+        b = a + a
+        assert b.shape == (5,)
+        c = a[:3]
+        assert c.shape == (3,)
+
     def test_add(self):
         from numpy import array
         a = array(range(5))
@@ -138,4 +166,51 @@
         b = a + a
         c = b + b
         b[1] = 5
-        assert c[1] == 4
\ No newline at end of file
+        assert c[1] == 4
+
+    def test_getslice(self):
+        from numpy import array
+        a = array(range(5))
+        s = a[1:5]
+        assert len(s) == 4
+        for i in range(4):
+            assert s[i] == a[i+1]
+
+    def test_getslice_step(self):
+        from numpy import array
+        a = array(range(10))
+        s = a[1:9:2]
+        assert len(s) == 4
+        for i in range(4):
+            assert s[i] == a[2*i+1]
+
+    def test_slice_update(self):
+        from numpy import array
+        a = array(range(5))
+        s = a[0:3]
+        s[1] = 10
+        assert a[1] == 10
+        a[2] = 20
+        assert s[2] == 20
+
+
+    def test_slice_invaidate(self):
+        # check that slice shares invalidation list with
+        from numpy import array
+        a = array(range(5))
+        s = a[0:2]
+        b = array([10,11])
+        c = s + b
+        a[0] = 100
+        assert c[0] == 10
+        assert c[1] == 12
+        d = s + b
+        a[1] = 101
+        assert d[0] == 110
+        assert d[1] == 12
+
+    def test_mean(self):
+        from numpy import array, mean
+        a = array(range(5))
+        assert a.mean() == 2.0
+        assert a[:4].mean() == 1.5
\ No newline at end of file
diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py
--- a/pypy/module/micronumpy/test/test_ufuncs.py
+++ b/pypy/module/micronumpy/test/test_ufuncs.py
@@ -3,6 +3,13 @@
 
 
 class AppTestUfuncs(BaseNumpyAppTest):
+    def test_single_item(self):
+        from numpy import negative, sign, minimum
+
+        assert negative(5.0) == -5.0
+        assert sign(-0.0) == 0.0
+        assert minimum(2.0, 3.0) == 2.0
+
     def test_negative(self):
         from numpy import array, negative
 
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -1,8 +1,9 @@
 from pypy.jit.metainterp.test.support import LLJitMixin
+from pypy.rpython.test.test_llinterp import interpret
 from pypy.module.micronumpy.interp_numarray import (SingleDimArray, Signature,
-    FloatWrapper, Call1, Call2, add, mul)
+    FloatWrapper, Call1, Call2, SingleDimSlice, add, mul)
 from pypy.module.micronumpy.interp_ufuncs import negative
-
+from pypy.module.micronumpy.compile import numpy_compile
 
 class FakeSpace(object):
     pass
@@ -91,4 +92,54 @@
 
         self.meta_interp(f, [5], listops=True, backendopt=True)
         # This is 3, not 2 because there is a bridge for the exit.
-        self.check_loop_count(3)
\ No newline at end of file
+        self.check_loop_count(3)
+
+    def test_slice(self):
+        space = self.space
+
+        def f(i):
+            step = 3
+            ar = SingleDimArray(step*i)
+            s = SingleDimSlice(0, step*i, step, i, ar, ar.signature.transition(SingleDimSlice.static_signature))
+            v = Call2(add, s, s, Signature())
+            return v.get_concrete().storage[3]
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({'int_mul': 1, 'getarrayitem_raw': 2, 'float_add': 1,
+                          'setarrayitem_raw': 1, 'int_add': 1,
+                          'int_lt': 1, 'guard_true': 1, 'jump': 1})
+        assert result == f(5)
+
+    def test_slice2(self):
+        space = self.space
+
+        def f(i):
+            step1 = 2
+            step2 = 3
+            ar = SingleDimArray(step2*i)
+            s1 = SingleDimSlice(0, step1*i, step1, i, ar, ar.signature.transition(SingleDimSlice.static_signature))
+            s2 = SingleDimSlice(0, step2*i, step2, i, ar, ar.signature.transition(SingleDimSlice.static_signature))
+            v = Call2(add, s1, s2, Signature())
+            return v.get_concrete().storage[3]
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({'int_mul': 2, 'getarrayitem_raw': 2, 'float_add': 1,
+                          'setarrayitem_raw': 1, 'int_add': 1,
+                          'int_lt': 1, 'guard_true': 1, 'jump': 1})
+        assert result == f(5)
+
+class TestTranslation(object):
+    def test_compile(self):
+        x = numpy_compile('aa+f*f/a-', 10)
+        x = x.compute()
+        assert isinstance(x, SingleDimArray)
+        assert x.size == 10
+        assert x.storage[0] == 0
+        assert x.storage[1] == ((1 + 1) * 1.2) / 1.2 - 1
+    
+    def test_translation(self):
+        # we import main to check if the target compiles
+        from pypy.translator.goal.targetnumpystandalone import main
+        from pypy.rpython.annlowlevel import llstr
+        
+        interpret(main, [llstr('af+'), 100])
diff --git a/pypy/module/operator/app_operator.py b/pypy/module/operator/app_operator.py
--- a/pypy/module/operator/app_operator.py
+++ b/pypy/module/operator/app_operator.py
@@ -4,6 +4,7 @@
 This module exports a set of operators as functions. E.g. operator.add(x,y) is
 equivalent to x+y.
 '''
+from __pypy__ import builtinify
 
 def countOf(a,b): 
     'countOf(a, b) -- Return the number of times b occurs in a.'
@@ -66,50 +67,56 @@
     a[b:c] = d 
 __setslice__ = setslice
 
-class attrgetter(object):
 
-    def __init__(self, attr, *attrs):
-        self.attrs = (attr,) + attrs
+def attrgetter(attr, *attrs):
+    if attrs:
+        getters = [single_attr_getter(a) for a in (attr,) + attrs]
+        def getter(obj):
+            return tuple([getter(obj) for getter in getters])
+    else:
+        getter = single_attr_getter(attr)
+    return builtinify(getter)
 
-    def _resolve_attr(self, obj, attr):
-        last = 0
-        while True:
-            try:
-                dot = attr.find(".", last)
-            except AttributeError:
-                raise TypeError
-            if dot > 0:
-                obj = getattr(obj, attr[last:dot])
-                last = dot + 1
-            else:
-                return getattr(obj, attr[last:])
+def single_attr_getter(attr):
+    if not isinstance(attr, str):
+        if not isinstance(attr, unicode):
+            def _raise_typeerror(obj):
+                raise TypeError("argument must be a string, not %r" %
+                                (type(attr).__name__,))
+            return _raise_typeerror
+        attr = attr.encode('ascii')
+    #
+    def make_getter(name, prevfn=None):
+        if prevfn is None:
+            def getter(obj):
+                return getattr(obj, name)
+        else:
+            def getter(obj):
+                return getattr(prevfn(obj), name)
+        return getter
+    #
+    last = 0
+    getter = None
+    while True:
+        dot = attr.find(".", last)
+        if dot < 0: break
+        getter = make_getter(attr[last:dot], getter)
+        last = dot + 1
+    return make_getter(attr[last:], getter)
 
-    def __call__(self, obj):
-        if len(self.attrs) == 1:
-            return self._resolve_attr(obj, self.attrs[0])
-        return tuple(self._resolve_attr(obj, attr) for attr in self.attrs)
 
-class itemgetter(object):
+def itemgetter(item, *items):
+    if items:
+        list_of_indices = [item] + list(items)
+        def getter(obj):
+            return tuple([obj[i] for i in list_of_indices])
+    else:
+        def getter(obj):
+            return obj[item]
+    return builtinify(getter)
 
-    def __init__(self, item, *args):
-        self.items = args
-        self.item = item
 
-    def __call__(self, obj):
-        result = obj[self.item]
-
-        if self.items:
-            list = [result] + [obj[item] for item in self.items]
-            return tuple(list)
-
-        return result
-
-class methodcaller(object):
-
-    def __init__(self, method_name, *args, **kwargs):
-        self.method_name = method_name
-        self.args = args
-        self.kwargs = kwargs
-
-    def __call__(self, obj):
-        return getattr(obj, self.method_name)(*self.args, **self.kwargs)
+def methodcaller(method_name, *args, **kwargs):
+    def call(obj):
+        return getattr(obj, method_name)(*args, **kwargs)
+    return builtinify(call)
diff --git a/pypy/module/oracle/__init__.py b/pypy/module/oracle/__init__.py
--- a/pypy/module/oracle/__init__.py
+++ b/pypy/module/oracle/__init__.py
@@ -28,6 +28,7 @@
 
     appleveldefs = {
         'version': 'app_oracle.version',
+        'paramstyle': 'app_oracle.paramstyle',
         'makedsn': 'app_oracle.makedsn',
         'TimestampFromTicks': 'app_oracle.TimestampFromTicks',
     }
diff --git a/pypy/module/oracle/app_oracle.py b/pypy/module/oracle/app_oracle.py
--- a/pypy/module/oracle/app_oracle.py
+++ b/pypy/module/oracle/app_oracle.py
@@ -1,4 +1,5 @@
 version = '5.0.0'
+paramstyle = 'named'
 
 class Warning(StandardError):
     pass
diff --git a/pypy/module/oracle/interp_connect.py b/pypy/module/oracle/interp_connect.py
--- a/pypy/module/oracle/interp_connect.py
+++ b/pypy/module/oracle/interp_connect.py
@@ -159,9 +159,20 @@
         # set the internal and external names; these are needed for global
         # transactions but are limited in terms of the lengths of the strings
         if twophase:
-            raise OperationError(
-                interp_error.get(space).w_NotSupportedError,
-                space.wrap("XXX write me"))
+            status = roci.OCIAttrSet(
+                self.serverHandle, roci.OCI_HTYPE_SERVER,
+                "cx_Oracle", 0,
+                roci.OCI_ATTR_INTERNAL_NAME,
+                self.environment.errorHandle)
+            self.environment.checkForError(
+                status, "Connection_Connect(): set internal name")
+            status = roci.OCIAttrSet(
+                self.serverHandle, roci.OCI_HTYPE_SERVER,
+                "cx_Oracle", 0,
+                roci.OCI_ATTR_EXTERNAL_NAME,
+                self.environment.errorHandle)
+            self.environment.checkForError(
+                status, "Connection_Connect(): set external name")
 
         # allocate the session handle
         handleptr = lltype.malloc(rffi.CArrayPtr(roci.OCISession).TO,
diff --git a/pypy/module/oracle/roci.py b/pypy/module/oracle/roci.py
--- a/pypy/module/oracle/roci.py
+++ b/pypy/module/oracle/roci.py
@@ -73,7 +73,8 @@
     defines = '''
     OCI_ATTR_SERVER OCI_ATTR_SESSION OCI_ATTR_USERNAME OCI_ATTR_PASSWORD
     OCI_ATTR_STMT_TYPE OCI_ATTR_PARAM OCI_ATTR_PARAM_COUNT OCI_ATTR_ROW_COUNT
-    OCI_ATTR_NAME OCI_ATTR_SCALE OCI_ATTR_PRECISION OCI_ATTR_IS_NULL
+    OCI_ATTR_NAME OCI_ATTR_INTERNAL_NAME OCI_ATTR_EXTERNAL_NAME
+    OCI_ATTR_SCALE OCI_ATTR_PRECISION OCI_ATTR_IS_NULL
     OCI_ATTR_DATA_SIZE OCI_ATTR_DATA_TYPE OCI_ATTR_REF_TDO
     OCI_ATTR_SCHEMA_NAME OCI_ATTR_TYPE_NAME OCI_ATTR_TYPECODE
     OCI_ATTR_NUM_TYPE_ATTRS OCI_ATTR_LIST_TYPE_ATTRS
diff --git a/pypy/module/oracle/test/test_connect.py b/pypy/module/oracle/test/test_connect.py
--- a/pypy/module/oracle/test/test_connect.py
+++ b/pypy/module/oracle/test/test_connect.py
@@ -41,6 +41,10 @@
         if hasattr(self, 'cnx'):
             self.cnx.close()
 
+    def test_constants(self):
+        assert '.' in oracle.version
+        assert oracle.paramstyle == 'named'
+
     def test_connect(self):
         self.cnx = oracle.connect(self.username, self.password,
                                   self.tnsentry, threaded=True)
@@ -49,6 +53,13 @@
         assert self.cnx.tnsentry == self.tnsentry
         assert isinstance(self.cnx.version, str)
 
+    def test_connect_twophase(self):
+        self.cnx = oracle.connect(self.username, self.password,
+                                  self.tnsentry, twophase=True)
+        assert self.cnx.username == self.username
+        assert self.cnx.password == self.password
+        assert self.cnx.tnsentry == self.tnsentry
+
     def test_singleArg(self):
         self.cnx = oracle.connect("%s/%s@%s" % (self.username, self.password,
                                                 self.tnsentry))
diff --git a/pypy/module/posix/app_posix.py b/pypy/module/posix/app_posix.py
--- a/pypy/module/posix/app_posix.py
+++ b/pypy/module/posix/app_posix.py
@@ -107,6 +107,9 @@
 def tmpnam():
     """Return an absolute pathname of a file that did not exist at the
     time the call is made."""
+    from warnings import warn
+    warn(RuntimeWarning("tmpnam is a potential security risk to your program"))
+
     import tempfile
     return tempfile.mktemp()
 
@@ -114,6 +117,9 @@
     """Return an absolute pathname of a file that did not exist at the
     time the call is made.  The directory and a prefix may be specified
     as strings; they may be omitted or None if not needed."""
+    from warnings import warn
+    warn(RuntimeWarning("tempnam is a potential security risk to your program"))
+
     import tempfile
     return tempfile.mktemp('', prefix or 'tmp', dir)
 
diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -847,6 +847,21 @@
                 assert os.path.basename(s1).startswith(prefix or 'tmp')
                 assert os.path.basename(s2).startswith(prefix or 'tmp')
 
+    def test_tmpnam_warning(self):
+        import warnings, os
+        #
+        def f_tmpnam_warning(): os.tmpnam()    # a single line
+        #
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            f_tmpnam_warning()
+            assert len(w) == 1
+            assert issubclass(w[-1].category, RuntimeWarning)
+            assert "potential security risk" in str(w[-1].message)
+            # check that the warning points to the call to os.tmpnam(),
+            # not to some code inside app_posix.py
+            assert w[-1].lineno == f_tmpnam_warning.func_code.co_firstlineno
+
 
 class AppTestEnvironment(object):
     def setup_class(cls):
diff --git a/pypy/module/pypyjit/__init__.py b/pypy/module/pypyjit/__init__.py
--- a/pypy/module/pypyjit/__init__.py
+++ b/pypy/module/pypyjit/__init__.py
@@ -8,6 +8,7 @@
         'set_param':    'interp_jit.set_param',
         'residual_call': 'interp_jit.residual_call',
         'set_compile_hook': 'interp_jit.set_compile_hook',
+        'DebugMergePoint': 'interp_resop.W_DebugMergePoint',
     }
 
     def setup_after_space_initialization(self):
diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -16,6 +16,9 @@
 from pypy.interpreter.baseobjspace import ObjSpace, W_Root
 from opcode import opmap
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.nonconst import NonConstant
+from pypy.jit.metainterp.resoperation import rop
+from pypy.module.pypyjit.interp_resop import W_DebugMergePoint
 
 PyFrame._virtualizable2_ = ['last_instr', 'pycode',
                             'valuestackdepth', 'valuestack_w[*]',
@@ -46,6 +49,15 @@
     return (bytecode.co_flags & CO_GENERATOR) != 0
 
 
+def wrap_oplist(space, logops, operations):
+    list_w = []
+    for op in operations:
+        if op.getopnum() == rop.DEBUG_MERGE_POINT:
+            list_w.append(space.wrap(W_DebugMergePoint(op.getarglist())))
+        else:
+            list_w.append(space.wrap(logops.repr_of_resop(op)))
+    return list_w
+
 class PyPyJitDriver(JitDriver):
     reds = ['frame', 'ec']
     greens = ['next_instr', 'is_being_profiled', 'pycode']
@@ -57,11 +69,13 @@
         
         space = self.space
         cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
         if space.is_true(cache.w_compile_hook):
-            memo = {}
-            list_w = [space.wrap(logger.repr_of_resop(memo, op))
-                      for op in operations]
+            logops = logger._make_log_operations()
+            list_w = wrap_oplist(space, logops, operations)
             pycode = cast_base_ptr_to_instance(PyCode, ll_pycode)
+            cache.in_recursion = True
             try:
                 space.call_function(cache.w_compile_hook,
                                     space.wrap('main'),
@@ -72,14 +86,17 @@
                                     space.newlist(list_w))
             except OperationError, e:
                 e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
+            cache.in_recursion = False
 
     def on_compile_bridge(self, logger, orig_looptoken, operations, n):
         space = self.space
         cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
         if space.is_true(cache.w_compile_hook):
-            memo = {}
-            list_w = [space.wrap(logger.repr_of_resop(memo, op))
-                      for op in operations]
+            logops = logger._make_log_operations()
+            list_w = wrap_oplist(space, logops, operations)
+            cache.in_recursion = True
             try:
                 space.call_function(cache.w_compile_hook,
                                     space.wrap('main'),
@@ -88,6 +105,7 @@
                                     space.newlist(list_w))
             except OperationError, e:
                 e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
+            cache.in_recursion = False
 
 pypyjitdriver = PyPyJitDriver(get_printable_location = get_printable_location,
                               get_jitcell_at = get_jitcell_at,
@@ -191,6 +209,8 @@
     return space.call_args(w_callable, __args__)
 
 class Cache(object):
+    in_recursion = False
+    
     def __init__(self, space):
         self.w_compile_hook = space.w_None
 
@@ -209,8 +229,13 @@
     for jit merge point. in case it's `main` it'll be a tuple
     (code, offset, is_being_profiled)
 
+    Note that jit hook is not reentrant. It means that if the code
+    inside the jit hook is itself jitted, it will get compiled, but the
+    jit hook won't be called for that.
+
     XXX write down what else
     """
     cache = space.fromcache(Cache)
     cache.w_compile_hook = w_hook
+    cache.in_recursion = NonConstant(False)
     return space.w_None
diff --git a/pypy/module/pypyjit/interp_resop.py b/pypy/module/pypyjit/interp_resop.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/interp_resop.py
@@ -0,0 +1,31 @@
+
+from pypy.interpreter.typedef import TypeDef, interp_attrproperty
+from pypy.interpreter.baseobjspace import Wrappable, ObjSpace
+from pypy.interpreter.gateway import unwrap_spec, interp2app
+from pypy.interpreter.pycode import PyCode
+from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
+from pypy.rpython.lltypesystem.rclass import OBJECT
+
+class W_DebugMergePoint(Wrappable):
+    """ A class representing debug_merge_point JIT operation
+    """
+    
+    def __init__(self, boxes):
+        self.mp_no = boxes[0].getint()
+        self.offset = boxes[2].getint()
+        llcode = lltype.cast_opaque_ptr(lltype.Ptr(OBJECT),
+                                        boxes[4].getref_base())
+        self.pycode = cast_base_ptr_to_instance(PyCode, llcode)
+
+    @unwrap_spec('self', ObjSpace)
+    def descr_repr(self, space):
+        return space.wrap('DebugMergePoint()')
+
+W_DebugMergePoint.typedef = TypeDef(
+    'DebugMergePoint',
+    __doc__ = W_DebugMergePoint.__doc__,
+    __repr__ = interp2app(W_DebugMergePoint.descr_repr),
+    code = interp_attrproperty('pycode', W_DebugMergePoint),
+)
+
diff --git a/pypy/module/pypyjit/test/test_jit_hook.py b/pypy/module/pypyjit/test/test_jit_hook.py
--- a/pypy/module/pypyjit/test/test_jit_hook.py
+++ b/pypy/module/pypyjit/test/test_jit_hook.py
@@ -8,12 +8,13 @@
 from pypy.jit.metainterp.logger import Logger
 from pypy.rpython.annlowlevel import (cast_instance_to_base_ptr,
                                       cast_base_ptr_to_instance)
+from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.module.pypyjit.interp_jit import pypyjitdriver
 from pypy.jit.tool.oparser import parse
 from pypy.jit.metainterp.typesystem import llhelper
 
 class MockSD(object):
-    class cpu:
+    class cpu(object):
         ts = llhelper
 
 class AppTestJitHook(object):
@@ -27,14 +28,17 @@
             pass
         return f
         """)
+        cls.w_f = w_f
         ll_code = cast_instance_to_base_ptr(w_f.code)
+        code_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, ll_code)
         logger = Logger(MockSD())
 
         oplist = parse("""
         [i1, i2]
         i3 = int_add(i1, i2)
+        debug_merge_point(0, 0, 0, 0, ConstPtr(ptr0))
         guard_true(i3) []
-        """).operations
+        """, namespace={'ptr0': code_gcref}).operations
 
         def interp_on_compile():
             pypyjitdriver.on_compile(logger, LoopToken(), oplist, 'loop',
@@ -63,7 +67,7 @@
         assert all[0][0][0].co_name == 'f'
         assert all[0][0][1] == 0
         assert all[0][0][2] == False
-        assert len(all[0][1]) == 2
+        assert len(all[0][1]) == 3
         assert 'int_add' in all[0][1][0]
         self.on_compile_bridge()
         assert len(all) == 2
@@ -87,3 +91,31 @@
             sys.stderr = prev
         assert 'jit hook' in s.getvalue()
         assert 'ZeroDivisionError' in s.getvalue()
+
+    def test_non_reentrant(self):
+        import pypyjit
+        l = []
+        
+        def hook(*args):
+            l.append(None)
+            self.on_compile()
+            self.on_compile_bridge()
+        
+        pypyjit.set_compile_hook(hook)
+        self.on_compile()
+        assert len(l) == 1 # and did not crash
+        self.on_compile_bridge()
+        assert len(l) == 2 # and did not crash
+        
+    def test_on_compile_types(self):
+        import pypyjit
+        l = []
+
+        def hook(*args):
+            l.append(args)
+
+        pypyjit.set_compile_hook(hook)
+        self.on_compile()
+        dmp = l[0][3][1]
+        assert isinstance(dmp, pypyjit.DebugMergePoint)
+        assert dmp.code is self.f.func_code
diff --git a/pypy/module/pypyjit/test/test_jit_setup.py b/pypy/module/pypyjit/test/test_jit_setup.py
--- a/pypy/module/pypyjit/test/test_jit_setup.py
+++ b/pypy/module/pypyjit/test/test_jit_setup.py
@@ -24,3 +24,13 @@
                 i += 1
 
         assert list(gen(3)) == [0, 1, 4]
+
+def test_interface_residual_call():
+    space = gettestobjspace(usemodules=['pypyjit'])
+    space.appexec([], """():
+        import pypyjit
+        def f(*args, **kwds):
+            return (args, kwds)
+        res = pypyjit.residual_call(f, 4, x=6)
+        assert res == ((4,), {'x': 6})
+    """)
diff --git a/pypy/module/pypyjit/test/test_pypy_c.py b/pypy/module/pypyjit/test/test_pypy_c.py
deleted file mode 100644
--- a/pypy/module/pypyjit/test/test_pypy_c.py
+++ /dev/null
@@ -1,430 +0,0 @@
-from pypy.conftest import gettestobjspace, option
-from pypy.tool.udir import udir
-import py
-from py.test import skip
-import sys, os, re
-import subprocess
-
-class BytecodeTrace(list):
-    def get_opnames(self, prefix=""):
-        return [op.getopname() for op in self
-                    if op.getopname().startswith(prefix)]
-
-    def __repr__(self):
-        return "%s%s" % (self.bytecode, list.__repr__(self))
-
-ZERO_OP_BYTECODES = [
-    'POP_TOP',
-    'ROT_TWO',
-    'ROT_THREE',
-    'DUP_TOP',
-    'ROT_FOUR',
-    'NOP',
-    'DUP_TOPX',
-    'LOAD_CONST',
-    'JUMP_FORWARD',
-    #'JUMP_ABSOLUTE' in theory, but contains signals stuff
-    #'LOAD_FAST' should be here, but currently needs a guard for nonzeroness
-    'STORE_FAST',
-    ]
-
-
-r_bridge = re.compile(r"bridge out of Guard (\d+)")
-
-def from_entry_bridge(text, allparts):
-    firstline = text.splitlines()[0]
-    if 'entry bridge' in firstline:
-        return True
-    match = r_bridge.search(firstline)
-    if match:
-        search = '<Guard' + match.group(1) + '>'
-        for part in allparts:
-            if search in part:
-                break
-        else:
-            raise AssertionError, "%s not found??" % (search,)
-        return from_entry_bridge(part, allparts)
-    return False
-
-def test_from_entry_bridge():
-    assert from_entry_bridge(
-        "# Loop 4 : entry bridge with 31 ops\n[p0, etc", [])
-    assert not from_entry_bridge(
-        "# Loop 1 : loop with 31 ops\n[p0, p1, etc", [])
-    assert not from_entry_bridge(
-        "# bridge out of Guard 5 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : loop with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n"])
-    assert from_entry_bridge(
-        "# bridge out of Guard 5 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : entry bridge with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n"])
-    assert not from_entry_bridge(
-        "# bridge out of Guard 51 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : loop with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n",
-         "# bridge out of Guard 5 with 13 ops\n"
-             "[p0, p1]\n"
-             "guard_other(p1, descr=<Guard51>)\n"])
-    assert from_entry_bridge(
-        "# bridge out of Guard 51 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : entry bridge with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n",
-         "# bridge out of Guard 5 with 13 ops\n"
-             "[p0, p1]\n"
-             "guard_other(p1, descr=<Guard51>)\n"])
-
-
-class PyPyCJITTests(object):
-    def run_source(self, source, expected_max_ops, *testcases, **kwds):
-        assert isinstance(expected_max_ops, int)
-        threshold = kwds.pop('threshold', 3)
-        self.count_debug_merge_point = \
-                                     kwds.pop('count_debug_merge_point', True)
-        if kwds:
-            raise TypeError, 'Unsupported keyword arguments: %s' % kwds.keys()
-        source = py.code.Source(source)
-        filepath = self.tmpdir.join('case%d.py' % self.counter)
-        logfilepath = filepath.new(ext='.log')
-        self.__class__.counter += 1
-        f = filepath.open('w')
-        print >> f, source
-        # some support code...
-        print >> f, py.code.Source("""
-            import sys
-            # we don't want to see the small bridges created
-            # by the checkinterval reaching the limit
-            sys.setcheckinterval(10000000)
-            try: # make the file runnable by CPython
-                import pypyjit
-                pypyjit.set_param(threshold=%d)
-            except ImportError:
-                pass
-
-            def check(args, expected):
-                #print >> sys.stderr, 'trying:', args
-                result = main(*args)
-                #print >> sys.stderr, 'got:', repr(result)
-                assert result == expected
-                assert type(result) is type(expected)
-        """ % threshold)
-        for testcase in testcases * 2:
-            print >> f, "check(%r, %r)" % testcase
-        print >> f, "print 'OK :-)'"
-        f.close()
-
-        print logfilepath
-        env = os.environ.copy()
-        env['PYPYLOG'] = ":%s" % (logfilepath,)
-        p = subprocess.Popen([self.pypy_c, str(filepath)],
-                             env=env, stdout=subprocess.PIPE)
-        result, _ = p.communicate()
-        assert result
-        if result.strip().startswith('SKIP:'):
-            py.test.skip(result.strip())
-        assert result.splitlines()[-1].strip() == 'OK :-)'
-        self.parse_loops(logfilepath)
-        self.print_loops()
-        print logfilepath
-        if self.total_ops > expected_max_ops:
-            assert 0, "too many operations: got %d, expected maximum %d" % (
-                self.total_ops, expected_max_ops)
-        return result
-
-    def parse_loops(self, opslogfile):
-        from pypy.tool import logparser
-        assert opslogfile.check()
-        log = logparser.parse_log_file(str(opslogfile))
-        parts = logparser.extract_category(log, 'jit-log-opt-')
-        self.rawloops = [part for part in parts
-                         if not from_entry_bridge(part, parts)]
-        self.loops, self.sliced_loops, self.total_ops = \
-                                           self.parse_rawloops(self.rawloops)
-        self.check_0_op_bytecodes()
-        self.rawentrybridges = [part for part in parts
-                                if from_entry_bridge(part, parts)]
-        _, self.sliced_entrybridge, _ = \
-                                    self.parse_rawloops(self.rawentrybridges)
-
-        from pypy.jit.tool.jitoutput import parse_prof
-        summaries  = logparser.extract_category(log, 'jit-summary')
-        if len(summaries) > 0:
-            self.jit_summary = parse_prof(summaries[-1])
-        else:
-            self.jit_summary = None
-        
-
-    def parse_rawloops(self, rawloops):
-        from pypy.jit.tool.oparser import parse
-        loops = [parse(part, no_namespace=True) for part in rawloops]
-        sliced_loops = [] # contains all bytecodes of all loops
-        total_ops = 0
-        for loop in loops:
-            for op in loop.operations:
-                if op.getopname() == "debug_merge_point":
-                    sliced_loop = BytecodeTrace()
-                    sliced_loop.bytecode = op.getarg(0)._get_str().rsplit(" ", 1)[1]
-                    sliced_loops.append(sliced_loop)
-                    if self.count_debug_merge_point:
-                        total_ops += 1
-                else:
-                    sliced_loop.append(op)
-                    total_ops += 1
-        return loops, sliced_loops, total_ops
-
-    def check_0_op_bytecodes(self):
-        for bytecodetrace in self.sliced_loops:
-            if bytecodetrace.bytecode not in ZERO_OP_BYTECODES:
-                continue
-            assert not bytecodetrace
-
-    def get_by_bytecode(self, name, from_entry_bridge=False):
-        if from_entry_bridge:
-            sliced_loops = self.sliced_entrybridge
-        else:
-            sliced_loops = self.sliced_loops
-        return [ops for ops in sliced_loops if ops.bytecode == name]
-
-    def print_loops(self):
-        for rawloop in self.rawloops:
-            print
-            print '@' * 79
-            print
-            print rawloop.rstrip()
-        print
-        print '@' * 79
-
-
-    def test_richards(self):
-        self.run_source('''
-            import sys; sys.path[:] = %r
-            from pypy.translator.goal import richards
-
-            def main():
-                return richards.main(iterations = 1)
-        ''' % (sys.path,), 7200,
-                   ([], 42))
-
-
-    def test_overflow_checking(self):
-        startvalue = sys.maxint - 2147483647
-        self.run_source('''
-        def main():
-            def f(a,b):
-                if a < 0: return -1
-                return a-b
-            total = %d
-            for i in range(100000):
-                total += f(i, 5)
-            return total
-        ''' % startvalue, 170, ([], startvalue + 4999450000L))
-
-    def test_shift(self):
-        from sys import maxint
-        maxvals = (-maxint-1, -maxint, maxint-1, maxint)
-        for a in (-4, -3, -2, -1, 0, 1, 2, 3, 4) + maxvals:
-            for b in (0, 1, 2, 31, 32, 33, 61, 62, 63):
-                r = 0
-                if (a >> b) >= 0:
-                    r += 2000
-                if (a << b) > 2:
-                    r += 20000000
-                if abs(a) < 10 and b < 5:
-                    ops = 13
-                else:
-                    ops = 29
-
-                self.run_source('''
-                def main(a, b):
-                    i = sa = 0
-                    while i < 2000:
-                        if a > 0: # Specialises the loop
-                            pass
-                        if b < 2 and b > 0:
-                            pass
-                        if (a >> b) >= 0:
-                            sa += 1
-                        if (a << b) > 2:
-                            sa += 10000
-                        i += 1
-                    return sa
-                ''', ops, ([a, b], r), count_debug_merge_point=False)
-
-    def test_revert_shift(self):
-        from sys import maxint
-        tests = []
-        for a in (1, 4, 8, 100):
-            for b in (-10, 10, -201, 201, -maxint/3, maxint/3):
-                for c in (-10, 10, -maxint/3, maxint/3):
-                    tests.append(([a, b, c], long(4000*(a+b+c))))
-        self.run_source('''
-        def main(a, b, c):
-            from sys import maxint
-            i = sa = 0
-            while i < 2000:
-                if 0 < a < 10: pass
-                if -100 < b < 100: pass
-                if -maxint/2 < c < maxint/2: pass
-                sa += (a<<a)>>a
-                sa += (b<<a)>>a
-                sa += (c<<a)>>a
-                sa += (a<<100)>>100
-                sa += (b<<100)>>100
-                sa += (c<<100)>>100
-                i += 1
-            return long(sa)
-        ''', 93, count_debug_merge_point=False, *tests)
-        
-    def test_division_to_rshift(self):
-        avalues = ('a', 'b', 7, -42, 8)
-        bvalues = ['b'] + range(-10, 0) + range(1,10)
-        code = ''
-        a1, b1, res1 = 10, 20, 0
-        a2, b2, res2 = 10, -20, 0
-        a3, b3, res3 = -10, -20, 0
-        def dd(a, b, aval, bval):
-            m = {'a': aval, 'b': bval}
-            if not isinstance(a, int):
-                a=m[a]
-            if not isinstance(b, int):
-                b=m[b]
-            return a/b
-        for a in avalues:
-            for b in bvalues:
-                code += '                sa += %s / %s\n' % (a, b)
-                res1 += dd(a, b, a1, b1)
-                res2 += dd(a, b, a2, b2)
-                res3 += dd(a, b, a3, b3)
-        # The purpose of this test is to check that we get
-        # the correct results, not really to count operations.
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 2000:
-%s                
-                i += 1
-            return sa
-        ''' % code, sys.maxint, ([a1, b1], 2000 * res1),
-                                ([a2, b2], 2000 * res2),
-                                ([a3, b3], 2000 * res3))
-
-    def test_mod(self):
-        avalues = ('a', 'b', 7, -42, 8)
-        bvalues = ['b'] + range(-10, 0) + range(1,10)
-        code = ''
-        a1, b1, res1 = 10, 20, 0
-        a2, b2, res2 = 10, -20, 0
-        a3, b3, res3 = -10, -20, 0
-        def dd(a, b, aval, bval):
-            m = {'a': aval, 'b': bval}
-            if not isinstance(a, int):
-                a=m[a]
-            if not isinstance(b, int):
-                b=m[b]
-            return a % b
-        for a in avalues:
-            for b in bvalues:
-                code += '                sa += %s %% %s\n' % (a, b)
-                res1 += dd(a, b, a1, b1)
-                res2 += dd(a, b, a2, b2)
-                res3 += dd(a, b, a3, b3)
-        # The purpose of this test is to check that we get
-        # the correct results, not really to count operations.
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 2000:
-                if a > 0: pass
-                if 1 < b < 2: pass
-%s
-                i += 1
-            return sa
-        ''' % code, sys.maxint, ([a1, b1], 2000 * res1),
-                                ([a2, b2], 2000 * res2),
-                                ([a3, b3], 2000 * res3))
-
-    def test_dont_trace_every_iteration(self):
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 200:
-                if a > 0: pass
-                if 1 < b < 2: pass
-                sa += a % b
-                i += 1
-            return sa
-        ''', 22,  ([10, 20], 200 * (10 % 20)),
-                 ([-10, -20], 200 * (-10 % -20)),
-                        count_debug_merge_point=False)
-        assert self.jit_summary.tracing_no == 2
-    def test_id_compare_optimization(self):
-        # XXX: lower the instruction count, 35 is the old value.
-        self.run_source("""
-        class A(object):
-            pass
-        def main():
-            i = 0
-            a = A()
-            while i < 5:
-                if A() != a:
-                    pass
-                i += 1
-        """, 35, ([], None))
-        _, compare = self.get_by_bytecode("COMPARE_OP")
-        assert "call" not in compare.get_opnames()
-
-class AppTestJIT(PyPyCJITTests):
-    def setup_class(cls):
-        if not option.runappdirect:
-            py.test.skip("meant only for pypy-c")
-        # the next line skips stuff if the pypy-c is not a jit build
-        cls.space = gettestobjspace(usemodules=['pypyjit'])
-        cls.tmpdir = udir.join('pypy-jit')
-        cls.tmpdir.ensure(dir=1)
-        cls.counter = 0
-        cls.pypy_c = sys.executable
-
-class TestJIT(PyPyCJITTests):
-    def setup_class(cls):
-        if option.pypy_c is None:
-            py.test.skip("pass --pypy!")
-        if not has_info(option.pypy_c, 'translation.jit'):
-            py.test.skip("must give a pypy-c with the jit enabled")
-        cls.tmpdir = udir.join('pypy-jit')
-        cls.tmpdir.ensure(dir=1)
-        cls.counter = 0
-        cls.pypy_c = option.pypy_c
-
-
-def test_interface_residual_call():
-    space = gettestobjspace(usemodules=['pypyjit'])
-    space.appexec([], """():
-        import pypyjit
-        def f(*args, **kwds):
-            return (args, kwds)
-        res = pypyjit.residual_call(f, 4, x=6)
-        assert res == ((4,), {'x': 6})
-    """)
-
-
-def has_info(pypy_c, option):
-    g = os.popen('"%s" --info' % pypy_c, 'r')
-    lines = g.readlines()
-    g.close()
-    if not lines:
-        raise ValueError("cannot execute %r" % pypy_c)
-    for line in lines:
-        line = line.strip()
-        if line.startswith(option + ':'):
-            line = line[len(option)+1:].strip()
-            if line == 'True':
-                return True
-            elif line == 'False':
-                return False
-            else:
-                return line
-    raise ValueError(option + ' not found in ' + pypy_c)
diff --git a/pypy/module/pypyjit/test_pypy_c/model.py b/pypy/module/pypyjit/test_pypy_c/model.py
--- a/pypy/module/pypyjit/test_pypy_c/model.py
+++ b/pypy/module/pypyjit/test_pypy_c/model.py
@@ -2,6 +2,7 @@
 import sys
 import re
 import os.path
+from _pytest.assertion import newinterpret
 from pypy.tool.jitlogparser.parser import SimpleParser, Function, TraceForOpcode
 from pypy.tool.jitlogparser.storage import LoopStorage
 
@@ -199,7 +200,7 @@
             # transform self._assert(x, 'foo') into assert x, 'foo'
             source = source.replace('self._assert(', 'assert ')
             source = source[:-1] # remove the trailing ')'
-            self.msg = py.code._reinterpret(source, f, should_fail=True)
+            self.msg = newinterpret.interpret(source, f, should_fail=True)
         else:
             self.msg = "<could not determine information>"
 
diff --git a/pypy/module/pypyjit/test_pypy_c/test_model.py b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
rename from pypy/module/pypyjit/test_pypy_c/test_model.py
rename to pypy/module/pypyjit/test_pypy_c/test_00_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
@@ -5,6 +5,7 @@
 from lib_pypy import disassembler
 from pypy.tool.udir import udir
 from pypy.tool import logparser
+from pypy.jit.tool.jitoutput import parse_prof
 from pypy.module.pypyjit.test_pypy_c.model import Log, find_ids_range, find_ids, \
     LoopWithIds, OpMatcher
 
@@ -21,6 +22,7 @@
         self.filepath = self.tmpdir.join(meth.im_func.func_name + '.py')
 
     def run(self, func_or_src, args=[], import_site=False, **jitopts):
+        jitopts.setdefault('threshold', 200)
         src = py.code.Source(func_or_src)
         if isinstance(func_or_src, types.FunctionType):
             funcname = func_or_src.func_name
@@ -63,6 +65,13 @@
         rawtraces = logparser.extract_category(rawlog, 'jit-log-opt-')
         log = Log(rawtraces)
         log.result = eval(stdout)
+        #
+        summaries  = logparser.extract_category(rawlog, 'jit-summary')
+        if len(summaries) > 0:
+            log.jit_summary = parse_prof(summaries[-1])
+        else:
+            log.jit_summary = None
+        #
         return log
 
     def run_and_check(self, src, args=[], **jitopts):
diff --git a/pypy/module/pypyjit/test_pypy_c/test__ffi.py b/pypy/module/pypyjit/test_pypy_c/test__ffi.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test__ffi.py
@@ -0,0 +1,133 @@
+import py
+import sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class Test__ffi(BaseTestPyPyC):
+
+    def test__ffi_call(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            try:
+                from _ffi import CDLL, types
+            except ImportError:
+                sys.stderr.write('SKIP: cannot import _ffi\n')
+                return 0
+
+            libm = CDLL(libm_name)
+            pow = libm.getfunc('pow', [types.double, types.double],
+                               types.double)
+            i = 0
+            res = 0
+            while i < 300:
+                tmp = pow(2, 3)   # ID: fficall
+                res += tmp
+                i += 1
+            return pow.getaddr(), res
+        #
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        pow_addr, res = log.result
+        assert res == 8.0 * 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('fficall', """
+            p16 = getfield_gc(ConstPtr(ptr15), descr=<.* .*Function.inst_name .*>)
+            guard_not_invalidated(descr=...)
+            i17 = force_token()
+            setfield_gc(p0, i17, descr=<.* .*PyFrame.vable_token .*>)
+            f21 = call_release_gil(%d, 2.000000, 3.000000, descr=<FloatCallDescr>)
+            guard_not_forced(descr=...)
+            guard_no_exception(descr=...)
+        """ % pow_addr)
+
+
+    def test__ffi_call_frame_does_not_escape(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            try:
+                from _ffi import CDLL, types
+            except ImportError:
+                sys.stderr.write('SKIP: cannot import _ffi\n')
+                return 0
+
+            libm = CDLL(libm_name)
+            pow = libm.getfunc('pow', [types.double, types.double],
+                               types.double)
+
+            def mypow(a, b):
+                return pow(a, b)
+
+            i = 0
+            res = 0
+            while i < 300:
+                tmp = mypow(2, 3)
+                res += tmp
+                i += 1
+            return pow.getaddr(), res
+        #
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        pow_addr, res = log.result
+        assert res == 8.0 * 300
+        loop, = log.loops_by_filename(self.filepath)
+        opnames = log.opnames(loop.allops())
+        # we only force the virtualref, not its content
+        assert opnames.count('new_with_vtable') == 1
+
+    def test__ffi_call_releases_gil(self):
+        from pypy.rlib.test.test_libffi import get_libc_name
+        def main(libc_name, n):
+            import time
+            from threading import Thread
+            from _ffi import CDLL, types
+            #
+            libc = CDLL(libc_name)
+            sleep = libc.getfunc('sleep', [types.uint], types.uint)
+            delays = [0]*n + [1]
+            #
+            def loop_of_sleeps(i, delays):
+                for delay in delays:
+                    sleep(delay)    # ID: sleep
+            #
+            threads = [Thread(target=loop_of_sleeps, args=[i, delays]) for i in range(5)]
+            start = time.time()
+            for i, thread in enumerate(threads):
+                thread.start()
+            for thread in threads:
+                thread.join()
+            end = time.time()
+            return end - start
+        #
+        log = self.run(main, [get_libc_name(), 200], threshold=150)
+        assert 1 <= log.result <= 1.5 # at most 0.5 seconds of overhead
+        loops = log.loops_by_id('sleep')
+        assert len(loops) == 1 # make sure that we actually JITted the loop
+
+
+    def test_ctypes_call(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            import ctypes
+            libm = ctypes.CDLL(libm_name)
+            fabs = libm.fabs
+            fabs.argtypes = [ctypes.c_double]
+            fabs.restype = ctypes.c_double
+            x = -4
+            i = 0
+            while i < 300:
+                x = fabs(x)
+                x = x - 100
+                i += 1
+            return fabs._ptr.getaddr(), x
+
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        fabs_addr, res = log.result
+        assert res == -4.0
+        loop, = log.loops_by_filename(self.filepath)
+        ops = loop.allops()
+        opnames = log.opnames(ops)
+        assert opnames.count('new_with_vtable') == 1 # only the virtualref
+        assert opnames.count('call_release_gil') == 1
+        idx = opnames.index('call_release_gil')
+        call = ops[idx]
+        assert int(call.args[0]) == fabs_addr
diff --git a/pypy/module/pypyjit/test_pypy_c/test_array.py b/pypy/module/pypyjit/test_pypy_c/test_array.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_array.py
@@ -0,0 +1,192 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestArray(BaseTestPyPyC):
+
+    def test_arraycopy_disappears(self):
+        def main(n):
+            i = 0
+            while i < n:
+                t = (1, 2, 3, i + 1)
+                t2 = t[:]
+                del t
+                i = t2[3]
+                del t2
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, i6)
+            guard_true(i7, descr=<Guard3>)
+            i9 = int_add(i5, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
+        """)
+
+    def test_array_sum(self):
+        def main():
+            from array import array
+            img = array("i", range(128) * 5) * 480
+            l, i = 0, 0
+            while i < len(img):
+                l += img[i]
+                i += 1
+            return l
+        #
+        log = self.run(main, [])
+        assert log.result == 19507200
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i13 = int_lt(i7, i9)
+            guard_true(i13, descr=<Guard3>)
+            i15 = getarrayitem_raw(i10, i7, descr=<.*ArrayNoLengthDescr>)
+            i16 = int_add_ovf(i8, i15)
+            guard_no_overflow(descr=<Guard4>)
+            i18 = int_add(i7, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, p6, i18, i16, i9, i10, descr=<Loop0>)
+        """)
+
+    def test_array_intimg(self):
+        def main():
+            from array import array
+            img = array('i', range(3)) * (350 * 480)
+            intimg = array('i', (0,)) * (640 * 480)
+            l, i = 0, 640
+            while i < 640 * 480:
+                assert len(img) == 3*350*480
+                assert len(intimg) == 640*480
+                l = l + img[i]
+                intimg[i] = (intimg[i-640] + l)
+                i += 1
+            return intimg[i - 1]
+        #
+        log = self.run(main, [])
+        assert log.result == 73574560
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i13 = int_lt(i8, 307200)
+            guard_true(i13, descr=<Guard3>)
+        # the bound check guard on img has been killed (thanks to the asserts)
+            i14 = getarrayitem_raw(i10, i8, descr=<.*ArrayNoLengthDescr>)
+            i15 = int_add_ovf(i9, i14)
+            guard_no_overflow(descr=<Guard4>)
+            i17 = int_sub(i8, 640)
+        # the bound check guard on intimg has been killed (thanks to the asserts)
+            i18 = getarrayitem_raw(i11, i17, descr=<.*ArrayNoLengthDescr>)
+            i19 = int_add_ovf(i18, i15)
+            guard_no_overflow(descr=<Guard5>)
+        # on 64bit, there is a guard checking that i19 actually fits into 32bit
+            ...
+            setarrayitem_raw(i11, i8, _, descr=<.*ArrayNoLengthDescr>)
+            i28 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, p6, p7, i28, i15, i10, i11, descr=<Loop0>)
+        """)
+
+
+    def test_zeropadded(self):
+        def main():
+            from array import array
+            class ZeroPadded(array):
+                def __new__(cls, l):
+                    self = array.__new__(cls, 'd', range(l))
+                    return self
+
+                def __getitem__(self, i):
+                    if i < 0 or i >= len(self):
+                        return 0
+                    return array.__getitem__(self, i) # ID: get
+            #
+            buf = ZeroPadded(2000)
+            i = 10
+            sa = 0
+            while i < 2000 - 10:
+                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
+                i += 1
+            return sa
+
+        log = self.run(main, [])
+        assert log.result == 9895050.0
+        loop, = log.loops_by_filename(self.filepath)
+        #
+        # check that the overloaded __getitem__ does not introduce double
+        # array bound checks.
+        #
+        # The force_token()s are still there, but will be eliminated by the
+        # backend regalloc, so they are harmless
+        assert loop.match(ignore_ops=['force_token'],
+                          expected_src="""
+            ...
+            i20 = int_ge(i18, i8)
+            guard_false(i20, descr=...)
+            f21 = getarrayitem_raw(i13, i18, descr=...)
+            i14 = int_sub(i6, 1)
+            i15 = int_ge(i14, i8)
+            guard_false(i15, descr=...)
+            f23 = getarrayitem_raw(i13, i14, descr=...)
+            f24 = float_add(f21, f23)
+            f26 = getarrayitem_raw(i13, i6, descr=...)
+            f27 = float_add(f24, f26)
+            i29 = int_add(i6, 1)
+            i31 = int_ge(i29, i8)
+            guard_false(i31, descr=...)
+            f33 = getarrayitem_raw(i13, i29, descr=...)
+            f34 = float_add(f27, f33)
+            i36 = int_add(i6, 2)
+            i38 = int_ge(i36, i8)
+            guard_false(i38, descr=...)
+            f39 = getarrayitem_raw(i13, i36, descr=...)
+            ...
+        """)
+
+    def test_circular(self):
+        def main():
+            from array import array
+            class Circular(array):
+                def __new__(cls):
+                    self = array.__new__(cls, 'd', range(256))
+                    return self
+                def __getitem__(self, i):
+                    assert len(self) == 256
+                    return array.__getitem__(self, i & 255)
+            #
+            buf = Circular()
+            i = 10
+            sa = 0
+            while i < 2000 - 10:
+                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
+                i += 1
+            return sa
+        #
+        log = self.run(main, [])
+        assert log.result == 1239690.0
+        loop, = log.loops_by_filename(self.filepath)
+        #
+        # check that the array bound checks are removed
+        #
+        # The force_token()s are still there, but will be eliminated by the
+        # backend regalloc, so they are harmless
+        assert loop.match(ignore_ops=['force_token'],
+                          expected_src="""
+            ...
+            i17 = int_and(i14, 255)
+            f18 = getarrayitem_raw(i8, i17, descr=...)
+            i19s = int_sub_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i22s = int_and(i19s, 255)
+            f20 = getarrayitem_raw(i8, i22s, descr=...)
+            f21 = float_add(f18, f20)
+            f23 = getarrayitem_raw(i8, i10, descr=...)
+            f24 = float_add(f21, f23)
+            i26 = int_add(i6, 1)
+            i29 = int_and(i26, 255)
+            f30 = getarrayitem_raw(i8, i29, descr=...)
+            f31 = float_add(f24, f30)
+            i33 = int_add(i6, 2)
+            i36 = int_and(i33, 255)
+            f37 = getarrayitem_raw(i8, i36, descr=...)
+            ...
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py b/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py
@@ -0,0 +1,233 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestBoolRewrite(BaseTestPyPyC):
+
+    def test_boolrewrite_inverse(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(i >= y)
+
+        where x and y can be either constants or variables. There are cases in
+        which the second guard is proven to be always true.
+        """
+
+        for a, b, res, opt_expected in (('2000', '2000', 20001000, True),
+                                        ( '500',  '500', 15001500, True),
+                                        ( '300',  '600', 16001700, False),
+                                        (   'a',    'b', 16001700, False),
+                                        (   'a',    'a', 13001700, True)):
+            src = """
+                def main():
+                    sa = 0
+                    a = 300
+                    b = 600
+                    for i in range(1000):
+                        if i < %s:         # ID: lt
+                            sa += 1
+                        else:
+                            sa += 2
+                        #
+                        if i >= %s:        # ID: ge
+                            sa += 10000
+                        else:
+                            sa += 20000
+                    return sa
+            """ % (a, b)
+            #
+            log = self.run(src, [], threshold=400)
+            assert log.result == res
+            for loop in log.loops_by_filename(self.filepath):
+                le_ops = log.opnames(loop.ops_by_id('lt'))
+                ge_ops = log.opnames(loop.ops_by_id('ge'))
+                assert le_ops.count('int_lt') == 1
+                #
+                if opt_expected:
+                    assert ge_ops.count('int_ge') == 0
+                else:
+                    # if this assert fails it means that the optimization was
+                    # applied even if we don't expect to. Check whether the
+                    # optimization is valid, and either fix the code or fix the
+                    # test :-)
+                    assert ge_ops.count('int_ge') == 1
+
+    def test_boolrewrite_reflex(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(y > i)
+
+        where x and y can be either constants or variables. There are cases in
+        which the second guard is proven to be always true.
+        """
+        for a, b, res, opt_expected in (('2000', '2000', 10001000, True),
+                                        ( '500',  '500', 15001500, True),
+                                        ( '300',  '600', 14001700, False),
+                                        (   'a',    'b', 14001700, False),
+                                        (   'a',    'a', 17001700, True)):
+
+            src = """
+                def main():
+                    sa = 0
+                    a = 300
+                    b = 600
+                    for i in range(1000):
+                        if i < %s:        # ID: lt
+                            sa += 1
+                        else:
+                            sa += 2
+                        if %s > i:        # ID: gt
+                            sa += 10000
+                        else:
+                            sa += 20000
+                    return sa
+            """ % (a, b)
+            log = self.run(src, [], threshold=400)
+            assert log.result == res
+            for loop in log.loops_by_filename(self.filepath):
+                le_ops = log.opnames(loop.ops_by_id('lt'))
+                gt_ops = log.opnames(loop.ops_by_id('gt'))
+                assert le_ops.count('int_lt') == 1
+                #
+                if opt_expected:
+                    assert gt_ops.count('int_gt') == 0
+                else:
+                    # if this assert fails it means that the optimization was
+                    # applied even if we don't expect to. Check whether the
+                    # optimization is valid, and either fix the code or fix the
+                    # test :-)
+                    assert gt_ops.count('int_gt') == 1
+
+
+    def test_boolrewrite_allcases_inverse(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(i > y)
+
+        with all possible combination of binary comparison operators.  This
+        test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        for op1 in ops:
+            for op2 in ops:
+                for a,b in ((500, 500), (300, 600)):
+                    src = """
+                        def main():
+                            sa = 0
+                            for i in range(300):
+                                if i %s %d:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if i %s %d:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                            return sa
+                    """ % (op1, a, op2, b)
+                    yield self.run_and_check, src
+
+                    src = """
+                        def main():
+                            sa = 0
+                            i = 0.0
+                            while i < 250.0:
+                                if i %s %f:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if i %s %f:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                                i += 0.25
+                            return sa
+                    """ % (op1, float(a)/4.0, op2, float(b)/4.0)
+                    yield self.run_and_check, src
+
+
+    def test_boolrewrite_allcases_reflex(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(x > i)
+
+        with all possible combination of binary comparison operators.  This
+        test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        for op1 in ops:
+            for op2 in ops:
+                for a,b in ((500, 500), (300, 600)):
+                    src = """
+                        def main():
+                            sa = 0
+                            for i in range(300):
+                                if i %s %d:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if %d %s i:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                            return sa
+                    """ % (op1, a, b, op2)
+                    yield self.run_and_check, src
+
+                    src = """
+                        def main():
+                            sa = 0
+                            i = 0.0
+                            while i < 250.0:
+                                if i %s %f:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if %f %s i:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                                i += 0.25
+                            return sa
+                    """ % (op1, float(a)/4.0, float(b)/4.0, op2)
+                    yield self.run_and_check, src
+
+    def test_boolrewrite_ptr(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        compares = ('a == b', 'b == a', 'a != b', 'b != a', 'a == c', 'c != b')
+        for e1 in compares:
+            for e2 in compares:
+                src = """
+                    class tst(object):
+                        pass
+                    def main():
+                        a = tst()
+                        b = tst()
+                        c = tst()
+                        sa = 0
+                        for i in range(300):
+                            if %s:
+                                sa += 1
+                            else:
+                                sa += 2
+                            if %s:
+                                sa += 10000
+                            else:
+                                sa += 20000
+                            if i > 750:
+                                a = b
+                        return sa
+                """ % (e1, e2)
+                yield self.run_and_check, src
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -0,0 +1,381 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestCall(BaseTestPyPyC):
+
+    def test_recursive_call(self):
+        def fn():
+            def rec(n):
+                if n == 0:
+                    return 0
+                return 1 + rec(n-1)
+            #
+            # this loop is traced and then aborted, because the trace is too
+            # long. But then "rec" is marked as "don't inline"
+            i = 0
+            j = 0
+            while i < 20:
+                i += 1
+                j += rec(100)
+            #
+            # next time we try to trace "rec", instead of inlining we compile
+            # it separately and generate a call_assembler
+            i = 0
+            j = 0
+            while i < 20:
+                i += 1
+                j += rec(100) # ID: call_rec
+                a = 0
+            return j
+        #
+        log = self.run(fn, [], threshold=18)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('call_rec', """
+            ...
+            p53 = call_assembler(..., descr=...)
+            guard_not_forced(descr=...)
+            guard_no_exception(descr=...)
+            ...
+        """)
+
+    def test_simple_call(self):
+        src = """
+            OFFSET = 0
+            def f(i):
+                return i + 1 + OFFSET # ID: add
+            def main(n):
+                i = 0
+                while i < n+OFFSET:   # ID: cond
+                    i = f(f(i))       # ID: call
+                    a = 0
+                return i
+        """
+        log = self.run(src, [1000])
+        assert log.result == 1000
+        # first, we test what is inside the entry bridge
+        # -----------------------------------------------
+        entry_bridge, = log.loops_by_id('call', is_entry_bridge=True)
+        # LOAD_GLOBAL of OFFSET
+        ops = entry_bridge.ops_by_id('cond', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["guard_value",
+                                    "getfield_gc", "guard_value",
+                                    "getfield_gc", "guard_isnull",
+                                    "getfield_gc", "guard_nonnull_class"]
+        # LOAD_GLOBAL of OFFSET but in different function partially folded
+        # away
+        # XXX could be improved
+        ops = entry_bridge.ops_by_id('add', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["guard_value", "getfield_gc", "guard_isnull"]
+        #
+        # two LOAD_GLOBAL of f, the second is folded away
+        ops = entry_bridge.ops_by_id('call', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["getfield_gc", "guard_nonnull_class"]
+        #
+        assert entry_bridge.match_by_id('call', """
+            p29 = getfield_gc(ConstPtr(ptr28), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value .*>)
+            guard_nonnull_class(p29, ConstClass(Function), descr=<Guard18>)
+            p33 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_code .*>)
+            guard_value(p33, ConstPtr(ptr34), descr=<Guard19>)
+            p35 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_w_func_globals .*>)
+            p36 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_closure .*>)
+            p38 = call(ConstClass(getexecutioncontext), descr=<GcPtrCallDescr>)
+            p39 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
+            i40 = force_token()
+            p41 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
+            guard_isnull(p41, descr=<Guard20>)
+            i42 = getfield_gc(p38, descr=<NonGcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_profilefunc .*>)
+            i43 = int_is_zero(i42)
+            guard_true(i43, descr=<Guard21>)
+            i50 = force_token()
+        """)
+        #
+        # then, we test the actual loop
+        # -----------------------------
+        loop, = log.loops_by_id('call')
+        assert loop.match("""
+            i12 = int_lt(i5, i6)
+            guard_true(i12, descr=<Guard3>)
+            i13 = force_token()
+            i15 = int_add(i5, 1)
+            i16 = int_add_ovf(i15, i7)
+            guard_no_overflow(descr=<Guard4>)
+            i18 = force_token()
+            i20 = int_add_ovf(i16, 1)
+            guard_no_overflow(descr=<Guard5>)
+            i21 = int_add_ovf(i20, i7)
+            guard_no_overflow(descr=<Guard6>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i21, i6, i7, p8, p9, p10, p11, descr=<Loop0>)
+        """)
+
+    def test_method_call(self):
+        def fn(n):
+            class A(object):
+                def __init__(self, a):
+                    self.a = a
+                def f(self, i):
+                    return self.a + i
+            i = 0
+            a = A(1)
+            while i < n:
+                x = a.f(i)    # ID: meth1
+                i = a.f(x)    # ID: meth2
+            return i
+        #
+        log = self.run(fn, [1000])
+        assert log.result == 1000
+        #
+        # first, we test the entry bridge
+        # -------------------------------
+        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
+        ops = entry_bridge.ops_by_id('meth1', opcode='LOOKUP_METHOD')
+        assert log.opnames(ops) == ['guard_value', 'getfield_gc', 'guard_value',
+                                    'guard_not_invalidated']
+        # the second LOOKUP_METHOD is folded away
+        assert list(entry_bridge.ops_by_id('meth2', opcode='LOOKUP_METHOD')) == []
+        #
+        # then, the actual loop
+        # ----------------------
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i15 = int_lt(i6, i9)
+            guard_true(i15, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i16 = force_token()
+            i17 = int_add_ovf(i10, i6)
+            guard_no_overflow(descr=<Guard5>)
+            i18 = force_token()
+            i19 = int_add_ovf(i10, i17)
+            guard_no_overflow(descr=<Guard6>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i19, p7, i17, i9, i10, p11, p12, p13, descr=<Loop0>)
+        """)
+
+    def test_static_classmethod_call(self):
+        def fn(n):
+            class A(object):
+                @classmethod
+                def f(cls, i):
+                    return i + (cls is A) + 1
+                @staticmethod
+                def g(i):
+                    return i - 1
+            #
+            i = 0
+            a = A()
+            while i < n:
+                x = a.f(i)
+                i = a.g(x)
+            return i
+        #
+        log = self.run(fn, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i14 = int_lt(i6, i9)
+            guard_true(i14, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i15 = force_token()
+            i17 = int_add_ovf(i8, 1)
+            guard_no_overflow(descr=<Guard5>)
+            i18 = force_token()
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i8, p7, i17, i9, p10, p11, p12, descr=<Loop0>)
+        """)
+
+    def test_default_and_kw(self):
+        def main(n):
+            def f(i, j=1):
+                return i + j
+            #
+            i = 0
+            while i < n:
+                i = f(f(i), j=1) # ID: call
+                a = 0
+            return i
+        #
+        log = self.run(main, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_id('call')
+        assert loop.match_by_id('call', """
+            i14 = force_token()
+            i16 = force_token()
+        """)
+
+    def test_kwargs(self):
+        # this is not a very precise test, could be improved
+        def main(x):
+            def g(**args):
+                return len(args)
+            #
+            s = 0
+            d = {}
+            for i in range(x):
+                s += g(**d)       # ID: call
+                d[str(i)] = i
+                if i % 100 == 99:
+                    d = {}
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 49500
+        loop, = log.loops_by_id('call')
+        ops = log.opnames(loop.ops_by_id('call'))
+        guards = [ops for ops in ops if ops.startswith('guard')]
+        assert len(guards) <= 5
+
+    def test_stararg_virtual(self):
+        def main(x):
+            def g(*args):
+                return len(args)
+            def h(a, b, c):
+                return c
+            #
+            s = 0
+            for i in range(x):
+                l = [i, x, 2]
+                s += g(*l)       # ID: g1
+                s += h(*l)       # ID: h1
+                s += g(i, x, 2)  # ID: g2
+                a = 0
+            for i in range(x):
+                l = [x, 2]
+                s += g(i, *l)    # ID: g3
+                s += h(i, *l)    # ID: h2
+                a = 0
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 13000
+        loop0, = log.loops_by_id('g1')
+        assert loop0.match_by_id('g1', """
+            i20 = force_token()
+            setfield_gc(p4, i19, descr=<.*W_AbstractSeqIterObject.inst_index .*>)
+            i22 = int_add_ovf(i8, 3)
+            guard_no_overflow(descr=<Guard4>)
+        """)
+        assert loop0.match_by_id('h1', """
+            i20 = force_token()
+            i22 = int_add_ovf(i8, 2)
+            guard_no_overflow(descr=<Guard5>)
+        """)
+        assert loop0.match_by_id('g2', """
+            i27 = force_token()
+            i29 = int_add_ovf(i26, 3)
+            guard_no_overflow(descr=<Guard6>)
+        """)
+        #
+        loop1, = log.loops_by_id('g3')
+        assert loop1.match_by_id('g3', """
+            i21 = force_token()
+            setfield_gc(p4, i20, descr=<.* .*W_AbstractSeqIterObject.inst_index .*>)
+            i23 = int_add_ovf(i9, 3)
+            guard_no_overflow(descr=<Guard37>)
+        """)
+        assert loop1.match_by_id('h2', """
+            i25 = force_token()
+            i27 = int_add_ovf(i23, 2)
+            guard_no_overflow(descr=<Guard38>)
+        """)
+
+    def test_stararg(self):
+        def main(x):
+            def g(*args):
+                return args[-1]
+            def h(*args):
+                return len(args)
+            #
+            s = 0
+            l = []
+            i = 0
+            while i < x:
+                l.append(1)
+                s += g(*l)     # ID: g
+                i = h(*l)      # ID: h
+                a = 0
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_id('g')
+        ops_g = log.opnames(loop.ops_by_id('g'))
+        ops_h = log.opnames(loop.ops_by_id('h'))
+        ops = ops_g + ops_h
+        assert 'new_with_vtable' not in ops
+        assert 'call_may_force' not in ops
+
+    def test_call_builtin_function(self):
+        def main(n):
+            i = 2
+            l = []
+            while i < n:
+                i += 1
+                l.append(i)    # ID: append
+                a = 0
+            return i, len(l)
+        #
+        log = self.run(main, [1000])
+        assert log.result == (1000, 998)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('append', """
+            i13 = getfield_gc(p8, descr=<SignedFieldDescr list.length .*>)
+            i15 = int_add(i13, 1)
+            call(ConstClass(_ll_list_resize_ge__listPtr_Signed), p8, i15, descr=<VoidCallDescr>)
+            guard_no_exception(descr=<Guard4>)
+            p17 = getfield_gc(p8, descr=<GcPtrFieldDescr list.items .*>)
+            p19 = new_with_vtable(ConstClass(W_IntObject))
+            setfield_gc(p19, i12, descr=<SignedFieldDescr .*W_IntObject.inst_intval .*>)
+            setarrayitem_gc(p17, i13, p19, descr=<GcPtrArrayDescr>)
+        """)
+
+    def test_blockstack_virtualizable(self):
+        def main(n):
+            from pypyjit import residual_call
+            i = 0
+            while i < n:
+                try:
+                    residual_call(len, [])   # ID: call
+                except:
+                    pass
+                i += 1
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_id('call')
+        assert loop.match_by_id('call', opcode='CALL_FUNCTION', expected_src="""
+            # make sure that the "block" is not allocated
+            ...
+            i20 = force_token()
+            setfield_gc(p0, i20, descr=<SignedFieldDescr .*PyFrame.vable_token .*>)
+            p22 = new_with_vtable(19511408)
+            p24 = new_array(1, descr=<GcPtrArrayDescr>)
+            p26 = new_with_vtable(ConstClass(W_ListObject))
+            p27 = new(descr=<SizeDescr .*>)
+            p29 = new_array(0, descr=<GcPtrArrayDescr>)
+            setfield_gc(p27, p29, descr=<GcPtrFieldDescr list.items .*>)
+            setfield_gc(p26, p27, descr=<.* .*W_ListObject.inst_wrappeditems .*>)
+            setarrayitem_gc(p24, 0, p26, descr=<GcPtrArrayDescr>)
+            setfield_gc(p22, p24, descr=<GcPtrFieldDescr .*Arguments.inst_arguments_w .*>)
+            p32 = call_may_force(11376960, p18, p22, descr=<GcPtrCallDescr>)
+            ...
+        """)
+
+    def test_func_defaults(self):
+        def main(n):
+            i = 1
+            while i < n:
+                i += len(xrange(i+1)) - i
+            return i
+
+        log = self.run(main, [10000])
+        assert log.result == 10000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i5, i6)
+            guard_true(i10, descr=<Guard3>)
+            i120 = int_add(i5, 1)
+            guard_not_invalidated(descr=<Guard4>)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_exception.py b/pypy/module/pypyjit/test_pypy_c/test_exception.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_exception.py
@@ -0,0 +1,93 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestException(BaseTestPyPyC):
+
+    def test_cmp_exc(self):
+        def f1(n):
+            # So we don't get a LOAD_GLOBAL op
+            KE = KeyError
+            i = 0
+            while i < n:
+                try:
+                    raise KE
+                except KE: # ID: except
+                    i += 1
+            return i
+
+        log = self.run(f1, [10000])
+        assert log.result == 10000
+        loop, = log.loops_by_id("except")
+        ops = list(loop.ops_by_id("except", opcode="COMPARE_OP"))
+        assert ops == []
+
+    def test_exception_inside_loop_1(self):
+        def main(n):
+            while n:
+                try:
+                    raise ValueError
+                except ValueError:
+                    pass
+                n -= 1
+            return n
+        #
+        log = self.run(main, [1000])
+        assert log.result == 0
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+        i5 = int_is_true(i3)
+        guard_true(i5, descr=<Guard3>)
+        guard_not_invalidated(descr=<Guard4>)
+        --EXC-TICK--
+        i12 = int_sub_ovf(i3, 1)
+        guard_no_overflow(descr=<Guard6>)
+        --TICK--
+        jump(..., descr=<Loop0>)
+        """)
+
+    def test_exception_inside_loop_2(self):
+        def main(n):
+            def g(n):
+                raise ValueError(n)  # ID: raise
+            def f(n):
+                g(n)
+            #
+            while n:
+                try:
+                    f(n)
+                except ValueError:
+                    pass
+                n -= 1
+            return n
+        #
+        log = self.run(main, [1000])
+        assert log.result == 0
+        loop, = log.loops_by_filename(self.filepath)
+        ops = log.opnames(loop.ops_by_id('raise'))
+        assert 'new' not in ops
+
+    def test_reraise(self):
+        def f(n):
+            i = 0
+            while i < n:
+                try:
+                    try:
+                        raise KeyError
+                    except KeyError:
+                        raise
+                except KeyError:
+                    i += 1
+            return i
+
+        log = self.run(f, [100000])
+        assert log.result == 100000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i4, i5)
+            guard_true(i7, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            --EXC-TICK--
+            i14 = int_add(i4, 1)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_import.py b/pypy/module/pypyjit/test_pypy_c/test_import.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_import.py
@@ -0,0 +1,46 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestImport(BaseTestPyPyC):
+
+    def test_import_in_function(self):
+        def main(n):
+            i = 0
+            while i < n:
+                from sys import version  # ID: import
+                i += 1
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_id('import')
+        assert loop.match_by_id('import', """
+            p11 = getfield_gc(ConstPtr(ptr10), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            guard_value(p11, ConstPtr(ptr12), descr=<Guard4>)
+            guard_not_invalidated(descr=<Guard5>)
+            p14 = getfield_gc(ConstPtr(ptr13), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            p16 = getfield_gc(ConstPtr(ptr15), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            guard_value(p14, ConstPtr(ptr17), descr=<Guard6>)
+            guard_isnull(p16, descr=<Guard7>)
+        """)
+
+    def test_import_fast_path(self, tmpdir):
+        pkg = tmpdir.join('mypkg').ensure(dir=True)
+        pkg.join('__init__.py').write("")
+        pkg.join('mod.py').write(str(py.code.Source("""
+            def do_the_import():
+                import sys
+        """)))
+        def main(path, n):
+            import sys
+            sys.path.append(path)
+            from mypkg.mod import do_the_import
+            for i in range(n):
+                do_the_import()
+        #
+        log = self.run(main, [str(tmpdir), 300])
+        loop, = log.loops_by_filename(self.filepath)
+        # this is a check for a slow-down that introduced a
+        # call_may_force(absolute_import_with_lock).
+        for opname in log.opnames(loop.allops(opcode="IMPORT_NAME")):
+            assert 'call' not in opname    # no call-like opcode
diff --git a/pypy/module/pypyjit/test_pypy_c/test_instance.py b/pypy/module/pypyjit/test_pypy_c/test_instance.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_instance.py
@@ -0,0 +1,202 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestInstance(BaseTestPyPyC):
+
+    def test_virtual_instance(self):
+        def main(n):
+            class A(object):
+                pass
+            #
+            i = 0
+            while i < n:
+                a = A()
+                assert isinstance(a, A)
+                assert not isinstance(a, int)
+                a.x = 2
+                i = i + a.x
+            return i
+        #
+        log = self.run(main, [1000], threshold = 400)
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, i6)
+            guard_true(i7, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i9 = int_add_ovf(i5, 2)
+            guard_no_overflow(descr=<Guard5>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
+        """)
+
+    def test_load_attr(self):
+        src = '''
+            class A(object):
+                pass
+            a = A()
+            a.x = 2
+            def main(n):
+                i = 0
+                while i < n:
+                    i = i + a.x
+                return i
+        '''
+        log = self.run(src, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i9 = int_lt(i5, i6)
+            guard_true(i9, descr=<Guard3>)
+            guard_not_invalidated(descr=<Guard4>)
+            i10 = int_add_ovf(i5, i7)
+            guard_no_overflow(descr=<Guard5>)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i10, i6, p7, i7, p8, descr=<Loop0>)
+        """)
+
+    def test_getattr_with_dynamic_attribute(self):
+        src = """
+        class A(object):
+            pass
+
+        l = ["x", "y"]
+
+        def main():
+            sum = 0
+            a = A()
+            a.a1 = 0
+            a.a2 = 0
+            a.a3 = 0
+            a.a4 = 0
+            a.a5 = 0 # workaround, because the first five attributes need a promotion
+            a.x = 1
+            a.y = 2
+            i = 0
+            while i < 500:
+                name = l[i % 2]
+                sum += getattr(a, name)
+                i += 1
+            return sum
+        """
+        log = self.run(src, [])
+        assert log.result == 250 + 250*2
+        loops = log.loops_by_filename(self.filepath)
+        assert len(loops) == 1
+
+    def test_mutate_class(self):
+        def fn(n):
+            class A(object):
+                count = 1
+                def __init__(self, a):
+                    self.a = a
+                def f(self):
+                    return self.count
+            i = 0
+            a = A(1)
+            while i < n:
+                A.count += 1 # ID: mutate
+                i = a.f()    # ID: meth1
+            return i
+        #
+        log = self.run(fn, [1000], threshold=10)
+        assert log.result == 1000
+        #
+        # first, we test the entry bridge
+        # -------------------------------
+        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
+        ops = entry_bridge.ops_by_id('mutate', opcode='LOAD_ATTR')
+        assert log.opnames(ops) == ['guard_value', 'guard_not_invalidated',
+                                    'getfield_gc', 'guard_nonnull_class']
+        # the STORE_ATTR is folded away
+        assert list(entry_bridge.ops_by_id('meth1', opcode='STORE_ATTR')) == []
+        #
+        # then, the actual loop
+        # ----------------------
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = getfield_gc_pure(p5, descr=<SignedFieldDescr .*W_IntObject.inst_intval.*>)
+            i9 = int_lt(i8, i7)
+            guard_true(i9, descr=.*)
+            guard_not_invalidated(descr=.*)
+            i11 = int_add(i8, 1)
+            i12 = force_token()
+            --TICK--
+            p20 = new_with_vtable(ConstClass(W_IntObject))
+            setfield_gc(p20, i11, descr=<SignedFieldDescr.*W_IntObject.inst_intval .*>)
+            setfield_gc(ConstPtr(ptr21), p20, descr=<GcPtrFieldDescr .*TypeCell.inst_w_value .*>)
+            jump(p0, p1, p2, p3, p4, p20, p6, i7, descr=<Loop.>)
+        """)
+
+    def test_oldstyle_newstyle_mix(self):
+        def main():
+            class A:
+                pass
+
+            class B(object, A):
+                def __init__(self, x):
+                    self.x = x
+
+            i = 0
+            b = B(1)
+            while i < 100:
+                v = b.x # ID: loadattr
+                i += v
+            return i
+
+        log = self.run(main, [], threshold=80)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('loadattr',
+        '''
+        guard_not_invalidated(descr=...)
+        i19 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
+        guard_no_exception(descr=...)
+        i21 = int_and(i19, _)
+        i22 = int_is_true(i21)
+        guard_true(i22, descr=...)
+        i26 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
+        guard_no_exception(descr=...)
+        i28 = int_and(i26, _)
+        i29 = int_is_true(i28)
+        guard_true(i29, descr=...)
+        ''')
+
+    def test_python_contains(self):
+        def main():
+            class A(object):
+                def __contains__(self, v):
+                    return True
+
+            i = 0
+            a = A()
+            while i < 100:
+                i += i in a # ID: contains
+                b = 0       # to make sure that JUMP_ABSOLUTE is not part of the ID
+
+        log = self.run(main, [], threshold=80)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("contains", """
+            guard_not_invalidated(descr=...)
+            i11 = force_token()
+            i12 = int_add_ovf(i5, i7)
+            guard_no_overflow(descr=...)
+        """)
+
+    def test_id_compare_optimization(self):
+        def main():
+            class A(object):
+                pass
+            #
+            i = 0
+            a = A()
+            while i < 300:
+                new_a = A()
+                if new_a != a:  # ID: compare
+                    pass
+                i += 1
+            return i
+        #
+        log = self.run(main, [])
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("compare", "") # optimized away
+
diff --git a/pypy/module/pypyjit/test_pypy_c/test_intbound.py b/pypy/module/pypyjit/test_pypy_c/test_intbound.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_intbound.py
@@ -0,0 +1,297 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestIntbound(BaseTestPyPyC):
+
+    def test_intbound_simple(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        nbr = (3, 7)
+        for o1 in ops:
+            for o2 in ops:
+                for n1 in nbr:
+                    for n2 in nbr:
+                        src = '''
+                        def f(i):
+                            a, b = 3, 3
+                            if i %s %d:
+                                a = 0
+                            else:
+                                a = 1
+                            if i %s %d:
+                                b = 0
+                            else:
+                                b = 1
+                            return a + b * 2
+
+                        def main():
+                            res = [0] * 4
+                            idx = []
+                            for i in range(15):
+                                idx.extend([i] * 15)
+                            for i in idx:
+                                res[f(i)] += 1
+                            return res
+
+                        ''' % (o1, n1, o2, n2)
+                        yield self.run_and_check, src
+
+    def test_intbound_addsub_mix(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        tests = ('i > 4', 'i > 2', 'i + 1 > 2', '1 + i > 4',
+                 'i - 1 > 1', '1 - i > 1', '1 - i < -3',
+                 'i == 1', 'i == 5', 'i != 1', '-2 * i < -4')
+        for t1 in tests:
+            for t2 in tests:
+                src = '''
+                def f(i):
+                    a, b = 3, 3
+                    if %s:
+                        a = 0
+                    else:
+                        a = 1
+                    if %s:
+                        b = 0
+                    else:
+                        b = 1
+                    return a + b * 2
+
+                def main():
+                    res = [0] * 4
+                    idx = []
+                    for i in range(15):
+                        idx.extend([i] * 15)
+                    for i in idx:
+                        res[f(i)] += 1
+                    return res
+
+                ''' % (t1, t2)
+                yield self.run_and_check, src
+
+    def test_intbound_gt(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < n:
+                if i > -1:
+                    a += 1
+                if i > -2:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, i9)
+            guard_true(i10, descr=...)
+            i12 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i14 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i17 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i14, i12, i17, i9, descr=<Loop0>)
+        """)
+
+    def test_intbound_sub_lt(self):
+        def main():
+            i, a = 0, 0
+            while i < 300:
+                if i - 10 < 295:
+                    a += 1
+                i += 1
+            return a
+        #
+        log = self.run(main, [])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, 300)
+            guard_true(i7, descr=...)
+            i9 = int_sub_ovf(i5, 10)
+            guard_no_overflow(descr=...)
+            i11 = int_add_ovf(i4, 1)
+            guard_no_overflow(descr=...)
+            i13 = int_add(i5, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, i11, i13, descr=<Loop0>)
+        """)
+
+    def test_intbound_addsub_ge(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < n:
+                if i + 5 >= 5:
+                    a += 1
+                if i - 1 >= -1:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, i9)
+            guard_true(i10, descr=...)
+            i12 = int_add_ovf(i8, 5)
+            guard_no_overflow(descr=...)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16s = int_sub(i8, 1)            
+            i16 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i19 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i16, i14, i19, i9, descr=<Loop0>)
+        """)
+
+    def test_intbound_addmul_ge(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < 300:
+                if i + 5 >= 5:
+                    a += 1
+                if 2 * i >= 0:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, 300)
+            guard_true(i10, descr=...)
+            i12 = int_add(i8, 5)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_lshift(i8, 1)
+            i18 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i21 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i18, i14, i21, descr=<Loop0>)
+        """)
+
+    def test_intbound_eq(self):
+        def main(a, n):
+            i, s = 0, 0
+            while i < 300:
+                if a == 7:
+                    s += a + 1
+                elif i == 10:
+                    s += i
+                else:
+                    s += 1
+                i += 1
+            return s
+        #
+        log = self.run(main, [7, 300])
+        assert log.result == main(7, 300)
+        log = self.run(main, [10, 300])
+        assert log.result == main(10, 300)
+        log = self.run(main, [42, 300])
+        assert log.result == main(42, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, 300)
+            guard_true(i10, descr=...)
+            i12 = int_eq(i8, 10)
+            guard_false(i12, descr=...)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, p6, i14, i16, descr=<Loop0>)
+        """)
+
+    def test_intbound_mul(self):
+        def main(a):
+            i, s = 0, 0
+            while i < 300:
+                assert i >= 0
+                if 2 * i < 30000:
+                    s += 1
+                else:
+                    s += a
+                i += 1
+            return s
+        #
+        log = self.run(main, [7])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = int_lt(i6, 300)
+            guard_true(i8, descr=...)
+            i10 = int_lshift(i6, 1)
+            i12 = int_add_ovf(i5, 1)
+            guard_no_overflow(descr=...)
+            i14 = int_add(i6, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i12, i14, descr=<Loop0>)
+        """)
+
+    def test_assert(self):
+        def main(a):
+            i, s = 0, 0
+            while i < 300:
+                assert a == 7
+                s += a + 1
+                i += 1
+            return s
+        log = self.run(main, [7])
+        assert log.result == 300*8
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = int_lt(i6, 300)
+            guard_true(i8, descr=...)
+            i10 = int_add_ovf(i5, 8)
+            guard_no_overflow(descr=...)
+            i12 = int_add(i6, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i10, i12, descr=<Loop0>)
+        """)
+
+    def test_xor(self):
+        def main(b):
+            a = sa = 0
+            while a < 300:
+                if a > 0: # Specialises the loop
+                    pass
+                if b > 10:
+                    pass
+                if a^b >= 0:  # ID: guard
+                    sa += 1
+                sa += a^a     # ID: a_xor_a
+                a += 1
+            return sa
+
+        log = self.run(main, [11])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        # if both are >=0, a^b is known to be >=0
+        # note that we know that b>10
+        assert loop.match_by_id('guard', """
+            i10 = int_xor(i5, i7)
+        """)
+        #
+        # x^x is always optimized to 0
+        assert loop.match_by_id('a_xor_a', "")
+
+        log = self.run(main, [9])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        # we don't know that b>10, hence we cannot optimize it
+        assert loop.match_by_id('guard', """
+            i10 = int_xor(i5, i7)
+            i12 = int_ge(i10, 0)
+            guard_true(i12, descr=...)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_min_max.py b/pypy/module/pypyjit/test_pypy_c/test_min_max.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_min_max.py
@@ -0,0 +1,67 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestMinMax(BaseTestPyPyC):
+
+    def test_min_max(self):
+        def main():
+            i=0
+            sa=0
+            while i < 300:
+                sa+=min(max(i, 3000), 4000)
+                i+=1
+            return sa
+        log = self.run(main, [])
+        assert log.result == 300*3000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i4, 300)
+            guard_true(i7, descr=...)
+            i9 = int_add_ovf(i5, 3000)
+            guard_no_overflow(descr=...)
+            i11 = int_add(i4, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, i11, i9, descr=<Loop0>)
+        """)
+
+    def test_silly_max(self):
+        def main():
+            i = 2
+            sa = 0
+            while i < 300:
+                lst = range(i)
+                sa += max(*lst) # ID: max
+                i += 1
+            return sa
+        log = self.run(main, [])
+        assert log.result == main()
+        loop, = log.loops_by_filename(self.filepath)
+        # We dont want too many guards, but a residual call to min_max_loop
+        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
+        assert len(guards) < 20
+        assert loop.match_by_id('max',"""
+            ...
+            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
+            ...
+        """)
+
+    def test_iter_max(self):
+        def main():
+            i = 2
+            sa = 0
+            while i < 300:
+                lst = range(i)
+                sa += max(lst) # ID: max
+                i += 1
+            return sa
+        log = self.run(main, [])
+        assert log.result == main()
+        loop, = log.loops_by_filename(self.filepath)
+        # We dont want too many guards, but a residual call to min_max_loop
+        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
+        assert len(guards) < 20
+        assert loop.match_by_id('max',"""
+            ...
+            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
+            ...
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py
rename from pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
rename to pypy/module/pypyjit/test_pypy_c/test_misc.py
--- a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py
@@ -1389,111 +1389,6 @@
             jump(p0, p1, p2, p3, p4, i10, i12, descr=<Loop0>)
         """)
 
-    def test_zeropadded(self):
-        def main():
-            from array import array
-            class ZeroPadded(array):
-                def __new__(cls, l):
-                    self = array.__new__(cls, 'd', range(l))
-                    return self
-
-                def __getitem__(self, i):
-                    if i < 0 or i >= len(self):
-                        return 0
-                    return array.__getitem__(self, i) # ID: get
-            #
-            buf = ZeroPadded(2000)
-            i = 10
-            sa = 0
-            while i < 2000 - 10:
-                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
-                i += 1
-            return sa
-
-        log = self.run(main, [], threshold=200)
-        assert log.result == 9895050.0
-        loop, = log.loops_by_filename(self.filepath)
-        #
-        # check that the overloaded __getitem__ does not introduce double
-        # array bound checks.
-        #
-        # The force_token()s are still there, but will be eliminated by the
-        # backend regalloc, so they are harmless
-        assert loop.match(ignore_ops=['force_token'],
-                          expected_src="""
-            ...
-            i20 = int_ge(i18, i8)
-            guard_false(i20, descr=...)
-            f21 = getarrayitem_raw(i13, i18, descr=...)
-            i14 = int_sub(i6, 1)
-            i15 = int_ge(i14, i8)
-            guard_false(i15, descr=...)
-            f23 = getarrayitem_raw(i13, i14, descr=...)
-            f24 = float_add(f21, f23)
-            f26 = getarrayitem_raw(i13, i6, descr=...)
-            f27 = float_add(f24, f26)
-            i29 = int_add(i6, 1)
-            i31 = int_ge(i29, i8)
-            guard_false(i31, descr=...)
-            f33 = getarrayitem_raw(i13, i29, descr=...)
-            f34 = float_add(f27, f33)
-            i36 = int_add(i6, 2)
-            i38 = int_ge(i36, i8)
-            guard_false(i38, descr=...)
-            f39 = getarrayitem_raw(i13, i36, descr=...)
-            ...
-        """)
-
-
-    def test_circular(self):
-        def main():
-            from array import array
-            class Circular(array):
-                def __new__(cls):
-                    self = array.__new__(cls, 'd', range(256))
-                    return self
-                def __getitem__(self, i):
-                    assert len(self) == 256
-                    return array.__getitem__(self, i & 255)
-            #
-            buf = Circular()
-            i = 10
-            sa = 0
-            while i < 2000 - 10:
-                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
-                i += 1
-            return sa
-        #
-        log = self.run(main, [], threshold=200)
-        assert log.result == 1239690.0
-        loop, = log.loops_by_filename(self.filepath)
-        #
-        # check that the array bound checks are removed
-        #
-        # The force_token()s are still there, but will be eliminated by the
-        # backend regalloc, so they are harmless
-        assert loop.match(ignore_ops=['force_token'],
-                          expected_src="""
-            ...
-            i17 = int_and(i14, 255)
-            f18 = getarrayitem_raw(i8, i17, descr=...)
-            i19s = int_sub_ovf(i6, 1)
-            guard_no_overflow(descr=...)
-            i22s = int_and(i19s, 255)
-            f20 = getarrayitem_raw(i8, i22s, descr=...)
-            f21 = float_add(f18, f20)
-            f23 = getarrayitem_raw(i8, i10, descr=...)
-            f24 = float_add(f21, f23)
-            i26 = int_add(i6, 1)
-            i29 = int_and(i26, 255)
-            f30 = getarrayitem_raw(i8, i29, descr=...)
-            f31 = float_add(f24, f30)
-            i33 = int_add(i6, 2)
-            i36 = int_and(i33, 255)
-            f37 = getarrayitem_raw(i8, i36, descr=...)
-            ...
-        """)
-
     def test_min_max(self):
         def main():
             i=0
diff --git a/pypy/module/pypyjit/test_pypy_c/test_shift.py b/pypy/module/pypyjit/test_pypy_c/test_shift.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_shift.py
@@ -0,0 +1,166 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestShift(BaseTestPyPyC):
+
+    def test_shift_intbound(self):
+        def main(b):
+            res = 0
+            a = 0
+            while a < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                val = a >> b
+                if val >= 0:    # ID: rshift
+                    res += 1
+                val = a << b
+                if val >= 0:    # ID: lshift
+                    res += 2
+                a += 1
+            return res
+        #
+        log = self.run(main, [2])
+        assert log.result == 300*3
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('rshift', "")  # guard optimized away
+        assert loop.match_by_id('lshift', "")  # guard optimized away
+
+    def test_lshift_and_then_rshift(self):
+        py.test.skip('fixme, this optimization is disabled')
+        def main(b):
+            res = 0
+            a = 0
+            while res < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                res = (a << b) >> b     # ID: shift
+                a += 1
+            return res
+        #
+        log = self.run(main, [2])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('shift', "")  # optimized away
+
+    def test_division_to_rshift(self):
+        def main(b):
+            res = 0
+            a = 0
+            while a < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                res = a/b     # ID: div
+                a += 1
+            return res
+        #
+        log = self.run(main, [3])
+        assert log.result == 99
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('div', """
+            i10 = int_floordiv(i6, i7)
+            i11 = int_mul(i10, i7)
+            i12 = int_sub(i6, i11)
+            i14 = int_rshift(i12, 63)
+            i15 = int_add(i10, i14)
+        """)
+
+    def test_division_to_rshift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        avalues = ('a', 'b', 7, -42, 8)
+        bvalues = ['b'] + range(-10, 0) + range(1,10)
+        code = ''
+        for a in avalues:
+            for b in bvalues:
+                code += '                sa += %s / %s\n' % (a, b)
+        src = """
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+%s
+                i += 1
+            return sa
+        """ % code
+        self.run_and_check(src, [ 10,  20])
+        self.run_and_check(src, [ 10, -20])
+        self.run_and_check(src, [-10, -20])
+
+    def test_mod(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        avalues = ('a', 'b', 7, -42, 8)
+        bvalues = ['b'] + range(-10, 0) + range(1,10)
+        code = ''
+        for a in avalues:
+            for b in bvalues:
+                code += '                sa += %s %% %s\n' % (a, b)
+        src = """
+        def main(a, b):
+            i = sa = 0
+            while i < 2000:
+                if a > 0: pass
+                if 1 < b < 2: pass
+%s
+                i += 1
+            return sa
+        """ % code
+        self.run_and_check(src, [ 10,  20])
+        self.run_and_check(src, [ 10, -20])
+        self.run_and_check(src, [-10, -20])
+
+    def test_shift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        from sys import maxint
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+                if a > 0: # Specialises the loop
+                    pass
+                if b < 2 and b > 0:
+                    pass
+                if (a >> b) >= 0:
+                    sa += 1
+                if (a << b) > 2:
+                    sa += 10000
+                i += 1
+            return sa
+        #
+        maxvals = (-maxint-1, -maxint, maxint-1, maxint)
+        for a in (-4, -3, -2, -1, 0, 1, 2, 3, 4) + maxvals:
+            for b in (0, 1, 2, 31, 32, 33, 61, 62, 63):
+                yield self.run_and_check, main, [a, b]
+
+    def test_revert_shift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        from sys import maxint
+
+        def main(a, b, c):
+            from sys import maxint
+            i = sa = 0
+            while i < 300:
+                if 0 < a < 10: pass
+                if -100 < b < 100: pass
+                if -maxint/2 < c < maxint/2: pass
+                sa += (a<<a)>>a
+                sa += (b<<a)>>a
+                sa += (c<<a)>>a
+                sa += (a<<100)>>100
+                sa += (b<<100)>>100
+                sa += (c<<100)>>100
+                i += 1
+            return long(sa)
+
+        for a in (1, 4, 8, 100):
+            for b in (-10, 10, -201, 201, -maxint/3, maxint/3):
+                for c in (-10, 10, -maxint/3, maxint/3):
+                    yield self.run_and_check, main, [a, b, c]
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
--- a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
+++ b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
@@ -43,6 +43,12 @@
 	qsort(base, num, width, compare);
 }
 
+EXPORT(char) deref_LP_c_char_p(char** argv)
+{
+    char* s = *argv;
+    return s[0];
+}
+
 EXPORT(int *) _testfunc_ai8(int a[8])
 {
 	return a;
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/support.py b/pypy/module/test_lib_pypy/ctypes_tests/support.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/support.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/support.py
@@ -1,4 +1,5 @@
 import py
+import sys
 import ctypes
 
 py.test.importorskip("ctypes", "1.0.2")
@@ -14,6 +15,16 @@
         if _rawffi:
             py.test.skip("white-box tests for pypy _rawffi based ctypes impl")
 
+def del_funcptr_refs_maybe(obj, attrname):
+    dll = getattr(obj, attrname, None)
+    if not dll:
+        return
+    _FuncPtr = dll._FuncPtr
+    for name in dir(dll):
+        obj = getattr(dll, name, None)
+        if isinstance(obj, _FuncPtr):
+            delattr(dll, name)
+
 class BaseCTypesTestChecker:
     def setup_class(cls):
         if _rawffi:
@@ -21,8 +32,21 @@
             for _ in range(4):
                 gc.collect()
             cls.old_num = _rawffi._num_of_allocated_objects()
-    
+
+
     def teardown_class(cls):
+        if sys.pypy_translation_info['translation.gc'] == 'boehm':
+            return # it seems that boehm has problems with __del__, so not
+                   # everything is freed
+        #
+        mod = sys.modules[cls.__module__]
+        del_funcptr_refs_maybe(mod, 'dll')
+        del_funcptr_refs_maybe(mod, 'dll2')
+        del_funcptr_refs_maybe(mod, 'lib')
+        del_funcptr_refs_maybe(mod, 'testdll')
+        del_funcptr_refs_maybe(mod, 'ctdll')
+        del_funcptr_refs_maybe(cls, '_dll')
+        #
         if hasattr(cls, 'old_num'):
             import gc
             for _ in range(4):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
@@ -0,0 +1,103 @@
+from ctypes import CDLL, POINTER, pointer, c_byte, c_int, c_char_p
+import sys
+import py
+from support import BaseCTypesTestChecker
+
+class MyCDLL(CDLL):
+    def __getattr__(self, attr):
+        fn = self[attr] # this way it's not cached as an attribute
+        fn._slowpath_allowed = False
+        return fn
+
+def setup_module(mod):
+    import conftest
+    _ctypes_test = str(conftest.sofile)
+    mod.dll = MyCDLL(_ctypes_test)  # slowpath not allowed
+    mod.dll2 = CDLL(_ctypes_test)   # slowpath allowed
+
+
+class TestFastpath(BaseCTypesTestChecker):
+
+    def test_fastpath_forbidden(self):
+        def myfunc():
+            pass
+        #
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        #
+        # so far, it's still using the slowpath
+        assert not tf_b._is_fastpath
+        tf_b.callable = myfunc
+        tf_b.argtypes = (c_byte,)
+        # errcheck prevented the fastpath to kick in
+        assert not tf_b._is_fastpath
+        #
+        del tf_b.callable
+        tf_b.argtypes = (c_byte,) # try to re-enable the fastpath
+        assert tf_b._is_fastpath
+        #
+        assert not tf_b._slowpath_allowed
+        py.test.raises(AssertionError, "tf_b.callable = myfunc")
+        py.test.raises(AssertionError, "tf_b('aaa')") # force a TypeError
+
+    def test_simple_args(self):
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        assert tf_b(-126) == -42
+
+    def test_pointer_args(self):
+        f = dll._testfunc_p_p
+        f.restype = POINTER(c_int)
+        f.argtypes = [POINTER(c_int)]
+        v = c_int(42)
+        result = f(pointer(v))
+        assert type(result) == POINTER(c_int)
+        assert result.contents.value == 42
+
+    def test_simple_pointer_args(self):
+        f = dll.my_strchr
+        f.argtypes = [c_char_p, c_int]
+        f.restype = c_char_p
+        mystr = c_char_p("abcd")
+        result = f(mystr, ord("b"))
+        assert result == "bcd"
+
+    @py.test.mark.xfail
+    def test_strings(self):
+        f = dll.my_strchr
+        f.argtypes = [c_char_p, c_int]
+        f.restype = c_char_p
+        # python strings need to be converted to c_char_p, but this is
+        # supported only in the slow path so far
+        result = f("abcd", ord("b"))
+        assert result == "bcd"
+
+    def test_errcheck(self):
+        def errcheck(result, func, args):
+            return 'hello'
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.errcheck = errcheck
+        assert tf_b(-126) == 'hello'
+
+
+class TestFallbackToSlowpath(BaseCTypesTestChecker):
+
+    def test_argtypes_is_None(self):
+        tf_b = dll2.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_char_p,)  # this is intentionally wrong
+        tf_b.argtypes = None # kill the fast path
+        assert not tf_b._is_fastpath
+        assert tf_b(-126) == -42
+
+    def test_callable_is_None(self):
+        tf_b = dll2.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.callable = lambda x: x+1
+        assert not tf_b._is_fastpath
+        assert tf_b(-126) == -125
+        tf_b.callable = None
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
@@ -91,6 +91,13 @@
         result = f(0, 0, 0, 0, 0, 0)
         assert result == u'\x00'
 
+    def test_char_result(self):
+        f = dll._testfunc_i_bhilfd
+        f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double]
+        f.restype = c_char
+        result = f(0, 0, 0, 0, 0, 0)
+        assert result == '\x00'
+
     def test_voidresult(self):
         f = dll._testfunc_v
         f.restype = None
@@ -211,8 +218,19 @@
         result = f(byref(c_int(99)))
         assert not result.contents == 99
 
+    def test_convert_pointers(self):
+        f = dll.deref_LP_c_char_p
+        f.restype = c_char
+        f.argtypes = [POINTER(c_char_p)]
+        #
+        s = c_char_p('hello world')
+        ps = pointer(s)
+        assert f(ps) == 'h'
+        assert f(s) == 'h'  # automatic conversion from char** to char*
+
     def test_errors_1(self):
         f = dll._testfunc_p_p
+        f.argtypes = [POINTER(c_int)]
         f.restype = c_int
 
         class X(Structure):
@@ -428,6 +446,16 @@
         u = dll.ret_un_func(a[1])
         assert u.y == 33*10000
 
+    def test_cache_funcptr(self):
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        assert tf_b(-126) == -42
+        ptr = tf_b._ptr
+        assert ptr is not None
+        assert tf_b(-126) == -42
+        assert tf_b._ptr is ptr
+
     def test_warnings(self):
         import warnings
         warnings.simplefilter("always")
@@ -439,6 +467,22 @@
             assert "C function without declared arguments called" in str(w[0].message)
             assert "C function without declared return type called" in str(w[1].message)
 
+    def test_errcheck(self):
+        py.test.skip('fixme')
+        def errcheck(result, func, args):
+            assert result == -42
+            assert type(result) is int
+            arg, = args
+            assert arg == -126
+            assert type(arg) is int
+            return result
+        #
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.errcheck = errcheck
+        assert tf_b(-126) == -42
+        del tf_b.errcheck
         with warnings.catch_warnings(record=True) as w:
             dll.get_an_integer.argtypes = []
             dll.get_an_integer()
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py b/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
@@ -12,8 +12,10 @@
     from _ctypes.function import CFuncPtr
 
     def guess(value):
-        cobj = CFuncPtr._conv_param(None, value)
-        return type(cobj)
+        cobj, ctype = CFuncPtr._conv_param(None, value)
+        return ctype
+        ## cobj = CFuncPtr._conv_param(None, value)
+        ## return type(cobj)
 
     assert guess(13) == c_int
     assert guess(0) == c_int
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
@@ -125,6 +125,9 @@
             if t is c_longdouble:   # no support for 'g' in the struct module
                 continue
             code = t._type_ # the typecode
+            if code == 'g':
+                # typecode not supported by "struct"
+                continue
             align = struct.calcsize("c%c" % code) - struct.calcsize(code)
 
             # alignment of the type...
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
@@ -12,6 +12,13 @@
     mod._ctypes_test = str(conftest.sofile)
 
 class TestPointers(BaseCTypesTestChecker):
+
+    def test_get_ffi_argtype(self):
+        P = POINTER(c_int)
+        ffitype = P.get_ffi_argtype()
+        assert P.get_ffi_argtype() is ffitype
+        assert ffitype.deref_pointer() is c_int.get_ffi_argtype()
+    
     def test_pointer_crash(self):
 
         class A(POINTER(c_ulong)):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
@@ -15,6 +15,10 @@
         mod.wcslen.argtypes = [ctypes.c_wchar_p]
         mod.func = dll._testfunc_p_p
 
+    def teardown_module(mod):
+        del mod.func
+        del mod.wcslen
+
     class TestUnicode(BaseCTypesTestChecker):
         def setup_method(self, method):
             self.prev_conv_mode = ctypes.set_conversion_mode("ascii", "strict")
diff --git a/pypy/pytest.ini b/pypy/pytest.ini
new file mode 100644
--- /dev/null
+++ b/pypy/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = --assertmode=old
\ No newline at end of file
diff --git a/pypy/rlib/clibffi.py b/pypy/rlib/clibffi.py
--- a/pypy/rlib/clibffi.py
+++ b/pypy/rlib/clibffi.py
@@ -10,6 +10,7 @@
 from pypy.rlib.rmmap import alloc
 from pypy.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal
 from pypy.rlib.rdynload import DLOpenError, DLLHANDLE
+from pypy.rlib import jit
 from pypy.tool.autopath import pypydir
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.translator.platform import platform
@@ -18,6 +19,10 @@
 import sys
 import ctypes.util
 
+from pypy.tool.ansi_print import ansi_log
+log = py.log.Producer("libffi")
+py.log.setconsumer("libffi", ansi_log)
+
 # maaaybe isinstance here would be better. Think
 _MSVC = platform.name == "msvc"
 _MINGW = platform.name == "mingw32"
@@ -67,12 +72,17 @@
             result = os.path.join(dir, 'libffi.a')
             if os.path.exists(result):
                 return result
-        raise ImportError("'libffi.a' not found in %s" % (dirlist,))
+        log.WARNING("'libffi.a' not found in %s" % (dirlist,))
+        log.WARNING("trying to use the dynamic library instead...")
+        return None
 
+    path_libffi_a = None
     if hasattr(platform, 'library_dirs_for_libffi_a'):
+        path_libffi_a = find_libffi_a()
+    if path_libffi_a is not None:
         # platforms on which we want static linking
         libraries = []
-        link_files = [find_libffi_a()]
+        link_files = [path_libffi_a]
     else:
         # platforms on which we want dynamic linking
         libraries = ['ffi']
@@ -261,6 +271,7 @@
 elif _MSVC:
     get_libc_handle = external('pypy_get_libc_handle', [], DLLHANDLE)
 
+    @jit.dont_look_inside
     def get_libc_name():
         return rwin32.GetModuleFileName(get_libc_handle())
 
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -275,7 +275,8 @@
 class JitHintError(Exception):
     """Inconsistency in the JIT hints."""
 
-PARAMETERS = {'threshold': 1000,
+PARAMETERS = {'threshold': 1032, # just above 1024
+              'function_threshold': 1617, # slightly more than one above 
               'trace_eagerness': 200,
               'trace_limit': 12000,
               'inlining': 0,
diff --git a/pypy/rlib/libffi.py b/pypy/rlib/libffi.py
--- a/pypy/rlib/libffi.py
+++ b/pypy/rlib/libffi.py
@@ -1,12 +1,15 @@
+from __future__ import with_statement
+
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rlib.objectmodel import specialize, enforceargs, we_are_translated
-from pypy.rlib.rarithmetic import intmask, r_uint
+from pypy.rlib.rarithmetic import intmask, r_uint, r_singlefloat
 from pypy.rlib import jit
 from pypy.rlib import clibffi
 from pypy.rlib.clibffi import get_libc_name, FUNCFLAG_CDECL, AbstractFuncPtr, \
-    push_arg_as_ffiptr, c_ffi_call
+    push_arg_as_ffiptr, c_ffi_call, FFI_TYPE_STRUCT
 from pypy.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal
 from pypy.rlib.rdynload import DLLHANDLE
+from pypy.rlib.longlong2float import longlong2float, float2longlong
 
 class types(object):
     """
@@ -31,6 +34,9 @@
                 setattr(cls, name, value)
         cls.slong = clibffi.cast_type_to_ffitype(rffi.LONG)
         cls.ulong = clibffi.cast_type_to_ffitype(rffi.ULONG)
+        cls.slonglong = clibffi.cast_type_to_ffitype(rffi.LONGLONG)
+        cls.ulonglong = clibffi.cast_type_to_ffitype(rffi.ULONGLONG)
+        cls.wchar_t = clibffi.cast_type_to_ffitype(lltype.UniChar)
         del cls._import
 
     @staticmethod
@@ -41,7 +47,8 @@
         """
         if   ffi_type is types.void:    return 'v'
         elif ffi_type is types.double:  return 'f'
-        elif ffi_type is types.pointer: return 'i'
+        elif ffi_type is types.float:   return 's'
+        elif ffi_type is types.pointer: return 'u'
         #
         elif ffi_type is types.schar:   return 'i'
         elif ffi_type is types.uchar:   return 'u'
@@ -58,13 +65,19 @@
         elif ffi_type is types.uint16:  return 'u'
         elif ffi_type is types.sint32:  return 'i'
         elif ffi_type is types.uint32:  return 'u'
-        ## we only support integers that fit in a lltype.Signed (==rffi.LONG)
-        ## (on 64-bit platforms, types.sint64 is types.slong and the case is
-        ## caught above)
-        ## elif ffi_type is types.sint64:  return 'i'
-        ## elif ffi_type is types.uint64:  return 'u'
+        ## (note that on 64-bit platforms, types.sint64 is types.slong and the
+        ## case is caught above)
+        elif ffi_type is types.sint64:  return 'I'
+        elif ffi_type is types.uint64:  return 'U'
+        #
+        elif types.is_struct(ffi_type): return 'S'
         raise KeyError
 
+    @staticmethod
+    @jit.purefunction
+    def is_struct(ffi_type):
+        return intmask(ffi_type.c_type) == intmask(FFI_TYPE_STRUCT)
+
 types._import()
 
 @specialize.arg(0)
@@ -78,8 +91,11 @@
     sz = rffi.sizeof(TYPE)
     return sz <= rffi.sizeof(rffi.LONG)
 
+
 # ======================================================================
 
+IS_32_BIT = (r_uint.BITS == 32)
+
 @specialize.memo()
 def _check_type(TYPE):
     if isinstance(TYPE, lltype.Ptr):
@@ -105,11 +121,37 @@
             val = rffi.cast(rffi.LONG, val)
         elif TYPE is rffi.DOUBLE:
             cls = FloatArg
+        elif TYPE is rffi.LONGLONG or TYPE is rffi.ULONGLONG:
+            raise TypeError, 'r_(u)longlong not supported by arg(), use arg_(u)longlong()'
+        elif TYPE is rffi.FLOAT:
+            raise TypeError, 'r_singlefloat not supported by arg(), use arg_singlefloat()'
         else:
             raise TypeError, 'Unsupported argument type: %s' % TYPE
         self._append(cls(val))
         return self
 
+    def arg_raw(self, val):
+        self._append(RawArg(val))
+
+    def arg_longlong(self, val):
+        """
+        Note: this is a hack. So far, the JIT does not support long longs, so
+        you must pass it as if it were a python Float (rffi.DOUBLE).  You can
+        use the convenience functions longlong2float and float2longlong to do
+        the conversions.  Note that if you use long longs, the call won't
+        be jitted at all.
+        """
+        assert IS_32_BIT      # use a normal integer on 64-bit platforms
+        self._append(LongLongArg(val))
+
+    def arg_singlefloat(self, val):
+        """
+        Note: you must pass a python Float (rffi.DOUBLE), not a r_singlefloat
+        (else the jit complains).  Note that if you use single floats, the
+        call won't be jitted at all.
+        """
+        self._append(SingleFloatArg(val))
+
     def _append(self, arg):
         if self.first is None:
             self.first = self.last = arg
@@ -132,8 +174,9 @@
     def push(self, func, ll_args, i):
         func._push_int(self.intval, ll_args, i)
 
+
 class FloatArg(AbstractArg):
-    """ An argument holding a float
+    """ An argument holding a python float (i.e. a C double)
     """
 
     def __init__(self, floatval):
@@ -142,6 +185,37 @@
     def push(self, func, ll_args, i):
         func._push_float(self.floatval, ll_args, i)
 
+class RawArg(AbstractArg):
+    """ An argument holding a raw pointer to put inside ll_args
+    """
+
+    def __init__(self, ptrval):
+        self.ptrval = ptrval
+
+    def push(self, func, ll_args, i):
+        func._push_raw(self.ptrval, ll_args, i)
+
+class SingleFloatArg(AbstractArg):
+    """ An argument representing a C float (but holding a C double)
+    """
+
+    def __init__(self, floatval):
+        self.floatval = floatval
+
+    def push(self, func, ll_args, i):
+        func._push_single_float(self.floatval, ll_args, i)
+
+
+class LongLongArg(AbstractArg):
+    """ An argument representing a C long long (but holding a C double)
+    """
+
+    def __init__(self, floatval):
+        self.floatval = floatval
+
+    def push(self, func, ll_args, i):
+        func._push_longlong(self.floatval, ll_args, i)
+
 
 # ======================================================================
 
@@ -164,8 +238,8 @@
     # ========================================================================
 
     @jit.unroll_safe
-    @specialize.arg(2)
-    def call(self, argchain, RESULT):
+    @specialize.arg(2, 3)
+    def call(self, argchain, RESULT, is_struct=False):
         # WARNING!  This code is written carefully in a way that the JIT
         # optimizer will see a sequence of calls like the following:
         #
@@ -179,6 +253,7 @@
         # the optimizer will fail to recognize the pattern and won't turn it
         # into a fast CALL.  Note that "arg = arg.next" is optimized away,
         # assuming that archain is completely virtual.
+        self = jit.hint(self, promote=True)
         if argchain.numargs != len(self.argtypes):
             raise TypeError, 'Wrong number of arguments: %d expected, got %d' %\
                 (argchain.numargs, len(self.argtypes))
@@ -190,10 +265,24 @@
             i += 1
             arg = arg.next
         #
-        if _fits_into_long(RESULT):
+        if is_struct:
+            assert types.is_struct(self.restype)
+            res = self._do_call_raw(self.funcsym, ll_args)
+        elif _fits_into_long(RESULT):
+            assert not types.is_struct(self.restype)
             res = self._do_call_int(self.funcsym, ll_args)
         elif RESULT is rffi.DOUBLE:
             return self._do_call_float(self.funcsym, ll_args)
+        elif RESULT is rffi.FLOAT:
+            # XXX: even if RESULT is FLOAT, we still return a DOUBLE, else the
+            # jit complains. Note that the jit is disabled in this case
+            return self._do_call_single_float(self.funcsym, ll_args)
+        elif RESULT is rffi.LONGLONG or RESULT is rffi.ULONGLONG:
+            # XXX: even if RESULT is LONGLONG, we still return a DOUBLE, else the
+            # jit complains. Note that the jit is disabled in this case
+            # (it's not a typo, we really return a DOUBLE)
+            assert IS_32_BIT
+            return self._do_call_longlong(self.funcsym, ll_args)
         elif RESULT is lltype.Void:
             return self._do_call_void(self.funcsym, ll_args)
         else:
@@ -222,11 +311,26 @@
     def _push_int(self, value, ll_args, i):
         self._push_arg(value, ll_args, i)
 
+    @jit.dont_look_inside
+    def _push_raw(self, value, ll_args, i):
+        ll_args[i] = value
+
     @jit.oopspec('libffi_push_float(self, value, ll_args, i)')
     @enforceargs(   None, float, None,    int) # fix the annotation for tests
     def _push_float(self, value, ll_args, i):
         self._push_arg(value, ll_args, i)
 
+    @jit.dont_look_inside
+    def _push_single_float(self, value, ll_args, i):
+        self._push_arg(r_singlefloat(value), ll_args, i)
+
+    @jit.dont_look_inside
+    def _push_longlong(self, floatval, ll_args, i):
+        """
+        Takes a longlong represented as a python Float. It's a hack for the
+        jit, else we could not see the whole libffi module at all"""  
+        self._push_arg(float2longlong(floatval), ll_args, i)
+
     @jit.oopspec('libffi_call_int(self, funcsym, ll_args)')
     def _do_call_int(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, rffi.LONG)
@@ -235,6 +339,21 @@
     def _do_call_float(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, rffi.DOUBLE)
 
+    @jit.dont_look_inside
+    def _do_call_single_float(self, funcsym, ll_args):
+        single_res = self._do_call(funcsym, ll_args, rffi.FLOAT)
+        return float(single_res)
+
+    @jit.dont_look_inside
+    def _do_call_raw(self, funcsym, ll_args):
+        # same as _do_call_int, but marked as jit.dont_look_inside
+        return self._do_call(funcsym, ll_args, rffi.LONG)
+
+    @jit.dont_look_inside
+    def _do_call_longlong(self, funcsym, ll_args):
+        llres = self._do_call(funcsym, ll_args, rffi.LONGLONG)
+        return longlong2float(llres)
+
     @jit.oopspec('libffi_call_void(self, funcsym, ll_args)')
     def _do_call_void(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, lltype.Void)
@@ -265,7 +384,14 @@
                             rffi.cast(rffi.VOIDPP, ll_args))
         if RESULT is not lltype.Void:
             TP = lltype.Ptr(rffi.CArray(RESULT))
-            res = rffi.cast(TP, ll_result)[0]
+            buf = rffi.cast(TP, ll_result)
+            if types.is_struct(self.restype):
+                assert RESULT == rffi.LONG
+                # for structs, we directly return the buffer and transfer the
+                # ownership
+                res = rffi.cast(RESULT, buf)
+            else:
+                res = buf[0]
         else:
             res = None
         self._free_buffers(ll_result, ll_args)
@@ -274,11 +400,19 @@
 
     def _free_buffers(self, ll_result, ll_args):
         if ll_result:
-            lltype.free(ll_result, flavor='raw')
+            self._free_buffer_maybe(rffi.cast(rffi.VOIDP, ll_result), self.restype)
         for i in range(len(self.argtypes)):
-            lltype.free(ll_args[i], flavor='raw')
+            argtype = self.argtypes[i]
+            self._free_buffer_maybe(ll_args[i], argtype)
         lltype.free(ll_args, flavor='raw')
 
+    def _free_buffer_maybe(self, buf, ffitype):
+        # if it's a struct, the buffer is not freed and the ownership is
+        # already of the caller (in case of ll_args buffers) or transferred to
+        # it (in case of ll_result buffer)
+        if not types.is_struct(ffitype):
+            lltype.free(buf, flavor='raw')
+
 
 # ======================================================================
 
@@ -288,11 +422,8 @@
     def __init__(self, libname):
         """Load the library, or raises DLOpenError."""
         self.lib = rffi.cast(DLLHANDLE, 0)
-        ll_libname = rffi.str2charp(libname)
-        try:
+        with rffi.scoped_str2charp(libname) as ll_libname:
             self.lib = dlopen(ll_libname)
-        finally:
-            lltype.free(ll_libname, flavor='raw')
 
     def __del__(self):
         if self.lib:
@@ -302,3 +433,6 @@
     def getpointer(self, name, argtypes, restype, flags=FUNCFLAG_CDECL):
         return Func(name, argtypes, restype, dlsym(self.lib, name),
                     flags=flags, keepalive=self)
+
+    def getaddressindll(self, name):
+        return dlsym(self.lib, name)
diff --git a/pypy/rlib/longlong2float.py b/pypy/rlib/longlong2float.py
--- a/pypy/rlib/longlong2float.py
+++ b/pypy/rlib/longlong2float.py
@@ -30,14 +30,19 @@
     return llval
 
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
-eci = ExternalCompilationInfo(post_include_bits=["""
+eci = ExternalCompilationInfo(includes=['string.h', 'assert.h'],
+                              post_include_bits=["""
 static double pypy__longlong2float(long long x) {
-    char *p = (char*)&x;
-    return *((double*)p);
+    double dd;
+    assert(sizeof(double) == 8 && sizeof(long long) == 8);
+    memcpy(&dd, &x, 8);
+    return dd;
 }
 static long long pypy__float2longlong(double x) {
-    char *p = (char*)&x;
-    return *((long long*)p);
+    long long ll;
+    assert(sizeof(double) == 8 && sizeof(long long) == 8);
+    memcpy(&ll, &x, 8);
+    return ll;
 }
 """])
 
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -191,6 +191,21 @@
         hop.exception_cannot_occur()
         return hop.genop('gc_can_move', hop.args_v, resulttype=hop.r_result)
 
+def _make_sure_does_not_move(p):
+    """'p' is a non-null GC object.  This (tries to) make sure that the
+    object does not move any more, by forcing collections if needed.
+    Warning: should ideally only be used with the minimark GC, and only
+    on objects that are already a bit old, so have a chance to be
+    already non-movable."""
+    if not we_are_translated():
+        return
+    i = 0
+    while can_move(p):
+        if i > 6:
+            raise NotImplementedError("can't make object non-movable!")
+        collect(i)
+        i += 1
+
 def _heap_stats():
     raise NotImplementedError # can't be run directly
 
diff --git a/pypy/rlib/ropenssl.py b/pypy/rlib/ropenssl.py
--- a/pypy/rlib/ropenssl.py
+++ b/pypy/rlib/ropenssl.py
@@ -134,7 +134,8 @@
 
 def external(name, argtypes, restype, **kw):
     kw['compilation_info'] = eci
-    eci.export_symbols += (name,)
+    if not kw.get('macro', False):
+        eci.export_symbols += (name,)
     return rffi.llexternal(
         name, argtypes, restype, **kw)
 
diff --git a/pypy/rlib/rrandom.py b/pypy/rlib/rrandom.py
--- a/pypy/rlib/rrandom.py
+++ b/pypy/rlib/rrandom.py
@@ -24,8 +24,7 @@
     def __init__(self, seed=r_uint(0)):
         self.state = [r_uint(0)] * N
         self.index = 0
-        if seed:
-            self.init_genrand(seed)
+        self.init_genrand(seed)
 
     def init_genrand(self, s):
         mt = self.state
diff --git a/pypy/rlib/streamio.py b/pypy/rlib/streamio.py
--- a/pypy/rlib/streamio.py
+++ b/pypy/rlib/streamio.py
@@ -141,7 +141,8 @@
 def construct_stream_tower(stream, buffering, universal, reading, writing,
                            binary):
     if buffering == 0:   # no buffering
-        pass
+        if reading:      # force some minimal buffering for readline()
+            stream = ReadlineInputStream(stream)
     elif buffering == 1:   # line-buffering
         if writing:
             stream = LineBufferingOutputStream(stream)
@@ -749,6 +750,113 @@
                                               flush_buffers=False)
 
 
+class ReadlineInputStream(Stream):
+
+    """Minimal buffering input stream.
+
+    Only does buffering for readline().  The other kinds of reads, and
+    all writes, are not buffered at all.
+    """
+
+    bufsize = 2**13 # 8 K
+
+    def __init__(self, base, bufsize=-1):
+        self.base = base
+        self.do_read = base.read   # function to fill buffer some more
+        self.do_seek = base.seek   # seek to a byte offset
+        if bufsize == -1:     # Get default from the class
+            bufsize = self.bufsize
+        self.bufsize = bufsize  # buffer size (hint only)
+        self.buf = None         # raw data (may contain "\n")
+        self.bufstart = 0
+
+    def flush_buffers(self):
+        if self.buf is not None:
+            try:
+                self.do_seek(self.bufstart-len(self.buf), 1)
+            except MyNotImplementedError:
+                pass
+            else:
+                self.buf = None
+                self.bufstart = 0
+
+    def readline(self):
+        if self.buf is not None:
+            i = self.buf.find('\n', self.bufstart)
+        else:
+            self.buf = ''
+            i = -1
+        #
+        if i < 0:
+            self.buf = self.buf[self.bufstart:]
+            self.bufstart = 0
+            while True:
+                bufsize = max(self.bufsize, len(self.buf) >> 2)
+                data = self.do_read(bufsize)
+                if not data:
+                    result = self.buf              # end-of-file reached
+                    self.buf = None
+                    return result
+                startsearch = len(self.buf)   # there is no '\n' in buf so far
+                self.buf += data
+                i = self.buf.find('\n', startsearch)
+                if i >= 0:
+                    break
+        #
+        i += 1
+        result = self.buf[self.bufstart:i]
+        self.bufstart = i
+        return result
+
+    def peek(self):
+        if self.buf is None:
+            return ''
+        if self.bufstart > 0:
+            self.buf = self.buf[self.bufstart:]
+            self.bufstart = 0
+        return self.buf
+
+    def tell(self):
+        pos = self.base.tell()
+        if self.buf is not None:
+            pos -= (len(self.buf) - self.bufstart)
+        return pos
+
+    def readall(self):
+        result = self.base.readall()
+        if self.buf is not None:
+            result = self.buf[self.bufstart:] + result
+            self.buf = None
+            self.bufstart = 0
+        return result
+
+    def read(self, n):
+        if self.buf is None:
+            return self.do_read(n)
+        else:
+            m = n - (len(self.buf) - self.bufstart)
+            start = self.bufstart
+            if m > 0:
+                result = self.buf[start:] + self.do_read(m)
+                self.buf = None
+                self.bufstart = 0
+                return result
+            elif n >= 0:
+                self.bufstart = start + n
+                return self.buf[start : self.bufstart]
+            else:
+                return ''
+
+    seek       = PassThrough("seek",      flush_buffers=True)
+    write      = PassThrough("write",     flush_buffers=True)
+    truncate   = PassThrough("truncate",  flush_buffers=True)
+    flush      = PassThrough("flush",     flush_buffers=True)
+    flushable  = PassThrough("flushable", flush_buffers=False)
+    close      = PassThrough("close",     flush_buffers=False)
+    try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
+                                              flush_buffers=False)
+
+
 class BufferingOutputStream(Stream):
 
     """Standard buffering output stream.
diff --git a/pypy/rlib/test/test_libffi.py b/pypy/rlib/test/test_libffi.py
--- a/pypy/rlib/test/test_libffi.py
+++ b/pypy/rlib/test/test_libffi.py
@@ -2,8 +2,10 @@
 import sys
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rpython.lltypesystem.ll2ctypes import ALLOCATED
-from pypy.rlib.test.test_clibffi import BaseFfiTest, get_libm_name
+from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
+from pypy.rlib.test.test_clibffi import BaseFfiTest, get_libm_name, make_struct_ffitype_e
 from pypy.rlib.libffi import CDLL, Func, get_libc_name, ArgChain, types
+from pypy.rlib.libffi import longlong2float, float2longlong, IS_32_BIT
 
 class TestLibffiMisc(BaseFfiTest):
 
@@ -50,6 +52,18 @@
         del lib
         assert not ALLOCATED
 
+    def test_longlong_as_float(self):
+        from pypy.translator.c.test.test_genc import compile
+        maxint64 = r_longlong(9223372036854775807)
+        def fn(x):
+            d = longlong2float(x)
+            ll = float2longlong(d)
+            return ll
+        assert fn(maxint64) == maxint64
+        #
+        fn2 = compile(fn, [r_longlong])
+        res = fn2(maxint64)
+        assert res == maxint64
 
 class TestLibffiCall(BaseFfiTest):
     """
@@ -97,7 +111,7 @@
     def get_libfoo(self):
         return self.CDLL(self.libfoo_name)
 
-    def call(self, funcspec, args, RESULT, init_result=0):
+    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
         """
         Call the specified function after constructing and ArgChain with the
         arguments in ``args``.
@@ -114,8 +128,20 @@
         func = lib.getpointer(name, argtypes, restype)
         chain = ArgChain()
         for arg in args:
-            chain.arg(arg)
-        return func.call(chain, RESULT)
+            if isinstance(arg, r_singlefloat):
+                chain.arg_singlefloat(float(arg))
+            elif IS_32_BIT and isinstance(arg, r_longlong):
+                chain.arg_longlong(longlong2float(arg))
+            elif IS_32_BIT and isinstance(arg, r_ulonglong):
+                arg = rffi.cast(rffi.LONGLONG, arg)
+                chain.arg_longlong(longlong2float(arg))
+            elif isinstance(arg, tuple):
+                methname, arg = arg
+                meth = getattr(chain, methname)
+                meth(arg)
+            else:
+                chain.arg(arg)
+        return func.call(chain, RESULT, is_struct=is_struct)
 
     def check_loops(self, *args, **kwds):
         """
@@ -137,7 +163,7 @@
         res = self.call(func, [38, 4.2], rffi.LONG)
         assert res == 42
         self.check_loops({
-                'call_may_force': 1,
+                'call_release_gil': 1,
                 'guard_no_exception': 1,
                 'guard_not_forced': 1,
                 'int_add': 1,
@@ -150,7 +176,7 @@
         func = (libm, 'pow', [types.double, types.double], types.double)
         res = self.call(func, [2.0, 3.0], rffi.DOUBLE, init_result=0.0)
         assert res == 8.0
-        self.check_loops(call_may_force=1, guard_no_exception=1, guard_not_forced=1)
+        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_result(self):
         """
@@ -163,7 +189,7 @@
         func = (libfoo, 'cast_to_uchar_and_ovf', [types.sint], types.uchar)
         res = self.call(func, [0], rffi.UCHAR)
         assert res == 200
-        self.check_loops(call_may_force=1, guard_no_exception=1, guard_not_forced=1)
+        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_argument(self):
         """
@@ -267,6 +293,76 @@
         res = self.call(get_dummy, [], rffi.LONG)
         assert res == initval+1
 
+    def test_single_float_args(self):
+        """
+            float sum_xy_float(float x, float y)
+            {
+                return x+y;
+            }
+        """
+        from ctypes import c_float # this is used only to compute the expected result
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_float', [types.float, types.float], types.float)
+        x = r_singlefloat(12.34)
+        y = r_singlefloat(56.78)
+        res = self.call(func, [x, y], rffi.FLOAT, init_result=0.0)
+        expected = c_float(c_float(12.34).value + c_float(56.78).value).value
+        assert res == expected
+
+    def test_slonglong_args(self):
+        """
+            long long sum_xy_longlong(long long x, long long y)
+            {
+                return x+y;
+            }
+        """
+        maxint32 = 2147483647 # we cannot really go above maxint on 64 bits
+                              # (and we would not test anything, as there long
+                              # is the same as long long)
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_longlong', [types.slonglong, types.slonglong],
+                types.slonglong)
+        if IS_32_BIT:
+            x = r_longlong(maxint32+1)
+            y = r_longlong(maxint32+2)
+            zero = longlong2float(r_longlong(0))
+        else:
+            x = maxint32+1
+            y = maxint32+2
+            zero = 0
+        res = self.call(func, [x, y], rffi.LONGLONG, init_result=zero)
+        if IS_32_BIT:
+            # obscure, on 32bit it's really a long long, so it returns a
+            # DOUBLE because of the JIT hack
+            res = float2longlong(res)
+        expected = maxint32*2 + 3
+        assert res == expected
+
+    def test_ulonglong_args(self):
+        """
+            unsigned long long sum_xy_ulonglong(unsigned long long x,
+                                                unsigned long long y)
+            {
+                return x+y;
+            }
+        """
+        maxint64 = 9223372036854775807 # maxint64+1 does not fit into a
+                                       # longlong, but it does into a
+                                       # ulonglong
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_ulonglong', [types.ulonglong, types.ulonglong],
+                types.ulonglong)
+        x = r_ulonglong(maxint64+1)
+        y = r_ulonglong(2)
+        res = self.call(func, [x, y], rffi.ULONGLONG, init_result=0)
+        if IS_32_BIT:
+            # obscure, on 32bit it's really a long long, so it returns a
+            # DOUBLE because of the JIT hack
+            res = float2longlong(res)
+            res = rffi.cast(rffi.ULONGLONG, res)
+        expected = maxint64 + 3
+        assert res == expected
+
     def test_wrong_number_of_arguments(self):
         from pypy.rpython.llinterp import LLException
         libfoo = self.get_libfoo() 
@@ -287,3 +383,57 @@
 
         my_raises("self.call(func, [38], rffi.LONG)") # one less
         my_raises("self.call(func, [38, 12.3, 42], rffi.LONG)") # one more
+
+
+    def test_byval_argument(self):
+        """
+            struct Point {
+                long x;
+                long y;
+            };
+
+            long sum_point(struct Point p) {
+                return p.x + p.y;
+            }
+        """
+        libfoo = CDLL(self.libfoo_name)
+        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.slong, types.slong])
+        ffi_point = ffi_point_struct.ffistruct
+        sum_point = (libfoo, 'sum_point', [ffi_point], types.slong)
+        #
+        ARRAY = rffi.CArray(rffi.LONG)
+        buf = lltype.malloc(ARRAY, 2, flavor='raw')
+        buf[0] = 30
+        buf[1] = 12
+        adr = rffi.cast(rffi.VOIDP, buf)
+        res = self.call(sum_point, [('arg_raw', adr)], rffi.LONG, init_result=0)
+        assert res == 42
+        # check that we still have the ownership on the buffer
+        assert buf[0] == 30
+        assert buf[1] == 12
+        lltype.free(buf, flavor='raw')
+        lltype.free(ffi_point_struct, flavor='raw')
+
+    def test_byval_result(self):
+        """
+            struct Point make_point(long x, long y) {
+                struct Point p;
+                p.x = x;
+                p.y = y;
+                return p;
+            }
+        """
+        libfoo = CDLL(self.libfoo_name)
+        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.slong, types.slong])
+        ffi_point = ffi_point_struct.ffistruct
+
+        libfoo = CDLL(self.libfoo_name)
+        make_point = (libfoo, 'make_point', [types.slong, types.slong], ffi_point)
+        #
+        PTR = lltype.Ptr(rffi.CArray(rffi.LONG))
+        p = self.call(make_point, [12, 34], PTR, init_result=lltype.nullptr(PTR.TO),
+                      is_struct=True)
+        assert p[0] == 12
+        assert p[1] == 34
+        lltype.free(p, flavor='raw')
+        lltype.free(ffi_point_struct, flavor='raw')
diff --git a/pypy/rlib/test/test_rrandom.py b/pypy/rlib/test/test_rrandom.py
--- a/pypy/rlib/test/test_rrandom.py
+++ b/pypy/rlib/test/test_rrandom.py
@@ -3,6 +3,12 @@
 
 # the numbers were created by using CPython's _randommodule.c
 
+def test_init_from_zero():
+    rnd = Random(0)
+    assert rnd.state[:14] == [0, 1, 1812433255, 1900727105, 1208447044,
+            2481403966, 4042607538, 337614300, 3232553940,
+            1018809052, 3202401494, 1775180719, 3192392114, 594215549]
+
 def test_init_from_seed():
     rnd = Random(1000)
     assert rnd.state[:14] == [1000, 4252021385, 1724402292, 571538732,
diff --git a/pypy/rlib/test/test_streamio.py b/pypy/rlib/test/test_streamio.py
--- a/pypy/rlib/test/test_streamio.py
+++ b/pypy/rlib/test/test_streamio.py
@@ -1008,6 +1008,75 @@
             assert base.buf == data
 
 
+class TestReadlineInputStream:
+
+    packets = ["a", "b", "\n", "def", "\nxy\npq\nuv", "wx"]
+    lines = ["ab\n", "def\n", "xy\n", "pq\n", "uvwx"]
+
+    def makeStream(self, seek=False, tell=False, bufsize=-1):
+        base = TSource(self.packets)
+        self.source = base
+        def f(*args):
+            if seek is False:
+                raise NotImplementedError     # a bug!
+            if seek is None:
+                raise streamio.MyNotImplementedError   # can be caught
+            raise ValueError(seek)  # uh?
+        if not tell:
+            base.tell = f
+        if not seek:
+            base.seek = f
+        return streamio.ReadlineInputStream(base, bufsize)
+
+    def test_readline(self):
+        for file in [self.makeStream(), self.makeStream(bufsize=2)]:
+            i = 0
+            while 1:
+                r = file.readline()
+                if r == "":
+                    break
+                assert self.lines[i] == r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_read_interleaved(self):
+        for file in [self.makeStream(seek=True),
+                     self.makeStream(seek=True, bufsize=2)]:
+            i = 0
+            while 1:
+                firstchar = file.read(1)
+                if firstchar == "":
+                    break
+                r = file.readline()
+                assert r != ""
+                assert self.lines[i] == firstchar + r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_read_interleaved_no_seek(self):
+        for file in [self.makeStream(seek=None),
+                     self.makeStream(seek=None, bufsize=2)]:
+            i = 0
+            while 1:
+                firstchar = file.read(1)
+                if firstchar == "":
+                    break
+                r = file.readline()
+                assert r != ""
+                assert self.lines[i] == firstchar + r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_readall(self):
+        file = self.makeStream(seek=True, tell=True, bufsize=2)
+        r = file.readline()
+        assert r == 'ab\n'
+        assert file.tell() == 3
+        r = file.readall()
+        assert r == 'def\nxy\npq\nuvwx'
+        r = file.readall()
+        assert r == ''
+
 
 # Speed test
 
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -418,6 +418,9 @@
     instance._storage = ctypes_storage
     assert ctypes_storage   # null pointer?
 
+class NotCtypesAllocatedStructure(ValueError):
+    pass
+
 class _parentable_mixin(object):
     """Mixin added to _parentable containers when they become ctypes-based.
     (This is done by changing the __class__ of the instance to reference
@@ -436,7 +439,7 @@
     def _addressof_storage(self):
         "Returns the storage address as an int"
         if self._storage is None or self._storage is True:
-            raise ValueError("Not a ctypes allocated structure")
+            raise NotCtypesAllocatedStructure("Not a ctypes allocated structure")
         return intmask(ctypes.cast(self._storage, ctypes.c_void_p).value)
 
     def _free(self):
diff --git a/pypy/rpython/lltypesystem/lltype.py b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -831,7 +831,7 @@
     raise TypeError, "unsupported cast"
 
 def _cast_whatever(TGT, value):
-    from pypy.rpython.lltypesystem import llmemory
+    from pypy.rpython.lltypesystem import llmemory, rffi
     ORIG = typeOf(value)
     if ORIG == TGT:
         return value
@@ -847,6 +847,8 @@
                 return cast_pointer(TGT, value)
         elif ORIG == llmemory.Address:
             return llmemory.cast_adr_to_ptr(value, TGT)
+        elif TGT == rffi.VOIDP and ORIG == Unsigned:
+            return rffi.cast(TGT, value)
         elif ORIG == Signed:
             return cast_int_to_ptr(TGT, value)
     elif TGT == llmemory.Address and isinstance(ORIG, Ptr):
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -139,10 +139,10 @@
         source = py.code.Source("""
             def call_external_function(%(argnames)s):
                 before = aroundstate.before
-                after = aroundstate.after
                 if before: before()
                 # NB. it is essential that no exception checking occurs here!
                 res = funcptr(%(argnames)s)
+                after = aroundstate.after
                 if after: after()
                 return res
         """ % locals())
@@ -244,7 +244,7 @@
     def __init__(self):
         self.callbacks = {}
 
-def _make_wrapper_for(TP, callable, callbackholder, aroundstate=None):
+def _make_wrapper_for(TP, callable, callbackholder=None, aroundstate=None):
     """ Function creating wrappers for callbacks. Note that this is
     cheating as we assume constant callbacks and we just memoize wrappers
     """
@@ -253,21 +253,18 @@
     if hasattr(callable, '_errorcode_'):
         errorcode = callable._errorcode_
     else:
-        errorcode = TP.TO.RESULT._example()
+        errorcode = TP.TO.RESULT._defl()
     callable_name = getattr(callable, '__name__', '?')
-    callbackholder.callbacks[callable] = True
+    if callbackholder is not None:
+        callbackholder.callbacks[callable] = True
     args = ', '.join(['a%d' % i for i in range(len(TP.TO.ARGS))])
     source = py.code.Source(r"""
         def wrapper(%s):    # no *args - no GIL for mallocing the tuple
             llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
             if aroundstate is not None:
-                before = aroundstate.before
                 after = aroundstate.after
-            else:
-                before = None
-                after = None
-            if after:
-                after()
+                if after:
+                    after()
             # from now on we hold the GIL
             stackcounter.stacks_counter += 1
             try:
@@ -281,8 +278,10 @@
                     traceback.print_exc()
                 result = errorcode
             stackcounter.stacks_counter -= 1
-            if before:
-                before()
+            if aroundstate is not None:
+                before = aroundstate.before
+                if before:
+                    before()
             # here we don't hold the GIL any more. As in the wrapper() produced
             # by llexternal, it is essential that no exception checking occurs
             # after the call to before().
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -927,7 +927,7 @@
     def write_barrier_from_array(self, newvalue, addr_array, index):
         if self.header(addr_array).tid & GCFLAG_NO_YOUNG_PTRS:
             if self.card_page_indices > 0:     # <- constant-folded
-                self.remember_young_pointer_from_array(addr_array, index)
+                self.remember_young_pointer_from_array2(addr_array, index)
             else:
                 self.remember_young_pointer(addr_array, newvalue)
 
@@ -976,7 +976,7 @@
 
     def _init_writebarrier_with_card_marker(self):
         DEBUG = self.DEBUG
-        def remember_young_pointer_from_array(addr_array, index):
+        def remember_young_pointer_from_array2(addr_array, index):
             # 'addr_array' is the address of the object in which we write,
             # which must have an array part;  'index' is the index of the
             # item that is (or contains) the pointer that we write.
@@ -1011,7 +1011,7 @@
             #
             # We set the flag (even if the newly written address does not
             # actually point to the nursery, which seems to be ok -- actually
-            # it seems more important that remember_young_pointer_from_array()
+            # it seems more important that remember_young_pointer_from_array2()
             # does not take 3 arguments).
             addr_byte.char[0] = chr(byte | bitmask)
             #
@@ -1019,9 +1019,67 @@
                 self.old_objects_with_cards_set.append(addr_array)
                 objhdr.tid |= GCFLAG_CARDS_SET
 
-        remember_young_pointer_from_array._dont_inline_ = True
-        self.remember_young_pointer_from_array = (
-            remember_young_pointer_from_array)
+        remember_young_pointer_from_array2._dont_inline_ = True
+        assert self.card_page_indices > 0
+        self.remember_young_pointer_from_array2 = (
+            remember_young_pointer_from_array2)
+
+        # xxx trying it out for the JIT: a 3-arguments version of the above
+        def remember_young_pointer_from_array3(addr_array, index, newvalue):
+            if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
+                ll_assert(self.debug_is_old_object(addr_array),
+                          "young array with GCFLAG_NO_YOUNG_PTRS")
+            objhdr = self.header(addr_array)
+            #
+            # a single check for the common case of neither GCFLAG_HAS_CARDS
+            # nor GCFLAG_NO_HEAP_PTRS
+            if objhdr.tid & (GCFLAG_HAS_CARDS | GCFLAG_NO_HEAP_PTRS) == 0:
+                # common case: fast path, jump to the end of the function
+                pass
+            elif objhdr.tid & GCFLAG_HAS_CARDS == 0:
+                # no cards, but GCFLAG_NO_HEAP_PTRS is set.
+                objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
+                self.prebuilt_root_objects.append(addr_array)
+                # jump to the end of the function
+            else:
+                # case with cards.
+                #
+                # If the newly written address does not actually point to the
+                # nursery, leave now.
+                if not self.appears_to_be_young(newvalue):
+                    return
+                #
+                # 'addr_array' is a raw_malloc'ed array with card markers
+                # in front.  Compute the index of the bit to set:
+                bitindex = index >> self.card_page_shift
+                byteindex = bitindex >> 3
+                bitmask = 1 << (bitindex & 7)
+                #
+                # If the bit is already set, leave now.
+                size_gc_header = self.gcheaderbuilder.size_gc_header
+                addr_byte = addr_array - size_gc_header
+                addr_byte = llarena.getfakearenaaddress(addr_byte) + \
+                            (~byteindex)
+                byte = ord(addr_byte.char[0])
+                if byte & bitmask:
+                    return
+                addr_byte.char[0] = chr(byte | bitmask)
+                #
+                if objhdr.tid & GCFLAG_CARDS_SET == 0:
+                    self.old_objects_with_cards_set.append(addr_array)
+                    objhdr.tid |= GCFLAG_CARDS_SET
+                return
+            #
+            # Logic for the no-cards case, put here to minimize the number
+            # of checks done at the start of the function
+            if self.appears_to_be_young(newvalue):
+                self.old_objects_pointing_to_young.append(addr_array)
+                objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+
+        remember_young_pointer_from_array3._dont_inline_ = True
+        assert self.card_page_indices > 0
+        self.remember_young_pointer_from_array3 = (
+            remember_young_pointer_from_array3)
 
 
     def assume_young_pointers(self, addr_struct):
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -463,7 +463,7 @@
                                             annmodel.SomeInteger()],
                                            annmodel.s_None,
                                            inline=True)
-                func = getattr(gcdata.gc, 'remember_young_pointer_from_array',
+                func = getattr(gcdata.gc, 'remember_young_pointer_from_array3',
                                None)
                 if func is not None:
                     # func should not be a bound method, but a real function
@@ -471,7 +471,8 @@
                     self.write_barrier_from_array_failing_case_ptr = \
                                              getfn(func,
                                                    [annmodel.SomeAddress(),
-                                                    annmodel.SomeInteger()],
+                                                    annmodel.SomeInteger(),
+                                                    annmodel.SomeAddress()],
                                                    annmodel.s_None)
         self.statistics_ptr = getfn(GCClass.statistics.im_func,
                                     [s_gc, annmodel.SomeInteger()],
@@ -860,9 +861,9 @@
 
     def gct_get_write_barrier_from_array_failing_case(self, hop):
         op = hop.spaceop
-        hop.genop("same_as",
-                  [self.write_barrier_from_array_failing_case_ptr],
-                  resultvar=op.result)
+        v = getattr(self, 'write_barrier_from_array_failing_case_ptr',
+                    lltype.nullptr(op.result.concretetype.TO))
+        hop.genop("same_as", [v], resultvar=op.result)
 
     def gct_zero_gc_pointers_inside(self, hop):
         if not self.malloc_zero_filled:
diff --git a/pypy/rpython/module/test/test_posix.py b/pypy/rpython/module/test/test_posix.py
--- a/pypy/rpython/module/test/test_posix.py
+++ b/pypy/rpython/module/test/test_posix.py
@@ -43,6 +43,17 @@
         for i in range(len(stat)):
             assert long(getattr(func, 'item%d' % i)) == stat[i]
 
+    def test_stat_exception(self):
+        def fo():
+            try:
+                posix.stat('I/do/not/exist')
+            except OSError:
+                return True
+            else:
+                return False
+        res = self.interpret(fo,[])
+        assert res
+
     def test_times(self):
         import py; py.test.skip("llinterp does not like tuple returns")
         from pypy.rpython.test.test_llinterp import interpret
@@ -205,5 +216,8 @@
     def test_stat(self):
         py.test.skip("ootypesystem does not support os.stat")
 
+    def test_stat_exception(self):
+        py.test.skip("ootypesystem does not support os.stat")
+
     def test_chown(self):
         py.test.skip("ootypesystem does not support os.chown")
diff --git a/pypy/tool/gcc_cache.py b/pypy/tool/gcc_cache.py
--- a/pypy/tool/gcc_cache.py
+++ b/pypy/tool/gcc_cache.py
@@ -39,7 +39,16 @@
         data = ''
     if not (data.startswith('True') or data.startswith('FAIL\n')):
         try:
-            platform.compile(c_files, eci)
+            _previous = platform.log_errors
+            try:
+                platform.log_errors = False
+                platform.compile(c_files, eci)
+            finally:
+                del platform.log_errors
+                # ^^^remove from the instance --- needed so that it can
+                # compare equal to another instance without it
+                if platform.log_errors != _previous:
+                    platform.log_errors = _previous
             data = 'True'
             path.write(data)
         except CompilationError, e:
diff --git a/pypy/tool/jitlogparser/parser.py b/pypy/tool/jitlogparser/parser.py
--- a/pypy/tool/jitlogparser/parser.py
+++ b/pypy/tool/jitlogparser/parser.py
@@ -1,4 +1,5 @@
 import re, sys
+
 from pypy.jit.metainterp.resoperation import rop, opname
 from pypy.jit.tool.oparser import OpParser
 
@@ -51,6 +52,7 @@
 
     # factory method
     Op = Op
+    use_mock_model = True
 
     @classmethod
     def parse_from_input(cls, input):
@@ -61,7 +63,7 @@
         if not argspec.strip():
             return [], None
         if opname == 'debug_merge_point':
-            return argspec.rsplit(", ", 1), None
+            return argspec.split(", ", 1), None
         else:
             args = argspec.split(', ')
             descr = None
@@ -95,12 +97,12 @@
 
     def __init__(self, operations, storage):
         if operations[0].name == 'debug_merge_point':
-            self.inline_level = int(operations[0].args[1])
-            m = re.search('<code object ([<>\w]+), file \'(.+?)\', line (\d+)> #(\d+) (\w+)',
-                         operations[0].getarg(0))
+            self.inline_level = int(operations[0].args[0])
+            m = re.search('<code object ([<>\w]+)\. file \'(.+?)\'\. line (\d+)> #(\d+) (\w+)',
+                         operations[0].getarg(1))
             if m is None:
                 # a non-code loop, like StrLiteralSearch or something
-                self.bytecode_name = operations[0].args[0].split(" ")[0][1:]
+                self.bytecode_name = operations[0].args[1].split(" ")[0][1:]
             else:
                 self.name, self.filename, lineno, bytecode_no, self.bytecode_name = m.groups()
                 self.startlineno = int(lineno)
diff --git a/pypy/tool/jitlogparser/test/test_parser.py b/pypy/tool/jitlogparser/test/test_parser.py
--- a/pypy/tool/jitlogparser/test/test_parser.py
+++ b/pypy/tool/jitlogparser/test/test_parser.py
@@ -29,7 +29,7 @@
 def test_parse_non_code():
     ops = parse('''
     []
-    debug_merge_point("SomeRandomStuff", 0)
+    debug_merge_point(0, "SomeRandomStuff")
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 1
@@ -38,10 +38,10 @@
 def test_split():
     ops = parse('''
     [i0]
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -54,12 +54,12 @@
 def test_inlined_call():
     ops = parse("""
     []
-    debug_merge_point('<code object inlined_call, file 'source.py', line 12> #28 CALL_FUNCTION', 0)
+    debug_merge_point(0, '<code object inlined_call. file 'source.py'. line 12> #28 CALL_FUNCTION')
     i18 = getfield_gc(p0, descr=<BoolFieldDescr pypy.interpreter.pyframe.PyFrame.inst_is_being_profiled 89>)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #0 LOAD_FAST', 1)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #3 LOAD_CONST', 1)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #7 RETURN_VALUE', 1)
-    debug_merge_point('<code object inlined_call, file 'source.py', line 12> #31 STORE_FAST', 0)
+    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #0 LOAD_FAST')
+    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #3 LOAD_CONST')
+    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #7 RETURN_VALUE')
+    debug_merge_point(0, '<code object inlined_call. file 'source.py'. line 12> #31 STORE_FAST')
     """)
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 3 # two chunks + inlined call
@@ -72,10 +72,10 @@
 def test_name():
     ops = parse('''
     [i0]
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -89,10 +89,10 @@
     ops = parse('''
     [i0]
     i3 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -102,10 +102,10 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #0 LOAD_FAST", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #3 LOAD_FAST", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #6 BINARY_ADD", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #7 RETURN_VALUE", 0)
+    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #0 LOAD_FAST")
+    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #3 LOAD_FAST")
+    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #6 BINARY_ADD")
+    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #7 RETURN_VALUE")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.chunks[1].lineno == 3
@@ -114,11 +114,11 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #9 LOAD_FAST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #12 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #22 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #28 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #6 SETUP_LOOP", 0)
+    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #9 LOAD_FAST")
+    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #12 LOAD_CONST")
+    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #22 LOAD_CONST")
+    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #28 LOAD_CONST")
+    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #6 SETUP_LOOP")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.linerange == (7, 9)
@@ -128,7 +128,7 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse("""
     [p6, p1]
-    debug_merge_point('<code object h, file '%(fname)s', line 11> #17 FOR_ITER', 0)
+    debug_merge_point(0, '<code object h. file '%(fname)s'. line 11> #17 FOR_ITER')
     guard_class(p6, 144264192, descr=<Guard2>)
     p12 = getfield_gc(p6, descr=<GcPtrFieldDescr pypy.objspace.std.iterobject.W_AbstractSeqIterObject.inst_w_seq 12>)
     """ % locals())
@@ -174,7 +174,7 @@
 
 def test_parsing_strliteral():
     loop = parse("""
-    debug_merge_point('StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]', 0)
+    debug_merge_point(0, 'StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]')
     """)
     ops = Function.from_operations(loop.operations, LoopStorage())
     chunk = ops.chunks[0]
diff --git a/pypy/tool/pytest/appsupport.py b/pypy/tool/pytest/appsupport.py
--- a/pypy/tool/pytest/appsupport.py
+++ b/pypy/tool/pytest/appsupport.py
@@ -1,8 +1,13 @@
 import autopath
 import py
-from pypy.interpreter import gateway
+from pypy.interpreter import gateway, pycode
 from pypy.interpreter.error import OperationError
 
+try:
+    from _pytest.assertion.newinterpret import interpret
+except ImportError:
+    from _pytest.assertion.oldinterpret import interpret
+
 # ____________________________________________________________
 
 class AppCode(object):
@@ -51,13 +56,11 @@
         space = self.space
         for key, w_value in vars.items():
             space.setitem(self.w_locals, space.wrap(key), w_value)
-        return space.eval(code, self.w_globals, self.w_locals)
-
-    def exec_(self, code, **vars):
-        space = self.space
-        for key, w_value in vars.items():
-            space.setitem(self.w_locals, space.wrap(key), w_value)
-        space.exec_(code, self.w_globals, self.w_locals)
+        if isinstance(code, str):
+            return space.eval(code, self.w_globals, self.w_locals)
+        pyc = pycode.PyCode._from_code(space, code)
+        return pyc.exec_host_bytecode(self.w_globals, self.w_locals)
+    exec_ = eval
 
     def repr(self, w_value):
         return self.space.unwrap(self.space.repr(w_value))
@@ -163,8 +166,8 @@
             except py.error.ENOENT: 
                 source = None
             from pypy import conftest
-            if source and not py.test.config.option.nomagic:
-                msg = py.code._reinterpret_old(source, runner, should_fail=True)
+            if source and py.test.config._assertstate.mode != "off":
+                msg = interpret(source, runner, should_fail=True)
                 space.setattr(w_self, space.wrap('args'),
                             space.newtuple([space.wrap(msg)]))
                 w_msg = space.wrap(msg)
diff --git a/pypy/tool/pytest/test/test_pytestsupport.py b/pypy/tool/pytest/test/test_pytestsupport.py
--- a/pypy/tool/pytest/test/test_pytestsupport.py
+++ b/pypy/tool/pytest/test/test_pytestsupport.py
@@ -4,7 +4,7 @@
 from pypy.interpreter.pycode import PyCode
 from pypy.interpreter.pyframe import PyFrame
 from pypy.tool.pytest.appsupport import (AppFrame, build_pytest_assertion,
-    AppExceptionInfo)
+    AppExceptionInfo, interpret)
 import py
 from pypy.tool.udir import udir
 import os
@@ -22,8 +22,8 @@
     co = PyCode._from_code(space, somefunc.func_code)
     pyframe = PyFrame(space, co, space.newdict(), None)
     runner = AppFrame(space, pyframe)
-    py.code._reinterpret_old("f = lambda x: x+1", runner, should_fail=False)
-    msg = py.code._reinterpret_old("assert isinstance(f(2), float)", runner)
+    interpret("f = lambda x: x+1", runner, should_fail=False)
+    msg = interpret("assert isinstance(f(2), float)", runner)
     assert msg.startswith("assert isinstance(3, float)\n"
                           " +  where 3 = ")
 
@@ -58,6 +58,12 @@
     except AssertionError, e:
         assert e.msg == "Failed"
 
+def app_test_comparison():
+    try:
+        assert 3 > 4
+    except AssertionError, e:
+        assert "3 > 4" in e.msg
+
 
 def test_appexecinfo(space):
     try:
diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -3,9 +3,9 @@
 It uses 'pypy/translator/goal/pypy-c' and parts of the rest of the working
 copy.  Usage:
 
-    package.py root-pypy-dir [name-of-archive] [name-of-pypy-c]
+    package.py root-pypy-dir [name-of-archive] [name-of-pypy-c] [destination-for-tarball] [pypy-c-path]
 
-Usually you would do:   package.py ../../.. pypy-VER-PLATFORM.
+Usually you would do:   package.py ../../.. pypy-VER-PLATFORM
 The output is found in the directory /tmp/usession-YOURNAME/build/.
 """
 
@@ -122,7 +122,10 @@
             zf.close()
         else:
             archive = str(builddir.join(name + '.tar.bz2'))
-            e = os.system('tar --owner=root --group=root --numeric-owner -cvjf ' + archive + " " + name)
+            if sys.platform == 'darwin':
+                e = os.system('tar --numeric-owner -cvjf ' + archive + " " + name)
+            else:
+                e = os.system('tar --owner=root --group=root --numeric-owner -cvjf ' + archive + " " + name)
             if e:
                 raise OSError('"tar" returned exit status %r' % e)
     finally:
diff --git a/pypy/tool/test/test_gcc_cache.py b/pypy/tool/test/test_gcc_cache.py
--- a/pypy/tool/test/test_gcc_cache.py
+++ b/pypy/tool/test/test_gcc_cache.py
@@ -1,11 +1,13 @@
-
+import sys
 from pypy.tool.gcc_cache import *
 from pypy.tool.udir import udir
-import md5
+import md5, cStringIO
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 
+localudir = udir.join('test_gcc_cache').ensure(dir=1)
+
 def test_gcc_exec():
-    f = udir.join("x.c")
+    f = localudir.join("x.c")
     f.write("""
     #include <stdio.h>
     #include <test_gcc_exec.h>
@@ -15,8 +17,8 @@
        return 0;
     }
     """)
-    dir1 = udir.join('test_gcc_exec_dir1').ensure(dir=1)
-    dir2 = udir.join('test_gcc_exec_dir2').ensure(dir=1)
+    dir1 = localudir.join('test_gcc_exec_dir1').ensure(dir=1)
+    dir2 = localudir.join('test_gcc_exec_dir2').ensure(dir=1)
     dir1.join('test_gcc_exec.h').write('#define ANSWER 3\n')
     dir2.join('test_gcc_exec.h').write('#define ANSWER 42\n')
     eci = ExternalCompilationInfo(include_dirs=[str(dir1)])
@@ -36,7 +38,7 @@
     print '>>>'
 
 def test_gcc_ask():
-    f = udir.join("y.c")
+    f = localudir.join("y.c")
     f.write("""
     #include <stdio.h>
     #include <test_gcc_ask.h>
@@ -46,8 +48,8 @@
        return 0;
     }
     """)
-    dir1 = udir.join('test_gcc_ask_dir1').ensure(dir=1)
-    dir2 = udir.join('test_gcc_ask_dir2').ensure(dir=1)
+    dir1 = localudir.join('test_gcc_ask_dir1').ensure(dir=1)
+    dir2 = localudir.join('test_gcc_ask_dir2').ensure(dir=1)
     dir1.join('test_gcc_ask.h').write('/* hello world */\n')
     dir2.join('test_gcc_ask.h').write('#error boom\n')
     eci = ExternalCompilationInfo(include_dirs=[str(dir1)])
@@ -63,3 +65,15 @@
     print '<<<'
     print err
     print '>>>'
+
+def test_gcc_ask_doesnt_log_errors():
+    f = localudir.join('z.c')
+    f.write("""this file is not valid C code\n""")
+    eci = ExternalCompilationInfo()
+    oldstderr = sys.stderr
+    try:
+        sys.stderr = capture = cStringIO.StringIO()
+        py.test.raises(CompilationError, try_compile_cache, [f], eci)
+    finally:
+        sys.stderr = oldstderr
+    assert 'ERROR' not in capture.getvalue().upper()
diff --git a/pypy/translator/c/gc.py b/pypy/translator/c/gc.py
--- a/pypy/translator/c/gc.py
+++ b/pypy/translator/c/gc.py
@@ -297,6 +297,13 @@
 
     gc_startup_code = RefcountingGcPolicy.gc_startup_code.im_func
 
+    def compilation_info(self):
+        eci = BasicGcPolicy.compilation_info(self)
+        eci = eci.merge(ExternalCompilationInfo(
+            post_include_bits=['#define USING_NO_GC_AT_ALL'],
+            ))
+        return eci
+
 
 class FrameworkGcPolicy(BasicGcPolicy):
     transformerclass = framework.FrameworkGCTransformer
diff --git a/pypy/translator/c/gcc/instruction.py b/pypy/translator/c/gcc/instruction.py
--- a/pypy/translator/c/gcc/instruction.py
+++ b/pypy/translator/c/gcc/instruction.py
@@ -187,8 +187,8 @@
 
     def requestgcroots(self, tracker):
         # no need to track the value of these registers in the caller
-        # function if we are the main(), or if we are flagged as a
-        # "bottom" function (a callback from C code)
+        # function if we are flagged as a "bottom" function (a callback
+        # from C code, or pypy_main_function())
         if tracker.is_stack_bottom:
             return {}
         else:
diff --git a/pypy/translator/c/gcc/test/elf/track10.s b/pypy/translator/c/gcc/test/elf/track10.s
--- a/pypy/translator/c/gcc/test/elf/track10.s
+++ b/pypy/translator/c/gcc/test/elf/track10.s
@@ -1,5 +1,5 @@
-	.type	main, @function
-main:
+	.type	main1, @function
+main1:
 	pushl	%ebx
 	call	pypy_f
 	;; expected {4(%esp) | (%esp), %esi, %edi, %ebp | %ebx}
@@ -11,4 +11,4 @@
 	/* GCROOT %ebx */
 	popl	%ebx
 	ret
-	.size	main, .-main
+	.size	main1, .-main1
diff --git a/pypy/translator/c/gcc/test/elf/track12.s b/pypy/translator/c/gcc/test/elf/track12.s
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/gcc/test/elf/track12.s
@@ -0,0 +1,9 @@
+	.type	pypy_f, @function
+pypy_f:
+	pushl   4(%esp)
+	call    pypy_other
+	;; expected {4(%esp) | %ebx, %esi, %edi, %ebp | (%esp)}
+	popl    %eax
+	/* GCROOT %eax */
+	ret
+	.size	pypy_f, .-pypy_f
diff --git a/pypy/translator/c/gcc/test/elf/track13.s b/pypy/translator/c/gcc/test/elf/track13.s
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/gcc/test/elf/track13.s
@@ -0,0 +1,9 @@
+	.type	pypy_f, @function
+pypy_f:
+	call    pypy_other
+	;; expected {(%esp) | %ebx, %esi, %edi, %ebp | 8(%esp)}
+	pushl   8(%esp)
+	popl    %eax
+	/* GCROOT %eax */
+	ret
+	.size	pypy_f, .-pypy_f
diff --git a/pypy/translator/c/gcc/test/elf/track4.s b/pypy/translator/c/gcc/test/elf/track4.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/elf/track4.s
+++ /dev/null
@@ -1,52 +0,0 @@
-	.type	main, @function
-main:
-	;; this is an artificial example showing what kind of code gcc
-	;; can produce for main()
-	pushl	%ebp
-	movl	%eax, $globalptr1
-	movl	%esp, %ebp
-	pushl	%edi
-	subl	$8, %esp
-	andl	$-16, %esp
-	movl	%ebx, -8(%ebp)
-	movl	8(%ebp), %edi
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %esi, -4(%ebp), (%ebp) | %edi}
-.L1:
-	cmpl	$0, %eax
-	je	.L3
-.L2:
-	;; inlined function here with -fomit-frame-pointer
-	movl	%eax, -12(%ebp)
-	movl	%edi, %edx
-	subl	$16, %esp
-	movl	%eax, (%esp)
-	movl	$42, %edi
-	movl	%edx, 4(%esp)
-	movl	%esi, %ebx
-	movl	$nonsense, %esi
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %ebx, -4(%ebp), (%ebp) | 4(%esp), -12(%ebp)}
-	addl	%edi, %eax
-	movl	4(%esp), %eax
-	movl	%ebx, %esi
-	addl	$16, %esp
-	movl	%eax, %edi
-	movl	-12(%ebp), %eax
-#APP
-	/* GCROOT %eax */
-#NO_APP
-	;; end of inlined function
-.L3:
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %esi, -4(%ebp), (%ebp) | %edi}
-#APP
-	/* GCROOT %edi */
-#NO_APP
-	movl	-8(%ebp), %ebx
-	movl	-4(%ebp), %edi
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-
-	.size	main, .-main
diff --git a/pypy/translator/c/gcc/test/elf/track6.s b/pypy/translator/c/gcc/test/elf/track6.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/elf/track6.s
+++ /dev/null
@@ -1,26 +0,0 @@
-	.type	main, @function
-main:
-	;; a minimal example showing what kind of code gcc
-	;; can produce for main(): some local variable accesses
-	;; are relative to %ebp, while others are relative to
-	;; %esp, and the difference %ebp-%esp is not constant
-	;; because of the 'andl' to align the stack
-	pushl	%ebp
-	movl	%esp, %ebp
-	subl	$8, %esp
-	andl	$-16, %esp
-	movl	$globalptr1, -4(%ebp)
-	movl	$globalptr2, (%esp)
-	pushl	$0
-	call	foobar
-	;; expected {4(%ebp) | %ebx, %esi, %edi, (%ebp) | 4(%esp), -4(%ebp)}
-	popl	%eax
-#APP
-	/* GCROOT -4(%ebp) */
-	/* GCROOT (%esp) */
-#NO_APP
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-
-	.size	main, .-main
diff --git a/pypy/translator/c/gcc/test/elf/track7.s b/pypy/translator/c/gcc/test/elf/track7.s
--- a/pypy/translator/c/gcc/test/elf/track7.s
+++ b/pypy/translator/c/gcc/test/elf/track7.s
@@ -1,5 +1,5 @@
-	.type	main, @function
-main:
+	.type	main1, @function
+main1:
 	;; cmovCOND tests.
 	pushl	%ebx
 	movl	12(%esp), %ebx
@@ -16,4 +16,4 @@
 	popl	%ebx
 	ret
 
-	.size	main, .-main
+	.size	main1, .-main1
diff --git a/pypy/translator/c/gcc/test/msvc/track6.s b/pypy/translator/c/gcc/test/msvc/track6.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/msvc/track6.s
+++ /dev/null
@@ -1,15 +0,0 @@
-_TEXT	SEGMENT
-_pypy_g_foo PROC					; COMDAT
-
-	push	ebp
-	mov	ebp, esp
-	and	esp, -64
-	sub	esp, 12
-	push	esi
-	call	_pypy_g_something_else
-	;; expected {4(%ebp) | %ebx, (%esp), %edi, (%ebp) | }
-	pop	esi
-	mov	esp, ebp
-	pop	ebp
-	ret	0
-_pypy_g_foo ENDP
diff --git a/pypy/translator/c/gcc/test/msvc/track_and_esp.s b/pypy/translator/c/gcc/test/msvc/track_and_esp.s
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/gcc/test/msvc/track_and_esp.s
@@ -0,0 +1,474 @@
+PUBLIC	??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@ ; `string'
+PUBLIC	_pypy_g_ll_math_ll_math_frexp
+;	COMDAT ??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@
+CONST	SEGMENT
+??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@ DB 'pypy_g_ll_math_l'
+	DB	'l_math_frexp', 00H				; `string'
+; Function compile flags: /Ogtpy
+CONST	ENDS
+;	COMDAT _pypy_g_ll_math_ll_math_frexp
+_TEXT	SEGMENT
+_l_mantissa_0$ = -8					; size = 8
+_l_v21638$ = -8						; size = 8
+_l_x_14$ = 8						; size = 8
+_pypy_g_ll_math_ll_math_frexp PROC			; COMDAT
+
+; 58245: struct pypy_tuple2_0 *pypy_g_ll_math_ll_math_frexp(double l_x_14) {
+
+	push	ebp
+	mov	ebp, esp
+	and	esp, -64				; ffffffc0H
+
+; 58246: 	long *l_exp_p_0; double l_mantissa_0; bool_t l_v21641;
+; 58247: 	bool_t l_v21643; bool_t l_v21644; bool_t l_v21646; bool_t l_v21647;
+; 58248: 	bool_t l_v21652; bool_t l_v21653; bool_t l_v21660; bool_t l_v21666;
+; 58249: 	bool_t l_v21670; bool_t l_v21674; bool_t l_v21676; double l_v21638;
+; 58250: 	long l_v21637; long l_v21649; long l_v21651; long l_v21677;
+; 58251: 	long l_v21678; struct pypy_exceptions_Exception0 *l_v21687;
+; 58252: 	struct pypy_header0 *l_v21654; struct pypy_object0 *l_v21682;
+; 58253: 	struct pypy_object0 *l_v21691; struct pypy_object_vtable0 *l_v21665;
+; 58254: 	struct pypy_object_vtable0 *l_v21669;
+; 58255: 	struct pypy_object_vtable0 *l_v21675;
+; 58256: 	struct pypy_object_vtable0 *l_v21683; struct pypy_tuple2_0 *l_v21640;
+; 58257: 	struct pypy_tuple2_0 *l_v21695; void* l_v21639; void* l_v21648;
+; 58258: 	void* l_v21650; void* l_v21656; void* l_v21658; void* l_v21659;
+; 58259: 	void* l_v21668; void* l_v21672; void* l_v21679; void* l_v21688;
+; 58260: 	void* l_v21696;
+; 58261: 	goto block0;
+; 58262: 
+; 58263:     block0:
+; 58264: 	l_v21641 = pypy_g_ll_math_ll_math_isnan(l_x_14);
+
+	fld	QWORD PTR _l_x_14$[ebp]
+	sub	esp, 52					; 00000034H
+	push	ebx
+	push	esi
+	push	edi
+	sub	esp, 8
+	fstp	QWORD PTR [esp]
+$block0$88239:
+	call	_pypy_g_ll_math_ll_math_isnan
+
+; 58265: 	pypy_asm_gc_nocollect(pypy_g_ll_math_ll_math_isnan);
+; 58266: 	l_v21643 = l_v21641;
+; 58267: 	if (l_v21643) {
+; 58268: 		l_v21637 = 0L;
+; 58269: 		l_v21638 = l_x_14;
+
+	fld	QWORD PTR _l_x_14$[ebp]
+	add	esp, 8
+	test	al, al
+
+; 58270: 		goto block3;
+
+	jne	SHORT $LN10 at pypy_g_ll_@159
+
+; 58271: 	}
+; 58272: 	goto block1;
+; 58273: 
+; 58274:     block1:
+; 58275: 	l_v21644 = pypy_g_ll_math_ll_math_isinf(l_x_14);
+
+	sub	esp, 8
+	fstp	QWORD PTR [esp]
+$block1$88243:
+	call	_pypy_g_ll_math_ll_math_isinf
+	add	esp, 8
+
+; 58276: 	pypy_asm_gc_nocollect(pypy_g_ll_math_ll_math_isinf);
+; 58277: 	l_v21646 = l_v21644;
+; 58278: 	if (l_v21646) {
+
+	test	al, al
+	je	SHORT $block2$88245
+
+; 58279: 		l_v21637 = 0L;
+; 58280: 		l_v21638 = l_x_14;
+
+	fld	QWORD PTR _l_x_14$[ebp]
+$LN10 at pypy_g_ll_@159:
+
+; 58288: 		goto block14;
+; 58289: 	}
+; 58290: 	l_v21637 = 0L;
+
+	xor	edi, edi
+$LN30 at pypy_g_ll_@159:
+
+; 58291: 	l_v21638 = l_x_14;
+; 58292: 	goto block3;
+; 58293: 
+; 58294:     block3:
+; 58295: 	l_v21648 = (&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC)->ssgc_inst_free;
+
+	mov	esi, DWORD PTR _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC+4
+	fstp	QWORD PTR _l_v21638$[esp+64]
+
+; 58296: 	OP_RAW_MALLOC_USAGE((0 + ROUND_UP_FOR_ALLOCATION(sizeof(struct pypy_tuple2_0), sizeof(struct pypy_forwarding_stub0))), l_v21649);
+; 58297: 	l_v21650 = (&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC)->ssgc_inst_top_of_space;
+; 58298: 	OP_ADR_DELTA(l_v21650, l_v21648, l_v21651);
+
+	mov	eax, DWORD PTR _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC+12
+	sub	eax, esi
+
+; 58299: 	OP_INT_GT(l_v21649, l_v21651, l_v21652);
+
+	cmp	eax, 24					; 00000018H
+$block3$88242:
+
+; 58300: 	if (l_v21652) {
+
+	jge	$block4$88260
+
+; 58334: 	l_v21695 = l_v21640;
+; 58335: 	goto block8;
+; 58336: 
+; 58337:     block8:
+; 58338: 	RPY_DEBUG_RETURN();
+; 58339: 	return l_v21695;
+; 58340: 
+; 58341:     block9:
+; 58342: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+; 58343: 	l_v21695 = ((struct pypy_tuple2_0 *) NULL);
+; 58344: 	goto block8;
+; 58345: 
+; 58346:     block10:
+; 58347: 	abort();  /* debug_llinterpcall should be unreachable */
+; 58348: 	l_v21665 = (&pypy_g_ExcData)->ed_exc_type;
+; 58349: 	l_v21666 = (l_v21665 == NULL);
+; 58350: 	if (!l_v21666) {
+; 58351: 		goto block11;
+; 58352: 	}
+; 58353: 	goto block5;
+; 58354: 
+; 58355:     block11:
+; 58356: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+; 58357: 	l_v21696 = NULL;
+; 58358: 	goto block6;
+; 58359: 
+; 58360:     block12:
+; 58361: 	l_v21668 = pypy_g_SemiSpaceGC_obtain_free_space((&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC), (0 + ROUND_UP_FOR_ALLOCATION(sizeof(struct pypy_tuple2_0), sizeof(struct pypy_forwarding_stub0))));
+
+	push	24					; 00000018H
+	push	OFFSET _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC
+$block12$88259:
+	call	_pypy_g_SemiSpaceGC_obtain_free_space
+    ;; expected {4(%ebp) | 16(%esp), 12(%esp), 8(%esp), (%ebp) | }
+
+; 58362: 	l_v21669 = (&pypy_g_ExcData)->ed_exc_type;
+; 58363: 	l_v21670 = (l_v21669 == NULL);
+
+	xor	ecx, ecx
+	add	esp, 8
+	cmp	DWORD PTR _pypy_g_ExcData, ecx
+
+; 58364: 	if (!l_v21670) {
+
+	je	$LN5 at pypy_g_ll_@159
+
+; 58368: 	goto block4;
+; 58369: 
+; 58370:     block13:
+; 58371: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+
+	mov	eax, DWORD PTR _pypydtcount
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?N@??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], ecx
+	inc	eax
+	and	eax, 8191				; 00001fffH
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?8??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], ecx
+	inc	eax
+	and	eax, 8191				; 00001fffH
+	mov	DWORD PTR _pypydtcount, eax
+$block13$88313:
+$block9$88285:
+	xor	eax, eax
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	pop	esi
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+$block2$88245:
+
+; 58281: 		goto block3;
+; 58282: 	}
+; 58283: 	goto block2;
+; 58284: 
+; 58285:     block2:
+; 58286: 	OP_FLOAT_IS_TRUE(l_x_14, l_v21647);
+
+	fldz
+	fld	QWORD PTR _l_x_14$[ebp]
+	fucom	ST(1)
+	fnstsw	ax
+	fstp	ST(1)
+	test	ah, 68					; 00000044H
+
+; 58287: 	if (l_v21647) {
+
+	jnp	$LN10 at pypy_g_ll_@159
+
+; 58372: 	l_v21696 = NULL;
+; 58373: 	goto block6;
+; 58374: 
+; 58375:     block14:
+; 58376: 	l_v21672 = pypy_g__ll_malloc_varsize_no_length__Signed_Signed_Sign(1L, (0 + 0), sizeof(long));
+
+	push	4
+	fstp	ST(0)
+	push	0
+	push	1
+$block14$88247:
+	call	_pypy_g__ll_malloc_varsize_no_length__Signed_Signed_Sign
+    ;; expected {4(%ebp) | 20(%esp), 16(%esp), 12(%esp), (%ebp) | }
+	mov	esi, eax
+
+; 58377: 	OP_TRACK_ALLOC_START(l_v21672, /* nothing */);
+
+	push	OFFSET ??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@
+	push	esi
+	call	_pypy_debug_alloc_start
+    ;; expected {4(%ebp) | 28(%esp), 24(%esp), 20(%esp), (%ebp) | }
+	add	esp, 20					; 00000014H
+
+; 58378: 	l_exp_p_0 = (long *)l_v21672;
+; 58379: 	l_v21674 = (l_exp_p_0 != NULL);
+
+	test	esi, esi
+
+; 58380: 	if (!l_v21674) {
+
+	jne	SHORT $block15$88324
+
+; 58418: 	goto block8;
+; 58419: 
+; 58420:     block18:
+; 58421: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+
+	mov	eax, DWORD PTR _pypydtcount
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?BB@??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], esi
+	inc	eax
+	and	eax, 8191				; 00001fffH
+	mov	DWORD PTR _pypydtcount, eax
+$block18$88323:
+
+; 58422: 	l_v21695 = ((struct pypy_tuple2_0 *) NULL);
+
+	xor	eax, eax
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	pop	esi
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+$block15$88324:
+
+; 58381: 		goto block18;
+; 58382: 	}
+; 58383: 	goto block15;
+; 58384: 
+; 58385:     block15:
+; 58386: 	l_mantissa_0 = pypy_g_frexp__Float_arrayPtr_star_2(l_x_14, l_exp_p_0);
+
+	fld	QWORD PTR _l_x_14$[ebp]
+	push	esi
+	sub	esp, 8
+	fstp	QWORD PTR [esp]
+	call	_pypy_g_frexp__Float_arrayPtr_star_2
+    ;; expected {4(%ebp) | 20(%esp), 16(%esp), 12(%esp), (%ebp) | }
+
+; 58387: 	l_v21675 = (&pypy_g_ExcData)->ed_exc_type;
+; 58388: 	l_v21676 = (l_v21675 == NULL);
+
+	mov	edi, DWORD PTR _pypy_g_ExcData
+	fstp	QWORD PTR _l_mantissa_0$[esp+76]
+	add	esp, 12					; 0000000cH
+	test	edi, edi
+
+; 58389: 	if (!l_v21676) {
+
+	je	SHORT $block16$88328
+
+; 58403: 
+; 58404:     block17:
+; 58405: 	l_v21682 = (&pypy_g_ExcData)->ed_exc_value;
+; 58406: 	l_v21683 = (&pypy_g_ExcData)->ed_exc_type;
+; 58407: 	PYPY_DEBUG_CATCH_EXCEPTION("ll_math_ll_math_frexp", l_v21683, l_v21683 == (&pypy_g_py__code_assertion_AssertionError_vtable.ae_super.ae_super.se_super.e_super) || l_v21683 == (&pypy_g_exceptions_NotImplementedError_vtable.nie_super.re_super.se_super.e_super));
+
+	mov	eax, DWORD PTR _pypydtcount
+	mov	ebx, DWORD PTR _pypy_g_ExcData+4
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?BA@??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], edi
+	inc	eax
+	and	eax, 8191				; 00001fffH
+$block17$88327:
+	mov	DWORD PTR _pypydtcount, eax
+	cmp	edi, OFFSET _pypy_g_py__code_assertion_AssertionError_vtable
+	je	SHORT $LN1 at pypy_g_ll_@159
+	cmp	edi, OFFSET _pypy_g_exceptions_NotImplementedError_vtable
+	jne	SHORT $LN2 at pypy_g_ll_@159
+$LN1 at pypy_g_ll_@159:
+	call	_pypy_debug_catch_fatal_exception
+$LN2 at pypy_g_ll_@159:
+
+; 58408: 	(&pypy_g_ExcData)->ed_exc_value = ((struct pypy_object0 *) NULL);
+
+	xor	eax, eax
+
+; 58409: 	(&pypy_g_ExcData)->ed_exc_type = ((struct pypy_object_vtable0 *) NULL);
+; 58410: 	l_v21687 = (struct pypy_exceptions_Exception0 *)l_v21682;
+; 58411: 	l_v21688 = (void*)l_exp_p_0;
+; 58412: 	OP_TRACK_ALLOC_STOP(l_v21688, /* nothing */);
+
+	push	esi
+	mov	DWORD PTR _pypy_g_ExcData+4, eax
+	mov	DWORD PTR _pypy_g_ExcData, eax
+	call	_pypy_debug_alloc_stop
+    ;; expected {4(%ebp) | 12(%esp), 8(%esp), 4(%esp), (%ebp) | }
+
+; 58413: 	OP_RAW_FREE(l_v21688, /* nothing */);
+
+	push	esi
+	call	_PyObject_Free
+    ;; expected {4(%ebp) | 16(%esp), 12(%esp), 8(%esp), (%ebp) | }
+
+; 58414: 	l_v21691 = (struct pypy_object0 *)l_v21687;
+; 58415: 	pypy_g_RPyReRaiseException(l_v21683, l_v21691);
+
+	push	ebx
+	push	edi
+	call	_pypy_g_RPyReRaiseException
+	add	esp, 16					; 00000010H
+
+; 58416: 	pypy_asm_gc_nocollect(pypy_g_RPyReRaiseException);
+; 58417: 	l_v21695 = ((struct pypy_tuple2_0 *) NULL);
+
+	xor	eax, eax
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	pop	esi
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+$block16$88328:
+
+; 58390: 		goto block17;
+; 58391: 	}
+; 58392: 	goto block16;
+; 58393: 
+; 58394:     block16:
+; 58395: 	l_v21677 = RPyBareItem(l_exp_p_0, 0L);
+; 58396: 	l_v21678 = (long)(l_v21677);
+
+	mov	edi, DWORD PTR [esi]
+
+; 58397: 	l_v21679 = (void*)l_exp_p_0;
+; 58398: 	OP_TRACK_ALLOC_STOP(l_v21679, /* nothing */);
+
+	push	esi
+	call	_pypy_debug_alloc_stop
+    ;; expected {4(%ebp) | 12(%esp), 8(%esp), 4(%esp), (%ebp) | }
+
+; 58399: 	OP_RAW_FREE(l_v21679, /* nothing */);
+
+	push	esi
+	call	_PyObject_Free
+    ;; expected {4(%ebp) | 16(%esp), 12(%esp), 8(%esp), (%ebp) | }
+
+; 58400: 	l_v21637 = l_v21678;
+; 58401: 	l_v21638 = l_mantissa_0;
+
+	fld	QWORD PTR _l_mantissa_0$[esp+72]
+	add	esp, 8
+
+; 58402: 	goto block3;
+
+	jmp	$LN30 at pypy_g_ll_@159
+$LN5 at pypy_g_ll_@159:
+
+; 58365: 		goto block13;
+; 58366: 	}
+; 58367: 	l_v21639 = l_v21668;
+
+	mov	esi, eax
+$block4$88260:
+$block5$88263:
+
+; 58301: 		goto block12;
+; 58302: 	}
+; 58303: 	l_v21639 = l_v21648;
+; 58304: 	goto block4;
+; 58305: 
+; 58306:     block4:
+; 58307: 	OP_INT_IS_TRUE(RUNNING_ON_LLINTERP, l_v21653);
+; 58308: 	if (l_v21653) {
+; 58309: 		goto block10;
+; 58310: 	}
+; 58311: 	goto block5;
+; 58312: 
+; 58313:     block5:
+; 58314: 	l_v21654 = (struct pypy_header0 *)l_v21639;
+; 58315: 	RPyField(l_v21654, h_tid) = (GROUP_MEMBER_OFFSET(struct group_pypy_g_typeinfo_s, member20)+0L);
+
+	test	esi, esi
+	jne	SHORT $LN18 at pypy_g_ll_@159
+	call	_RPyAbort
+$LN18 at pypy_g_ll_@159:
+
+; 58316: 	OP_ADR_ADD(l_v21639, (0 + ROUND_UP_FOR_ALLOCATION(sizeof(struct pypy_tuple2_0), sizeof(struct pypy_forwarding_stub0))), l_v21656);
+; 58317: 	(&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC)->ssgc_inst_free = l_v21656;
+; 58318: 	OP_ADR_ADD(l_v21639, 0, l_v21658);
+; 58319: 	l_v21659 = (void*)l_v21658;
+; 58320: 	l_v21696 = l_v21659;
+; 58321: 	goto block6;
+; 58322: 
+; 58323:     block6:
+; 58324: 	l_v21640 = (struct pypy_tuple2_0 *)l_v21696;
+; 58325: 	l_v21660 = (l_v21640 != NULL);
+; 58326: 	if (!l_v21660) {
+; 58327: 		goto block9;
+; 58328: 	}
+; 58329: 	goto block7;
+; 58330: 
+; 58331:     block7:
+; 58332: 	RPyField(l_v21640, t_item0) = l_v21638;
+
+	fld	QWORD PTR _l_v21638$[esp+64]
+	mov	DWORD PTR [esi], 81			; 00000051H
+	lea	ecx, DWORD PTR [esi+24]
+	mov	DWORD PTR _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC+4, ecx
+	fstp	QWORD PTR [esi+8]
+
+; 58333: 	RPyField(l_v21640, t_item1) = l_v21637;
+
+	mov	DWORD PTR [esi+16], edi
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	mov	eax, esi
+	pop	esi
+$block6$88281:
+$block8$88289:
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+_pypy_g_ll_math_ll_math_frexp ENDP
+_TEXT	ENDS
diff --git a/pypy/translator/c/gcc/trackgcroot.py b/pypy/translator/c/gcc/trackgcroot.py
--- a/pypy/translator/c/gcc/trackgcroot.py
+++ b/pypy/translator/c/gcc/trackgcroot.py
@@ -39,10 +39,15 @@
         self.uses_frame_pointer = False
         self.r_localvar = self.r_localvarnofp
         self.filetag = filetag
-        # a "stack bottom" function is either main() or a callback from C code
+        # a "stack bottom" function is either pypy_main_function() or a
+        # callback from C code.  In both cases they are identified by
+        # the presence of pypy_asm_stack_bottom().
         self.is_stack_bottom = False
 
     def computegcmaptable(self, verbose=0):
+        if self.funcname in ['main', '_main']:
+            return []     # don't analyze main(), its prologue may contain
+                          # strange instructions
         self.findlabels()
         self.parse_instructions()
         try:
@@ -226,7 +231,7 @@
         # in the frame at this point.  This doesn't count the return address
         # which is the word immediately following the frame in memory.
         # The 'framesize' is set to an odd value if it is only an estimate
-        # (see visit_andl()).
+        # (see InsnCannotFollowEsp).
 
         def walker(insn, size_delta):
             check = deltas.setdefault(insn, size_delta)
@@ -266,7 +271,8 @@
 
             match = self.r_localvar_esp.match(localvar)
             if match:
-                if localvar == self.TOP_OF_STACK: # for pushl and popl, by
+                if localvar == self.TOP_OF_STACK_MINUS_WORD:
+                                                  # for pushl and popl, by
                     hint = None                   # default ebp addressing is
                 else:                             # a bit nicer
                     hint = 'esp'
@@ -521,9 +527,8 @@
         target = match.group("target")
         if target == self.ESP:
             # only for  andl $-16, %esp  used to align the stack in main().
-            # The exact amount of adjutment is not known yet, so we use
-            # an odd-valued estimate to make sure the real value is not used
-            # elsewhere by the FunctionGcRootTracker.
+            # main() should not be seen at all.  But on e.g. MSVC we see
+            # the instruction somewhere else too...
             return InsnCannotFollowEsp()
         else:
             return self.binary_insn(line)
@@ -588,10 +593,12 @@
     def _visit_push(self, line):
         match = self.r_unaryinsn.match(line)
         source = match.group(1)
-        return [InsnStackAdjust(-self.WORD)] + self.insns_for_copy(source, self.TOP_OF_STACK)
+        return self.insns_for_copy(source, self.TOP_OF_STACK_MINUS_WORD) + \
+               [InsnStackAdjust(-self.WORD)]
 
     def _visit_pop(self, target):
-        return self.insns_for_copy(self.TOP_OF_STACK, target) + [InsnStackAdjust(+self.WORD)]
+        return [InsnStackAdjust(+self.WORD)] + \
+               self.insns_for_copy(self.TOP_OF_STACK_MINUS_WORD, target)
 
     def _visit_prologue(self):
         # for the prologue of functions that use %ebp as frame pointer
@@ -983,15 +990,15 @@
     OPERAND = r'(?:[-\w$%+.:@"]+(?:[(][\w%,]+[)])?|[(][\w%,]+[)])'
     LABEL   = r'([a-zA-Z_$.][a-zA-Z0-9_$@.]*)'
     OFFSET_LABELS   = 2**30
-    TOP_OF_STACK = '0(%esp)'
+    TOP_OF_STACK_MINUS_WORD = '-4(%esp)'
 
     r_functionstart = re.compile(r"\t.type\s+"+LABEL+",\s*[@]function\s*$")
     r_functionend   = re.compile(r"\t.size\s+"+LABEL+",\s*[.]-"+LABEL+"\s*$")
-    LOCALVAR        = r"%eax|%edx|%ecx|%ebx|%esi|%edi|%ebp|\d*[(]%esp[)]"
+    LOCALVAR        = r"%eax|%edx|%ecx|%ebx|%esi|%edi|%ebp|-?\d*[(]%esp[)]"
     LOCALVARFP      = LOCALVAR + r"|-?\d*[(]%ebp[)]"
     r_localvarnofp  = re.compile(LOCALVAR)
     r_localvarfp    = re.compile(LOCALVARFP)
-    r_localvar_esp  = re.compile(r"(\d*)[(]%esp[)]")
+    r_localvar_esp  = re.compile(r"(-?\d*)[(]%esp[)]")
     r_localvar_ebp  = re.compile(r"(-?\d*)[(]%ebp[)]")
 
     r_rel_label      = re.compile(r"(\d+):\s*$")
@@ -1044,7 +1051,7 @@
     OPERAND = r'(?:[-\w$%+.:@"]+(?:[(][\w%,]+[)])?|[(][\w%,]+[)])'
     LABEL   = r'([a-zA-Z_$.][a-zA-Z0-9_$@.]*)'
     OFFSET_LABELS   = 2**30
-    TOP_OF_STACK = '0(%rsp)'
+    TOP_OF_STACK_MINUS_WORD = '-8(%rsp)'
 
     r_functionstart = re.compile(r"\t.type\s+"+LABEL+",\s*[@]function\s*$")
     r_functionend   = re.compile(r"\t.size\s+"+LABEL+",\s*[.]-"+LABEL+"\s*$")
@@ -1140,7 +1147,7 @@
     CALLEE_SAVE_REGISTERS = ['ebx', 'esi', 'edi', 'ebp']
     REG2LOC = dict((_reg, LOC_REG | ((_i+1)<<2))
                    for _i, _reg in enumerate(CALLEE_SAVE_REGISTERS))
-    TOP_OF_STACK = 'DWORD PTR [esp]'
+    TOP_OF_STACK_MINUS_WORD = 'DWORD PTR [esp-4]'
 
     OPERAND = r'(?:(:?WORD|DWORD|BYTE) PTR |OFFSET )?[_\w?:@$]*(?:[-+0-9]+)?(:?\[[-+*\w0-9]+\])?'
     LABEL   = r'([a-zA-Z_$@.][a-zA-Z0-9_$@.]*)'
@@ -1170,7 +1177,7 @@
     r_gcroot_marker = re.compile(r"$1") # never matches
     r_gcroot_marker_var = re.compile(r"DWORD PTR .+_constant_always_one_.+pypy_asm_gcroot")
     r_gcnocollect_marker = re.compile(r"\spypy_asm_gc_nocollect\(("+OPERAND+")\);")
-    r_bottom_marker = re.compile(r"; .+\tpypy_asm_stack_bottom\(\);")
+    r_bottom_marker = re.compile(r"; .+\spypy_asm_stack_bottom\(\);")
 
     FUNCTIONS_NOT_RETURNING = {
         '__exit': None,
@@ -1323,12 +1330,11 @@
         self.verbose = verbose
         self.shuffle = shuffle
         self.gcmaptable = []
-        self.seen_main = False
 
-    def process(self, iterlines, newfile, entrypoint='main', filename='?'):
+    def process(self, iterlines, newfile, filename='?'):
         for in_function, lines in self.find_functions(iterlines):
             if in_function:
-                tracker = self.process_function(lines, entrypoint, filename)
+                tracker = self.process_function(lines, filename)
                 lines = tracker.lines
             self.write_newfile(newfile, lines, filename.split('.')[0])
         if self.verbose == 1:
@@ -1337,11 +1343,9 @@
     def write_newfile(self, newfile, lines, grist):
         newfile.writelines(lines)
 
-    def process_function(self, lines, entrypoint, filename):
+    def process_function(self, lines, filename):
         tracker = self.FunctionGcRootTracker(
             lines, filetag=getidentifier(filename))
-        is_main = tracker.funcname == entrypoint
-        tracker.is_stack_bottom = is_main
         if self.verbose == 1:
             sys.stderr.write('.')
         elif self.verbose > 1:
@@ -1356,7 +1360,6 @@
             self.gcmaptable[:0] = table
         else:
             self.gcmaptable.extend(table)
-        self.seen_main |= is_main
         return tracker
 
 class ElfAssemblerParser(AssemblerParser):
@@ -1432,11 +1435,6 @@
         if functionlines:
             yield in_function, functionlines
 
-    def process_function(self, lines, entrypoint, filename):
-        entrypoint = '_' + entrypoint
-        return super(DarwinAssemblerParser, self).process_function(
-            lines, entrypoint, filename)
-
 class DarwinAssemblerParser64(DarwinAssemblerParser):
     format = "darwin64"
     FunctionGcRootTracker = DarwinFunctionGcRootTracker64
@@ -1494,11 +1492,6 @@
             "missed the end of the previous function")
         yield False, functionlines
 
-    def process_function(self, lines, entrypoint, filename):
-        entrypoint = '_' + entrypoint
-        return super(MsvcAssemblerParser, self).process_function(
-            lines, entrypoint, filename)
-
     def write_newfile(self, newfile, lines, grist):
         newlines = []
         for line in lines:
@@ -1560,24 +1553,21 @@
         self.shuffle = shuffle     # to debug the sorting logic in asmgcroot.py
         self.format = format
         self.gcmaptable = []
-        self.seen_main = False
 
     def dump_raw_table(self, output):
-        print >> output, "seen_main = %d" % (self.seen_main,)
+        print 'raw table'
         for entry in self.gcmaptable:
             print >> output, entry
 
     def reload_raw_table(self, input):
         firstline = input.readline()
-        assert firstline.startswith("seen_main = ")
-        self.seen_main |= bool(int(firstline[len("seen_main = "):].strip()))
+        assert firstline == 'raw table\n'
         for line in input:
             entry = eval(line)
             assert type(entry) is tuple
             self.gcmaptable.append(entry)
 
     def dump(self, output):
-        assert self.seen_main
 
         def _globalname(name, disp=""):
             return tracker_cls.function_names_prefix + name
@@ -1649,8 +1639,8 @@
             s = """\
             /* See description in asmgcroot.py */
             .cfi_startproc
-            movq\t%rdi, %rdx\t/* 1st argument, which is the callback */
-            movq\t%rsi, %rcx\t/* 2nd argument, which is gcrootanchor */
+            /* %rdi is the 1st argument, which is the callback */
+            /* %rsi is the 2nd argument, which is gcrootanchor */
             movq\t%rsp, %rax\t/* my frame top address */
             pushq\t%rax\t\t/* ASM_FRAMEDATA[8] */
             pushq\t%rbp\t\t/* ASM_FRAMEDATA[7] */
@@ -1663,15 +1653,15 @@
             /* Add this ASM_FRAMEDATA to the front of the circular linked */
             /* list.  Let's call it 'self'.                               */
 
-            movq\t8(%rcx), %rax\t/* next = gcrootanchor->next */
+            movq\t8(%rsi), %rax\t/* next = gcrootanchor->next */
             pushq\t%rax\t\t\t\t/* self->next = next */
-            pushq\t%rcx\t\t\t/* self->prev = gcrootanchor */
-            movq\t%rsp, 8(%rcx)\t/* gcrootanchor->next = self */
+            pushq\t%rsi\t\t\t/* self->prev = gcrootanchor */
+            movq\t%rsp, 8(%rsi)\t/* gcrootanchor->next = self */
             movq\t%rsp, 0(%rax)\t\t\t/* next->prev = self */
             .cfi_def_cfa_offset 80\t/* 9 pushes + the retaddr = 80 bytes */
 
             /* note: the Mac OS X 16 bytes aligment must be respected. */
-            call\t*%rdx\t\t/* invoke the callback */
+            call\t*%rdi\t\t/* invoke the callback */
 
             /* Detach this ASM_FRAMEDATA from the circular linked list */
             popq\t%rsi\t\t/* prev = self->prev */
@@ -1688,7 +1678,7 @@
             popq\t%rcx\t\t/* ignored      ASM_FRAMEDATA[8] */
 
             /* the return value is the one of the 'call' above, */
-            /* because %rax (and possibly %rdx) are unmodified  */
+            /* because %rax is unmodified  */
             ret
             .cfi_endproc
             """
@@ -1835,11 +1825,11 @@
             """.replace("__gccallshapes", _globalname("__gccallshapes"))
             output.writelines(shapelines)
 
-    def process(self, iterlines, newfile, entrypoint='main', filename='?'):
+    def process(self, iterlines, newfile, filename='?'):
         parser = PARSERS[format](verbose=self.verbose, shuffle=self.shuffle)
         for in_function, lines in parser.find_functions(iterlines):
             if in_function:
-                tracker = parser.process_function(lines, entrypoint, filename)
+                tracker = parser.process_function(lines, filename)
                 lines = tracker.lines
             parser.write_newfile(newfile, lines, filename.split('.')[0])
         if self.verbose == 1:
@@ -1848,7 +1838,6 @@
             self.gcmaptable[:0] = parser.gcmaptable
         else:
             self.gcmaptable.extend(parser.gcmaptable)
-        self.seen_main |= parser.seen_main
 
 
 class UnrecognizedOperation(Exception):
@@ -1915,7 +1904,6 @@
             format = 'elf64'
         else:
             format = 'elf'
-    entrypoint = 'main'
     while len(sys.argv) > 1:
         if sys.argv[1] == '-v':
             del sys.argv[1]
@@ -1929,9 +1917,9 @@
         elif sys.argv[1].startswith('-f'):
             format = sys.argv[1][2:]
             del sys.argv[1]
-        elif sys.argv[1].startswith('-m'):
-            entrypoint = sys.argv[1][2:]
-            del sys.argv[1]
+        elif sys.argv[1].startswith('-'):
+            print >> sys.stderr, "unrecognized option:", sys.argv[1]
+            sys.exit(1)
         else:
             break
     tracker = GcRootTracker(verbose=verbose, shuffle=shuffle, format=format)
@@ -1940,7 +1928,7 @@
         firstline = f.readline()
         f.seek(0)
         assert firstline, "file %r is empty!" % (fn,)
-        if firstline.startswith('seen_main = '):
+        if firstline == 'raw table\n':
             tracker.reload_raw_table(f)
             f.close()
         else:
@@ -1948,7 +1936,7 @@
             lblfn = fn[:-2] + '.lbl.s'
             g = open(lblfn, 'w')
             try:
-                tracker.process(f, g, entrypoint=entrypoint, filename=fn)
+                tracker.process(f, g, filename=fn)
             except:
                 g.close()
                 os.unlink(lblfn)
diff --git a/pypy/translator/c/genc.py b/pypy/translator/c/genc.py
--- a/pypy/translator/c/genc.py
+++ b/pypy/translator/c/genc.py
@@ -570,7 +570,10 @@
             mk.definition('ASMFILES', sfiles)
             mk.definition('ASMLBLFILES', lblsfiles)
             mk.definition('GCMAPFILES', gcmapfiles)
-            mk.definition('DEBUGFLAGS', '-O2 -fomit-frame-pointer -g')
+            if sys.platform == 'win32':
+                mk.definition('DEBUGFLAGS', '/Zi')
+            else:
+                mk.definition('DEBUGFLAGS', '-O2 -fomit-frame-pointer -g')
 
             if self.config.translation.shared:
                 mk.definition('PYPY_MAIN_FUNCTION', "pypy_main_startup")
@@ -602,7 +605,7 @@
                         'cmd /c $(MASM) /nologo /Cx /Cp /Zm /coff /Fo$@ /c $< $(INCLUDEDIRS)')
                 mk.rule('.c.gcmap', '',
                         ['$(CC) /nologo $(ASM_CFLAGS) /c /FAs /Fa$*.s $< $(INCLUDEDIRS)',
-                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -m$(PYPY_MAIN_FUNCTION) -t $*.s > $@']
+                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
                         )
                 mk.rule('gcmaptable.c', '$(GCMAPFILES)',
                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
@@ -613,7 +616,7 @@
                 mk.rule('%.lbl.s %.gcmap', '%.s',
                         [python +
                              '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
-                             '-m$(PYPY_MAIN_FUNCTION) -t $< > $*.gctmp',
+                             '-t $< > $*.gctmp',
                          'mv $*.gctmp $*.gcmap'])
                 mk.rule('gcmaptable.s', '$(GCMAPFILES)',
                         [python +
@@ -623,7 +626,10 @@
                 mk.rule('.PRECIOUS', '%.s', "# don't remove .s files if Ctrl-C'ed")
 
         else:
-            mk.definition('DEBUGFLAGS', '-O1 -g')
+            if sys.platform == 'win32':
+                mk.definition('DEBUGFLAGS', '/Zi')
+            else:
+                mk.definition('DEBUGFLAGS', '-O1 -g')
         mk.write()
         #self.translator.platform,
         #                           ,
@@ -900,8 +906,9 @@
     print >> f, '}'
 
 def commondefs(defines):
-    from pypy.rlib.rarithmetic import LONG_BIT
+    from pypy.rlib.rarithmetic import LONG_BIT, LONGLONG_BIT
     defines['PYPY_LONG_BIT'] = LONG_BIT
+    defines['PYPY_LONGLONG_BIT'] = LONGLONG_BIT
 
 def add_extra_files(eci):
     srcdir = py.path.local(autopath.pypydir).join('translator', 'c', 'src')
diff --git a/pypy/translator/c/node.py b/pypy/translator/c/node.py
--- a/pypy/translator/c/node.py
+++ b/pypy/translator/c/node.py
@@ -1031,7 +1031,7 @@
             if (issubclass(value, BaseException) and
                 value.__module__ == 'exceptions'):
                 return 'PyExc_' + value.__name__
-            if value is py.code._AssertionError:
+            if issubclass(value, AssertionError):
                 return 'PyExc_AssertionError'
             if value is _StackOverflow:
                 return 'PyExc_RuntimeError'
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -1,4 +1,5 @@
 #include <stdlib.h>
+#include <string.h>
 #include "src/cjkcodecs/multibytecodec.h"
 
 
@@ -93,6 +94,22 @@
   return d->inbuf - d->inbuf_start;
 }
 
+Py_ssize_t pypy_cjk_dec_replace_on_error(struct pypy_cjk_dec_s* d,
+                                         Py_UNICODE *newbuf, Py_ssize_t newlen,
+                                         Py_ssize_t in_offset)
+{
+  if (newlen > 0)
+    {
+      if (d->outbuf + newlen > d->outbuf_end)
+        if (expand_decodebuffer(d, newlen) == -1)
+          return MBERR_NOMEMORY;
+      memcpy(d->outbuf, newbuf, newlen * sizeof(Py_UNICODE));
+      d->outbuf += newlen;
+    }
+  d->inbuf = d->inbuf_start + in_offset;
+  return 0;
+}
+
 /************************************************************/
 
 struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
@@ -209,3 +226,19 @@
 {
   return d->inbuf - d->inbuf_start;
 }
+
+Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
+                                         char *newbuf, Py_ssize_t newlen,
+                                         Py_ssize_t in_offset)
+{
+  if (newlen > 0)
+    {
+      if (d->outbuf + newlen > d->outbuf_end)
+        if (expand_encodebuffer(d, newlen) == -1)
+          return MBERR_NOMEMORY;
+      memcpy(d->outbuf, newbuf, newlen);
+      d->outbuf += newlen;
+    }
+  d->inbuf = d->inbuf_start + in_offset;
+  return 0;
+}
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -102,6 +102,8 @@
 Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *);
 Py_ssize_t pypy_cjk_dec_inbuf_remaining(struct pypy_cjk_dec_s *d);
 Py_ssize_t pypy_cjk_dec_inbuf_consumed(struct pypy_cjk_dec_s* d);
+Py_ssize_t pypy_cjk_dec_replace_on_error(struct pypy_cjk_dec_s* d,
+                                         Py_UNICODE *, Py_ssize_t, Py_ssize_t);
 
 struct pypy_cjk_enc_s {
   const MultibyteCodec *codec;
@@ -119,6 +121,8 @@
 Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *);
 Py_ssize_t pypy_cjk_enc_inbuf_remaining(struct pypy_cjk_enc_s *d);
 Py_ssize_t pypy_cjk_enc_inbuf_consumed(struct pypy_cjk_enc_s* d);
+Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
+                                         char *, Py_ssize_t, Py_ssize_t);
 
 /* list of codecs defined in the .c files */
 
diff --git a/pypy/translator/c/src/int.h b/pypy/translator/c/src/int.h
--- a/pypy/translator/c/src/int.h
+++ b/pypy/translator/c/src/int.h
@@ -73,15 +73,28 @@
 
 /* NB. shifting has same limitations as C: the shift count must be
        >= 0 and < LONG_BITS. */
-#define OP_INT_RSHIFT(x,y,r)    r = Py_ARITHMETIC_RIGHT_SHIFT(long, x, y)
-#define OP_UINT_RSHIFT(x,y,r)   r = (x) >> (y)
-#define OP_LLONG_RSHIFT(x,y,r)  r = Py_ARITHMETIC_RIGHT_SHIFT(PY_LONG_LONG,x,y)
-#define OP_ULLONG_RSHIFT(x,y,r) r = (x) >> (y)
+#define CHECK_SHIFT_RANGE(y, bits) RPyAssert(y >= 0 && y < bits, \
+	       "The shift count is outside of the supported range")
 
-#define OP_INT_LSHIFT(x,y,r)    r = (x) << (y)
-#define OP_UINT_LSHIFT(x,y,r)   r = (x) << (y)
-#define OP_LLONG_LSHIFT(x,y,r)  r = (x) << (y)
-#define OP_ULLONG_LSHIFT(x,y,r) r = (x) << (y)
+
+#define OP_INT_RSHIFT(x,y,r)    CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+						r = Py_ARITHMETIC_RIGHT_SHIFT(long, x, (y))
+#define OP_UINT_RSHIFT(x,y,r)   CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+						r = (x) >> (y)
+#define OP_LLONG_RSHIFT(x,y,r)  CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+						r = Py_ARITHMETIC_RIGHT_SHIFT(PY_LONG_LONG,x, (y))
+#define OP_ULLONG_RSHIFT(x,y,r) CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+						r = (x) >> (y)
+
+
+#define OP_INT_LSHIFT(x,y,r)    CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+							r = (x) << (y)
+#define OP_UINT_LSHIFT(x,y,r)   CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+							r = (x) << (y)
+#define OP_LLONG_LSHIFT(x,y,r)  CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+							r = (x) << (y)
+#define OP_ULLONG_LSHIFT(x,y,r) CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+							r = (x) << (y)
 
 #define OP_INT_LSHIFT_OVF(x,y,r) \
 	OP_INT_LSHIFT(x,y,r); \
diff --git a/pypy/translator/c/src/main.h b/pypy/translator/c/src/main.h
--- a/pypy/translator/c/src/main.h
+++ b/pypy/translator/c/src/main.h
@@ -23,12 +23,19 @@
 #include "src/winstuff.c"
 #endif
 
-int PYPY_MAIN_FUNCTION(int argc, char *argv[])
+#ifdef __GNUC__
+/* Hack to prevent this function from being inlined.  Helps asmgcc
+   because the main() function has often a different prologue/epilogue. */
+int pypy_main_function(int argc, char *argv[]) __attribute__((__noinline__));
+#endif
+
+int pypy_main_function(int argc, char *argv[])
 {
     char *errmsg;
     int i, exitcode;
     RPyListOfString *list;
 
+    pypy_asm_stack_bottom();
     instrument_setup();
 
     if (sizeof(void*) != SIZEOF_LONG) {
@@ -72,6 +79,12 @@
     fprintf(stderr, "Fatal error during initialization: %s\n", errmsg);
 #endif
     abort();
+    return 1;
+}
+
+int PYPY_MAIN_FUNCTION(int argc, char *argv[])
+{
+    return pypy_main_function(argc, argv);
 }
 
 #endif /* PYPY_NOT_MAIN_FILE */
diff --git a/pypy/translator/c/src/mem.h b/pypy/translator/c/src/mem.h
--- a/pypy/translator/c/src/mem.h
+++ b/pypy/translator/c/src/mem.h
@@ -222,6 +222,15 @@
 
 #endif /* USING_BOEHM_GC */
 
+
+#ifdef USING_NO_GC_AT_ALL
+#define OP_BOEHM_ZERO_MALLOC(size, r, restype, is_atomic, is_varsize)  \
+  r = (restype) calloc(1, size);
+#define OP_BOEHM_DISAPPEARING_LINK(link, obj, r)  /* nothing */
+#define OP_GC__DISABLE_FINALIZERS(r)  /* nothing */
+#define OP_GC__ENABLE_FINALIZERS(r)  /* nothing */
+#endif
+
 /************************************************************/
 /* weakref support */
 
diff --git a/pypy/translator/c/test/test_standalone.py b/pypy/translator/c/test/test_standalone.py
--- a/pypy/translator/c/test/test_standalone.py
+++ b/pypy/translator/c/test/test_standalone.py
@@ -596,6 +596,42 @@
         # The traceback stops at f() because it's the first function that
         # captures the AssertionError, which makes the program abort.
 
+    def test_int_lshift_too_large(self):
+        from pypy.rlib.rarithmetic import LONG_BIT, LONGLONG_BIT
+        def entry_point(argv):
+            a = int(argv[1])
+            b = int(argv[2])
+            print a << b
+            return 0
+
+        t, cbuilder = self.compile(entry_point, debug=True)
+        out = cbuilder.cmdexec("10 2", expect_crash=False)
+        assert out.strip() == str(10 << 2)
+        cases = [-4, LONG_BIT, LONGLONG_BIT]
+        for x in cases:
+            out, err = cbuilder.cmdexec("%s %s" % (1, x), expect_crash=True)
+            lines = err.strip()
+            assert 'The shift count is outside of the supported range' in lines
+
+    def test_llong_rshift_too_large(self):
+        from pypy.rlib.rarithmetic import LONG_BIT, LONGLONG_BIT
+        def entry_point(argv):
+            a = r_longlong(int(argv[1]))
+            b = r_longlong(int(argv[2]))
+            print a >> b
+            return 0
+
+        t, cbuilder = self.compile(entry_point, debug=True)
+        out = cbuilder.cmdexec("10 2", expect_crash=False)
+        assert out.strip() == str(10 >> 2)
+        out = cbuilder.cmdexec("%s %s" % (-42, LONGLONG_BIT - 1), expect_crash=False)
+        assert out.strip() == '-1'
+        cases = [-4, LONGLONG_BIT]
+        for x in cases:
+            out, err = cbuilder.cmdexec("%s %s" % (1, x), expect_crash=True)
+            lines = err.strip()
+            assert 'The shift count is outside of the supported range' in lines
+
     def test_ll_assert_error_debug(self):
         def entry_point(argv):
             ll_assert(len(argv) != 1, "foobar")
diff --git a/pypy/translator/driver.py b/pypy/translator/driver.py
--- a/pypy/translator/driver.py
+++ b/pypy/translator/driver.py
@@ -559,6 +559,7 @@
                 shutil.copy(str(soname), str(newsoname))
                 self.log.info("copied: %s" % (newsoname,))
             self.c_entryp = newexename
+        self.log.info('usession directory: %s' % (udir,))
         self.log.info("created: %s" % (self.c_entryp,))
 
     def task_compile_c(self):
diff --git a/pypy/translator/goal/targetnumpystandalone.py b/pypy/translator/goal/targetnumpystandalone.py
--- a/pypy/translator/goal/targetnumpystandalone.py
+++ b/pypy/translator/goal/targetnumpystandalone.py
@@ -10,46 +10,32 @@
 """
 
 import time
-from pypy.module.micronumpy.numarray import SingleDimArray, Code, compute
+from pypy.module.micronumpy.compile import numpy_compile
 from pypy.jit.codewriter.policy import JitPolicy
-
-def create_array(size):
-    a = SingleDimArray(size)
-    for i in range(size):
-        a.storage[i] = float(i % 10)
-    return a
+from pypy.rpython.annlowlevel import hlstr
 
 def entry_point(argv):
     if len(argv) != 3:
         print __doc__
         return 1
-    bytecode = argv[1]
-    for b in bytecode:
-        if b not in 'alf':
-            print "WRONG BYTECODE"
-            print __doc__
-            return 2
     try:
         size = int(argv[2])
     except ValueError:
         print "INVALID LITERAL FOR INT:", argv[2]
         print __doc__
         return 3
-    no_arrays = bytecode.count('l')
-    no_floats = bytecode.count('f')
-    arrays = []
-    floats = []
-    for i in range(no_arrays):
-        arrays.append(create_array(size))
-    for i in range(no_floats):
-        floats.append(float(i + 1))
-    code = Code(bytecode, arrays, floats)
     t0 = time.time()
-    compute(code)
-    print "bytecode:", bytecode, "size:", size
+    main(argv[0], size)
+    print "bytecode:", argv[0], "size:", size
     print "took:", time.time() - t0
     return 0
 
+def main(bc, size):
+    if not isinstance(bc, str):
+        bc = hlstr(bc) # for tests
+    a = numpy_compile(bc, size)
+    a = a.compute()
+
 def target(*args):
     return entry_point, None
 
diff --git a/pypy/translator/goal/translate.py b/pypy/translator/goal/translate.py
--- a/pypy/translator/goal/translate.py
+++ b/pypy/translator/goal/translate.py
@@ -149,6 +149,9 @@
             log.ERROR("Could not find target %r" % (arg, ))
             sys.exit(1)
 
+    # apply the platform settings
+    set_platform(config)
+
     targetspec = translateconfig.targetspec
     targetspec_dic = load_target(targetspec)
 
@@ -164,9 +167,6 @@
                 existing_config=config,
                 translating=True)
 
-    # apply the platform settings
-    set_platform(config)
-
     # apply the optimization level settings
     set_opt_level(config, translateconfig.opt)
 
diff --git a/pypy/translator/platform/__init__.py b/pypy/translator/platform/__init__.py
--- a/pypy/translator/platform/__init__.py
+++ b/pypy/translator/platform/__init__.py
@@ -38,6 +38,7 @@
     c_environ = None
 
     relevant_environ = ()
+    log_errors = True
 
     so_prefixes = ('',)
 
@@ -120,11 +121,12 @@
         if returncode != 0:
             errorfile = outname.new(ext='errors')
             errorfile.write(stderr, 'wb')
-            stderrlines = stderr.splitlines()
-            for line in stderrlines:
-                log.Error(line)
-            # ^^^ don't use ERROR, because it might actually be fine.
-            # Also, ERROR confuses lib-python/conftest.py.
+            if self.log_errors:
+                stderrlines = stderr.splitlines()
+                for line in stderrlines:
+                    log.Error(line)
+                # ^^^ don't use ERROR, because it might actually be fine.
+                # Also, ERROR confuses lib-python/conftest.py.
             raise CompilationError(stdout, stderr)
         else:
             for line in stderr.splitlines():
diff --git a/pytest.py b/pytest.py
old mode 100644
new mode 100755
--- a/pytest.py
+++ b/pytest.py
@@ -1,7 +1,6 @@
+#!/usr/bin/env python
 """
 unit and functional testing with Python.
-(pypy version of startup script)
-see http://pytest.org for details.
 """
 __all__ = ['main']
 
@@ -9,23 +8,6 @@
 from _pytest import core as cmdline
 from _pytest import __version__
 
-# This pytest.py script is located in the pypy source tree
-# which has a copy of pytest and py within its source tree.
-# If the environment also has an installed version of pytest/py
-# we are bound to get warnings so we disable them.
-# XXX eventually pytest and py should not be inlined shipped
-# with the pypy source code but become a requirement for installation.
-
-import warnings
-warnings.filterwarnings("ignore",
-    "Module py was already imported", category=UserWarning)
-warnings.filterwarnings("ignore",
-    "Module _pytest was already imported",
-    category=UserWarning)
-warnings.filterwarnings("ignore",
-    "Module pytest was already imported",
-    category=UserWarning)
-
 if __name__ == '__main__': # if run as a script or by 'python -m pytest'
     raise SystemExit(main())
 else: